Browse Source

merged from rel098 upto r1276
(and fixed @groupby type bug caught by test_20)



git-svn-id: svn://svn.sphinxsearch.com/sphinx/trunk@1277 406a0c4d-033a-0410-8de8-e80135713968

shodan 18 years ago
parent
commit
f2feaa548f
100 changed files with 5108 additions and 715 deletions
  1. 1 0
      api/java/SphinxClient.java
  2. 2 2
      api/ruby/README
  3. 7 8
      api/ruby/Rakefile
  4. 1 0
      api/ruby/init.rb
  5. 5 0
      api/ruby/install.rb
  6. 6 0
      api/ruby/lib/sphinx.rb
  7. 1020 0
      api/ruby/lib/sphinx/client.rb
  8. 44 0
      api/ruby/lib/sphinx/request.rb
  9. 69 0
      api/ruby/lib/sphinx/response.rb
  10. 103 0
      api/ruby/spec/client_response_spec.rb
  11. 546 0
      api/ruby/spec/client_spec.rb
  12. 8 0
      api/ruby/spec/fixtures/default_search.php
  13. 8 0
      api/ruby/spec/fixtures/default_search_index.php
  14. 11 0
      api/ruby/spec/fixtures/excerpt_custom.php
  15. 8 0
      api/ruby/spec/fixtures/excerpt_default.php
  16. 11 0
      api/ruby/spec/fixtures/excerpt_flags.php
  17. 9 0
      api/ruby/spec/fixtures/field_weights.php
  18. 9 0
      api/ruby/spec/fixtures/filter.php
  19. 9 0
      api/ruby/spec/fixtures/filter_exclude.php
  20. 9 0
      api/ruby/spec/fixtures/filter_float_range.php
  21. 9 0
      api/ruby/spec/fixtures/filter_float_range_exclude.php
  22. 9 0
      api/ruby/spec/fixtures/filter_range.php
  23. 9 0
      api/ruby/spec/fixtures/filter_range_exclude.php
  24. 10 0
      api/ruby/spec/fixtures/filter_ranges.php
  25. 10 0
      api/ruby/spec/fixtures/filters.php
  26. 13 0
      api/ruby/spec/fixtures/filters_different.php
  27. 9 0
      api/ruby/spec/fixtures/geo_anchor.php
  28. 9 0
      api/ruby/spec/fixtures/group_by_attr.php
  29. 9 0
      api/ruby/spec/fixtures/group_by_attrpair.php
  30. 9 0
      api/ruby/spec/fixtures/group_by_day.php
  31. 9 0
      api/ruby/spec/fixtures/group_by_day_sort.php
  32. 9 0
      api/ruby/spec/fixtures/group_by_month.php
  33. 9 0
      api/ruby/spec/fixtures/group_by_week.php
  34. 9 0
      api/ruby/spec/fixtures/group_by_year.php
  35. 10 0
      api/ruby/spec/fixtures/group_distinct.php
  36. 9 0
      api/ruby/spec/fixtures/id_range.php
  37. 9 0
      api/ruby/spec/fixtures/id_range64.php
  38. 9 0
      api/ruby/spec/fixtures/index_weights.php
  39. 8 0
      api/ruby/spec/fixtures/keywords.php
  40. 9 0
      api/ruby/spec/fixtures/limits.php
  41. 9 0
      api/ruby/spec/fixtures/limits_cutoff.php
  42. 9 0
      api/ruby/spec/fixtures/limits_max.php
  43. 9 0
      api/ruby/spec/fixtures/limits_max_cutoff.php
  44. 9 0
      api/ruby/spec/fixtures/match_all.php
  45. 9 0
      api/ruby/spec/fixtures/match_any.php
  46. 9 0
      api/ruby/spec/fixtures/match_boolean.php
  47. 9 0
      api/ruby/spec/fixtures/match_extended.php
  48. 9 0
      api/ruby/spec/fixtures/match_extended2.php
  49. 9 0
      api/ruby/spec/fixtures/match_fullscan.php
  50. 9 0
      api/ruby/spec/fixtures/match_phrase.php
  51. 9 0
      api/ruby/spec/fixtures/max_query_time.php
  52. 12 0
      api/ruby/spec/fixtures/miltiple_queries.php
  53. 9 0
      api/ruby/spec/fixtures/ranking_bm25.php
  54. 9 0
      api/ruby/spec/fixtures/ranking_none.php
  55. 9 0
      api/ruby/spec/fixtures/ranking_proximity_bm25.php
  56. 9 0
      api/ruby/spec/fixtures/ranking_wordcount.php
  57. 9 0
      api/ruby/spec/fixtures/retries.php
  58. 9 0
      api/ruby/spec/fixtures/retries_delay.php
  59. 9 0
      api/ruby/spec/fixtures/sort_attr_asc.php
  60. 9 0
      api/ruby/spec/fixtures/sort_attr_desc.php
  61. 9 0
      api/ruby/spec/fixtures/sort_expr.php
  62. 9 0
      api/ruby/spec/fixtures/sort_extended.php
  63. 9 0
      api/ruby/spec/fixtures/sort_relevance.php
  64. 9 0
      api/ruby/spec/fixtures/sort_time_segments.php
  65. 1181 0
      api/ruby/spec/fixtures/sphinxapi.php
  66. 8 0
      api/ruby/spec/fixtures/update_attributes.php
  67. 9 0
      api/ruby/spec/fixtures/weights.php
  68. 67 0
      api/ruby/spec/sphinx/sphinx.conf
  69. 86 0
      api/ruby/spec/sphinx/sphinx_test.sql
  70. 3 0
      api/ruby/sphinx.yml.tpl
  71. 75 0
      api/ruby/tasks/sphinx.rake
  72. 0 493
      contrib/rubyapi/lib/sphinx.rb
  73. 93 84
      doc/sphinx.html
  74. 10 1
      doc/sphinx.txt
  75. 11 1
      doc/sphinx.xml
  76. 2 1
      src/indexer.cpp
  77. 247 77
      src/sphinx.cpp
  78. 13 6
      src/sphinx.h
  79. 3 3
      src/sphinxexcerpt.cpp
  80. 18 15
      src/sphinxquery.cpp
  81. 14 4
      src/sphinxsort.cpp
  82. 13 0
      src/sphinxstd.h
  83. 150 3
      src/tests.cpp
  84. 125 17
      test/helpers.inc
  85. 0 0
      test/test_02/model.bin
  86. 0 0
      test/test_20/model.bin
  87. 122 0
      test/test_20/test.xml
  88. 0 0
      test/test_21/model.bin
  89. 87 0
      test/test_21/test.xml
  90. 1 0
      test/test_22/model.bin
  91. 57 0
      test/test_22/test.xml
  92. 0 0
      test/test_23/model.bin
  93. 114 0
      test/test_23/test.xml
  94. 0 0
      test/test_24/model.bin
  95. 113 0
      test/test_24/test.xml
  96. 0 0
      test/test_25/model.bin
  97. 114 0
      test/test_25/test.xml
  98. 1 0
      test/test_26/model.bin
  99. 80 0
      test/test_26/test.xml
  100. 0 0
      test/test_27/model.bin

+ 1 - 0
api/java/SphinxClient.java

@@ -959,6 +959,7 @@ public class SphinxClient
 			req.writeInt ( ((Integer) opts.get("around")).intValue() );
 
 			/* send documents */
+			req.writeInt ( docs.length );
 			for ( int i=0; i<docs.length; i++ )
 				writeNetUTF8 ( req, docs[i] );
 

+ 2 - 2
contrib/rubyapi/README → api/ruby/README

@@ -1,4 +1,4 @@
-=Sphinx Client Library 0.1.0
+=Sphinx Client API 0.4.0-r1112
 
 This document gives an overview of what is Sphinx itself and how to use in 
 within Ruby on Rails. For more information or documentation, 
@@ -24,7 +24,7 @@ You can create the documentation by running:
 ==Latest version
 
 You can always get latest version from
-http://kpumuk.info/projects/ror-plugins/using-sphinx-search-engine-in-ruby-on-rails
+http://kpumuk.info/projects/ror-plugins/sphinx
 
 ==Credits
 

+ 7 - 8
contrib/rubyapi/Rakefile → api/ruby/Rakefile

@@ -1,21 +1,20 @@
 require 'rake'
-require 'rake/testtask'
+require 'spec/rake/spectask'
 require 'rake/rdoctask'
 
 desc 'Default: run unit tests.'
-task :default => :test
+task :default => :spec
 
-desc 'Test the sphinx plugin.'
-Rake::TestTask.new(:test) do |t|
+desc 'Test the magic_enum plugin.'
+Spec::Rake::SpecTask.new(:spec) do |t|
   t.libs << 'lib'
-  t.pattern = 'test/**/*_test.rb'
-  t.verbose = true
+  t.pattern = 'spec/*_spec.rb'
 end
 
-desc 'Generate documentation for the sphinx plugin.'
+desc 'Generate documentation for the magic_enum plugin.'
 Rake::RDocTask.new(:rdoc) do |rdoc|
   rdoc.rdoc_dir = 'rdoc'
-  rdoc.title    = 'Sphinx'
+  rdoc.title    = 'Sphinx Client API'
   rdoc.options << '--line-numbers' << '--inline-source'
   rdoc.rdoc_files.include('README')
   rdoc.rdoc_files.include('lib/**/*.rb')

+ 1 - 0
api/ruby/init.rb

@@ -0,0 +1 @@
+require File.dirname(__FILE__) + '/lib/sphinx'

+ 5 - 0
api/ruby/install.rb

@@ -0,0 +1,5 @@
+require 'fileutils'
+
+sphinx_config = File.dirname(__FILE__) + '/../../../config/sphinx.yml'
+FileUtils.cp File.dirname(__FILE__) + '/sphinx.yml.tpl', sphinx_config unless File.exist?(sphinx_config)
+puts IO.read(File.join(File.dirname(__FILE__), 'README'))

+ 6 - 0
api/ruby/lib/sphinx.rb

@@ -0,0 +1,6 @@
+require File.dirname(__FILE__) + '/sphinx/request'
+require File.dirname(__FILE__) + '/sphinx/response'
+require File.dirname(__FILE__) + '/sphinx/client'
+
+module Sphinx
+end

+ 1020 - 0
api/ruby/lib/sphinx/client.rb

@@ -0,0 +1,1020 @@
+# = client.rb - Sphinx Client API
+# 
+# Author::    Dmytro Shteflyuk <mailto:[email protected]>.
+# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
+# License::   Distributes under the same terms as Ruby
+# Version::   0.4.0-r1112
+# Website::   http://kpumuk.info/projects/ror-plugins/sphinx
+#
+# This library is distributed under the terms of the Ruby license.
+# You can freely distribute/modify this library.
+
+# ==Sphinx Client API
+# 
+# The Sphinx Client API is used to communicate with <tt>searchd</tt>
+# daemon and get search results from Sphinx.
+# 
+# ===Usage
+# 
+#   sphinx = Sphinx::Client.new
+#   result = sphinx.Query('test')
+#   ids = result['matches'].map { |match| match['id'] }.join(',')
+#   posts = Post.find :all, :conditions => "id IN (#{ids})"
+#   
+#   docs = posts.map(&:body)
+#   excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
+
+require 'socket'
+
+module Sphinx
+  # :stopdoc:
+
+  class SphinxError < StandardError; end
+  class SphinxArgumentError < SphinxError; end
+  class SphinxConnectError < SphinxError; end
+  class SphinxResponseError < SphinxError; end
+  class SphinxInternalError < SphinxError; end
+  class SphinxTemporaryError < SphinxError; end
+  class SphinxUnknownError < SphinxError; end
+
+  # :startdoc:
+
+  class Client
+  
+    # :stopdoc:
+  
+    # Known searchd commands
+  
+    # search command
+    SEARCHD_COMMAND_SEARCH   = 0
+    # excerpt command
+    SEARCHD_COMMAND_EXCERPT  = 1
+    # update command
+    SEARCHD_COMMAND_UPDATE   = 2 
+    # keywords command
+    SEARCHD_COMMAND_KEYWORDS = 3 
+  
+    # Current client-side command implementation versions
+    
+    # search command version
+    VER_COMMAND_SEARCH   = 0x113
+    # excerpt command version
+    VER_COMMAND_EXCERPT  = 0x100
+    # update command version
+    VER_COMMAND_UPDATE   = 0x101
+    # keywords command version
+    VER_COMMAND_KEYWORDS = 0x100
+    
+    # Known searchd status codes
+  
+    # general success, command-specific reply follows
+    SEARCHD_OK      = 0
+    # general failure, command-specific reply may follow
+    SEARCHD_ERROR   = 1
+    # temporaty failure, client should retry later
+    SEARCHD_RETRY   = 2
+    # general success, warning message and command-specific reply follow 
+    SEARCHD_WARNING = 3    
+    
+    # :startdoc:
+  
+    # Known match modes
+  
+    # match all query words
+    SPH_MATCH_ALL       = 0 
+    # match any query word
+    SPH_MATCH_ANY       = 1 
+    # match this exact phrase
+    SPH_MATCH_PHRASE    = 2 
+    # match this boolean query
+    SPH_MATCH_BOOLEAN   = 3 
+    # match this extended query
+    SPH_MATCH_EXTENDED  = 4 
+    # match all document IDs w/o fulltext query, apply filters
+    SPH_MATCH_FULLSCAN  = 5
+    # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
+    SPH_MATCH_EXTENDED2 = 6
+    
+    # Known ranking modes (ext2 only)
+  
+    # default mode, phrase proximity major factor and BM25 minor one
+    SPH_RANK_PROXIMITY_BM25 = 0
+    # statistical mode, BM25 ranking only (faster but worse quality)
+    SPH_RANK_BM25           = 1
+    # no ranking, all matches get a weight of 1
+    SPH_RANK_NONE           = 2
+    # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
+    SPH_RANK_WORDCOUNT      = 3
+    
+    # Known sort modes
+  
+    # sort by document relevance desc, then by date
+    SPH_SORT_RELEVANCE     = 0
+    # sort by document date desc, then by relevance desc
+    SPH_SORT_ATTR_DESC     = 1
+    # sort by document date asc, then by relevance desc
+    SPH_SORT_ATTR_ASC      = 2
+    # sort by time segments (hour/day/week/etc) desc, then by relevance desc
+    SPH_SORT_TIME_SEGMENTS = 3
+    # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
+    SPH_SORT_EXTENDED      = 4
+    # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
+    SPH_SORT_EXPR          = 5
+    
+    # Known filter types
+    
+    # filter by integer values set
+    SPH_FILTER_VALUES      = 0
+    # filter by integer range
+    SPH_FILTER_RANGE       = 1
+    # filter by float range
+    SPH_FILTER_FLOATRANGE  = 2
+    
+    # Known attribute types
+  
+    # this attr is just an integer
+    SPH_ATTR_INTEGER   = 1
+    # this attr is a timestamp
+    SPH_ATTR_TIMESTAMP = 2
+    # this attr is an ordinal string number (integer at search time, 
+    # specially handled at indexing time)
+    SPH_ATTR_ORDINAL   = 3
+    # this attr is a boolean bit field
+    SPH_ATTR_BOOL      = 4
+    # this attr is a float
+    SPH_ATTR_FLOAT     = 5
+    # this attr has multiple values (0 or more)
+    SPH_ATTR_MULTI     = 0x40000000
+    
+    # Known grouping functions
+  
+    # group by day
+    SPH_GROUPBY_DAY      = 0
+    # group by week
+    SPH_GROUPBY_WEEK     = 1 
+    # group by month
+    SPH_GROUPBY_MONTH    = 2 
+    # group by year
+    SPH_GROUPBY_YEAR     = 3
+    # group by attribute value
+    SPH_GROUPBY_ATTR     = 4
+    # group by sequential attrs pair
+    SPH_GROUPBY_ATTRPAIR = 5
+    
+    # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values. 
+    def initialize
+      # per-client-object settings
+      @host          = 'localhost'             # searchd host (default is "localhost")
+      @port          = 3312                    # searchd port (default is 3312)
+      
+      # per-query settings
+      @offset        = 0                       # how many records to seek from result-set start (default is 0)
+      @limit         = 20                      # how many records to return from result-set starting at offset (default is 20)
+      @mode          = SPH_MATCH_ALL           # query matching mode (default is SPH_MATCH_ALL)
+      @weights       = []                      # per-field weights (default is 1 for all fields)
+      @sort          = SPH_SORT_RELEVANCE      # match sorting mode (default is SPH_SORT_RELEVANCE)
+      @sortby        = ''                      # attribute to sort by (defualt is "")
+      @min_id        = 0                       # min ID to match (default is 0, which means no limit)
+      @max_id        = 0                       # max ID to match (default is 0, which means no limit)
+      @filters       = []                      # search filters
+      @groupby       = ''                      # group-by attribute name
+      @groupfunc     = SPH_GROUPBY_DAY         # function to pre-process group-by attribute value with
+      @groupsort     = '@group desc'           # group-by sorting clause (to sort groups in result set with)
+      @groupdistinct = ''                      # group-by count-distinct attribute
+      @maxmatches    = 1000                    # max matches to retrieve
+      @cutoff        = 0                       # cutoff to stop searching at (default is 0)
+      @retrycount    = 0                       # distributed retries count
+      @retrydelay    = 0                       # distributed retries delay
+      @anchor        = []                      # geographical anchor point
+      @indexweights  = []                      # per-index weights
+      @ranker        = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
+      @maxquerytime  = 0                       # max query time, milliseconds (default is 0, do not limit) 
+      @fieldweights  = {}                      # per-field-name weights
+    
+      # per-reply fields (for single-query case)
+      @error         = ''                      # last error message
+      @warning       = ''                      # last warning message
+      
+      @reqs          = []                      # requests storage (for multi-query case)
+      @mbenc         = ''                      # stored mbstring encoding
+    end
+  
+    # Get last error message.
+    def GetLastError
+      @error
+    end
+    
+    # Get last warning message.
+    def GetLastWarning
+      @warning
+    end
+    
+    # Set searchd host name (string) and port (integer).
+    def SetServer(host, port)
+      assert { host.instance_of? String }
+      assert { port.instance_of? Fixnum }
+
+      @host = host
+      @port = port
+    end
+   
+    # Set offset and count into result set,
+    # and optionally set max-matches and cutoff limits.
+    def SetLimits(offset, limit, max = 0, cutoff = 0)
+      assert { offset.instance_of? Fixnum }
+      assert { limit.instance_of? Fixnum }
+      assert { max.instance_of? Fixnum }
+      assert { offset >= 0 }
+      assert { limit > 0 }
+      assert { max >= 0 }
+
+      @offset = offset
+      @limit = limit
+      @maxmatches = max if max > 0
+      @cutoff = cutoff if cutoff > 0
+    end
+    
+    # Set maximum query time, in milliseconds, per-index,
+    # integer, 0 means "do not limit"
+    def SetMaxQueryTime(max)
+      assert { max.instance_of? Fixnum }
+      assert { max >= 0 }
+      @maxquerytime = max
+    end
+    
+    # Set matching mode.
+    def SetMatchMode(mode)
+      assert { mode == SPH_MATCH_ALL \
+            || mode == SPH_MATCH_ANY \
+            || mode == SPH_MATCH_PHRASE \
+            || mode == SPH_MATCH_BOOLEAN \
+            || mode == SPH_MATCH_EXTENDED \
+            || mode == SPH_MATCH_FULLSCAN \
+            || mode == SPH_MATCH_EXTENDED2 }
+
+      @mode = mode
+    end
+    
+    # Set ranking mode.
+    def SetRankingMode(ranker)
+      assert { ranker == SPH_RANK_PROXIMITY_BM25 \
+            || ranker == SPH_RANK_BM25 \
+            || ranker == SPH_RANK_NONE \
+            || ranker == SPH_RANK_WORDCOUNT }
+
+      @ranker = ranker
+    end
+    
+    # Set matches sorting mode.
+    def SetSortMode(mode, sortby = '')
+      assert { mode == SPH_SORT_RELEVANCE \
+            || mode == SPH_SORT_ATTR_DESC \
+            || mode == SPH_SORT_ATTR_ASC \
+            || mode == SPH_SORT_TIME_SEGMENTS \
+            || mode == SPH_SORT_EXTENDED \
+            || mode == SPH_SORT_EXPR }
+      assert { sortby.instance_of? String }
+      assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
+
+      @sort = mode
+      @sortby = sortby
+    end
+    
+    # Bind per-field weights by order.
+    #
+    # DEPRECATED; use SetFieldWeights() instead.
+    def SetWeights(weights)
+      assert { weights.instance_of? Array }
+      weights.each do |weight|
+        assert { weight.instance_of? Fixnum }
+      end
+
+      @weights = weights
+    end
+
+    # Bind per-field weights by name.
+    #
+    # Takes string (field name) to integer name (field weight) hash as an argument.
+    # * Takes precedence over SetWeights().
+    # * Unknown names will be silently ignored.
+    # * Unbound fields will be silently given a weight of 1.
+    def SetFieldWeights(weights)
+      assert { weights.instance_of? Hash }
+      weights.each do |name, weight|
+        assert { name.instance_of? String }
+        assert { weight.instance_of? Fixnum }
+      end
+
+      @fieldweights = weights
+    end
+    
+    # Bind per-index weights by name.
+    def SetIndexWeights(weights)
+      assert { weights.instance_of? Hash }
+      weights.each do |index, weight|
+        assert { index.instance_of? String }
+        assert { weight.instance_of? Fixnum }
+      end
+      
+      @indexweights = weights
+    end
+    
+    # Set IDs range to match.
+    # 
+    # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive). 
+    def SetIDRange(min, max)
+      assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
+      assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
+      assert { min <= max }
+
+      @min_id = min
+      @max_id = max
+    end
+    
+    # Set values filter.
+    # 
+    # Only match those records where <tt>attribute</tt> column values
+    # are in specified set.
+    def SetFilter(attribute, values, exclude = false)
+      assert { attribute.instance_of? String }
+      assert { values.instance_of? Array }
+      assert { !values.empty? }
+
+      if values.instance_of?(Array) && values.size > 0
+        values.each do |value|
+          assert { value.instance_of? Fixnum }
+        end
+      
+        @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
+      end
+    end
+    
+    # Set range filter.
+    # 
+    # Only match those records where <tt>attribute</tt> column value
+    # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
+    def SetFilterRange(attribute, min, max, exclude = false)
+      assert { attribute.instance_of? String }
+      assert { min.instance_of? Fixnum }
+      assert { max.instance_of? Fixnum }
+      assert { min <= max }
+    
+      @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
+    end
+    
+    # Set float range filter.
+    #
+    # Only match those records where <tt>attribute</tt> column value
+    # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
+    def SetFilterFloatRange(attribute, min, max, exclude = false)
+      assert { attribute.instance_of? String }
+      assert { min.instance_of? Float }
+      assert { max.instance_of? Float }
+      assert { min <= max }
+    
+      @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
+    end
+    
+    # Setup anchor point for geosphere distance calculations.
+    #
+    # Required to use <tt>@geodist</tt> in filters and sorting
+    # distance will be computed to this point. Latitude and longitude 
+    # must be in radians.
+    #
+    # * <tt>attrlat</tt> -- is the name of latitude attribute
+    # * <tt>attrlong</tt> -- is the name of longitude attribute
+    # * <tt>lat</tt> -- is anchor point latitude, in radians
+    # * <tt>long</tt> -- is anchor point longitude, in radians
+    def SetGeoAnchor(attrlat, attrlong, lat, long)
+      assert { attrlat.instance_of? String }
+      assert { attrlong.instance_of? String }
+      assert { lat.instance_of? Float }
+      assert { long.instance_of? Float }
+
+      @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
+    end
+    
+    # Set grouping attribute and function.
+    #
+    # In grouping mode, all matches are assigned to different groups
+    # based on grouping function value.
+    #
+    # Each group keeps track of the total match count, and the best match
+    # (in this group) according to current sorting function.
+    #
+    # The final result set contains one best match per group, with
+    # grouping function value and matches count attached.
+    #
+    # Groups in result set could be sorted by any sorting clause,
+    # including both document attributes and the following special
+    # internal Sphinx attributes:
+    #
+    # * @id - match document ID;
+    # * @weight, @rank, @relevance -  match weight;
+    # * @group - groupby function value;
+    # * @count - amount of matches in group.
+    #
+    # the default mode is to sort by groupby value in descending order,
+    # ie. by '@group desc'.
+    #
+    # 'total_found' would contain total amount of matching groups over
+    # the whole index.
+    #
+    # WARNING: grouping is done in fixed memory and thus its results
+    # are only approximate; so there might be more groups reported
+    # in total_found than actually present. @count might also
+    # be underestimated. 
+    #
+    # For example, if sorting by relevance and grouping by "published"
+    # attribute with SPH_GROUPBY_DAY function, then the result set will
+    # contain one most relevant match per each day when there were any
+    # matches published, with day number and per-day match count attached,
+    # and sorted by day number in descending order (ie. recent days first).
+    def SetGroupBy(attribute, func, groupsort = '@group desc')
+      assert { attribute.instance_of? String }
+      assert { groupsort.instance_of? String }
+      assert { func == SPH_GROUPBY_DAY \
+            || func == SPH_GROUPBY_WEEK \
+            || func == SPH_GROUPBY_MONTH \
+            || func == SPH_GROUPBY_YEAR \
+            || func == SPH_GROUPBY_ATTR \
+            || func == SPH_GROUPBY_ATTRPAIR }
+
+      @groupby = attribute
+      @groupfunc = func
+      @groupsort = groupsort
+    end
+    
+    # Set count-distinct attribute for group-by queries.
+    def SetGroupDistinct(attribute)
+      assert { attribute.instance_of? String }
+      @groupdistinct = attribute
+    end
+    
+    # Set distributed retries count and delay.
+    def SetRetries(count, delay = 0)
+      assert { count.instance_of? Fixnum }
+      assert { delay.instance_of? Fixnum }
+      
+      @retrycount = count
+      @retrydelay = delay
+    end
+    
+    # Clear all filters (for multi-queries).
+    def ResetFilters
+      @filters = []
+      @anchor = []
+    end
+    
+    # Clear groupby settings (for multi-queries).
+    def ResetGroupBy
+      @groupby       = ''
+      @groupfunc     = SPH_GROUPBY_DAY
+      @groupsort     = '@group desc'
+      @groupdistinct = ''
+    end
+    
+    # Connect to searchd server and run given search query.
+    #
+    # <tt>query</tt> is query string
+
+    # <tt>index</tt> is index name (or names) to query. default value is "*" which means
+    # to query all indexes. Accepted characters for index names are letters, numbers,
+    # dash, and underscore; everything else is considered a separator. Therefore,
+    # all the following calls are valid and will search two indexes:
+    #
+    #   sphinx.Query('test query', 'main delta')
+    #   sphinx.Query('test query', 'main;delta')
+    #   sphinx.Query('test query', 'main, delta')
+    #
+    # Index order matters. If identical IDs are found in two or more indexes,
+    # weight and attribute values from the very last matching index will be used
+    # for sorting and returning to client. Therefore, in the example above,
+    # matches from "delta" index will always "win" over matches from "main".
+    #
+    # Returns false on failure.
+    # Returns hash which has the following keys on success:
+    # 
+    # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
+    # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
+    # * <tt>'total_found'</tt> -- total amount of matching documents in index
+    # * <tt>'time'</tt> -- search time
+    # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
+    def Query(query, index = '*', comment = '')
+      assert { @reqs.empty? }
+      @reqs = []
+      
+      self.AddQuery(query, index, comment)
+      results = self.RunQueries
+      
+      # probably network error; error message should be already filled
+      return false unless results.instance_of?(Array)
+      
+      @error = results[0]['error']
+      @warning = results[0]['warning']
+      
+      return false if results[0]['status'] == SEARCHD_ERROR
+      return results[0]
+    end
+    
+    # Add query to batch.
+    #
+    # Batch queries enable searchd to perform internal optimizations,
+    # if possible; and reduce network connection overheads in all cases.
+    #
+    # For instance, running exactly the same query with different
+    # groupby settings will enable searched to perform expensive
+    # full-text search and ranking operation only once, but compute
+    # multiple groupby results from its output.
+    #
+    # Parameters are exactly the same as in <tt>Query</tt> call.
+    # Returns index to results array returned by <tt>RunQueries</tt> call.
+    def AddQuery(query, index = '*', comment = '')
+      # build request
+  
+      # mode and limits
+      request = Request.new
+      request.put_int @offset, @limit, @mode, @ranker, @sort
+      request.put_string @sortby
+      # query itself
+      request.put_string query
+      # weights
+      request.put_int_array @weights
+      # indexes
+      request.put_string index
+      # id64 range marker
+      request.put_int 1
+      # id64 range
+      request.put_int64 @min_id.to_i, @max_id.to_i 
+      
+      # filters
+      request.put_int @filters.length
+      @filters.each do |filter|
+        request.put_string filter['attr']
+        request.put_int filter['type']
+
+        case filter['type']
+          when SPH_FILTER_VALUES
+            request.put_int_array filter['values']
+          when SPH_FILTER_RANGE
+            request.put_int filter['min'], filter['max']
+          when SPH_FILTER_FLOATRANGE
+            request.put_float filter['min'], filter['max']
+          else
+            raise SphinxInternalError, 'Internal error: unhandled filter type'
+        end
+        request.put_int filter['exclude'] ? 1 : 0
+      end
+      
+      # group-by clause, max-matches count, group-sort clause, cutoff count
+      request.put_int @groupfunc
+      request.put_string @groupby
+      request.put_int @maxmatches
+      request.put_string @groupsort
+      request.put_int @cutoff, @retrycount, @retrydelay
+      request.put_string @groupdistinct
+      
+      # anchor point
+      if @anchor.empty?
+        request.put_int 0
+      else
+        request.put_int 1
+        request.put_string @anchor['attrlat'], @anchor['attrlong']
+        request.put_float @anchor['lat'], @anchor['long']
+      end
+      
+      # per-index weights
+      request.put_int @indexweights.length
+      @indexweights.each do |idx, weight|
+        request.put_string idx
+        request.put_int weight
+      end
+      
+      # max query time
+      request.put_int @maxquerytime
+      
+      # per-field weights
+      request.put_int @fieldweights.length
+      @fieldweights.each do |field, weight|
+        request.put_string field
+        request.put_int weight
+      end
+      
+      request.put_string comment
+      
+      # store request to requests array
+      @reqs << request.to_s;
+      return @reqs.length - 1
+    end
+    
+    # Run queries batch.
+    #
+    # Returns an array of result sets on success.
+    # Returns false on network IO failure.
+    #
+    # Each result set in returned array is a hash which containts
+    # the same keys as the hash returned by <tt>Query</tt>, plus:
+    #
+    # * <tt>'error'</tt> -- search error for this query
+    # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
+    def RunQueries
+      if @reqs.empty?
+        @error = 'No queries defined, issue AddQuery() first'
+        return false
+      end
+
+      req = @reqs.join('')
+      nreqs = @reqs.length
+      @reqs = []
+      response = PerformRequest(:search, req, nreqs)
+     
+      # parse response
+      begin
+        results = []
+        ires = 0
+        while ires < nreqs
+          ires += 1
+          result = {}
+          
+          result['error'] = ''
+          result['warning'] = ''
+          
+          # extract status
+          status = result['status'] = response.get_int
+          if status != SEARCHD_OK
+            message = response.get_string
+            if status == SEARCHD_WARNING
+              result['warning'] = message
+            else
+              result['error'] = message
+              results << result
+              next
+            end
+          end
+      
+          # read schema
+          fields = []
+          attrs = {}
+          attrs_names_in_order = []
+          
+          nfields = response.get_int
+          while nfields > 0
+            nfields -= 1
+            fields << response.get_string
+          end
+          result['fields'] = fields
+      
+          nattrs = response.get_int
+          while nattrs > 0
+            nattrs -= 1
+            attr = response.get_string
+            type = response.get_int
+            attrs[attr] = type
+            attrs_names_in_order << attr
+          end
+          result['attrs'] = attrs
+          
+          # read match count
+          count = response.get_int
+          id64 = response.get_int
+          
+          # read matches
+          result['matches'] = []
+          while count > 0
+            count -= 1
+            
+            if id64 != 0
+              doc = response.get_int64
+              weight = response.get_int
+            else
+              doc, weight = response.get_ints(2)
+            end
+      
+            r = {} # This is a single result put in the result['matches'] array
+            r['id'] = doc
+            r['weight'] = weight
+            attrs_names_in_order.each do |a|
+              r['attrs'] ||= {}
+  
+              # handle floats
+              if attrs[a] == SPH_ATTR_FLOAT
+                r['attrs'][a] = response.get_float
+              else
+                # handle everything else as unsigned ints
+                val = response.get_int
+                if (attrs[a] & SPH_ATTR_MULTI) != 0
+                  r['attrs'][a] = []
+                  1.upto(val) do
+                    r['attrs'][a] << response.get_int
+                  end
+                else
+                  r['attrs'][a] = val
+                end
+              end
+            end
+            result['matches'] << r
+          end
+          result['total'], result['total_found'], msecs, words = response.get_ints(4)
+          result['time'] = '%.3f' % (msecs / 1000.0)
+  
+          result['words'] = {}
+          while words > 0
+            words -= 1
+            word = response.get_string
+            docs, hits = response.get_ints(2)
+            result['words'][word] = { 'docs' => docs, 'hits' => hits }
+          end
+          
+          results << result
+        end
+      #rescue EOFError
+      #  @error = 'incomplete reply'
+      #  raise SphinxResponseError, @error
+      end
+      
+      return results
+    end
+  
+    # Connect to searchd server and generate exceprts from given documents.
+    #
+    # * <tt>docs</tt> -- an array of strings which represent the documents' contents
+    # * <tt>index</tt> -- a string specifiying the index which settings will be used
+    # for stemming, lexing and case folding
+    # * <tt>words</tt> -- a string which contains the words to highlight
+    # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
+    # 
+    # You can use following parameters:
+    # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
+    # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
+    # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
+    # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
+    # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
+    # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
+    # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
+    # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
+    # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
+    #
+    # Returns false on failure.
+    # Returns an array of string excerpts on success.
+    def BuildExcerpts(docs, index, words, opts = {})
+      assert { docs.instance_of? Array }
+      assert { index.instance_of? String }
+      assert { words.instance_of? String }
+      assert { opts.instance_of? Hash }
+
+      # fixup options
+      opts['before_match'] ||= '<b>';
+      opts['after_match'] ||= '</b>';
+      opts['chunk_separator'] ||= ' ... ';
+      opts['limit'] ||= 256;
+      opts['around'] ||= 5;
+      opts['exact_phrase'] ||= false
+      opts['single_passage'] ||= false
+      opts['use_boundaries'] ||= false
+      opts['weight_order'] ||= false
+      
+      # build request
+      
+      # v.1.0 req
+      flags = 1
+      flags |= 2  if opts['exact_phrase']
+      flags |= 4  if opts['single_passage']
+      flags |= 8  if opts['use_boundaries']
+      flags |= 16 if opts['weight_order']
+      
+      request = Request.new
+      request.put_int 0, flags # mode=0, flags=1 (remove spaces)
+      # req index
+      request.put_string index
+      # req words
+      request.put_string words
+  
+      # options
+      request.put_string opts['before_match']
+      request.put_string opts['after_match']
+      request.put_string opts['chunk_separator']
+      request.put_int opts['limit'].to_i, opts['around'].to_i
+      
+      # documents
+      request.put_int docs.size
+      docs.each do |doc|
+        assert { doc.instance_of? String }
+
+        request.put_string doc
+      end
+      
+      response = PerformRequest(:excerpt, request)
+      
+      # parse response
+      begin
+        res = []
+        docs.each do |doc|
+          res << response.get_string
+        end
+      rescue EOFError
+        @error = 'incomplete reply'
+        raise SphinxResponseError, @error
+      end
+      return res
+    end
+    
+    # Connect to searchd server, and generate keyword list for a given query.
+    #
+    # Returns an array of words on success.
+    def BuildKeywords(query, index, hits)
+      assert { query.instance_of? String }
+      assert { index.instance_of? String }
+      assert { hits.instance_of? Boolean }
+      
+      # build request
+      request = Request.new
+      # v.1.0 req
+      request.put_string query # req query
+      request.put_string index # req index
+      request.put_int hits ? 1 : 0
+
+      response = PerformRequest(:keywords, request)
+      
+      # parse response
+      begin
+        res = []
+        nwords = response.get_int
+        0.upto(nwords - 1) do |i|
+          tokenized = response.get_string
+          normalized = response.get_string
+          
+          entry = { 'tokenized' => tokenized, 'normalized' => normalized }
+          entry['docs'], entry['hits'] = response.get_ints(2) if hits
+          
+          res << entry
+        end
+      rescue EOFError
+        @error = 'incomplete reply'
+        raise SphinxResponseError, @error
+      end
+      
+      return res
+    end
+
+    # Update specified attributes on specified documents.
+    #
+    # * <tt>index</tt> is a name of the index to be updated
+    # * <tt>attrs</tt> is an array of attribute name strings.
+    # * <tt>values</tt> is a hash where key is document id, and value is an array of
+    # new attribute values
+    #
+    # Returns number of actually updated documents (0 or more) on success.
+    # Returns -1 on failure.
+    #
+    # Usage example:
+    #    sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
+    def UpdateAttributes(index, attrs, values)
+      # verify everything
+      assert { index.instance_of? String }
+      
+      assert { attrs.instance_of? Array }
+      attrs.each do |attr|
+        assert { attr.instance_of? String }
+      end
+      
+      assert { values.instance_of? Hash }
+      values.each do |id, entry|
+        assert { id.instance_of? Fixnum }
+        assert { entry.instance_of? Array }
+        assert { entry.length == attrs.length }
+        entry.each do |v|
+          assert { v.instance_of? Fixnum }
+        end
+      end
+      
+      # build request
+      request = Request.new
+      request.put_string index
+      
+      request.put_int attrs.length
+      request.put_string(*attrs)
+      
+      request.put_int values.length
+      values.each do |id, entry|
+        request.put_int64 id
+        request.put_int(*entry)
+      end
+      
+      response = PerformRequest(:update, request)
+      
+      # parse response
+      begin
+        return response.get_int
+      rescue EOFError
+        @error = 'incomplete reply'
+        raise SphinxResponseError, @error
+      end
+    end
+  
+    protected
+    
+      # Connect to searchd server.
+      def Connect
+        begin
+          sock = TCPSocket.new(@host, @port)
+        rescue
+          @error = "connection to #{@host}:#{@port} failed"
+          raise SphinxConnectError, @error
+        end
+        
+        v = sock.recv(4).unpack('N*').first
+        if v < 1
+          sock.close
+          @error = "expected searchd protocol version 1+, got version '#{v}'"
+          raise SphinxConnectError, @error
+        end
+        
+        sock.send([1].pack('N'), 0)
+        sock
+      end
+      
+      # Get and check response packet from searchd server.
+      def GetResponse(sock, client_version)
+        response = ''
+        len = 0
+        
+        header = sock.recv(8)
+        if header.length == 8
+          status, ver, len = header.unpack('n2N')
+          left = len.to_i
+          while left > 0 do
+            begin
+              chunk = sock.recv(left)
+              if chunk
+                response << chunk
+                left -= chunk.length
+              end
+            rescue EOFError
+              break
+            end
+          end
+        end
+        sock.close
+    
+        # check response
+        read = response.length
+        if response.empty? or read != len.to_i
+          @error = len \
+            ? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
+            : 'received zero-sized searchd response'
+          raise SphinxResponseError, @error
+        end
+        
+        # check status
+        if (status == SEARCHD_WARNING)
+          wlen = response[0, 4].unpack('N*').first
+          @warning = response[4, wlen]
+          return response[4 + wlen, response.length - 4 - wlen]
+        end
+
+        if status == SEARCHD_ERROR
+          @error = 'searchd error: ' + response[4, response.length - 4]
+          raise SphinxInternalError, @error
+        end
+    
+        if status == SEARCHD_RETRY
+          @error = 'temporary searchd error: ' + response[4, response.length - 4]
+          raise SphinxTemporaryError, @error
+        end
+    
+        unless status == SEARCHD_OK
+          @error = "unknown status code: '#{status}'"
+          raise SphinxUnknownError, @error
+        end
+        
+        # check version
+        if ver < client_version
+          @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
+            "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
+        end
+        
+        return response
+      end
+      
+      # Connect, send query, get response.
+      def PerformRequest(command, request, additional = nil)
+        cmd = command.to_s.upcase
+        command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
+        command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
+        
+        sock = self.Connect
+        len = request.to_s.length + (additional != nil ? 4 : 0)
+        header = [command_id, command_ver, len].pack('nnN')
+        header << [additional].pack('N') if additional != nil
+        sock.send(header + request.to_s, 0)
+        response = self.GetResponse(sock, command_ver)
+        return Response.new(response)
+      end
+      
+      # :stopdoc:
+      def assert
+        raise 'Assertion failed!' unless yield if $DEBUG
+      end
+      # :startdoc:
+  end
+end

+ 44 - 0
api/ruby/lib/sphinx/request.rb

@@ -0,0 +1,44 @@
+module Sphinx
+  # Pack ints, floats, strings, and arrays to internal representation
+  # needed by Sphinx search engine.
+  class Request
+    # Initialize new request.
+    def initialize
+      @request = ''
+    end
+    
+    # Put int(s) to request.
+    def put_int(*ints)
+      ints.each { |i| @request << [i].pack('N') }
+    end
+
+    # Put 64-bit int(s) to request.
+    def put_int64(*ints)
+      ints.each { |i| @request << [i >> 32, i & ((1 << 32) - 1)].pack('NN') }
+    end
+
+    # Put string(s) to request (first length, then the string itself).
+    def put_string(*strings)
+      strings.each { |s| @request << [s.length].pack('N') + s }
+    end
+    
+    # Put float(s) to request.
+    def put_float(*floats)
+      floats.each do |f|
+        t1 = [f].pack('f') # machine order
+        t2 = t1.unpack('L*').first # int in machine order
+        @request << [t2].pack('N')
+      end
+    end
+    
+    # Put array of ints to request (first length, then the array itself)
+    def put_int_array(arr)
+      put_int arr.length, *arr
+    end
+    
+    # Returns the entire message
+    def to_s
+      @request
+    end
+  end
+end

+ 69 - 0
api/ruby/lib/sphinx/response.rb

@@ -0,0 +1,69 @@
+module Sphinx
+  # Unpack internal Sphinx representation of ints, floats, strings, and arrays.
+  # needed by Sphinx search engine.
+  class Response
+    # Initialize new request.
+    def initialize(response)
+      @response = response
+      @position = 0
+      @size = response.length
+    end
+    
+    # Gets current stream position.
+    def position
+      @position
+    end
+    
+    # Gets response size.
+    def size
+      @size
+    end
+    
+    # Returns <tt>true</tt> when response stream is out.
+    def eof?
+      @position >= @size
+    end
+
+    # Get int from stream.
+    def get_int
+      raise EOFError if @position + 4 > @size
+      value = @response[@position, 4].unpack('N*').first
+      @position += 4
+      return value
+    end
+
+    # Get 64-bit int from stream.
+    def get_int64
+      raise EOFError if @position + 8 > @size
+      hi, lo = @response[@position, 8].unpack('N*N*')
+      @position += 8
+      return (hi << 32) + lo
+    end
+
+    # Get array of <tt>count</tt> ints from stream.
+    def get_ints(count)
+      length = 4 * count
+      raise EOFError if @position + length > @size
+      values = @response[@position, length].unpack('N*' * count)
+      @position += length
+      return values
+    end
+    
+    # Get string from stream.
+    def get_string
+      length = get_int
+      raise EOFError if @position + length > @size
+      value = length > 0 ? @response[@position, length] : ''
+      @position += length
+      return value
+    end
+    
+    # Get float from stream.
+    def get_float
+      raise EOFError if @position + 4 > @size
+      uval = @response[@position, 4].unpack('N*').first;
+      @position += 4
+      return ([uval].pack('L')).unpack('f*').first
+    end
+  end
+end

+ 103 - 0
api/ruby/spec/client_response_spec.rb

@@ -0,0 +1,103 @@
+require File.dirname(__FILE__) + '/../init'
+
+# To execute these tests you need to execute sphinx_test.sql and configure sphinx using sphinx.conf
+# (both files are placed under sphinx directory)
+context 'The SphinxApi connected to Sphinx' do
+  setup do
+    @sphinx = Sphinx::Client.new
+  end
+  
+  specify 'should parse response in Query method' do
+    result = @sphinx.Query('wifi', 'test1')
+    validate_results_wifi(result)
+  end
+
+  specify 'should process 64-bit keys in Query method' do
+    result = @sphinx.Query('wifi', 'test2')
+    result['total_found'].should == 3
+    result['matches'].length.should == 3
+    result['matches'][0]['id'].should == 4294967298
+    result['matches'][1]['id'].should == 4294967299
+    result['matches'][2]['id'].should == 4294967297
+  end
+  
+  specify 'should parse batch-query responce in RunQueries method' do
+    @sphinx.AddQuery('wifi', 'test1')
+    @sphinx.AddQuery('gprs', 'test1')
+    results = @sphinx.RunQueries
+    results.should be_an_instance_of(Array)
+    results.length.should == 2
+    validate_results_wifi(results[0])
+  end
+  
+  specify 'should parse response in BuildExcerpts method' do
+    result = @sphinx.BuildExcerpts(['what the world', 'London is the capital of Great Britain'], 'test1', 'the')
+    result.should == ['what <b>the</b> world', 'London is <b>the</b> capital of Great Britain']
+  end
+
+  specify 'should parse response in BuildKeywords method' do
+    result = @sphinx.BuildKeywords('wifi gprs', 'test1', true)
+    result.should == [
+      { 'normalized' => 'wifi', 'tokenized' => 'wifi', 'hits' => 6, 'docs' => 3 },
+      { 'normalized' => 'gprs', 'tokenized' => 'gprs', 'hits' => 1, 'docs' => 1 }
+    ]
+  end
+
+  specify 'should parse response in UpdateAttributes method' do
+    @sphinx.UpdateAttributes('test1', ['group_id'], { 2 => [1] }).should == 1
+    result = @sphinx.Query('wifi', 'test1')
+    result['matches'][0]['attrs']['group_id'].should == 1
+    @sphinx.UpdateAttributes('test1', ['group_id'], { 2 => [2] }).should == 1
+    result = @sphinx.Query('wifi', 'test1')
+    result['matches'][0]['attrs']['group_id'].should == 2
+  end
+  
+  specify 'should process errors in Query method' do
+  	@sphinx.Query('wifi', 'fakeindex').should be_false
+  	@sphinx.GetLastError.length.should_not == 0
+  end
+
+  specify 'should process errors in RunQueries method' do
+  	@sphinx.AddQuery('wifi', 'fakeindex')
+  	r = @sphinx.RunQueries
+  	r[0]['error'].length.should_not == 0
+  end
+  
+  def validate_results_wifi(result)
+    result['total_found'].should == 3
+    result['matches'].length.should == 3
+    result['time'].should_not be_nil
+    result['attrs'].should == {
+      'group_id' => Sphinx::Client::SPH_ATTR_INTEGER,
+      'created_at' => Sphinx::Client::SPH_ATTR_TIMESTAMP,
+      'rating' => Sphinx::Client::SPH_ATTR_FLOAT,
+      'tags' => Sphinx::Client::SPH_ATTR_MULTI | Sphinx::Client::SPH_ATTR_INTEGER
+    }
+    result['fields'].should == [ 'name', 'description' ]
+    result['total'].should == 3
+    result['matches'].should be_an_instance_of(Array)
+    
+    result['matches'][0]['id'].should == 2
+    result['matches'][0]['weight'].should == 2
+    result['matches'][0]['attrs']['group_id'].should == 2
+    result['matches'][0]['attrs']['created_at'].should == 1175658555
+    result['matches'][0]['attrs']['tags'].should == [5, 6, 7, 8]
+    ('%0.2f' % result['matches'][0]['attrs']['rating']).should == '54.85'
+    
+    result['matches'][1]['id'].should == 3
+    result['matches'][1]['weight'].should == 2
+    result['matches'][1]['attrs']['group_id'].should == 1
+    result['matches'][1]['attrs']['created_at'].should == 1175658647
+    result['matches'][1]['attrs']['tags'].should == [1, 7, 9, 10]
+    ('%0.2f' % result['matches'][1]['attrs']['rating']).should == '16.25'
+
+    result['matches'][2]['id'].should == 1
+    result['matches'][2]['weight'].should == 1
+    result['matches'][2]['attrs']['group_id'].should == 1
+    result['matches'][2]['attrs']['created_at'].should == 1175658490
+    result['matches'][2]['attrs']['tags'].should == [1, 2, 3, 4]
+    ('%0.2f' % result['matches'][2]['attrs']['rating']).should == '13.32'
+    
+    result['words'].should == { 'wifi' => { 'hits' => 6, 'docs' => 3 } }
+  end
+end

+ 546 - 0
api/ruby/spec/client_spec.rb

@@ -0,0 +1,546 @@
+require File.dirname(__FILE__) + '/../init'
+
+module SphinxFixtureHelper
+  def sphinx_fixture(name)
+    `php #{File.dirname(__FILE__)}/fixtures/#{name}.php`
+  end
+end
+
+describe 'The Connect method of SphinxApi' do
+  before(:each) do
+    @sphinx = Sphinx::Client.new
+    @sock = mock('TCPSocket')
+  end
+
+  it 'should establish TCP connection to the server and initialize session' do
+    TCPSocket.should_receive(:new).with('localhost', 3312).and_return(@sock)
+    @sock.should_receive(:recv).with(4).and_return([1].pack('N'))
+    @sock.should_receive(:send).with([1].pack('N'), 0)
+    @sphinx.send(:Connect).should be(@sock)
+  end
+
+  it 'should raise exception when searchd protocol is not 1+' do
+    TCPSocket.should_receive(:new).with('localhost', 3312).and_return(@sock)
+    @sock.should_receive(:recv).with(4).and_return([0].pack('N'))
+    @sock.should_receive(:close)
+    lambda { @sphinx.send(:Connect) }.should raise_error(Sphinx::SphinxConnectError)
+    @sphinx.GetLastError.should == 'expected searchd protocol version 1+, got version \'0\''
+  end
+
+  it 'should raise exception on connection error' do
+    TCPSocket.should_receive(:new).with('localhost', 3312).and_raise(Errno::EBADF)
+    lambda { @sphinx.send(:Connect) }.should raise_error(Sphinx::SphinxConnectError)
+    @sphinx.GetLastError.should == 'connection to localhost:3312 failed'
+  end
+
+  it 'should use custom host and port' do
+    @sphinx.SetServer('anotherhost', 55555)
+    TCPSocket.should_receive(:new).with('anotherhost', 55555).and_raise(Errno::EBADF)
+    lambda { @sphinx.send(:Connect) }.should raise_error(Sphinx::SphinxConnectError)
+  end
+end
+
+describe 'The GetResponse method of SphinxApi' do
+  before(:each) do
+    @sphinx = Sphinx::Client.new
+    @sock = mock('TCPSocket')
+    @sock.should_receive(:close)
+  end
+  
+  it 'should receive response' do
+    @sock.should_receive(:recv).with(8).and_return([Sphinx::Client::SEARCHD_OK, 1, 4].pack('n2N'))
+    @sock.should_receive(:recv).with(4).and_return([0].pack('N'))
+    @sphinx.send(:GetResponse, @sock, 1)
+  end
+
+  it 'should raise exception on zero-sized response' do
+    @sock.should_receive(:recv).with(8).and_return([Sphinx::Client::SEARCHD_OK, 1, 0].pack('n2N'))
+    lambda { @sphinx.send(:GetResponse, @sock, 1) }.should raise_error(Sphinx::SphinxResponseError)
+  end
+
+  it 'should raise exception when response is incomplete' do
+    @sock.should_receive(:recv).with(8).and_return([Sphinx::Client::SEARCHD_OK, 1, 4].pack('n2N'))
+    @sock.should_receive(:recv).with(4).and_raise(EOFError)
+    lambda { @sphinx.send(:GetResponse, @sock, 1) }.should raise_error(Sphinx::SphinxResponseError)
+  end
+
+  it 'should set warning message when SEARCHD_WARNING received' do
+    @sock.should_receive(:recv).with(8).and_return([Sphinx::Client::SEARCHD_WARNING, 1, 14].pack('n2N'))
+    @sock.should_receive(:recv).with(14).and_return([5].pack('N') + 'helloworld')
+    @sphinx.send(:GetResponse, @sock, 1).should == 'world'
+    @sphinx.GetLastWarning.should == 'hello'
+  end
+
+  it 'should raise exception when SEARCHD_ERROR received' do
+    @sock.should_receive(:recv).with(8).and_return([Sphinx::Client::SEARCHD_ERROR, 1, 9].pack('n2N'))
+    @sock.should_receive(:recv).with(9).and_return([1].pack('N') + 'hello')
+    lambda { @sphinx.send(:GetResponse, @sock, 1) }.should raise_error(Sphinx::SphinxInternalError)
+    @sphinx.GetLastError.should == 'searchd error: hello'
+  end
+
+  it 'should raise exception when SEARCHD_RETRY received' do
+    @sock.should_receive(:recv).with(8).and_return([Sphinx::Client::SEARCHD_RETRY, 1, 9].pack('n2N'))
+    @sock.should_receive(:recv).with(9).and_return([1].pack('N') + 'hello')
+    lambda { @sphinx.send(:GetResponse, @sock, 1) }.should raise_error(Sphinx::SphinxTemporaryError)
+    @sphinx.GetLastError.should == 'temporary searchd error: hello'
+  end
+
+  it 'should raise exception when unknown status received' do
+    @sock.should_receive(:recv).with(8).and_return([65535, 1, 9].pack('n2N'))
+    @sock.should_receive(:recv).with(9).and_return([1].pack('N') + 'hello')
+    lambda { @sphinx.send(:GetResponse, @sock, 1) }.should raise_error(Sphinx::SphinxUnknownError)
+    @sphinx.GetLastError.should == 'unknown status code: \'65535\''
+  end
+
+  it 'should set warning when server is older than client' do
+    @sock.should_receive(:recv).with(8).and_return([Sphinx::Client::SEARCHD_OK, 1, 9].pack('n2N'))
+    @sock.should_receive(:recv).with(9).and_return([1].pack('N') + 'hello')
+    @sphinx.send(:GetResponse, @sock, 5)
+    @sphinx.GetLastWarning.should == 'searchd command v.0.1 older than client\'s v.0.5, some options might not work'
+  end
+end
+
+describe 'The Query method of SphinxApi' do
+  include SphinxFixtureHelper
+
+  before(:each) do
+    @sphinx = Sphinx::Client.new
+    @sock = mock('TCPSocket')
+    @sphinx.stub!(:Connect).and_return(@sock)
+    @sphinx.stub!(:GetResponse).and_raise(Sphinx::SphinxError)
+  end
+
+  it 'should generate valid request with default parameters' do
+    expected = sphinx_fixture('default_search')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with default parameters and index' do
+    expected = sphinx_fixture('default_search_index')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.Query('query', 'index') rescue nil?
+  end
+  
+  it 'should generate valid request with limits' do
+    expected = sphinx_fixture('limits')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetLimits(10, 20)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with limits and max number to retrieve' do
+    expected = sphinx_fixture('limits_max')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetLimits(10, 20, 30)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with limits and cutoff to retrieve' do
+    expected = sphinx_fixture('limits_cutoff')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetLimits(10, 20, 30, 40)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with max query time specified' do
+    expected = sphinx_fixture('max_query_time')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetMaxQueryTime(1000)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with match SPH_MATCH_ALL' do
+    expected = sphinx_fixture('match_all')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetMatchMode(Sphinx::Client::SPH_MATCH_ALL)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with match SPH_MATCH_ANY' do
+    expected = sphinx_fixture('match_any')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetMatchMode(Sphinx::Client::SPH_MATCH_ANY)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with match SPH_MATCH_PHRASE' do
+    expected = sphinx_fixture('match_phrase')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetMatchMode(Sphinx::Client::SPH_MATCH_PHRASE)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with match SPH_MATCH_BOOLEAN' do
+    expected = sphinx_fixture('match_boolean')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetMatchMode(Sphinx::Client::SPH_MATCH_BOOLEAN)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with match SPH_MATCH_EXTENDED' do
+    expected = sphinx_fixture('match_extended')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetMatchMode(Sphinx::Client::SPH_MATCH_EXTENDED)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with match SPH_MATCH_FULLSCAN' do
+    expected = sphinx_fixture('match_fullscan')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetMatchMode(Sphinx::Client::SPH_MATCH_FULLSCAN)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with match SPH_MATCH_EXTENDED2' do
+    expected = sphinx_fixture('match_extended2')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetMatchMode(Sphinx::Client::SPH_MATCH_EXTENDED2)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with ranking mode SPH_RANK_PROXIMITY_BM25' do
+    expected = sphinx_fixture('ranking_proximity_bm25')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetRankingMode(Sphinx::Client::SPH_RANK_PROXIMITY_BM25)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with ranking mode SPH_RANK_BM25' do
+    expected = sphinx_fixture('ranking_bm25')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetRankingMode(Sphinx::Client::SPH_RANK_BM25)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with ranking mode SPH_RANK_NONE' do
+    expected = sphinx_fixture('ranking_none')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetRankingMode(Sphinx::Client::SPH_RANK_NONE)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with ranking mode SPH_RANK_WORDCOUNT' do
+    expected = sphinx_fixture('ranking_wordcount')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetRankingMode(Sphinx::Client::SPH_RANK_WORDCOUNT)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with sort mode SPH_SORT_RELEVANCE' do
+    expected = sphinx_fixture('sort_relevance')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetSortMode(Sphinx::Client::SPH_SORT_RELEVANCE)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with sort mode SPH_SORT_ATTR_DESC' do
+    expected = sphinx_fixture('sort_attr_desc')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetSortMode(Sphinx::Client::SPH_SORT_ATTR_DESC, 'sortby')
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with sort mode SPH_SORT_ATTR_ASC' do
+    expected = sphinx_fixture('sort_attr_asc')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetSortMode(Sphinx::Client::SPH_SORT_ATTR_ASC, 'sortby')
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with sort mode SPH_SORT_TIME_SEGMENTS' do
+    expected = sphinx_fixture('sort_time_segments')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetSortMode(Sphinx::Client::SPH_SORT_TIME_SEGMENTS, 'sortby')
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with sort mode SPH_SORT_EXTENDED' do
+    expected = sphinx_fixture('sort_extended')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetSortMode(Sphinx::Client::SPH_SORT_EXTENDED, 'sortby')
+    @sphinx.Query('query') rescue nil?
+  end
+
+
+  it 'should generate valid request with sort mode SPH_SORT_EXPR' do
+    expected = sphinx_fixture('sort_EXPR')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetSortMode(Sphinx::Client::SPH_SORT_EXPR, 'sortby')
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with weights' do
+    expected = sphinx_fixture('weights')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetWeights([10, 20, 30, 40])
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with field weights' do
+    expected = sphinx_fixture('field_weights')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetFieldWeights({'field1' => 10, 'field2' => 20})
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with index weights' do
+    expected = sphinx_fixture('index_weights')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetIndexWeights({'index1' => 10, 'index2' => 20})
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with ID range' do
+    expected = sphinx_fixture('id_range')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetIDRange(10, 20)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with ID range and 64-bit ints' do
+    expected = sphinx_fixture('id_range64')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetIDRange(8589934591, 17179869183)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with values filter' do
+    expected = sphinx_fixture('filter')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetFilter('attr', [10, 20, 30])
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with two values filters' do
+    expected = sphinx_fixture('filters')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetFilter('attr2', [40, 50])
+    @sphinx.SetFilter('attr1', [10, 20, 30])
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with values filter excluded' do
+    expected = sphinx_fixture('filter_exclude')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetFilter('attr', [10, 20, 30], true)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with values filter range' do
+    expected = sphinx_fixture('filter_range')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetFilterRange('attr', 10, 20)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with two filter ranges' do
+    expected = sphinx_fixture('filter_ranges')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetFilterRange('attr2', 30, 40)
+    @sphinx.SetFilterRange('attr1', 10, 20)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with filter range excluded' do
+    expected = sphinx_fixture('filter_range_exclude')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetFilterRange('attr', 10, 20, true)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with float filter range' do
+    expected = sphinx_fixture('filter_float_range')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetFilterFloatRange('attr', 10.5, 20.3)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with float filter excluded' do
+    expected = sphinx_fixture('filter_float_range_exclude')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetFilterFloatRange('attr', 10.5, 20.3, true)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with different filters' do
+    expected = sphinx_fixture('filters_different')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetFilterRange('attr1', 10, 20, true)
+    @sphinx.SetFilter('attr3', [30, 40, 50])
+    @sphinx.SetFilterRange('attr1', 60, 70)
+    @sphinx.SetFilter('attr2', [80, 90, 100], true)
+    @sphinx.SetFilterFloatRange('attr1', 60.8, 70.5)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with geographical anchor point' do
+    expected = sphinx_fixture('geo_anchor')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetGeoAnchor('attrlat', 'attrlong', 20.3, 40.7)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with group by SPH_GROUPBY_DAY' do
+    expected = sphinx_fixture('group_by_day')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetGroupBy('attr', Sphinx::Client::SPH_GROUPBY_DAY)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with group by SPH_GROUPBY_WEEK' do
+    expected = sphinx_fixture('group_by_week')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetGroupBy('attr', Sphinx::Client::SPH_GROUPBY_WEEK)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with group by SPH_GROUPBY_MONTH' do
+    expected = sphinx_fixture('group_by_month')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetGroupBy('attr', Sphinx::Client::SPH_GROUPBY_MONTH)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with group by SPH_GROUPBY_YEAR' do
+    expected = sphinx_fixture('group_by_year')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetGroupBy('attr', Sphinx::Client::SPH_GROUPBY_YEAR)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with group by SPH_GROUPBY_ATTR' do
+    expected = sphinx_fixture('group_by_attr')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetGroupBy('attr', Sphinx::Client::SPH_GROUPBY_ATTR)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with group by SPH_GROUPBY_ATTRPAIR' do
+    expected = sphinx_fixture('group_by_attrpair')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetGroupBy('attr', Sphinx::Client::SPH_GROUPBY_ATTRPAIR)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with group by SPH_GROUPBY_DAY with sort' do
+    expected = sphinx_fixture('group_by_day_sort')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetGroupBy('attr', Sphinx::Client::SPH_GROUPBY_DAY, 'somesort')
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with count-distinct attribute' do
+    expected = sphinx_fixture('group_distinct')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetGroupBy('attr', Sphinx::Client::SPH_GROUPBY_DAY)
+    @sphinx.SetGroupDistinct('attr')
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with retries count specified' do
+    expected = sphinx_fixture('retries')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetRetries(10)
+    @sphinx.Query('query') rescue nil?
+  end
+
+  it 'should generate valid request with retries count and delay specified' do
+    expected = sphinx_fixture('retries_delay')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.SetRetries(10, 20)
+    @sphinx.Query('query') rescue nil?
+  end
+end
+
+describe 'The RunQueries method of SphinxApi' do
+  include SphinxFixtureHelper
+
+  before(:each) do
+    @sphinx = Sphinx::Client.new
+    @sock = mock('TCPSocket')
+    @sphinx.stub!(:Connect).and_return(@sock)
+    @sphinx.stub!(:GetResponse).and_raise(Sphinx::SphinxError)
+  end
+
+  it 'should generate valid request for multiple queries' do
+    expected = sphinx_fixture('miltiple_queries')
+    @sock.should_receive(:send).with(expected, 0)
+    
+    @sphinx.SetRetries(10, 20)
+    @sphinx.AddQuery('test1')
+    @sphinx.SetGroupBy('attr', Sphinx::Client::SPH_GROUPBY_DAY)
+    @sphinx.AddQuery('test2') rescue nil?
+    
+    @sphinx.RunQueries rescue nil?
+  end
+end
+
+describe 'The BuildExcerpts method of SphinxApi' do
+  include SphinxFixtureHelper
+
+  before(:each) do
+    @sphinx = Sphinx::Client.new
+    @sock = mock('TCPSocket')
+    @sphinx.stub!(:Connect).and_return(@sock)
+    @sphinx.stub!(:GetResponse).and_raise(Sphinx::SphinxError)
+  end
+  
+  it 'should generate valid request with default parameters' do
+    expected = sphinx_fixture('excerpt_default')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.BuildExcerpts(['10', '20'], 'index', 'word1 word2') rescue nil?
+  end
+
+  it 'should generate valid request with custom parameters' do
+    expected = sphinx_fixture('excerpt_custom')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.BuildExcerpts(['10', '20'], 'index', 'word1 word2', { 'before_match' => 'before',
+                                                                  'after_match' => 'after',
+                                                                  'chunk_separator' => 'separator',
+                                                                  'limit' => 10 }) rescue nil?
+  end
+  
+  it 'should generate valid request with flags' do
+    expected = sphinx_fixture('excerpt_flags')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.BuildExcerpts(['10', '20'], 'index', 'word1 word2', { 'exact_phrase' => true,
+                                                                  'single_passage' => true,
+                                                                  'use_boundaries' => true,
+                                                                  'weight_order' => true }) rescue nil?
+  end
+end
+
+describe 'The BuildKeywords method of SphinxApi' do
+  include SphinxFixtureHelper
+
+  before(:each) do
+    @sphinx = Sphinx::Client.new
+    @sock = mock('TCPSocket')
+    @sphinx.stub!(:Connect).and_return(@sock)
+    @sphinx.stub!(:GetResponse).and_raise(Sphinx::SphinxError)
+  end
+  
+  it 'should generate valid request' do
+    expected = sphinx_fixture('keywords')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.BuildKeywords('test', 'index', true) rescue nil?
+  end
+end
+
+describe 'The UpdateAttributes method of SphinxApi' do
+  include SphinxFixtureHelper
+
+  before(:each) do
+    @sphinx = Sphinx::Client.new
+    @sock = mock('TCPSocket')
+    @sphinx.stub!(:Connect).and_return(@sock)
+    @sphinx.stub!(:GetResponse).and_raise(Sphinx::SphinxError)
+  end
+  
+  it 'should generate valid request' do
+    expected = sphinx_fixture('update_attributes')
+    @sock.should_receive(:send).with(expected, 0)
+    @sphinx.UpdateAttributes('index', ['group'], { 123 => [456] }) rescue nil?
+  end
+end

+ 8 - 0
api/ruby/spec/fixtures/default_search.php

@@ -0,0 +1,8 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->Query('query');
+
+?>

+ 8 - 0
api/ruby/spec/fixtures/default_search_index.php

@@ -0,0 +1,8 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->Query('query', 'index');
+
+?>

+ 11 - 0
api/ruby/spec/fixtures/excerpt_custom.php

@@ -0,0 +1,11 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->BuildExcerpts(array('10', '20'), 'index', 'word1 word2', array('before_match' => 'before',
+                                                                    'after_match' => 'after',
+                                                                    'chunk_separator' => 'separator',
+                                                                    'limit' => 10));
+
+?>

+ 8 - 0
api/ruby/spec/fixtures/excerpt_default.php

@@ -0,0 +1,8 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->BuildExcerpts(array('10', '20'), 'index', 'word1 word2');
+
+?>

+ 11 - 0
api/ruby/spec/fixtures/excerpt_flags.php

@@ -0,0 +1,11 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->BuildExcerpts(array('10', '20'), 'index', 'word1 word2', array('exact_phrase' => true,
+                                                                    'single_passage' => true,
+                                                                    'use_boundaries' => true,
+                                                                    'weight_order' => true));
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/field_weights.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetFieldWeights(array('field1' => 10, 'field2' => 20));
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/filter.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetFilter('attr', array(10, 20, 30));
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/filter_exclude.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetFilter('attr', array(10, 20, 30), true);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/filter_float_range.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetFilterFloatRange('attr', 10.5, 20.3);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/filter_float_range_exclude.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetFilterFloatRange('attr', 10.5, 20.3, true);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/filter_range.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetFilterRange('attr', 10, 20);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/filter_range_exclude.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetFilterRange('attr', 10, 20, true);
+$cl->Query('query');
+
+?>

+ 10 - 0
api/ruby/spec/fixtures/filter_ranges.php

@@ -0,0 +1,10 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetFilterRange('attr2', 30, 40);
+$cl->SetFilterRange('attr1', 10, 20);
+$cl->Query('query');
+
+?>

+ 10 - 0
api/ruby/spec/fixtures/filters.php

@@ -0,0 +1,10 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetFilter('attr2', array(40, 50));
+$cl->SetFilter('attr1', array(10, 20, 30));
+$cl->Query('query');
+
+?>

+ 13 - 0
api/ruby/spec/fixtures/filters_different.php

@@ -0,0 +1,13 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetFilterRange('attr1', 10, 20, true);
+$cl->SetFilter('attr3', array(30, 40, 50));
+$cl->SetFilterRange('attr1', 60, 70);
+$cl->SetFilter('attr2', array(80, 90, 100), true);
+$cl->SetFilterFloatRange('attr1', 60.8, 70.5);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/geo_anchor.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetGeoAnchor('attrlat', 'attrlong', 20.3, 40.7);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/group_by_attr.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetGroupBy('attr', SPH_GROUPBY_ATTR);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/group_by_attrpair.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetGroupBy('attr', SPH_GROUPBY_ATTRPAIR);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/group_by_day.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetGroupBy('attr', SPH_GROUPBY_DAY);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/group_by_day_sort.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetGroupBy('attr', SPH_GROUPBY_DAY, 'somesort');
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/group_by_month.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetGroupBy('attr', SPH_GROUPBY_MONTH);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/group_by_week.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetGroupBy('attr', SPH_GROUPBY_WEEK);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/group_by_year.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetGroupBy('attr', SPH_GROUPBY_YEAR);
+$cl->Query('query');
+
+?>

+ 10 - 0
api/ruby/spec/fixtures/group_distinct.php

@@ -0,0 +1,10 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetGroupBy('attr', SPH_GROUPBY_DAY);
+$cl->SetGroupDistinct('attr');
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/id_range.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetIDRange(10, 20);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/id_range64.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetIDRange(8589934591, 17179869183);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/index_weights.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetIndexWeights(array('index1' => 10, 'index2' => 20));
+$cl->Query('query');
+
+?>

+ 8 - 0
api/ruby/spec/fixtures/keywords.php

@@ -0,0 +1,8 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->BuildKeywords('test', 'index', true);
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/limits.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetLimits(10, 20);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/limits_cutoff.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetLimits(10, 20, 30, 40);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/limits_max.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetLimits(10, 20, 30);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/limits_max_cutoff.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetLimits(10, 20, 30, 40);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/match_all.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetMatchMode(SPH_MATCH_ALL);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/match_any.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetMatchMode(SPH_MATCH_ANY);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/match_boolean.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetMatchMode(SPH_MATCH_BOOLEAN);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/match_extended.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetMatchMode(SPH_MATCH_EXTENDED);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/match_extended2.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetMatchMode(SPH_MATCH_EXTENDED2);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/match_fullscan.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetMatchMode(SPH_MATCH_FULLSCAN);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/match_phrase.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetMatchMode(SPH_MATCH_PHRASE);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/max_query_time.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetMaxQueryTime(1000);
+$cl->Query('query');
+
+?>

+ 12 - 0
api/ruby/spec/fixtures/miltiple_queries.php

@@ -0,0 +1,12 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetRetries(10, 20);
+$cl->AddQuery('test1');
+$cl->SetGroupBy('attr', SPH_GROUPBY_DAY);
+$cl->AddQuery('test2');
+$cl->RunQueries();
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/ranking_bm25.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetRankingMode(SPH_RANK_BM25);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/ranking_none.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetRankingMode(SPH_RANK_NONE);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/ranking_proximity_bm25.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetRankingMode(SPH_RANK_PROXIMITY_BM25);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/ranking_wordcount.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetRankingMode(SPH_RANK_WORDCOUNT);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/retries.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetRetries(10);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/retries_delay.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetRetries(10, 20);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/sort_attr_asc.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetSortMode(SPH_SORT_ATTR_ASC, 'sortby');
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/sort_attr_desc.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetSortMode(SPH_SORT_ATTR_DESC, 'sortby');
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/sort_expr.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetSortMode(SPH_SORT_EXPR, 'sortby');
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/sort_extended.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetSortMode(SPH_SORT_EXTENDED, 'sortby');
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/sort_relevance.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetSortMode(SPH_SORT_RELEVANCE);
+$cl->Query('query');
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/sort_time_segments.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetSortMode(SPH_SORT_TIME_SEGMENTS, 'sortby');
+$cl->Query('query');
+
+?>

+ 1181 - 0
api/ruby/spec/fixtures/sphinxapi.php

@@ -0,0 +1,1181 @@
+<?php
+
+//
+// $Id$
+//
+
+//
+// Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+/////////////////////////////////////////////////////////////////////////////
+// PHP version of Sphinx searchd client (PHP API)
+/////////////////////////////////////////////////////////////////////////////
+
+/// known searchd commands
+define ( "SEARCHD_COMMAND_SEARCH",	0 );
+define ( "SEARCHD_COMMAND_EXCERPT",	1 );
+define ( "SEARCHD_COMMAND_UPDATE",	2 );
+define ( "SEARCHD_COMMAND_KEYWORDS",3 );
+
+/// current client-side command implementation versions
+define ( "VER_COMMAND_SEARCH",		0x113 );
+define ( "VER_COMMAND_EXCERPT",		0x100 );
+define ( "VER_COMMAND_UPDATE",		0x101 );
+define ( "VER_COMMAND_KEYWORDS",	0x100 );
+
+/// known searchd status codes
+define ( "SEARCHD_OK",				0 );
+define ( "SEARCHD_ERROR",			1 );
+define ( "SEARCHD_RETRY",			2 );
+define ( "SEARCHD_WARNING",			3 );
+
+/// known match modes
+define ( "SPH_MATCH_ALL",			0 );
+define ( "SPH_MATCH_ANY",			1 );
+define ( "SPH_MATCH_PHRASE",		2 );
+define ( "SPH_MATCH_BOOLEAN",		3 );
+define ( "SPH_MATCH_EXTENDED",		4 );
+define ( "SPH_MATCH_FULLSCAN",		5 );
+define ( "SPH_MATCH_EXTENDED2",		6 );	// extended engine V2 (TEMPORARY, WILL BE REMOVED)
+
+/// known ranking modes (ext2 only)
+define ( "SPH_RANK_PROXIMITY_BM25",	0 );	///< default mode, phrase proximity major factor and BM25 minor one
+define ( "SPH_RANK_BM25",			1 );	///< statistical mode, BM25 ranking only (faster but worse quality)
+define ( "SPH_RANK_NONE",			2 );	///< no ranking, all matches get a weight of 1
+define ( "SPH_RANK_WORDCOUNT",		3 );	///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
+
+/// known sort modes
+define ( "SPH_SORT_RELEVANCE",		0 );
+define ( "SPH_SORT_ATTR_DESC",		1 );
+define ( "SPH_SORT_ATTR_ASC",		2 );
+define ( "SPH_SORT_TIME_SEGMENTS", 	3 );
+define ( "SPH_SORT_EXTENDED", 		4 );
+define ( "SPH_SORT_EXPR", 			5 );
+
+/// known filter types
+define ( "SPH_FILTER_VALUES",		0 );
+define ( "SPH_FILTER_RANGE",		1 );
+define ( "SPH_FILTER_FLOATRANGE",	2 );
+
+/// known attribute types
+define ( "SPH_ATTR_INTEGER",		1 );
+define ( "SPH_ATTR_TIMESTAMP",		2 );
+define ( "SPH_ATTR_ORDINAL",		3 );
+define ( "SPH_ATTR_BOOL",			4 );
+define ( "SPH_ATTR_FLOAT",			5 );
+define ( "SPH_ATTR_MULTI",			0x40000000 );
+
+/// known grouping functions
+define ( "SPH_GROUPBY_DAY",			0 );
+define ( "SPH_GROUPBY_WEEK",		1 );
+define ( "SPH_GROUPBY_MONTH",		2 );
+define ( "SPH_GROUPBY_YEAR",		3 );
+define ( "SPH_GROUPBY_ATTR",		4 );
+define ( "SPH_GROUPBY_ATTRPAIR",	5 );
+
+
+/// portably pack numeric to 64 unsigned bits, network order
+function sphPack64 ( $v )
+{
+	assert ( is_numeric($v) );
+
+	// x64 route
+	if ( PHP_INT_SIZE>=8 )
+	{
+		$i = (int)$v;
+		return pack ( "NN", $i>>32, $i&((1<<32)-1) );
+	}
+
+	// x32 route, bcmath
+	$x = "4294967296";
+	if ( function_exists("bcmul") )
+	{
+		$h = bcdiv ( $v, $x, 0 );
+		$l = bcmod ( $v, $x );
+		return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
+	}
+
+	// x32 route, 15 or less decimal digits
+	// we can use float, because its actually double and has 52 precision bits
+	if ( strlen($v)<=15 )
+	{
+		$f = (float)$v;
+		$h = (int)($f/$x);
+		$l = (int)($f-$x*$h);
+		return pack ( "NN", $h, $l );
+	}
+
+	// x32 route, 16 or more decimal digits
+	// well, let me know if you *really* need this
+	die ( "INTERNAL ERROR: packing more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
+}
+
+
+/// portably unpack 64 unsigned bits, network order to numeric
+function sphUnpack64 ( $v )
+{
+	list($h,$l) = array_values ( unpack ( "N*N*", $v ) );
+
+	// x64 route
+	if ( PHP_INT_SIZE>=8 )
+	{
+		if ( $h<0 ) $h += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
+		if ( $l<0 ) $l += (1<<32);
+		return ($h<<32) + $l;
+	}
+
+	// x32 route
+	$h = sprintf ( "%u", $h );
+	$l = sprintf ( "%u", $l );
+	$x = "4294967296";
+
+	// bcmath
+	if ( function_exists("bcmul") )
+		return bcadd ( $l, bcmul ( $x, $h ) );
+
+	// no bcmath, 15 or less decimal digits
+	// we can use float, because its actually double and has 52 precision bits
+	if ( $h<1048576 )
+	{
+		$f = ((float)$h)*$x + (float)$l;
+		return sprintf ( "%.0f", $f ); // builtin conversion is only about 39-40 bits precise!
+	}
+
+	// x32 route, 16 or more decimal digits
+	// well, let me know if you *really* need this
+	die ( "INTERNAL ERROR: unpacking more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
+}
+
+
+/// sphinx searchd client class
+class SphinxClient
+{
+	var $_host;			///< searchd host (default is "localhost")
+	var $_port;			///< searchd port (default is 3312)
+	var $_offset;		///< how many records to seek from result-set start (default is 0)
+	var $_limit;		///< how many records to return from result-set starting at offset (default is 20)
+	var $_mode;			///< query matching mode (default is SPH_MATCH_ALL)
+	var $_weights;		///< per-field weights (default is 1 for all fields)
+	var $_sort;			///< match sorting mode (default is SPH_SORT_RELEVANCE)
+	var $_sortby;		///< attribute to sort by (defualt is "")
+	var $_min_id;		///< min ID to match (default is 0, which means no limit)
+	var $_max_id;		///< max ID to match (default is 0, which means no limit)
+	var $_filters;		///< search filters
+	var $_groupby;		///< group-by attribute name
+	var $_groupfunc;	///< group-by function (to pre-process group-by attribute value with)
+	var $_groupsort;	///< group-by sorting clause (to sort groups in result set with)
+	var $_groupdistinct;///< group-by count-distinct attribute
+	var $_maxmatches;	///< max matches to retrieve
+	var $_cutoff;		///< cutoff to stop searching at (default is 0)
+	var $_retrycount;	///< distributed retries count
+	var $_retrydelay;	///< distributed retries delay
+	var $_anchor;		///< geographical anchor point
+	var $_indexweights;	///< per-index weights
+	var $_ranker;		///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
+	var $_maxquerytime;	///< max query time, milliseconds (default is 0, do not limit)
+	var $_fieldweights;	///< per-field-name weights
+
+	var $_error;		///< last error message
+	var $_warning;		///< last warning message
+
+	var $_reqs;			///< requests array for multi-query
+	var $_mbenc;		///< stored mbstring encoding
+	var $_arrayresult;	///< whether $result["matches"] should be a hash or an array
+
+	/////////////////////////////////////////////////////////////////////////////
+	// common stuff
+	/////////////////////////////////////////////////////////////////////////////
+
+	/// create a new client object and fill defaults
+	function SphinxClient ()
+	{
+		// per-client-object settings
+		$this->_host		= "localhost";
+		$this->_port		= 3312;
+
+		// per-query settings
+		$this->_offset		= 0;
+		$this->_limit		= 20;
+		$this->_mode		= SPH_MATCH_ALL;
+		$this->_weights		= array ();
+		$this->_sort		= SPH_SORT_RELEVANCE;
+		$this->_sortby		= "";
+		$this->_min_id		= 0;
+		$this->_max_id		= 0;
+		$this->_filters		= array ();
+		$this->_groupby		= "";
+		$this->_groupfunc	= SPH_GROUPBY_DAY;
+		$this->_groupsort	= "@group desc";
+		$this->_groupdistinct= "";
+		$this->_maxmatches	= 1000;
+		$this->_cutoff		= 0;
+		$this->_retrycount	= 0;
+		$this->_retrydelay	= 0;
+		$this->_anchor		= array ();
+		$this->_indexweights= array ();
+		$this->_ranker		= SPH_RANK_PROXIMITY_BM25;
+		$this->_maxquerytime= 0;
+		$this->_fieldweights= array();
+
+		$this->_error		= ""; // per-reply fields (for single-query case)
+		$this->_warning		= "";
+		$this->_reqs		= array ();	// requests storage (for multi-query case)
+		$this->_mbenc		= "";
+		$this->_arrayresult	= false;
+	}
+
+	/// get last error message (string)
+	function GetLastError ()
+	{
+		return $this->_error;
+	}
+
+	/// get last warning message (string)
+	function GetLastWarning ()
+	{
+		return $this->_warning;
+	}
+
+	/// set searchd host name (string) and port (integer)
+	function SetServer ( $host, $port )
+	{
+		assert ( is_string($host) );
+		assert ( is_int($port) );
+		$this->_host = $host;
+		$this->_port = $port;
+	}
+
+	/////////////////////////////////////////////////////////////////////////////
+
+	/// enter mbstring workaround mode
+	function _MBPush ()
+	{
+		$this->_mbenc = "";
+		if ( ini_get ( "mbstring.func_overload" ) & 2 )
+		{
+			$this->_mbenc = mb_internal_encoding();
+			mb_internal_encoding ( "latin1" );
+		}
+    }
+
+	/// leave mbstring workaround mode
+	function _MBPop ()
+	{
+		if ( $this->_mbenc )
+			mb_internal_encoding ( $this->_mbenc );
+	}
+
+	/// connect to searchd server
+	function _Connect ()
+	{
+		return fopen('php://stdout', 'w');
+	}
+	
+	function _OldConnect()
+	{
+		if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
+		{
+			$this->_error = "connection to {$this->_host}:{$this->_port} failed";
+			return false;
+		}
+
+		// check version
+		list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
+		$v = (int)$v;
+		if ( $v<1 )
+		{
+			fclose ( $fp );
+			$this->_error = "expected searchd protocol version 1+, got version '$v'";
+			return false;
+		}
+
+		// all ok, send my version
+		fwrite ( $fp, pack ( "N", 1 ) );
+		return $fp;
+	}
+
+	/// get and check response packet from searchd server
+	function _GetResponse ( $fp, $client_ver )
+	{
+		return false;
+	}
+	
+	function _OldGetResponse ( $fp, $client_ver )
+	{
+		$response = "";
+		$len = 0;
+
+		$header = fread ( $fp, 8 );
+		if ( strlen($header)==8 )
+		{
+			list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
+			$left = $len;
+			while ( $left>0 && !feof($fp) )
+			{
+				$chunk = fread ( $fp, $left );
+				if ( $chunk )
+				{
+					$response .= $chunk;
+					$left -= strlen($chunk);
+				}
+			}
+		}
+		fclose ( $fp );
+
+		// check response
+		$read = strlen ( $response );
+		if ( !$response || $read!=$len )
+		{
+			$this->_error = $len
+				? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
+				: "received zero-sized searchd response";
+			return false;
+		}
+
+		// check status
+		if ( $status==SEARCHD_WARNING )
+		{
+			list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
+			$this->_warning = substr ( $response, 4, $wlen );
+			return substr ( $response, 4+$wlen );
+		}
+		if ( $status==SEARCHD_ERROR )
+		{
+			$this->_error = "searchd error: " . substr ( $response, 4 );
+			return false;
+		}
+		if ( $status==SEARCHD_RETRY )
+		{
+			$this->_error = "temporary searchd error: " . substr ( $response, 4 );
+			return false;
+		}
+		if ( $status!=SEARCHD_OK )
+		{
+			$this->_error = "unknown status code '$status'";
+			return false;
+		}
+
+		// check version
+		if ( $ver<$client_ver )
+		{
+			$this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
+				$ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
+		}
+
+		return $response;
+	}
+
+	/////////////////////////////////////////////////////////////////////////////
+	// searching
+	/////////////////////////////////////////////////////////////////////////////
+
+	/// set offset and count into result set,
+	/// and optionally set max-matches and cutoff limits
+	function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
+	{
+		assert ( is_int($offset) );
+		assert ( is_int($limit) );
+		assert ( $offset>=0 );
+		assert ( $limit>0 );
+		assert ( $max>=0 );
+		$this->_offset = $offset;
+		$this->_limit = $limit;
+		if ( $max>0 )
+			$this->_maxmatches = $max;
+		if ( $cutoff>0 )
+			$this->_cutoff = $cutoff;
+	}
+
+	/// set maximum query time, in milliseconds, per-index
+	/// integer, 0 means "do not limit"
+	function SetMaxQueryTime ( $max )
+	{
+		assert ( is_int($max) );
+		assert ( $max>=0 );
+		$this->_maxquerytime = $max;
+	}
+
+	/// set matching mode
+	function SetMatchMode ( $mode )
+	{
+		assert ( $mode==SPH_MATCH_ALL
+			|| $mode==SPH_MATCH_ANY
+			|| $mode==SPH_MATCH_PHRASE
+			|| $mode==SPH_MATCH_BOOLEAN
+			|| $mode==SPH_MATCH_EXTENDED
+			|| $mode==SPH_MATCH_FULLSCAN
+			|| $mode==SPH_MATCH_EXTENDED2 );
+		$this->_mode = $mode;
+	}
+
+	/// set ranking mode
+	function SetRankingMode ( $ranker )
+	{
+		assert ( $ranker==SPH_RANK_PROXIMITY_BM25
+			|| $ranker==SPH_RANK_BM25
+			|| $ranker==SPH_RANK_NONE
+			|| $ranker==SPH_RANK_WORDCOUNT );
+		$this->_ranker = $ranker;
+	}
+
+	/// set matches sorting mode
+	function SetSortMode ( $mode, $sortby="" )
+	{
+		assert (
+			$mode==SPH_SORT_RELEVANCE ||
+			$mode==SPH_SORT_ATTR_DESC ||
+			$mode==SPH_SORT_ATTR_ASC ||
+			$mode==SPH_SORT_TIME_SEGMENTS ||
+			$mode==SPH_SORT_EXTENDED ||
+			$mode==SPH_SORT_EXPR );
+		assert ( is_string($sortby) );
+		assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
+
+		$this->_sort = $mode;
+		$this->_sortby = $sortby;
+	}
+
+	/// bind per-field weights by order
+	/// DEPRECATED; use SetFieldWeights() instead
+	function SetWeights ( $weights )
+	{
+		assert ( is_array($weights) );
+		foreach ( $weights as $weight )
+			assert ( is_int($weight) );
+
+		$this->_weights = $weights;
+	}
+
+	/// bind per-field weights by name
+	function SetFieldWeights ( $weights )
+	{
+		assert ( is_array($weights) );
+		foreach ( $weights as $name=>$weight )
+		{
+			assert ( is_string($name) );
+			assert ( is_int($weight) );
+		}
+		$this->_fieldweights = $weights;
+	}
+
+	/// bind per-index weights by name
+	function SetIndexWeights ( $weights )
+	{
+		assert ( is_array($weights) );
+		foreach ( $weights as $index=>$weight )
+		{
+			assert ( is_string($index) );
+			assert ( is_int($weight) );
+		}
+		$this->_indexweights = $weights;
+	}
+
+	/// set IDs range to match
+	/// only match records if document ID is beetwen $min and $max (inclusive)
+	function SetIDRange ( $min, $max )
+	{
+		assert ( is_numeric($min) );
+		assert ( is_numeric($max) );
+		assert ( $min<=$max );
+		$this->_min_id = $min;
+		$this->_max_id = $max;
+	}
+
+	/// set values set filter
+	/// only match records where $attribute value is in given set
+	function SetFilter ( $attribute, $values, $exclude=false )
+	{
+		assert ( is_string($attribute) );
+		assert ( is_array($values) );
+		assert ( count($values) );
+
+		if ( is_array($values) && count($values) )
+		{
+			foreach ( $values as $value )
+				assert ( is_numeric($value) );
+
+			$this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
+		}
+	}
+
+	/// set range filter
+	/// only match records if $attribute value is beetwen $min and $max (inclusive)
+	function SetFilterRange ( $attribute, $min, $max, $exclude=false )
+	{
+		assert ( is_string($attribute) );
+		assert ( is_int($min) );
+		assert ( is_int($max) );
+		assert ( $min<=$max );
+
+		$this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
+	}
+
+	/// set float range filter
+	/// only match records if $attribute value is beetwen $min and $max (inclusive)
+	function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
+	{
+		assert ( is_string($attribute) );
+		assert ( is_float($min) );
+		assert ( is_float($max) );
+		assert ( $min<=$max );
+
+		$this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
+	}
+
+	/// setup anchor point for geosphere distance calculations
+	/// required to use @geodist in filters and sorting
+	/// latitude and longitude must be in radians
+	function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
+	{
+		assert ( is_string($attrlat) );
+		assert ( is_string($attrlong) );
+		assert ( is_float($lat) );
+		assert ( is_float($long) );
+
+		$this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
+	}
+
+	/// set grouping attribute and function
+	function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
+	{
+		assert ( is_string($attribute) );
+		assert ( is_string($groupsort) );
+		assert ( $func==SPH_GROUPBY_DAY
+			|| $func==SPH_GROUPBY_WEEK
+			|| $func==SPH_GROUPBY_MONTH
+			|| $func==SPH_GROUPBY_YEAR
+			|| $func==SPH_GROUPBY_ATTR
+			|| $func==SPH_GROUPBY_ATTRPAIR );
+
+		$this->_groupby = $attribute;
+		$this->_groupfunc = $func;
+		$this->_groupsort = $groupsort;
+	}
+
+	/// set count-distinct attribute for group-by queries
+	function SetGroupDistinct ( $attribute )
+	{
+		assert ( is_string($attribute) );
+		$this->_groupdistinct = $attribute;
+	}
+
+	/// set distributed retries count and delay
+	function SetRetries ( $count, $delay=0 )
+	{
+		assert ( is_int($count) && $count>=0 );
+		assert ( is_int($delay) && $delay>=0 );
+		$this->_retrycount = $count;
+		$this->_retrydelay = $delay;
+	}
+
+	/// set result set format (hash or array; hash by default)
+	/// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
+	function SetArrayResult ( $arrayresult )
+	{
+		assert ( is_bool($arrayresult) );
+		$this->_arrayresult = $arrayresult;
+	}
+
+	//////////////////////////////////////////////////////////////////////////////
+
+	/// clear all filters (for multi-queries)
+	function ResetFilters ()
+	{
+		$this->_filters = array();
+		$this->_anchor = array();
+	}
+
+	/// clear groupby settings (for multi-queries)
+	function ResetGroupBy ()
+	{
+		$this->_groupby		= "";
+		$this->_groupfunc	= SPH_GROUPBY_DAY;
+		$this->_groupsort	= "@group desc";
+		$this->_groupdistinct= "";
+	}
+
+	//////////////////////////////////////////////////////////////////////////////
+
+	/// connect to searchd server, run given search query through given indexes,
+	/// and return the search results
+	function Query ( $query, $index="*", $comment="" )
+	{
+		assert ( empty($this->_reqs) );
+
+		$this->AddQuery ( $query, $index, $comment );
+		$results = $this->RunQueries ();
+
+		if ( !is_array($results) )
+			return false; // probably network error; error message should be already filled
+
+		$this->_error = $results[0]["error"];
+		$this->_warning = $results[0]["warning"];
+		if ( $results[0]["status"]==SEARCHD_ERROR )
+			return false;
+		else
+			return $results[0];
+	}
+
+	/// helper to pack floats in network byte order
+	function _PackFloat ( $f )
+	{
+		$t1 = pack ( "f", $f ); // machine order
+		list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
+		return pack ( "N", $t2 );
+	}
+
+	/// add query to multi-query batch
+	/// returns index into results array from RunQueries() call
+	function AddQuery ( $query, $index="*", $comment="" )
+	{
+		// mbstring workaround
+		$this->_MBPush ();
+
+		// build request
+		$req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
+		$req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
+		$req .= pack ( "N", strlen($query) ) . $query; // query itself
+		$req .= pack ( "N", count($this->_weights) ); // weights
+		foreach ( $this->_weights as $weight )
+			$req .= pack ( "N", (int)$weight );
+		$req .= pack ( "N", strlen($index) ) . $index; // indexes
+		$req .= pack ( "N", 1 ); // id64 range marker
+		$req .= sphPack64 ( $this->_min_id ) . sphPack64 ( $this->_max_id ); // id64 range
+
+		// filters
+		$req .= pack ( "N", count($this->_filters) );
+		foreach ( $this->_filters as $filter )
+		{
+			$req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
+			$req .= pack ( "N", $filter["type"] );
+			switch ( $filter["type"] )
+			{
+				case SPH_FILTER_VALUES:
+					$req .= pack ( "N", count($filter["values"]) );
+					foreach ( $filter["values"] as $value )
+						$req .= pack ( "N", floatval($value) ); // this uberhack is to workaround 32bit signed int limit on x32 platforms
+					break;
+
+				case SPH_FILTER_RANGE:
+					$req .= pack ( "NN", $filter["min"], $filter["max"] );
+					break;
+
+				case SPH_FILTER_FLOATRANGE:
+					$req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
+					break;
+
+				default:
+					assert ( 0 && "internal error: unhandled filter type" );
+			}
+			$req .= pack ( "N", $filter["exclude"] );
+		}
+
+		// group-by clause, max-matches count, group-sort clause, cutoff count
+		$req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
+		$req .= pack ( "N", $this->_maxmatches );
+		$req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
+		$req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
+		$req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
+
+		// anchor point
+		if ( empty($this->_anchor) )
+		{
+			$req .= pack ( "N", 0 );
+		} else
+		{
+			$a =& $this->_anchor;
+			$req .= pack ( "N", 1 );
+			$req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
+			$req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
+			$req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
+		}
+
+		// per-index weights
+		$req .= pack ( "N", count($this->_indexweights) );
+		foreach ( $this->_indexweights as $idx=>$weight )
+			$req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
+
+		// max query time
+		$req .= pack ( "N", $this->_maxquerytime );
+
+		// per-field weights
+		$req .= pack ( "N", count($this->_fieldweights) );
+		foreach ( $this->_fieldweights as $field=>$weight )
+			$req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
+
+		// comment
+		$req .= pack ( "N", strlen($comment) ) . $comment;
+
+		// mbstring workaround
+		$this->_MBPop ();
+
+		// store request to requests array
+		$this->_reqs[] = $req;
+		return count($this->_reqs)-1;
+	}
+
+	/// connect to searchd, run queries batch, and return an array of result sets
+	function RunQueries ()
+	{
+		if ( empty($this->_reqs) )
+		{
+			$this->_error = "no queries defined, issue AddQuery() first";
+			return false;
+		}
+
+		// mbstring workaround
+		$this->_MBPush ();
+
+		if (!( $fp = $this->_Connect() ))
+		{
+			$this->_MBPop ();
+			return false;
+		}
+
+		////////////////////////////
+		// send query, get response
+		////////////////////////////
+
+		$nreqs = count($this->_reqs);
+		$req = join ( "", $this->_reqs );
+		$len = 4+strlen($req);
+		$req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
+
+		fwrite ( $fp, $req, $len+8 );
+		if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
+		{
+			$this->_MBPop ();
+			return false;
+		}
+
+		$this->_reqs = array ();
+
+		//////////////////
+		// parse response
+		//////////////////
+
+		$p = 0; // current position
+		$max = strlen($response); // max position for checks, to protect against broken responses
+
+		$results = array ();
+		for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
+		{
+			$results[] = array();
+			$result =& $results[$ires];
+
+			$result["error"] = "";
+			$result["warning"] = "";
+
+			// extract status
+			list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+			$result["status"] = $status;
+			if ( $status!=SEARCHD_OK )
+			{
+				list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+				$message = substr ( $response, $p, $len ); $p += $len;
+
+				if ( $status==SEARCHD_WARNING )
+				{
+					$result["warning"] = $message;
+				} else
+				{
+					$result["error"] = $message;
+					continue;
+				}
+			}
+
+			// read schema
+			$fields = array ();
+			$attrs = array ();
+
+			list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+			while ( $nfields-->0 && $p<$max )
+			{
+				list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+				$fields[] = substr ( $response, $p, $len ); $p += $len;
+			}
+			$result["fields"] = $fields;
+
+			list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+			while ( $nattrs-->0 && $p<$max  )
+			{
+				list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+				$attr = substr ( $response, $p, $len ); $p += $len;
+				list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+				$attrs[$attr] = $type;
+			}
+			$result["attrs"] = $attrs;
+
+			// read match count
+			list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+			list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+
+			// read matches
+			$idx = -1;
+			while ( $count-->0 && $p<$max )
+			{
+				// index into result array
+				$idx++;
+
+				// parse document id and weight
+				if ( $id64 )
+				{
+					$doc = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8;
+					list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+				} else
+				{
+					list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
+						substr ( $response, $p, 8 ) ) );
+					$p += 8;
+
+					if ( PHP_INT_SIZE>=8 )
+					{
+						// x64 route, workaround broken unpack() in 5.2.2+
+						if ( $doc<0 ) $doc += (1<<32);
+					} else
+					{
+						// x32 route, workaround php signed/unsigned braindamage
+						$doc = sprintf ( "%u", $doc );
+					}
+				}
+				$weight = sprintf ( "%u", $weight );
+
+				// create match entry
+				if ( $this->_arrayresult )
+					$result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
+				else
+					$result["matches"][$doc]["weight"] = $weight;
+
+				// parse and create attributes
+				$attrvals = array ();
+				foreach ( $attrs as $attr=>$type )
+				{
+					// handle floats
+					if ( $type==SPH_ATTR_FLOAT )
+					{
+						list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+						list(,$fval) = unpack ( "f*", pack ( "L", $uval ) ); 
+						$attrvals[$attr] = $fval;
+						continue;
+					}
+
+					// handle everything else as unsigned ints
+					list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+					if ( $type & SPH_ATTR_MULTI )
+					{
+						$attrvals[$attr] = array ();
+						$nvalues = $val;
+						while ( $nvalues-->0 && $p<$max )
+						{
+							list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+							$attrvals[$attr][] = sprintf ( "%u", $val );
+						}
+					} else
+					{
+						$attrvals[$attr] = sprintf ( "%u", $val );
+					}
+				}
+
+				if ( $this->_arrayresult )
+					$result["matches"][$idx]["attrs"] = $attrvals;
+				else
+					$result["matches"][$doc]["attrs"] = $attrvals;
+			}
+
+			list ( $total, $total_found, $msecs, $words ) =
+				array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
+			$result["total"] = sprintf ( "%u", $total );
+			$result["total_found"] = sprintf ( "%u", $total_found );
+			$result["time"] = sprintf ( "%.3f", $msecs/1000 );
+			$p += 16;
+
+			while ( $words-->0 && $p<$max )
+			{
+				list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+				$word = substr ( $response, $p, $len ); $p += $len;
+				list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
+				$result["words"][$word] = array (
+					"docs"=>sprintf ( "%u", $docs ),
+					"hits"=>sprintf ( "%u", $hits ) );
+			}
+		}
+
+		$this->_MBPop ();
+		return $results;
+	}
+
+	/////////////////////////////////////////////////////////////////////////////
+	// excerpts generation
+	/////////////////////////////////////////////////////////////////////////////
+
+	/// connect to searchd server, and generate exceprts (snippets)
+	/// of given documents for given query. returns false on failure,
+	/// an array of snippets on success
+	function BuildExcerpts ( $docs, $index, $words, $opts=array() )
+	{
+		assert ( is_array($docs) );
+		assert ( is_string($index) );
+		assert ( is_string($words) );
+		assert ( is_array($opts) );
+
+		$this->_MBPush ();
+
+		if (!( $fp = $this->_Connect() ))
+		{
+			$this->_MBPop();
+			return false;
+		}
+
+		/////////////////
+		// fixup options
+		/////////////////
+
+		if ( !isset($opts["before_match"]) )		$opts["before_match"] = "<b>";
+		if ( !isset($opts["after_match"]) )			$opts["after_match"] = "</b>";
+		if ( !isset($opts["chunk_separator"]) )		$opts["chunk_separator"] = " ... ";
+		if ( !isset($opts["limit"]) )				$opts["limit"] = 256;
+		if ( !isset($opts["around"]) )				$opts["around"] = 5;
+		if ( !isset($opts["exact_phrase"]) )		$opts["exact_phrase"] = false;
+		if ( !isset($opts["single_passage"]) )		$opts["single_passage"] = false;
+		if ( !isset($opts["use_boundaries"]) )		$opts["use_boundaries"] = false;
+		if ( !isset($opts["weight_order"]) )		$opts["weight_order"] = false;
+
+		/////////////////
+		// build request
+		/////////////////
+
+		// v.1.0 req
+		$flags = 1; // remove spaces
+		if ( $opts["exact_phrase"] )	$flags |= 2;
+		if ( $opts["single_passage"] )	$flags |= 4;
+		if ( $opts["use_boundaries"] )	$flags |= 8;
+		if ( $opts["weight_order"] )	$flags |= 16;
+		$req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
+		$req .= pack ( "N", strlen($index) ) . $index; // req index
+		$req .= pack ( "N", strlen($words) ) . $words; // req words
+
+		// options
+		$req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
+		$req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
+		$req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
+		$req .= pack ( "N", (int)$opts["limit"] );
+		$req .= pack ( "N", (int)$opts["around"] );
+
+		// documents
+		$req .= pack ( "N", count($docs) );
+		foreach ( $docs as $doc )
+		{
+			assert ( is_string($doc) );
+			$req .= pack ( "N", strlen($doc) ) . $doc;
+		}
+
+		////////////////////////////
+		// send query, get response
+		////////////////////////////
+
+		$len = strlen($req);
+		$req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
+		$wrote = fwrite ( $fp, $req, $len+8 );
+		if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
+		{
+			$this->_MBPop ();
+			return false;
+		}
+
+		//////////////////
+		// parse response
+		//////////////////
+
+		$pos = 0;
+		$res = array ();
+		$rlen = strlen($response);
+		for ( $i=0; $i<count($docs); $i++ )
+		{
+			list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
+			$pos += 4;
+
+			if ( $pos+$len > $rlen )
+			{
+				$this->_error = "incomplete reply";
+				$this->_MBPop ();
+				return false;
+			}
+			$res[] = $len ? substr ( $response, $pos, $len ) : "";
+			$pos += $len;
+		}
+
+		$this->_MBPop ();
+		return $res;
+	}
+
+
+	/////////////////////////////////////////////////////////////////////////////
+	// keyword generation
+	/////////////////////////////////////////////////////////////////////////////
+
+	/// connect to searchd server, and generate keyword list for a given query
+	/// returns false on failure,
+	/// an array of words on success
+	function BuildKeywords ( $query, $index, $hits )
+	{
+		assert ( is_string($query) );
+		assert ( is_string($index) );
+		assert ( is_bool($hits) );
+
+		$this->_MBPush ();
+
+		if (!( $fp = $this->_Connect() ))
+		{
+			$this->_MBPop();
+			return false;
+		}
+
+		/////////////////
+		// build request
+		/////////////////
+
+		// v.1.0 req
+		$req  = pack ( "N", strlen($query) ) . $query; // req query
+		$req .= pack ( "N", strlen($index) ) . $index; // req index
+		$req .= pack ( "N", (int)$hits );
+
+		////////////////////////////
+		// send query, get response
+		////////////////////////////
+
+		$len = strlen($req);
+		$req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
+		$wrote = fwrite ( $fp, $req, $len+8 );
+		if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ))
+		{
+			$this->_MBPop ();
+			return false;
+		}
+
+		//////////////////
+		// parse response
+		//////////////////
+
+		$pos = 0;
+		$res = array ();
+		$rlen = strlen($response);
+		list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
+		$pos += 4;
+		for ( $i=0; $i<$nwords; $i++ )
+		{
+			list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );	$pos += 4;
+			$tokenized = $len ? substr ( $response, $pos, $len ) : "";
+			$pos += $len;
+
+			list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );	$pos += 4;
+			$normalized = $len ? substr ( $response, $pos, $len ) : "";
+			$pos += $len;
+
+			$res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
+
+			if ( $hits )
+			{
+				list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
+				$pos += 8;
+				$res [$i]["docs"] = $ndocs;
+				$res [$i]["hits"] = $nhits;
+			}
+
+			if ( $pos > $rlen )
+			{
+				$this->_error = "incomplete reply";
+				$this->_MBPop ();
+				return false;
+			}
+		}
+
+		$this->_MBPop ();
+		return $res;
+	}
+
+	function EscapeString ( $string )
+	{
+		$from = array ( '(',')','|','-','!','@','~','\"','&' );
+		$to   = array ( '\\(','\\)','\\|','\\-','\\!','\\@','\\~','\\\"', '\\&' );
+
+		return str_replace ( $from, $to, $string );
+	}
+
+	/////////////////////////////////////////////////////////////////////////////
+	// attribute updates
+	/////////////////////////////////////////////////////////////////////////////
+
+	/// update given attribute values on given documents in given indexes
+	/// returns amount of updated documents (0 or more) on success, or -1 on failure
+	function UpdateAttributes ( $index, $attrs, $values )
+	{
+		// verify everything
+		assert ( is_string($index) );
+
+		assert ( is_array($attrs) );
+		foreach ( $attrs as $attr )
+			assert ( is_string($attr) );
+
+		assert ( is_array($values) );
+		foreach ( $values as $id=>$entry )
+		{
+			assert ( is_numeric($id) );
+			assert ( is_array($entry) );
+			assert ( count($entry)==count($attrs) );
+			foreach ( $entry as $v )
+				assert ( is_int($v) );
+		}
+
+		// build request
+		$req = pack ( "N", strlen($index) ) . $index;
+
+		$req .= pack ( "N", count($attrs) );
+		foreach ( $attrs as $attr )
+			$req .= pack ( "N", strlen($attr) ) . $attr;
+
+		$req .= pack ( "N", count($values) );
+		foreach ( $values as $id=>$entry )
+		{
+			$req .= sphPack64 ( $id );
+			foreach ( $entry as $v )
+				$req .= pack ( "N", $v );
+		}
+
+		// mbstring workaround
+		$this->_MBPush ();
+
+		// connect, send query, get response
+		if (!( $fp = $this->_Connect() ))
+		{
+			$this->_MBPop ();
+			return -1;
+		}
+
+		$len = strlen($req);
+		$req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
+		fwrite ( $fp, $req, $len+8 );
+
+		if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
+		{
+			$this->_MBPop ();
+			return -1;
+		}
+
+		// parse response
+		list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
+		$this->_MBPop ();
+		return $updated;
+	}
+}
+
+//
+// $Id$
+//
+
+?>

+ 8 - 0
api/ruby/spec/fixtures/update_attributes.php

@@ -0,0 +1,8 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->UpdateAttributes('index', array('group'), array(123 => array(456)));
+
+?>

+ 9 - 0
api/ruby/spec/fixtures/weights.php

@@ -0,0 +1,9 @@
+<?php
+
+require ("sphinxapi.php");
+
+$cl = new SphinxClient();
+$cl->SetWeights(array(10, 20, 30, 40));
+$cl->Query('query');
+
+?>

+ 67 - 0
api/ruby/spec/sphinx/sphinx.conf

@@ -0,0 +1,67 @@
+source src1
+{
+	type				= mysql
+	sql_host			= localhost
+	sql_user			= root
+	sql_pass			=
+	sql_db				= sphinx_test
+	sql_port			= 3306	# optional, default is 3306
+
+	sql_query			= SELECT id, name, description, UNIX_TIMESTAMP(created_at) AS created_at, group_id, rating FROM links
+	sql_attr_uint		= group_id
+	sql_attr_timestamp	= created_at
+	sql_attr_float		= rating
+	sql_attr_multi		= uint tags from query; SELECT link_id, tag_id FROM links_tags
+	sql_query_info		= SELECT * FROM links WHERE id=$id
+}
+
+source src2
+{
+	type				= mysql
+	sql_host			= localhost
+	sql_user			= root
+	sql_pass			=
+	sql_db				= sphinx_test
+	sql_port			= 3306	# optional, default is 3306
+
+	sql_query			= SELECT id, name, description, UNIX_TIMESTAMP(created_at) AS created_at, group_id FROM links64
+	sql_attr_uint		= group_id
+	sql_attr_timestamp	= created_at
+	sql_query_info		= SELECT * FROM links WHERE id=$id
+}
+
+index test1
+{
+	source				= src1
+	path				= e:/work/_sandbox/sphinx/data/test1
+	docinfo				= extern
+	morphology			= none
+	stopwords			=
+	charset_type		= utf-8
+}
+
+index test2
+{
+	source				= src2
+	path				= e:/work/_sandbox/sphinx/data/test2
+	docinfo				= extern
+	morphology			= none
+	stopwords			=
+	charset_type		= utf-8
+}
+
+indexer
+{
+	mem_limit			= 32M
+}
+
+searchd
+{
+	port				= 3312
+	log					= e:/work/_sandbox/sphinx/log/searchd.log
+	query_log			= e:/work/_sandbox/sphinx/log/query.log
+	read_timeout		= 5
+	max_children		= 30
+	pid_file			= e:/work/_sandbox/sphinx/log/searchd.pid
+	max_matches			= 1000
+}

+ 86 - 0
api/ruby/spec/sphinx/sphinx_test.sql

@@ -0,0 +1,86 @@
+/*
+SQLyog Enterprise - MySQL GUI v5.20
+Host - 5.0.27-community-nt : Database - sphinx_test
+*********************************************************************
+Server version : 5.0.27-community-nt
+*/

+
+SET NAMES utf8;
+
+SET SQL_MODE='';
+
+CREATE database IF NOT EXISTS `sphinx_test`;
+
+USE `sphinx_test`;
+
+/* Table structure for table `links` */
+
+DROP TABLE IF EXISTS `links`;
+
+CREATE TABLE `links` (
+  `id` INT(11) NOT NULL auto_increment,
+  `name` VARCHAR(255) NOT NULL,
+  `created_at` DATETIME NOT NULL,
+  `description` TEXT,
+  `group_id` INT(11) NOT NULL,
+  `rating` FLOAT NOT NULL,
+  PRIMARY KEY  (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+/* Table structure for table `tags` */
+
+DROP TABLE IF EXISTS `tags`;
+
+CREATE TABLE `tags` (
+  `id` INT(11) NOT NULL auto_increment,
+  `tag` VARCHAR(255) NOT NULL,
+  PRIMARY KEY  (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+/* Table structure for table `links_tags` */
+
+DROP TABLE IF EXISTS `links_tags`;
+
+CREATE TABLE `links_tags` (
+  `link_id` INT(11) NOT NULL,
+  `tag_id` INT(11) NOT NULL,
+  PRIMARY KEY  (`link_id`,`tag_id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+/* Table structure for table `links64` */
+
+DROP TABLE IF EXISTS `links64`;
+
+CREATE TABLE `links64` (
+  `id` BIGINT(11) NOT NULL auto_increment,
+  `name` VARCHAR(255) NOT NULL,
+  `created_at` DATETIME NOT NULL,
+  `description` TEXT,
+  `group_id` INT(11) NOT NULL,
+  PRIMARY KEY  (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+/* Data for the table `links` */
+
+INSERT INTO `links`(`id`,`name`,`created_at`,`description`,`group_id`,`rating`) VALUES
+	(1,'Paint Protects WiFi Network from Hackers','2007-04-04 06:48:10','A company known as SEC Technologies has created a special type of paint that blocks Wi-Fi signals so that you can be sure hackers can ',1,13.32),
+	(2,'Airplanes To Become WiFi Hotspots','2007-04-04 06:49:15','Airlines will start turning their airplanes into WiFi hotspots beginning early next year, WSJ reports. Here\'s what you need to know...',2,54.85),
+	(3,'Planet VIP-195 GSM/WiFi Phone With Windows Messanger','2007-04-04 06:50:47','The phone does comply with IEEE 802.11b and IEEE 802.11g to provide phone capability via WiFi. As GSM phone the VIP-195 support 900/1800/1900 band and GPRS too. It comes with simple button to switch between WiFi or GSM mod',1,16.25);
+
+/* Data for the table `tags` */
+INSERT INTO `tags`(`id`,`tag`) VALUES
+	(1, 'tag1'),(2, 'tag2'),(3, 'tag3'),(4, 'tag4'),(5, 'tag5'),
+	(6, 'tag6'),(7, 'tag7'),(8, 'tag8'),(9, 'tag9'),(10, 'tag5');
+
+/* Data for the table `links_tags` */
+INSERT INTO `links_tags`(`link_id`,`tag_id`) VALUES
+	(1, 1),(1, 2),(1, 3),(1, 4),
+	(2, 5),(2, 6),(2, 7),(2, 8),
+	(3, 9),(3, 1),(3, 7),(3, 10);
+	
+/* Data for the table `links64` */
+
+INSERT INTO `links64`(`id`,`name`,`created_at`,`description`,`group_id`) VALUES
+	(4294967297,'Paint Protects WiFi Network from Hackers','2007-04-04 06:48:10','A company known as SEC Technologies has created a special type of paint that blocks Wi-Fi signals so that you can be sure hackers can ',1),
+	(4294967298,'Airplanes To Become WiFi Hotspots','2007-04-04 06:49:15','Airlines will start turning their airplanes into WiFi hotspots beginning early next year, WSJ reports. Here\'s what you need to know...',2),
+	(4294967299,'Planet VIP-195 GSM/WiFi Phone With Windows Messanger','2007-04-04 06:50:47','The phone does comply with IEEE 802.11b and IEEE 802.11g to provide phone capability via WiFi. As GSM phone the VIP-195 support 900/1800/1900 band and GPRS too. It comes with simple button to switch between WiFi or GSM mod',1);

+ 3 - 0
api/ruby/sphinx.yml.tpl

@@ -0,0 +1,3 @@
+config_file: /opt/sphinx/etc/sphinx.conf
+root_dir: /opt/sphinx/bin
+indexes: test1 test2

+ 75 - 0
api/ruby/tasks/sphinx.rake

@@ -0,0 +1,75 @@
+namespace :sphinx do
+  desc 'Run indexer for configured indexes'
+  task :index do
+    config = load_config
+    if config[:indexes]
+      system "#{config[:root_dir]}/indexer --config \"#{config[:config_file]}\" #{config[:indexes]}"
+    else
+      puts 'You should specify indexes in sphinx.yml'
+    end
+  end
+
+  desc 'Run indexer for all indexes'
+  task :index_all do
+    config = load_config
+    system "#{config[:root_dir]}/indexer --config \"#{config[:config_file]}\" --all"
+  end
+
+  desc 'Rotate configured indexes and restart searchd server'
+  task :rotate do
+    config = load_config
+    if config[:indexes]
+      system "#{config[:root_dir]}/indexer --config \"#{config[:config_file]}\" --rotate #{config[:indexes]}"
+    else
+      puts 'You should specify indexes in sphinx.yml'
+    end
+  end
+
+  desc 'Rotate all indexes and restart searchd server'
+  task :rotate_all do
+    config = load_config
+    system "#{config[:root_dir]}/indexer --config \"#{config[:config_file]}\" --rotate --all"
+  end
+  
+  desc 'Start searchd server'
+  task :start do
+    config = load_config
+    if File.exists?(config[:pid_file])
+      puts 'Sphinx searchd server is already started.'
+    else
+      system "#{config[:root_dir]}/searchd --config \"#{config[:config_file]}\""
+      puts 'Sphinx searchd server started.'
+    end
+  end
+  
+  desc 'Stop searchd server'
+  task :stop do
+    config = load_config
+    unless File.exists?(config[:pid_file])
+      puts 'Sphinx searchd server is not running.'
+    else
+      pid = File.read(config[:pid_file]).chomp
+      kill 'SIGHUP', pid
+      puts 'Sphinx searchd server stopped.'
+    end
+  end
+  
+  desc 'Restart searchd server'
+  task :restart => [:stop, :start]
+  
+  def load_config
+    return @sphinx_config if @sphinx_config
+
+    options = YAML.load_file(File.dirname(__FILE__) + '/../../../../config/sphinx.yml') rescue {}
+    @sphinx_config = {
+      :config_file => options['config_file'] || '/etc/sphinx.conf',
+      :root_dir => options['root_dir'] || '/usr/bin',
+      :indexes => options['indexes']
+    }
+    sphinx_config = File.read(@sphinx_config[:config_file]) rescue ''
+    
+    sphinx_config =~ /searchd\s*{.*pid_file\s*=\s*(.*?)\n.*}/m
+    @sphinx_config[:pid_file] = $1 || '/var/run/searchd.pid'
+    return @sphinx_config
+  end
+end

+ 0 - 493
contrib/rubyapi/lib/sphinx.rb

@@ -1,493 +0,0 @@
-# = sphinx.rb - Sphinx Client Library
-# 
-# Author::    Dmytro Shteflyuk <mailto:[email protected]>.
-# Copyright:: Copyright (c) 2006 Wildbit, LLC
-# License::   Distributes under the same terms as Ruby
-# Version::   0.1.0
-# Website::   http://kpumuk.info/projects/ror-plugins/using-sphinx-search-engine-in-ruby-on-rails
-#
-# This library is distributed under the terms of the Ruby license.
-# You can freely distribute/modify this library.
-
-# ==Sphinx Client Library 
-# 
-# The Sphinx Client Library is used to communicate with <tt>searchd</tt>
-# daemon and get search results from Sphinx.
-# 
-# ===Usage
-# 
-#   sphinx = Sphinx.new
-#   result = sphinx.query('test')
-#   ids = result[:matches].map { |id, value| id }.join(',')
-#   posts = Post.find :all, :conditions => "id IN (#{ids})"
-#   
-#   docs = posts.map { |post| post.body }
-#   excerpts = sphinx.build_excerpts(docs, 'index', 'test')
-# 
-class Sphinx
-
-  # :stopdoc:
-  class SphinxError < StandardError; end
-  class SphinxConnectError < SphinxError; end
-  class SphinxResponseError < SphinxError; end
-  class SphinxInternalError < SphinxError; end
-  class SphinxTemporaryError < SphinxError; end
-  class SphinxUnknownError < SphinxError; end
-
-  # Known searchd commands
-
-  # search command
-  SEARCHD_COMMAND_SEARCH  = 0
-  # excerpt command
-  SEARCHD_COMMAND_EXCERPT = 1
-
-  # Current client-side command implementation versions
-  
-  # search command version
-  VER_COMMAND_SEARCH  = 0x104
-  # excerpt command version
-  VER_COMMAND_EXCERPT = 0x100
-  
-  # Known searchd status codes
-
-  # match all query words
-  SEARCHD_OK    = 0
-  # match all query words
-  SEARCHD_ERROR = 1
-  # match all query words
-  SEARCHD_RETRY = 2
-  
-  # :startdoc:
-
-  # Known match modes
-
-  # match all query words
-  SPH_MATCH_ALL     = 0 
-  # match any query word
-  SPH_MATCH_ANY     = 1 
-  # match this exact phrase
-  SPH_MATCH_PHRASE  = 2 
-  # match this boolean query
-  SPH_MATCH_BOOLEAN = 3 
-  # match this extended query
-  SPH_MATCH_EXTENDED= 4
-  
-  # Known sort modes
-
-  # sort by document relevance desc, then by date
-  SPH_SORT_RELEVANCE     = 0
-  # sort by document date desc, then by relevance desc
-  SPH_SORT_ATTR_DESC     = 1
-  # sort by document date asc, then by relevance desc
-  SPH_SORT_ATTR_ASC      = 2
-  # sort by time segments (hour/day/week/etc) desc, then by relevance desc
-  SPH_SORT_TIME_SEGMENTS = 3
-  # sort by SQL-like expression (eg. "@weight DESC my_attr ASC")
-  SPH_SORT_EXTENDED      = 4
-
-  # Known attribute types
-
-  # this attr is just an integer
-  SPH_ATTR_INTEGER   = 1
-  # this attr is a timestamp
-  SPH_ATTR_TIMESTAMP = 2 
-  
-  # Known grouping functions
-
-  # group by day
-  SPH_GROUPBY_DAY   = 0
-  # group by week
-  SPH_GROUPBY_WEEK  = 1 
-  # group by month
-  SPH_GROUPBY_MONTH = 2 
-  # group by year
-  SPH_GROUPBY_YEAR  = 3
-  # group by attribute value
-  SPH_GROUPBY_ATTR  = 4
-  
-  # Constructs the Sphinx object and sets options to their default values. 
-  def initialize
-    @host       = 'localhost'         # searchd host (default is "localhost")
-    @port       = 3312                # searchd port (default is 3312)
-    @offset     = 0                   # how much records to seek from result-set start (default is 0)
-    @limit      = 20                  # how much records to return from result-set starting at offset (default is 20)
-    @mode       = SPH_MATCH_ALL       # query matching mode (default is SPH_MATCH_ALL)
-    @weights    = []                  # per-field weights (default is 1 for all fields)
-    @sort       = SPH_SORT_RELEVANCE  # match sorting mode (default is SPH_SORT_RELEVANCE)
-    @sortby     = ''                  # attribute to sort by (defualt is "")
-    @min_id     = 0                   # min ID to match (default is 0)
-    @max_id     = 0xFFFFFFFF          # max ID to match (default is UINT_MAX)
-    @min        = {}                  # attribute name to min-value hash (for range filters)
-    @max        = {}                  # attribute name to max-value hash (for range filters)
-    @filter     = {}                  # attribute name to values set hash (for values-set filters)
-    @groupby    = ''                  # group-by attribute name
-    @groupfunc  = SPH_GROUPBY_DAY     # function to pre-process group-by attribute value with
-    @maxmatches = 1000                # max matches to retrieve
-  
-    @error      = ''                  # last error message
-    @warning    = ''                  # last warning message
-  end
-
-  # Get last error message.
-  def last_error
-    @error
-  end
-  
-  # Get last warning message.
-  def last_warning
-    @warning
-  end
-  
-  # Set searchd server.
-  def set_server(host, port)
-    @host = host
-    @port = port
-  end
- 
-  # Set match offset, count, and max number to retrieve.
-  def set_limits(offset, limit, max = 0)
-    @offset = offset
-    @limit = limit
-    @maxmatches = max if max > 0
-  end
-  
-  # Set match mode.
-  def set_match_mode(mode)
-    @mode = mode
-  end
-  
-  # Set sort mode.
-  def set_sort_mode(mode, sortby = '')
-    @sort = mode
-    @sortby = sortby
-  end
-  
-  # Set per-field weights.
-  def set_weights(weights)
-    @weights = weights
-  end
-  
-  # Set IDs range to match.
-  # 
-  # Only match those records where document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> 
-  # (including <tt>min_id</tt> and <tt>max_id</tt>).
-  def set_id_range(min_id, max_id)
-    @min_id = min_id
-    @max_id = max_id
-  end
-  
-  # Set values filter.
-  # 
-  # Only match those records where <tt>attr</tt> column values
-  # are in specified set.
-  def set_filter(attr, values)
-    @filter[attr] = values
-  end
-  
-  # Set range filter.
-  # 
-  # Only match those records where <tt>attr</tt> column value
-  # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
-  def set_filter_range(attr, min, max)
-    @min[attr] = min
-    @max[attr] = max
-  end
-  
-  # Set grouping.
-  # 
-  # if grouping
-  def set_group_by(attr, func)
-    @groupby = attr
-    @groupfunc = func
-  end
-  
-  # Connect to searchd server and run given search query.
-  #
-  # * <tt>query</tt> -- query string
-  # * <tt>index</tt> -- index name to query, default is "*" which means to query all indexes
-  #
-  # returns hash which has the following keys on success:
-  # 
-  # * <tt>:matches</tt> -- hash which maps found document_id to ( "weight", "group" ) hash
-  # * <tt>:total</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
-  # * <tt>:total_found</tt> -- total amount of matching documents in index
-  # * <tt>:time</tt> -- search time
-  # * <tt>:words</tt> -- hash which maps query terms (stemmed!) to ( :docs, :hits ) hash
-  def query(query, index = '*')
-    sock = connect
-    
-    # build request
-
-    # mode and limits
-    req = [@offset, @limit, @mode, @sort].pack('NNNN')
-    req << [@sortby.length].pack('N')
-    req << @sortby
-    # query itself
-    req << [query.length].pack('N')
-    req << query
-    # weights
-    req << [@weights.length].pack('N')
-    req << @weights.pack('N' * @weights.length)
-    # indexes
-    req << [index.length].pack('N')
-    req << index
-    # id range
-    req << [@min_id.to_i, @max_id.to_i].pack('NN')
-    
-    # filters
-    req << [@min.length + @filter.length].pack('N')
-    @min.each do |attribute, min|
-      req << [attribute.length].pack('N')
-      req << attribute
-      req << [0, min, @max[attribute]].pack('NNN')
-    end
-    
-    @filter.each do |attribute, values|
-      req << [attribute.length].pack('N')
-      req << attribute
-      req << [values.length].pack('N')
-      req << values.pack('N' * values.length)
-    end
-    
-    # group-by
-    req << [@groupfunc, @groupby.length].pack('NN')
-    req << @groupby
-    
-    # max matches to retrieve
-    req << [@maxmatches].pack('N')
-    
-    # send query, get response
-    len = req.length
-    # add header
-    req = [SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, len].pack('nnN') + req
-    sock.send(req, 0)
-    
-    response = get_response(sock, VER_COMMAND_SEARCH)
-    
-    # parse response
-    result = {}
-    max = response.length # protection from broken response
-
-    #read schema
-    p = 0
-    fields = []
-    attrs = {}
-    
-    nfields = response[p, 4].unpack('N*').first
-    p += 4
-    while nfields > 0 and p < max
-      nfields -= 1
-      len = response[p, 4].unpack('N*').first
-      p += 4
-      fields << response[p, len]
-      p += len
-    end
-    result[:fields] = fields
-
-    nattrs = response[p, 4].unpack('N*').first
-    p += 4
-    while nattrs > 0 && p < max
-      nattrs -= 1
-      len = response[p, 4].unpack('N*').first
-      p += 4
-      attr = response[p, len]
-      p += len
-      type = response[p, 4].unpack('N*').first
-      p += 4
-      attrs[attr.to_sym] = type;
-    end
-    result[:attrs] = attrs
-    
-    # read match count
-    count = response[p, 4].unpack('N*').first
-    p += 4
-    
-    # read matches
-    result[:matches] = {}
-    while count > 0 and p < max
-      count -= 1
-      doc, weight = response[p, 8].unpack('N*N*')
-      p += 8
-
-      result[:matches][doc] ||= {}
-      result[:matches][doc][:weight] = weight
-      attrs.each do |attr, type|
-        val = response[p, 4].unpack('N*').first
-        p += 4
-        result[:matches][doc][:attrs] ||= {}
-        result[:matches][doc][:attrs][attr] = val
-      end
-    end
-    result[:total], result[:total_found], result[:time], words = \
-      response[p, 16].unpack('N*N*N*N*')
-    result[:time] = '%.3f' % (result[:time] / 1000)
-    p += 16
-    
-    result[:words] = {}
-    while words > 0 and p < max
-      words -= 1
-      len = response[p, 4].unpack('N*').first
-      p += 4
-      word = response[p, len]
-      p += len
-      docs, hits = response[p, 8].unpack('N*N*')
-      p += 8
-      result[:words][word] = {:docs => docs, :hits => hits}
-    end
-    
-    result
-  end
-
-  # Connect to searchd server and generate exceprts from given documents.
-  #
-  # * <tt>index</tt> -- a string specifiying the index which settings will be used
-  # for stemming, lexing and case folding
-  # * <tt>docs</tt> -- an array of strings which represent the documents' contents
-  # * <tt>words</tt> -- a string which contains the words to highlight
-  # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
-  # 
-  # You can use following parameters:
-  # * <tt>:before_match</tt> -- a string to insert before a set of matching words, default is "<b>"
-  # * <tt>:after_match</tt> -- a string to insert after a set of matching words, default is "<b>"
-  # * <tt>:chunk_separator</tt> -- a string to insert between excerpts chunks, default is " ... "
-  # * <tt>:limit</tt> -- max excerpt size in symbols (codepoints), default is 256
-  # * <tt>:around</tt> -- how much words to highlight around each match, default is 5
-  #
-  # Returns an array of string excerpts on success.
-  def build_excerpts(docs, index, words, opts = {})
-    sock = connect
-
-    # fixup options
-    opts[:before_match] ||= '<b>';
-    opts[:after_match] ||= '</b>';
-    opts[:chunk_separator] ||= ' ... ';
-    opts[:limit] ||= 256;
-    opts[:around] ||= 5;
-    
-    # build request
-    
-    # v.1.0 req
-    req = [0, 1].pack('N2'); # mode=0, flags=1 (remove spaces)
-    # req index
-    req << [index.length].pack('N')
-    req << index
-    # req words
-    req << [words.length].pack('N')
-    req << words
-
-    # options
-    req << [opts[:before_match].length].pack('N')
-    req << opts[:before_match]
-    req << [opts[:after_match].length].pack('N')
-    req << opts[:after_match]
-    req << [opts[:chunk_separator].length].pack('N')
-    req << opts[:chunk_separator]
-    req << [opts[:limit].to_i, opts[:around].to_i].pack('NN')
-    
-    # documents
-    req << [docs.size].pack('N');
-    docs.each do |doc|
-      req << [doc.length].pack('N')
-      req << doc
-    end
-    
-    # send query, get response
-    len = req.length
-    # add header
-    req = [SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, len].pack('nnN') + req
-    sock.send(req, 0)
-    
-    response = get_response(sock, VER_COMMAND_EXCERPT)
-    
-    # parse response
-    p = 0
-    res = []
-    rlen = response.length
-    docs.each do |doc|
-      len = response[p, 4].unpack('N*').first;
-      p += 4
-      if p + len > rlen
-        @error = 'incomplete reply'
-        raise SphinxResponseError, @error
-      end
-      res << response[p, len]
-      p += len
-    end
-    return res;
-  end
-
-  # Connect to searchd server.
-  def connect
-    begin
-      sock = TCPSocket.new(@host, @port)
-    rescue
-      @error = "connection to #{@host}:#{@port} failed"
-      raise SphinxConnectError, @error
-    end
-    
-    v = sock.recv(4).unpack('N*').first
-    if v < 1
-      sock.close
-      @error = "expected searchd protocol version 1+, got version '#{v}'"
-      raise SphinxConnectError, @error
-    end
-    
-    sock.send([1].pack('N'), 0)
-    sock
-  end
-  private :connect
-  
-  # get and check response packet from searchd server
-  def get_response(sock, client_version)
-    header = sock.recv(8)
-    status, ver, len = header.unpack('n2N')
-    response = ''
-    left = len
-    while left > 0 do
-      begin
-        chunk = sock.recv(left)
-        if chunk
-          response << chunk
-          left -= chunk.length
-        end
-      rescue EOFError
-      end
-    end
-    sock.close
-
-    # check response
-    read = response.length
-    if not response or read != len
-      @error = len \
-        ? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
-        : "received zero-sized searchd response"
-      raise SphinxResponseError, @error
-    end
-    
-    # check status
-    if status == SEARCHD_ERROR
-      @error = "searchd error: " + response[4,].to_s
-      raise SphinxInternalError, @error
-    end
-
-    if status == SEARCHD_RETRY
-      @error = "temporary searchd error: " + response[4,]
-      raise SphinxTemporaryError, @error
-    end
-
-    unless status == SEARCHD_OK
-      @error = "unknown status code '#{status}'"
-      raise SphinxUnknownError, @error
-    end
-    
-    # check version
-    if ver < client_version
-      @warning = "searchd command v.%d.%d older than client's v.%d.%d, some options might not work" % \
-        ver >> 8, ver & 0xff, client_ver >> 8, client_ver & 0xff
-    end
-    
-    return response
-  end
-  private :get_response
- 
-end

File diff suppressed because it is too large
+ 93 - 84
doc/sphinx.html


+ 10 - 1
doc/sphinx.txt

@@ -967,6 +967,7 @@ Example 4. Fully automated live updates
    | source main
    | {
    |     # ...
+   |     sql_query_pre = SET NAMES utf8
    |     sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM documents
    |     sql_query = SELECT id, title, body FROM documents \
    |         WHERE id<=( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
@@ -974,7 +975,7 @@ Example 4. Fully automated live updates
    | 
    | source delta : main
    | {
-   |     sql_query_pre =
+   |     sql_query_pre = SET NAMES utf8
    |     sql_query = SELECT id, title, body FROM documents \
    |         WHERE id>( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
    | }
@@ -994,6 +995,14 @@ Example 4. Fully automated live updates
    |     path = /path/to/delta
    | }
 
+Note how we're overriding sql_query_pre in the delta source. We need to
+explicitly have that override. Otherwise REPLACE query would be run when
+indexing delta source too, effectively nullifying it. However, when we
+issue the directive in the inherited source for the first time, it removes
+all inherited values, so the encoding setup is also lost. So sql_query_pre
+in the delta can not just be empty; and we need to issue the encoding setup
+query explicitly once again.
+
 3.11. Index merging
 -------------------
 

+ 11 - 1
doc/sphinx.xml

@@ -919,6 +919,7 @@ CREATE TABLE sph_counter
 source main
 {
     # ...
+    sql_query_pre = SET NAMES utf8
     sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM documents
     sql_query = SELECT id, title, body FROM documents \
         WHERE id&lt;=( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
@@ -926,7 +927,7 @@ source main
 
 source delta : main
 {
-    sql_query_pre =
+    sql_query_pre = SET NAMES utf8
     sql_query = SELECT id, title, body FROM documents \
         WHERE id&gt;( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
 }
@@ -948,6 +949,15 @@ index delta : main
 </programlisting>
 </example>
 </para>
+<para>
+Note how we're overriding <code>sql_query_pre</code> in the delta source.
+We need to explicitly have that override. Otherwise <code>REPLACE</code> query
+would be run when indexing delta source too, effectively nullifying it. However,
+when we issue the directive in the inherited source for the first time, it removes 
+<emphasis>all</emphasis> inherited values, so the encoding setup is also lost.
+So <code>sql_query_pre</code> in the delta can not just be empty; and we need
+to issue the encoding setup query explicitly once again.
+</para>
 </sect2>
 
 

+ 2 - 1
src/indexer.cpp

@@ -938,7 +938,8 @@ bool DoMerge ( const CSphConfigSection & hDst, const char * sDst,
 	if ( !pDst->Merge ( pSrc, tPurge, bMergeKillLists ) )
 		sphDie ( "failed to merge index '%s' into index '%s': %s", sSrc, sDst, pDst->GetLastError().cstr() );
 	tmMerge += sphLongTimer ();
-	printf ( "merged in %.1f sec\n", tmMerge );
+	if ( !g_bQuiet )
+		printf ( "merged in %.1f sec\n", tmMerge );
 
 	// pick up merge result
 	const char * sPath = hDst["path"].cstr();

+ 247 - 77
src/sphinx.cpp

@@ -2159,11 +2159,11 @@ class CSphTokenizerTraits : public ISphTokenizer
 public:
 	CSphTokenizerTraits ();
 
-	virtual const BYTE *	GetTokenStart () const;
-	virtual const BYTE *	GetTokenEnd () const;
-	virtual const BYTE *	GetBufferPtr () const;
-	virtual const BYTE *	GetBufferEnd () const;
-	virtual void			AdvanceBufferPtr ( int iOffset );
+	virtual const char *	GetTokenStart () const		{ return (const char *) m_pTokenStart; }
+	virtual const char *	GetTokenEnd () const		{ return (const char *) m_pTokenEnd; }
+	virtual const char *	GetBufferPtr () const		{ return (const char *) m_pCur; }
+	virtual const char *	GetBufferEnd () const		{ return (const char *) m_pBufferMax; }
+	virtual void			SetBufferPtr ( const char * sNewPtr );
 
 protected:
 	BYTE * GetTokenSyn ();
@@ -2887,6 +2887,12 @@ void SaveDictionarySettings ( CSphWriter & tWriter, CSphDict * pDict )
 	WriteFileInfo ( tWriter, tWFFileInfo );
 }
 
+
+static inline bool ShortTokenFilter ( BYTE * pToken, int iLen )
+{
+	return pToken [0] == '*' || ( iLen > 0 && pToken [iLen-1] == '*' );
+}
+
 /////////////////////////////////////////////////////////////////////////////
 
 ISphTokenizer::ISphTokenizer ()
@@ -2896,6 +2902,7 @@ ISphTokenizer::ISphTokenizer ()
 	, m_bWasSpecial ( false )
 	, m_bEscaped ( false )
 	, m_iOvershortCount ( 0 )
+	, m_bShortTokenFilter ( false )
 {}
 
 
@@ -3250,35 +3257,12 @@ CSphTokenizerTraits<IS_UTF8>::CSphTokenizerTraits ()
 	m_pAccum = m_sAccum;
 }
 
-template < bool IS_UTF8 >
-const BYTE * CSphTokenizerTraits<IS_UTF8>::GetTokenStart () const
-{
-	return m_pTokenStart;
-}
-
-template < bool IS_UTF8 >
-const BYTE * CSphTokenizerTraits<IS_UTF8>::GetTokenEnd () const
-{
-	return m_pTokenEnd;
-}
-
-template < bool IS_UTF8 >
-const BYTE * CSphTokenizerTraits<IS_UTF8>::GetBufferPtr () const
-{
-	return m_pCur;
-}
-
-template < bool IS_UTF8 >
-const BYTE * CSphTokenizerTraits<IS_UTF8>::GetBufferEnd () const
-{
-	return m_pBufferMax;
-}
 
 template < bool IS_UTF8 >
-void CSphTokenizerTraits<IS_UTF8>::AdvanceBufferPtr ( int iOffset )
+void CSphTokenizerTraits<IS_UTF8>::SetBufferPtr ( const char * sNewPtr )
 {
-	assert ( iOffset >= 0 );
-	m_pCur = Min ( m_pBufferMax, m_pCur + iOffset );
+	assert ( (BYTE*)sNewPtr>=m_pBuffer && (BYTE*)sNewPtr<=m_pBufferMax );
+	m_pCur = Min ( m_pBufferMax, Max ( m_pBuffer, (BYTE*)sNewPtr ) );
 	m_iAccum = 0;
 	m_pAccum = m_sAccum;
 }
@@ -3327,7 +3311,7 @@ static inline SynCheck_e SynCheckPrefix ( const CSphSynonym & tCandidate, int iO
 static inline bool IsSeparator ( int iFolded, bool bFirst )
 {
 	// eternal separator
-	if ( ( iFolded & MASK_CODEPOINT )==0 )
+	if ( iFolded<0 || ( iFolded & MASK_CODEPOINT )==0 )
 		return true;
 
 	// just a codepoint
@@ -3463,7 +3447,7 @@ BYTE * CSphTokenizerTraits<IS_UTF8>::GetTokenSyn ()
 			int iTest;
 
 			int iMasked = ( iCode & MASK_CODEPOINT );
-			if ( iFolded==0 )
+			if ( iFolded<=0 )
 			{
 				sTest[0] = MAGIC_SYNONYM_WHITESPACE;
 				iTest = 1;
@@ -3496,7 +3480,7 @@ BYTE * CSphTokenizerTraits<IS_UTF8>::GetTokenSyn ()
 
 			#define LOC_REFINE_BREAK() \
 			{ \
-				if ( iExact>=0 ) { m_pCur = pExact; LOC_RETURN_SYNONYM ( iExact ); } \
+				if ( iExact>=0 ) { m_pCur = pCur = pExact; LOC_RETURN_SYNONYM ( iExact ); } \
 				break; \
 			}
 
@@ -3510,7 +3494,7 @@ BYTE * CSphTokenizerTraits<IS_UTF8>::GetTokenSyn ()
 			}
 
 			// this is to catch intermediate separators (eg. "OS/2/3")
-			bool bMaybeSeparator = ( iFolded & FLAG_CODEPOINT_SYNONYM )!=0;
+			bool bMaybeSeparator = ( iFolded & FLAG_CODEPOINT_SYNONYM )!=0 || ( iFolded<0 );
 
 			SynCheck_e eStart = SynCheckPrefix ( m_dSynonyms[iSynStart], iSynOff, sTest, iTest, bMaybeSeparator );
 			if ( eStart==SYNCHECK_EXACT )
@@ -3673,8 +3657,23 @@ BYTE * CSphTokenizerTraits<IS_UTF8>::GetTokenSyn ()
 		// return accumulated token
 		if ( m_iAccum<m_tSettings.m_iMinWordLen )
 		{
+			if ( m_bShortTokenFilter )
+			{
+				*m_pAccum = '\0';
+
+				if ( ShortTokenFilter ( m_sAccum, m_iAccum ) )
+				{
+					m_iLastTokenLen = m_iAccum;
+					m_pTokenEnd = pCur;
+					m_iAccum = 0;
+					return m_sAccum;
+				}
+			}
+
 			if ( m_iAccum )
 				m_iOvershortCount++;
+
+			m_iAccum = 0;
 			continue;
 		}
 
@@ -3822,10 +3821,21 @@ BYTE * CSphTokenizer_SBCS::GetToken ()
 			pCur = m_pCur;
 			if ( m_iAccum<m_tSettings.m_iMinWordLen )
 			{
-				m_bBoundary = m_bTokenBoundary = false;
-				m_iAccum = 0;
-				m_iLastTokenLen = 0;
-				return NULL;
+				bool bShortToken = false;
+				if ( m_bShortTokenFilter )
+				{
+					m_sAccum [m_iAccum] = '\0';
+					if ( ShortTokenFilter ( m_sAccum, m_iAccum ) )
+						bShortToken = true;
+				}
+
+				if ( !bShortToken )
+				{
+					m_bBoundary = m_bTokenBoundary = false;
+					m_iAccum = 0;
+					m_iLastTokenLen = 0;
+					return NULL;
+				}
 			}
 		} else
 		{
@@ -3859,10 +3869,22 @@ BYTE * CSphTokenizer_SBCS::GetToken ()
 		{
 			if ( m_iAccum<m_tSettings.m_iMinWordLen )
 			{
-				if ( m_iAccum )
-					m_iOvershortCount++;
-				m_iAccum = 0;
-				continue;
+				bool bShortToken = false;
+				if ( m_bShortTokenFilter )
+				{
+					m_sAccum[m_iAccum] = '\0';
+					if ( ShortTokenFilter ( m_sAccum, m_iAccum ) )
+						bShortToken = true;
+				}
+
+				if ( !bShortToken )
+				{
+					if ( m_iAccum )
+						m_iOvershortCount++;
+
+					m_iAccum = 0;
+					continue;
+				}
 			}
 
 			m_iLastTokenLen = m_iAccum;
@@ -3884,9 +3906,24 @@ BYTE * CSphTokenizer_SBCS::GetToken ()
 			// skip short words
 			if ( m_iAccum<m_tSettings.m_iMinWordLen )
 			{
-				m_iAccum = 0;
 				if ( m_iAccum )
 					m_iOvershortCount++;
+
+				bool bShortToken = false;
+				if ( m_bShortTokenFilter )
+				{
+					m_sAccum[m_iAccum] = '\0';
+					if ( ShortTokenFilter ( m_sAccum, m_iAccum ) )
+						bShortToken = true;
+				}
+
+				if ( !bShortToken )
+				{
+					if ( m_iAccum )
+						m_iOvershortCount++;
+
+					m_iAccum = 0;
+				}
 			}
 
 			m_pTokenEnd = m_pCur;
@@ -3987,8 +4024,12 @@ BYTE * CSphTokenizer_UTF8::GetToken ()
 			FlushAccum ();
 			if ( m_iLastTokenLen<m_tSettings.m_iMinWordLen )
 			{
-				m_iLastTokenLen = 0;
-				return NULL;
+				
+				if ( !m_bShortTokenFilter || !ShortTokenFilter ( m_sAccum, m_iLastTokenLen ) )
+				{
+					m_iLastTokenLen = 0;
+					return NULL;
+				}
 			}
 
 			// return trailing word
@@ -4022,9 +4063,18 @@ BYTE * CSphTokenizer_UTF8::GetToken ()
 			FlushAccum ();
 			if ( m_iLastTokenLen<m_tSettings.m_iMinWordLen )
 			{
-				if ( m_iLastTokenLen )
-					m_iOvershortCount++;
-				continue;
+				if ( m_bShortTokenFilter && ShortTokenFilter ( m_sAccum, m_iLastTokenLen ) )
+				{
+					m_pTokenEnd = pCur;
+					return m_sAccum;
+				}
+				else
+				{
+					if ( m_iLastTokenLen )
+						m_iOvershortCount++;
+
+					continue;
+				}
 			}
 			else
 			{
@@ -4044,9 +4094,15 @@ BYTE * CSphTokenizer_UTF8::GetToken ()
 			// skip short words preceding specials
 			if ( m_iAccum<m_tSettings.m_iMinWordLen )
 			{
-				if ( m_iAccum )
-					m_iOvershortCount++;
-				FlushAccum ();
+				m_sAccum[m_iAccum] = '\0';
+
+				if ( !m_bShortTokenFilter || !ShortTokenFilter ( m_sAccum, m_iAccum ) )
+				{
+					if ( m_iAccum )
+						m_iOvershortCount++;
+
+					FlushAccum ();
+				}
 			}
 
 			m_pTokenEnd = m_pCur;
@@ -8072,17 +8128,18 @@ bool CSphIndex_VLN::Merge ( CSphIndex * pSource, CSphVector<CSphFilter> & dFilte
 	tMerge.m_eDocinfo = m_tSettings.m_eDocinfo;
 
 	int iMinAttrSize = m_tSchema.GetRowSize();
-	assert ( iMinAttrSize );
-
-	tMerge.m_pMinRowitems = new CSphRowitem [ iMinAttrSize ];
-	for( int i = 0; i < iMinAttrSize; i++ )
+	if ( iMinAttrSize )
 	{
-		if ( bDstEmpty )
-			tMerge.m_pMinRowitems[i] = pSrcIndex->m_tMin.m_pRowitems[i];
-		else if ( bSrcEmpty )
-			tMerge.m_pMinRowitems[i] = m_tMin.m_pRowitems[i];
-		else
-			tMerge.m_pMinRowitems[i] = Min ( m_tMin.m_pRowitems[i], pSrcIndex->m_tMin.m_pRowitems[i] );
+		tMerge.m_pMinRowitems = new CSphRowitem [ iMinAttrSize ];
+		for( int i = 0; i < iMinAttrSize; i++ )
+		{
+			if ( bDstEmpty )
+				tMerge.m_pMinRowitems[i] = pSrcIndex->m_tMin.m_pRowitems[i];
+			else if ( bSrcEmpty )
+				tMerge.m_pMinRowitems[i] = m_tMin.m_pRowitems[i];
+			else
+				tMerge.m_pMinRowitems[i] = Min ( m_tMin.m_pRowitems[i], pSrcIndex->m_tMin.m_pRowitems[i] );
+		}
 	}
 
 	// fixup filters
@@ -8184,9 +8241,16 @@ bool CSphIndex_VLN::Merge ( CSphIndex * pSource, CSphVector<CSphFilter> & dFilte
 		{
 			assert ( uProgress & 0x03 );
 
-			MergeWordData ( tDstWord, tSrcWord );
-			
-			tDstWord.Write();
+			if ( tDstWord.IsEmpty () )
+				tSrcWord.Write();
+			else if ( tSrcWord.IsEmpty () )
+				tDstWord.Write ();
+			else
+			{
+				MergeWordData ( tDstWord, tSrcWord );
+				tDstWord.Write();
+			}
+
 			m_tProgress.m_iWords++;
 
 			if ( !tDstWord.Read() )
@@ -8443,10 +8507,13 @@ protected:
 SphWordID_t CSphDictStar::GetWordID ( BYTE * pWord )
 {
 	char sBuf [ 16+3*SPH_MAX_WORD_LEN ];
+	assert ( strlen ( (const char*)pWord ) < 16+3*SPH_MAX_WORD_LEN );
 
 	m_pDict->ApplyStemmers ( pWord );
 
 	int iLen = strlen ( (const char*)pWord );
+	assert ( iLen < 16+3*SPH_MAX_WORD_LEN - 1 );
+
 	memcpy ( sBuf, pWord, iLen+1 );
 
 	if ( iLen )
@@ -8493,6 +8560,8 @@ SphWordID_t	CSphDictStarV8::GetWordID ( BYTE * pWord )
 	char sBuf [ 16+3*SPH_MAX_WORD_LEN ];
 
 	int iLen = strlen ( (const char*)pWord );
+	assert ( iLen < 16+3*SPH_MAX_WORD_LEN );
+
 	if ( !iLen )
 		return 0;
 
@@ -8503,6 +8572,8 @@ SphWordID_t	CSphDictStarV8::GetWordID ( BYTE * pWord )
 		m_pDict->ApplyStemmers ( pWord );
 
 	iLen = strlen ( (const char*)pWord );
+	assert ( iLen < 16+3*SPH_MAX_WORD_LEN - 2 );
+
 	if ( !iLen )
 		return 0;
 
@@ -8581,7 +8652,12 @@ struct CSphSimpleQueryParser
 		int				m_iQueryPos;
 	} m_dWords [ SPH_MAX_QUERY_WORDS ];
 
-	CSphSimpleQueryParser () {}
+	bool m_bUseStarDict;
+
+	CSphSimpleQueryParser ( bool bStarDict )
+		: m_bUseStarDict ( bStarDict )
+	{
+	}
 
 	int Parse ( const char * sQuery, ISphTokenizer * pTokenizer, CSphDict * pDict )
 	{
@@ -8592,6 +8668,9 @@ struct CSphSimpleQueryParser
 		CSphString sQbuf ( sQuery );
 		pTokenizer->SetBuffer ( (BYTE*)sQbuf.cstr(), strlen(sQuery) );
 
+		if ( m_bUseStarDict )
+			pTokenizer->EnableQueryParserMode ( true );
+
 		int iWords = 0;
 		int iPos = 0;
 
@@ -8613,6 +8692,9 @@ struct CSphSimpleQueryParser
 			}
 		}
 
+		if ( m_bUseStarDict )
+			pTokenizer->EnableQueryParserMode ( false );
+
 		return iWords;
 	}
 };
@@ -9211,6 +9293,10 @@ void CSphIndex_VLN::MatchAll ( const CSphQuery * pQuery, CSphQueryResult * pResu
 		if ( ++i != m_dQueryWords.GetLength() )
 			continue;
 
+		// forcibly break on DOCID_MAX matches
+		if ( docID==DOCID_MAX )
+			break;
+
 		// this is my match
 		CSphMatch & tMatch = m_dQueryWords[0].m_tDoc;
 
@@ -9410,7 +9496,7 @@ void CSphIndex_VLN::MatchAny ( const CSphQuery * pQuery, CSphQueryResult * pResu
 				iMinIndex = i;
 			}
 		}
-		if ( iActive==0 )
+		if ( iActive==0 || iMinID==DOCID_MAX )
 			break;
 
 		iLastMatchID = iMinID;
@@ -9860,6 +9946,10 @@ bool CSphIndex_VLN::MatchBoolean ( const CSphQuery * pQuery, CSphQueryResult * p
 			break;
 		iMinID = 1 + pMatch->m_iDocID;
 
+		// forcibly break on DOCID_MAX matches
+		if ( pMatch->m_iDocID==DOCID_MAX )
+			break;
+
 		// early reject by group id, doc id or timestamp
 		if ( EarlyReject ( *pMatch, pQuery ) )
 			continue;
@@ -10780,6 +10870,10 @@ bool CSphIndex_VLN::MatchExtendedV1 ( const CSphQuery * pQuery, CSphQueryResult
 			break;
 		iMinID = 1 + pAccept->m_iDocID;
 
+		// forcibly break on DOCID_MAX matches
+		if ( pAccept->m_iDocID==DOCID_MAX )
+			break;
+
 		CSphMatch * pReject = tReject.GetNextMatch ( pAccept->m_iDocID, 0, NULL, 0, NULL, pResult->m_sWarning );
 		if ( pReject && pReject->m_iDocID==pAccept->m_iDocID )
 			continue;
@@ -14385,7 +14479,14 @@ bool CSphIndex_VLN::MultiQuery ( CSphQuery * pQuery, CSphQueryResult * pResult,
 	}
 
 	CSphScopedPtr<CSphDict> tDict ( NULL );
-	CSphDict * pDict = SetupStarDict  ( tDict );
+	CSphDict * pDict = m_pDict;
+	CSphDict * pStarDict = SetupStarDict ( tDict );
+	bool bUseStarDict = false;
+	if ( pStarDict != pDict )
+	{
+		bUseStarDict = true;
+		pDict = pStarDict;
+	}
 
 	// setup calculations and result schema
 	if ( !SetupCalc ( pResult, pQuery ) )
@@ -14421,7 +14522,7 @@ bool CSphIndex_VLN::MultiQuery ( CSphQuery * pQuery, CSphQueryResult * pResult,
 		|| pQuery->m_eMode==SPH_MATCH_FULLSCAN );
 	if ( bSimpleQuery && pQuery->m_eMode!=SPH_MATCH_FULLSCAN )
 	{
-		CSphSimpleQueryParser qp;
+		CSphSimpleQueryParser qp ( bUseStarDict ) ;
 		int iRealWords = qp.Parse ( pQuery->m_sQuery.cstr(), m_pTokenizer, pDict );
 		if ( !iRealWords )
 		{
@@ -15301,11 +15402,10 @@ CSphDictCRC::WordformContainer * CSphDictCRC::LoadWordformContainer ( const char
 		if ( !pFrom ) continue; // FIXME! report parsing error
 
 		CSphString sFrom ( (const char*)pFrom );
-		const BYTE * pStart = pMyTokenizer->GetBufferPtr ();
-		const BYTE * pCur = pStart;
+		const char * pCur = pMyTokenizer->GetBufferPtr ();
 		while ( isspace(*pCur) ) pCur++;
 		if ( *pCur!='>' ) continue; // FIXME! report parsing error
-		pMyTokenizer->AdvanceBufferPtr ( pCur+1-pStart ); // FIXME! replace with SetBufferPtr()
+		pMyTokenizer->SetBufferPtr ( pCur+1 );
 
 		BYTE * pTo = pMyTokenizer->GetToken ();
 		if ( !pTo ) continue; // FIXME! report parsing error
@@ -16307,12 +16407,14 @@ void CSphHTMLStripper::Strip ( BYTE * sData )
 				char cEnd = *s;
 				s++;
 				while ( *s && *s!=cEnd ) *d++ = *s++;
+				*d++ = ' ';
 				if ( *s==cEnd ) s++;
 				continue;
 			}
 
 			// handle unquoted value
 			while ( *s && !isspace(*s) && *s!='>' ) *d++ = *s++;
+			*d++ = ' ';
 		}
 		if ( *s=='>' ) s++;
 
@@ -16761,6 +16863,8 @@ CSphSource_SQL::CSphSource_SQL ( const char * sName )
 	, m_iMultiAttr			( -1 )
 	, m_iFieldMVA			( 0 )
 	, m_iFieldMVAIterator	( 0 )
+	, m_bWarnedNull			( false )
+	, m_bWarnedMax			( false )
 {
 }
 
@@ -16982,6 +17086,9 @@ bool CSphSource_SQL::IterateHitsStart ( CSphString & sError )
 {
 	assert ( m_bSqlConnected );
 
+	m_bWarnedNull = false;
+	m_bWarnedMax = false;
+
 	// run pre-queries
 	ARRAY_FOREACH ( i, m_tParams.m_dQueryPre )
 	{
@@ -17163,6 +17270,8 @@ BYTE ** CSphSource_SQL::NextDocument ( CSphString & sError )
 				continue;
 			}
 
+			SqlDismissResult ();
+
 			// ok, we're over
 			ARRAY_FOREACH ( i, m_tParams.m_dQueryPost )
 			{
@@ -17184,7 +17293,19 @@ BYTE ** CSphSource_SQL::NextDocument ( CSphString & sError )
 		m_uMaxFetchedID = Max ( m_uMaxFetchedID, m_tDocInfo.m_iDocID );
 
 		if ( !m_tDocInfo.m_iDocID )
-			sphWarn ( "zero/NULL document_id, skipping" );
+		{
+			if ( !m_bWarnedNull )
+				sphWarn ( "zero/NULL document_id, skipping" );
+			m_bWarnedNull = true;
+		}
+
+		if ( m_tDocInfo.m_iDocID==DOCID_MAX )
+		{
+			if ( !m_bWarnedMax )
+				sphWarn ( "DOCID_MAX document_id, skipping" );
+			m_bWarnedMax = true;
+			m_tDocInfo.m_iDocID = 0;
+		}
 
 	} while ( !m_tDocInfo.m_iDocID );
 
@@ -18269,6 +18390,7 @@ private:
 	void			ConfigureFields ( const CSphVariant * pHead );
 	void			AddFieldToSchema ( const char * szName );
 	void			SetupFieldMatch ( CSphColumnInfo & tCol );
+	void			UnexpectedCharaters ( const char * pCharacters, int iLen, const char * szComment );
 
 #if USE_LIBEXPAT
 	bool			ParseNextChunk ( int iBufferLen, CSphString & sError );
@@ -19167,11 +19289,59 @@ void CSphSource_XMLPipe2::EndElement ( const char * szName )
 }
 
 
+void CSphSource_XMLPipe2::UnexpectedCharaters ( const char * pCharacters, int iLen, const char * szComment )
+{
+	const int MAX_WARNING_LENGTH = 64;
+
+	bool bSpaces = true;
+	for ( int i = 0; i < iLen && bSpaces; i++ )
+		if ( !sphIsSpace ( pCharacters [i] ) )
+			bSpaces = false;
+
+	if ( !bSpaces )
+	{
+		CSphString sWarning;
+#if USE_LIBEXPAT
+		sWarning.SetBinary ( pCharacters, Min ( iLen, MAX_WARNING_LENGTH ) );
+		sphWarn ( "source '%s': unexpected string '%s' (line=%d, pos=%d) %s",
+			m_tSchema.m_sName.cstr(), sWarning.cstr (),
+			XML_GetCurrentLineNumber(m_pParser), XML_GetCurrentColumnNumber(m_pParser), szComment );
+#endif
+
+#if USE_LIBXML
+		int i = 0;
+		for ( i = 0; i < iLen && sphIsSpace ( pCharacters [i] ); i++ );
+		sWarning.SetBinary ( pCharacters + i, Min ( iLen - i, MAX_WARNING_LENGTH ) );
+		for ( i = iLen - i - 1; i >= 0 && sphIsSpace ( sWarning.cstr () [i] ); i-- );
+		if ( i >= 0 )
+			((char *) sWarning.cstr ()) [i+1] = '\0';
+
+		sphWarn ( "source '%s': unexpected string '%s' %s", m_tSchema.m_sName.cstr(), sWarning.cstr(), szComment );
+#endif
+	}
+}
+
+
 void CSphSource_XMLPipe2::Characters ( const char * pCharacters, int iLen )
 {
-	if ( ( m_iCurAttr == -1 && m_iCurField == -1 ) && !m_bInId )
+	if ( !m_bInDocset )
+	{
+		UnexpectedCharaters ( pCharacters, iLen, "outside of <sphinx:docset>" );
 		return;
-	
+	}
+
+	if ( !m_bInSchema && !m_bInDocument )
+	{
+		UnexpectedCharaters ( pCharacters, iLen, "outside of <sphinx:schema> and <sphinx:document>" );
+		return;
+	}
+
+	if ( m_iCurAttr == -1 && m_iCurField == -1 )
+	{
+		UnexpectedCharaters ( pCharacters, iLen, m_bInDocument ? "inside <sphinx:document>" : ( m_bInSchema ? "inside <sphinx:schema>" : "" ) );
+		return;
+	}
+
 	if ( iLen + m_iFieldBufferLen < MAX_FIELD_LENGTH )
 	{
 		memcpy ( m_pFieldBuffer + m_iFieldBufferLen, pCharacters, iLen );
@@ -19369,17 +19539,17 @@ void CSphDoclistRecord::Write ( CSphMergeData * pData )
 	pWriter->ZipOffset ( m_iDocID - pData->m_iLastDocID );
 	pData->m_iLastDocID = m_iDocID;
 
-	assert ( pData->m_pMinRowitems );
-
 	if ( m_iRowitems )
 	{
 		if ( pData->m_eDocinfo == SPH_DOCINFO_INLINE )
 		{
+			assert ( pData->m_pMinRowitems );
 			for ( int i=0; i<m_iRowitems; i++ )
 				pWriter->ZipInt ( m_pRowitems[i] - pData->m_pMinRowitems[i] );
 		}
 		else if ( pData->m_eDocinfo == SPH_DOCINFO_EXTERN )
 		{
+			assert ( pData->m_pMinRowitems );
 			for ( int i=0; i<m_iRowitems; i++ )
 				pWriter->ZipInt ( m_pRowitems[i] );
 		}

+ 13 - 6
src/sphinx.h

@@ -425,6 +425,9 @@ public:
 	/// calc codepoint length
 	virtual int						GetCodepointLength ( int iCode ) const = 0;
 
+	/// handle tokens less than min_word_len if they match filter
+	inline void						EnableQueryParserMode ( bool bEnable ) { m_bShortTokenFilter = bEnable; }
+
 	/// get last token length, in codepoints
 	inline int						GetLastTokenLen () { return m_iLastTokenLen; }
 
@@ -451,19 +454,19 @@ public:
 	virtual bool					IsUtf8 () const = 0;
 
 	/// start buffer point of last token
-	virtual const BYTE *			GetTokenStart () const = 0;
+	virtual const char *			GetTokenStart () const = 0;
 
 	/// end buffer point of last token
-	virtual const BYTE *			GetTokenEnd () const = 0;
+	virtual const char *			GetTokenEnd () const = 0;
 
 	/// current buffer ptr
-	virtual const BYTE *			GetBufferPtr () const = 0;
+	virtual const char *			GetBufferPtr () const = 0;
 
 	/// buffer end
-	virtual const BYTE *			GetBufferEnd () const = 0;
+	virtual const char *			GetBufferEnd () const = 0;
 
-	/// advance ptr by iOffset bytes
-	virtual void					AdvanceBufferPtr ( int iOffset ) = 0;
+	/// set new buffer ptr (must be within current bounds)
+	virtual void					SetBufferPtr ( const char * sNewPtr ) = 0;
 
 protected:
 	static const int				MAX_SYNONYM_LEN		= 1024;	///< max synonyms map-from length, bytes
@@ -475,6 +478,7 @@ protected:
 	bool							m_bWasSpecial;				///< special token flag
 	bool							m_bEscaped;					///< backslash handling flag
 	int								m_iOvershortCount;			///< skipped overshort tokens count
+	bool							m_bShortTokenFilter;		///< short token filter flag
 
 	CSphTokenizerSettings			m_tSettings;				///< tokenizer settings
 	CSphSavedFile					m_tSynFileInfo;				///< synonyms file info
@@ -1114,6 +1118,9 @@ protected:
 
 	CSphSourceParams_SQL		m_tParams;
 
+	bool				m_bWarnedNull;
+	bool				m_bWarnedMax;
+
 	static const int			MACRO_COUNT = 2;
 	static const char * const	MACRO_VALUES [ MACRO_COUNT ];
 

+ 3 - 3
src/sphinxexcerpt.cpp

@@ -167,11 +167,11 @@ char * ExcerptGen_c::BuildExcerpt ( const ExcerptQuery_t & q, CSphDict * pDict,
 
 	pTokenizer->SetBuffer ( (BYTE*)q.m_sSource.cstr (), strlen ( q.m_sSource.cstr () ) );
 	BYTE * sWord;
-	const BYTE * pStartPtr = pTokenizer->GetBufferPtr ();
-	const BYTE * pLastTokenEnd = pStartPtr;
+	const char * pStartPtr = pTokenizer->GetBufferPtr ();
+	const char * pLastTokenEnd = pStartPtr;
 	while ( ( sWord = pTokenizer->GetToken() ) != NULL )
 	{
-		const BYTE * pTokenStart = pTokenizer->GetTokenStart ();
+		const char * pTokenStart = pTokenizer->GetTokenStart ();
 
 		if ( pTokenStart != pStartPtr )
 			AddJunk ( pLastTokenEnd - pStartPtr, pTokenStart - pLastTokenEnd );

+ 18 - 15
src/sphinxquery.cpp

@@ -220,6 +220,7 @@ CSphBooleanQueryExpr * CSphBooleanQueryParser::Parse ( const char * sQuery, cons
 	CSphScopedPtr<ISphTokenizer> pMyTokenizer ( pTokenizer->Clone ( true ) );
 	pMyTokenizer->AddSpecials ( "&|()-!" );
 	pMyTokenizer->SetBuffer ( (BYTE*)sBuffer.cstr(), strlen ( sBuffer.cstr() ) );
+	pMyTokenizer->EnableQueryParserMode ( true );
 
 	// iterate all tokens
 	const int QUERY_END = -1;
@@ -747,6 +748,8 @@ void CSphExtendedQueryNode::Submit ( CSphExtendedQueryNode * & pNew, bool bAny )
 		( IsPlain() && m_tAtom.m_dWords.GetLength()>1 && m_tAtom.m_iMaxDistance==-1 ) ||
 		( !IsPlain() && m_dChildren.GetLength()>1 ) ) )
 	{
+		assert ( IsPlain() || m_bAny==false );
+
 		// detach last word/child if we can, and build a new sublevel
 		CSphExtendedQueryNode * pChop;
 		if ( IsPlain() )
@@ -761,7 +764,7 @@ void CSphExtendedQueryNode::Submit ( CSphExtendedQueryNode * & pNew, bool bAny )
 			pChop = m_dChildren.Pop ();
 		}
 
-		pChop->Submit ( pNew, true );
+		pChop->Sublevelize ( pNew, true );
 		assert ( !pChop->IsPlain() );
 
 		Sublevelize ( pChop, false );
@@ -887,9 +890,8 @@ bool CSphExtendedQueryParser::ParseFields ( DWORD & uFields, ISphTokenizer * pTo
 	if ( m_dStack.GetLength() )
 		return Error ( "field specification is only allowed at top level" );
 
-	const char * pStart = (const char *)pTokenizer->GetBufferPtr ();
-	const char * pLastPtr = (const char *)pTokenizer->GetBufferEnd ();
-	const char * pPtr = pStart;
+	const char * pPtr = pTokenizer->GetBufferPtr ();
+	const char * pLastPtr = pTokenizer->GetBufferEnd ();
 
 	if ( pPtr==pLastPtr )
 		return true; // silently ignore trailing field operator
@@ -908,7 +910,7 @@ bool CSphExtendedQueryParser::ParseFields ( DWORD & uFields, ISphTokenizer * pTo
 	{
 		// handle @*
 		uFields = 0xFFFFFFFF;
-		pTokenizer->AdvanceBufferPtr ( pPtr+1-pStart );
+		pTokenizer->SetBufferPtr ( pPtr+1 );
 		return true;
 
 	} else if ( *pPtr=='(' )
@@ -921,7 +923,7 @@ bool CSphExtendedQueryParser::ParseFields ( DWORD & uFields, ISphTokenizer * pTo
 	// handle invalid chars
 	if ( !sphIsAlpha(*pPtr) )
 	{
-		pTokenizer->AdvanceBufferPtr ( pPtr-pStart ); // ignore and re-parse (FIXME! maybe warn?)
+		pTokenizer->SetBufferPtr ( pPtr ); // ignore and re-parse (FIXME! maybe warn?)
 		return true;
 	}
 	assert ( sphIsAlpha(*pPtr) ); // i think i'm paranoid
@@ -937,7 +939,7 @@ bool CSphExtendedQueryParser::ParseFields ( DWORD & uFields, ISphTokenizer * pTo
 		if ( !AddField ( uFields, pFieldStart, pPtr-pFieldStart, pSchema ) )
 			return false;
 
-		pTokenizer->AdvanceBufferPtr ( pPtr-pStart );
+		pTokenizer->SetBufferPtr ( pPtr );
 		if ( bNegate && uFields )
 			uFields = ~uFields;
 		return true;
@@ -976,7 +978,7 @@ bool CSphExtendedQueryParser::ParseFields ( DWORD & uFields, ISphTokenizer * pTo
 			if ( !AddField ( uFields, pFieldStart, pPtr-pFieldStart, pSchema ) )
 				return false;
 
-			pTokenizer->AdvanceBufferPtr ( pPtr-pStart+1 );
+			pTokenizer->SetBufferPtr ( pPtr+1 );
 			if ( bNegate && uFields )
 				uFields = ~uFields;
 			return true;
@@ -1039,6 +1041,7 @@ bool CSphExtendedQueryParser::Parse ( CSphExtendedQuery & tParsed, const char *
 	CSphString sBuffer ( sQuery );
 	CSphScopedPtr<ISphTokenizer> pMyTokenizer ( pTokenizer->Clone ( true ) );
 	pMyTokenizer->AddSpecials ( "()|-!@~\"/" );
+	pMyTokenizer->EnableQueryParserMode ( true );
 	pMyTokenizer->SetBuffer ( (BYTE*)sBuffer.cstr(), strlen ( sBuffer.cstr() ) );
 
 	// iterate all tokens
@@ -1077,10 +1080,9 @@ bool CSphExtendedQueryParser::Parse ( CSphExtendedQuery & tParsed, const char *
 			// the tokenizer will *not* return the number as a token!
 			if ( dState.Last()==XQS_PROXIMITY || dState.Last()==XQS_QUORUM )
 			{
-				const char * sStart = (const char*) pMyTokenizer->GetBufferPtr ();
-				const char * sEnd = (const char*) pMyTokenizer->GetBufferEnd ();
+				const char * sEnd = pMyTokenizer->GetBufferEnd ();
 
-				const char * p = sStart;
+				const char * p = pMyTokenizer->GetBufferPtr ();
 				while ( p<sEnd && isspace(*p) ) p++;
 
 				sToken = p;
@@ -1089,7 +1091,7 @@ bool CSphExtendedQueryParser::Parse ( CSphExtendedQuery & tParsed, const char *
 				if ( p>sToken )
 				{
 					// got a number, skip it
-					pMyTokenizer->AdvanceBufferPtr ( p-sStart );
+					pMyTokenizer->SetBufferPtr ( p );
 					bSpecial = false;
 
 				} else
@@ -1221,7 +1223,8 @@ bool CSphExtendedQueryParser::Parse ( CSphExtendedQuery & tParsed, const char *
 			if ( !ParseFields ( uFields, pMyTokenizer.Ptr (), pSchema ) )
 				return false;
 
-			uFields &= ( 1UL << pSchema->m_dFields.GetLength () ) - 1;
+			if ( pSchema->m_dFields.GetLength () != 32 )
+				uFields &=  ( 1UL << pSchema->m_dFields.GetLength () ) - 1;
 			continue;
 		}
 
@@ -1365,8 +1368,8 @@ static void xqDump ( CSphExtendedQueryNode * pNode, const CSphSchema & tSch, int
 	{
 		const CSphExtendedQueryAtom & tAtom = pNode->m_tAtom;
 		xqIndent ( iIndent );
-		printf ( "MATCH(%s,%d):",
-			tAtom.m_iField>=0 ? tSch.m_dFields[tAtom.m_iField].m_sName.cstr() : "-",
+		printf ( "MATCH(%d,%d):",
+			tAtom.m_uFields,
 			tAtom.m_iMaxDistance );
 		ARRAY_FOREACH ( i, tAtom.m_dWords )
 			printf ( " %s (pos %d)", tAtom.m_dWords[i].m_sWord.cstr(), tAtom.m_dWords[i].m_iAtomPos );

+ 14 - 4
src/sphinxsort.cpp

@@ -208,7 +208,7 @@ public:
 	public: \
 		_name ( const CSphAttrLocator & tLoc ) : m_tLocator ( tLoc ) {} \
 		virtual void GetLocator ( CSphAttrLocator & tOut ) const { tOut = m_tLocator; } \
-		virtual DWORD GetResultType () const { return m_tLocator.m_iBitCount>=(int)sizeof(DWORD) ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER; } \
+		virtual DWORD GetResultType () const { return m_tLocator.m_iBitCount>8*(int)sizeof(DWORD) ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER; } \
 		virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const { return KeyFromValue ( tMatch.GetAttr ( m_tLocator ) ); } \
 		virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const \
 		{
@@ -272,7 +272,7 @@ public:
 	virtual void GetLocator ( CSphAttrLocator & tOut ) const { tOut = m_tLocator; }
 	virtual SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const { return KeyFromValue ( tMatch.GetAttr ( m_tLocator ) ); }
 	virtual SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const { return uValue/m_iDiv; }
-	virtual DWORD GetResultType () const { return m_tLocator.m_iBitCount>=(int)sizeof(DWORD) ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER; }
+	virtual DWORD GetResultType () const { return m_tLocator.m_iBitCount>8*(int)sizeof(DWORD) ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER; }
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -497,7 +497,7 @@ int CSphUniqounter::CountNext ( SphGroupKey_t * pOutGroup )
 	*pOutGroup = uGroup;
 
 	int iCount = 1;
-	while ( m_pData[m_iCountPos].m_uGroup==uGroup )
+	while ( m_iCountPos<m_iLength && m_pData[m_iCountPos].m_uGroup==uGroup )
 	{
 		if ( m_pData[m_iCountPos].m_uValue!=uValue )
 			iCount++;
@@ -556,6 +556,7 @@ enum
 {
 	SPH_VATTR_ID			= -1,	///< tells match sorter to use doc id
 	SPH_VATTR_RELEVANCE		= -2,	///< tells match sorter to use match weight
+	SPH_VATTR_FLOAT			= 10000	///< tells match sorter to compare floats
 };
 
 
@@ -755,6 +756,8 @@ public:
 		m_iTotal = 0;
 
 		m_hGroup2Match.Reset ();
+		if ( DISTINCT )
+			m_tUniq.Resize ( 0 );
 	}
 
 	/// get entries count
@@ -1072,6 +1075,13 @@ struct MatchExpr_fn : public ISphMatchComparator
 	{ \
 		case SPH_VATTR_ID:			SPH_TEST_PAIR ( a.m_iDocID, b.m_iDocID, _idx ); break; \
 		case SPH_VATTR_RELEVANCE:	SPH_TEST_PAIR ( a.m_iWeight, b.m_iWeight, _idx ); break; \
+		case SPH_VATTR_FLOAT: \
+		{ \
+			register float aa = a.GetAttrFloat ( t.m_tLocator[_idx] ); \
+			register float bb = b.GetAttrFloat ( t.m_tLocator[_idx] ); \
+			SPH_TEST_PAIR ( aa, bb, _idx ) \
+			break; \
+		} \
 		default: \
 		{ \
 			register SphAttr_t aa = sphGetCompAttr<BITS> ( t, a, _idx ); \
@@ -1387,7 +1397,7 @@ static ESortClauseParseResult sphParseSortClause ( const char * sClause, const C
 				sError.SetSprintf ( "sort-by attribute '%s' not found", pTok );
 				return SORT_CLAUSE_ERROR;
 			}
-			tState.m_iAttr[iField] = iAttr;
+			tState.m_iAttr[iField] = ( tSchema.GetAttr(iAttr).m_eAttrType==SPH_ATTR_FLOAT ) ? SPH_VATTR_FLOAT : iAttr;
 			tState.m_tLocator[iField] = tSchema.GetAttr(iAttr).m_tLocator;
 		}
 	}

+ 13 - 0
src/sphinxstd.h

@@ -996,6 +996,19 @@ public:
 			return false;
 		return strncmp ( m_sValue, sPrefix, strlen(sPrefix) )==0;
 	}
+
+	void Chop ()
+	{
+		if ( m_sValue )
+		{
+			const char * sStart = m_sValue;
+			const char * sEnd = m_sValue + strlen(m_sValue) - 1;
+			while ( sStart<=sEnd && isspace(*sStart) ) sStart++;
+			while ( sStart<=sEnd && isspace(*sEnd) ) sEnd--;
+			memmove ( m_sValue, sStart, sEnd-sStart+1 );
+			m_sValue [ sEnd-sStart+1 ] = '\0';
+		}
+	}
 };
 
 /// string swapper

+ 150 - 3
src/tests.cpp

@@ -14,6 +14,7 @@
 #include "sphinx.h"
 #include "sphinxexpr.h"
 #include "sphinxutils.h"
+#include "sphinxquery.h"
 #include <math.h>
 
 //////////////////////////////////////////////////////////////////////////
@@ -38,7 +39,7 @@ bool CreateSynonymsFile ( const char * sMagic )
 		"MS DOS => MS-DOS\n"
 		"feat. => featuring\n"
 		"U.S. => US\n"
-		"U.S.A => USA\n"
+		"U.S.A. => USA\n"
 		"U.S.B. => USB\n"
 		"U.S.D. => USD\n"
 		"U.S.P. => USP\n"
@@ -119,6 +120,7 @@ void TestTokenizer ( bool bUTF8, bool bEscaped = false )
 			"2", "AT*&*T",						"at", NULL,
 			"2", "# OS/2's system install",		"OS/2", "system", "install", NULL,
 			"2", "IBM-s/OS/2/Merlin",			"ibm-s", "OS/2", "merlin", NULL,
+			"2", "U.S.A",						"US", NULL,
 			"2", "MS DOSS feat.Deskview.MS DOS",			"ms", "doss", "featuring", "deskview", "MS-DOS", NULL,
 			"2", sMagic,									"test", NULL,
 			"2", "U.S. U.S.A. U.S.A.F.",					"US", "USA", "USAF", NULL,
@@ -197,6 +199,44 @@ void TestTokenizer ( bool bUTF8, bool bEscaped = false )
 		assert ( !strcmp ( (char*)pTokenizer->GetToken(), sTok4 ) );
 		assert ( pTokenizer->GetToken()==NULL );
 
+		// test short word callbacks
+		printf ( "%s for short token handling\n", sPrefix );
+		ISphTokenizer * pShortTokenizer = pTokenizer->Clone ( bEscaped );
+
+		CSphRemapRange tStar ( '*', '*', '*' );
+		pShortTokenizer->AddCaseFolding ( tStar );
+
+		CSphTokenizerSettings tSettings = pShortTokenizer->GetSettings();
+		tSettings.m_iMinWordLen = 5;
+		pShortTokenizer->Setup ( tSettings );
+
+		pShortTokenizer->EnableQueryParserMode ( true );
+
+		char * dTestsShort[] =
+		{
+			"ab*",		"ab*",		NULL,
+			"*ab",		"*ab",		NULL,
+			"abcdef",	"abcdef",	NULL,
+			"ab *ab* abc", "*ab*",	NULL,
+			NULL
+		};
+
+		for ( int iCur=0; dTestsShort[iCur]; )
+		{
+			pShortTokenizer->SetBuffer ( (BYTE*)(dTestsShort [iCur]), strlen ( (const char*)dTestsShort [iCur] ) );
+			iCur++;
+			for ( BYTE * pToken=pShortTokenizer->GetToken(); pToken; pToken=pShortTokenizer->GetToken() )
+			{
+				assert ( dTestsShort[iCur] && strcmp ( (const char*)pToken, dTestsShort[iCur] )==0 );
+				iCur++;
+			}
+
+			assert ( !dTestsShort [iCur] );
+			iCur++;
+		}
+
+		SafeDelete ( pShortTokenizer );
+
 		// test uberlong synonym-only tokens
 		if ( iRun==2 )
 		{
@@ -337,7 +377,7 @@ void TestStripper ()
 	{
 		// source-data, index-attrs, remove-elements, expected-results
 		{ "<html>trivial test</html>", "", "", " trivial test " },
-		{ "<html>lets <img src=\"g/smth.jpg\" alt=\"nice picture\"> index attrs</html>", "img=alt", "", " lets nice picture index attrs " },
+		{ "<html>lets <img src=\"g/smth.jpg\" alt=\"nice picture\">index attrs</html>", "img=alt", "", " lets nice picture index attrs " },
 		{ "<html>   lets  also<script> whatever here; a<b</script>remove scripts", "", "script, style", "    lets  also remove scripts" },
 		{ "testing in<b><font color='red'>line</font> ele<em>men</em>ts", "", "", "testing inline elements" },
 		{ "testing non<p>inline</h1>elements", "", "", "testing non inline elements" },
@@ -345,7 +385,8 @@ void TestStripper ()
 		{ "testing &#1040;&#1041;&#1042; utf encoding", "", "", "testing \xD0\x90\xD0\x91\xD0\x92 utf encoding" },
 		{ "testing <1 <\" <\x80 <\xe0 <\xff </3 malformed tags", "", "", "testing <1 <\" <\x80 <\xe0 <\xff </3 malformed tags" },
 		{ "testing comm<!--comm-->ents", "", "", "testing comments" },
-		{ "&lt; &gt; &thetasym; &somethingverylong; &the", "", "", "< > \xCF\x91 &somethingverylong; &the" }
+		{ "&lt; &gt; &thetasym; &somethingverylong; &the", "", "", "< > \xCF\x91 &somethingverylong; &the" },
+		{ "testing <img src=\"g/smth.jpg\" alt=\"nice picture\" rel=anotherattr junk=throwaway>inline tags vs attr indexing", "img=alt,rel", "", "testing nice picture anotherattr inline tags vs attr indexing" }
 	};
 
 	for ( int iTest=0; iTest<(int)(sizeof(sTests)/sizeof(sTests[0])); iTest++ )
@@ -564,6 +605,111 @@ void BenchExpr ()
 
 //////////////////////////////////////////////////////////////////////////
 
+CSphString ReconstructNode ( const CSphExtendedQueryNode * pNode )
+{
+	CSphString sRes ( "" );
+
+	if ( pNode->IsPlain() )
+	{
+		// say just words to me
+		const CSphExtendedQueryAtom & tAtom = pNode->m_tAtom;
+		const CSphVector<CSphExtendedQueryAtomWord> & dWords = tAtom.m_dWords;
+		ARRAY_FOREACH ( i, dWords )
+			sRes.SetSprintf ( "%s %s", sRes.cstr(), dWords[i].m_sWord.cstr() );
+		sRes.Chop ();
+
+		if ( tAtom.m_bQuorum || tAtom.m_iMaxDistance>0 )
+		{
+			sRes.SetSprintf ( "\"%s\"%c%d", sRes.cstr(), tAtom.m_bQuorum ? '/' : '~', tAtom.m_iMaxDistance ); // quorum or proximity
+		
+		} else if ( dWords.GetLength()>1 )
+		{
+			if ( tAtom.m_iMaxDistance==0 )
+				sRes.SetSprintf ( "\"%s\"", sRes.cstr() ); // phrase
+			else
+				sRes.SetSprintf ( "( %s )", sRes.cstr() ); // just bag of words
+		}
+
+	} else
+	{
+		ARRAY_FOREACH ( i, pNode->m_dChildren )
+		{
+			if ( !i )
+				sRes = ReconstructNode ( pNode->m_dChildren[i] );
+			else
+				sRes.SetSprintf ( "%s %s %s", sRes.cstr(), pNode->m_bAny ? "OR" : "AND", ReconstructNode ( pNode->m_dChildren[i] ).cstr() );
+		}
+
+		if ( pNode->m_dChildren.GetLength()>1 )
+			sRes.SetSprintf ( "( %s )", sRes.cstr() );
+	}
+
+	return sRes;
+}
+
+CSphString ReconstructQuery ( const CSphExtendedQuery & tQuery )
+{
+	CSphString sAccept = ReconstructNode ( tQuery.m_pAccept );
+	CSphString sReject = ReconstructNode ( tQuery.m_pReject );
+
+	if ( !sReject.IsEmpty () )
+		sAccept.SetSprintf ( "( %s ) AND NOT ( %s )", sAccept.cstr(), sReject.cstr() );
+
+	return sAccept;
+}
+
+void TestQueryParser ()
+{
+	CSphString sTmp;
+
+	CSphSchema tSchema;
+	CSphColumnInfo tCol;
+	tCol.m_sName = "title"; tSchema.m_dFields.Add ( tCol );
+	tCol.m_sName = "content"; tSchema.m_dFields.Add ( tCol );
+
+	CSphDictSettings tDictSettings;
+	CSphScopedPtr<ISphTokenizer> pTokenizer ( sphCreateSBCSTokenizer () );
+	CSphScopedPtr<CSphDict> pDict ( sphCreateDictionaryCRC ( tDictSettings, pTokenizer.Ptr(), sTmp ) );
+	assert ( pTokenizer.Ptr() );
+	assert ( pDict.Ptr() );
+
+	struct QueryTest_t
+	{
+		const char *	m_sQuery;
+		const char *	m_sReconst;
+	};
+	const QueryTest_t dTest[] =
+	{
+		{ "aaa bbb ccc",					"( aaa AND bbb AND ccc )" },
+		{ "aaa|bbb ccc",					"( ( aaa OR bbb ) AND ccc )" },
+		{ "aaa bbb|ccc",					"( aaa AND ( bbb OR ccc ) )" },
+		{ "aaa (bbb ccc)|ddd",				"( aaa AND ( ( bbb AND ccc ) OR ddd ) )" },
+		{ "aaa bbb|(ccc ddd)",				"( aaa AND ( bbb OR ( ccc AND ddd ) ) )" },
+		{ "aaa bbb|(ccc ddd)|eee|(fff)",	"( aaa AND ( ( ( bbb OR ( ccc AND ddd ) ) OR eee ) OR fff ) )" },
+		{ "aaa bbb|(ccc ddd) eee|(fff)",	"( ( aaa AND ( bbb OR ( ccc AND ddd ) ) ) AND ( eee OR fff ) )" },
+		{ "aaa (ccc ddd)|bbb|eee|(fff)",	"( aaa AND ( ( ( ( ccc AND ddd ) OR bbb ) OR eee ) OR fff ) )" },
+		{ "aaa (ccc ddd)|bbb eee|(fff)",	"( ( aaa AND ( ( ccc AND ddd ) OR bbb ) ) AND ( eee OR fff ) )" },
+		{ "aaa \"bbb ccc\"~5|ddd",			"( aaa AND ( \"bbb ccc\"~5 OR ddd ) )" },
+		{ "aaa bbb|\"ccc ddd\"~5",			"( aaa AND ( bbb OR \"ccc ddd\"~5 ) )" },
+	};
+
+	int nTests = sizeof(dTest)/sizeof(dTest[0]);
+	for ( int i=0; i<nTests; i++ )
+	{
+		printf ( "testing query parser, test %d/%d... ", i+1, nTests );
+
+		CSphExtendedQuery tQuery;
+		sphParseExtendedQuery ( tQuery, dTest[i].m_sQuery, pTokenizer.Ptr(), &tSchema, pDict.Ptr() );
+
+		CSphString sReconst = ReconstructQuery ( tQuery );
+		assert ( sReconst==dTest[i].m_sReconst );
+
+		printf ( "ok\n" );
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+
 int main ()
 {
 	printf ( "RUNNING INTERNAL LIBSPHINX TESTS\n\n" );
@@ -574,6 +720,7 @@ int main ()
 	BenchTokenizer ( true );
 	BenchExpr ();
 #else
+	TestQueryParser ();
 	TestStripper ();
 	TestTokenizer ( false );
 	TestTokenizer ( true );

+ 125 - 17
test/helpers.inc

@@ -31,12 +31,12 @@ function CreateDB ( $db_drop, $db_create, $db_insert )
 }
 
 
-function RunIndexer ( &$error )
+function RunIndexer ( &$error, $params )
 {
 	global $indexer_path;
 
 	$retval = 0;
-	exec ( "$indexer_path --quiet --config config.conf --all", $error, $retval );
+	exec ( "$indexer_path --quiet --config config.conf $params", $error, $retval );
 
 	$error = join ( "\n", $error );
 	return ( $retval==0 && !empty($error) ) ? 2 : $retval;
@@ -149,6 +149,7 @@ class SphinxConfig
 	private $_queries;
 	private $_query_settings;
 	private $_query_attributes;
+	private $_indexer_runs;
 	private $_custom_test;
 	private	$_sd_address;
 	private	$_sd_port;
@@ -171,6 +172,7 @@ class SphinxConfig
 		$this->_results			= array ();
 		$this->_results_model	= array ();
 		$this->_query_attributes = array ();
+		$this->_indexer_runs	= array ();
 		$this->_num_agents		= 1;
 		$this->_subtest 		= 0;
 		$this->_subtestcount	= 0;
@@ -314,6 +316,14 @@ class SphinxConfig
 		if ( $custom )
 			$this->_custom_test = $custom->nodeValue;
 
+		// extract indexer run params
+		$indexer_run = GetFirstChild ( $xml, "indexer" );
+		if ( $indexer_run )
+		{
+			foreach ( ChildrenArray ( $indexer_run, "run" ) as $run )
+				$this->_indexer_runs [] = $run->nodeValue;
+		}
+
 		// extract queries
 		$qs = GetFirstChild ( $xml, "queries" );
 		if ( $qs )
@@ -351,6 +361,7 @@ class SphinxConfig
 				switch ( $sortmode_s )
 				{
 					case "":			$sortmode_s = "(default)"; break;
+					case "extended":	$sortmode = SPH_SORT_EXTENDED; break;
 					case "expr":		$sortmode = SPH_SORT_EXPR; break;
 					default:
 						printf ( "$config_file: unknown sorting mode '%s'\n", $sortmode_s );
@@ -360,11 +371,41 @@ class SphinxConfig
 				$res["sortmode_s" ] = $sortmode_s;
 				$res["sortby"] = $q->getAttribute("sortby");
 
+				// groupby
+				$groupfunc = 0;
+				$groupfunc_s = $q->getAttribute("groupfunc");
+				switch ( $groupfunc_s )
+				{
+					case "":			$groupfunc_s = "(default)"; break;
+					case "day":			$groupfunc = SPH_GROUPBY_DAY; break;
+					case "week":		$groupfunc = SPH_GROUPBY_WEEK; break;
+					case "month":		$groupfunc = SPH_GROUPBY_MONTH; break;
+					case "year":		$groupfunc = SPH_GROUPBY_YEAR; break;
+					case "attr":		$groupfunc = SPH_GROUPBY_ATTR; break;
+					case "attrpair":	$groupfunc = SPH_GROUPBY_ATTRPAIR; break;
+					default:
+						printf ( "$config_file: unknown groupby func '%s'\n", $groupfunc_s );
+						return false;
+				}
+
+				$res["groupfunc"] = $groupfunc;
+				$res["groupfunc_s"] = $groupfunc_s;
+				$res["groupattr"] = $q->getAttribute("groupattr");
+				$groupsort = $q->getAttribute("groupsort");
+				if ( $groupsort == "" )
+					$groupsort = "@group desc";
+
+				$res["groupsort"] = $groupsort;
+				$res["groupdistinct"] = $q->getAttribute("groupdistinct");
+
+				$res["resarray"] = $q->getAttribute("resarray");
+				$res["index"] = $q->getAttribute("index");
+
 				// add query
 				$this->_queries[] = $res;
 			}
-
-		} else
+		}
+		else
 		{
 			// legacy
 			$qs = array ();
@@ -397,6 +438,19 @@ class SphinxConfig
 	}
 
 
+	function RunIndexerEx ( &$error )
+	{
+		foreach ( $this->_indexer_runs as $param )
+		{
+			$retval = RunIndexer ( $error, $param );
+			if ( $retval != 0 )
+				return $retval;
+		}
+
+		return 0;
+	}
+
+
 	function RunQuery ( $index, &$error )
 	{
 		global $sd_address, $sd_port, $action_retries, $action_wait_timeout;
@@ -416,12 +470,20 @@ class SphinxConfig
 				$results = 0;
 				if ( empty($this->_query_settings) )
 				{
+					$my_index = $index;
 					if ( @$qinfo["mode"] )		$cl->SetMatchMode ( $qinfo["mode"] );
 					if ( @$qinfo["sortmode"] )	$cl->SetSortMode ( $qinfo["sortmode"], $qinfo["sortby"] );
-					if ( @$qinfo["filter"] )		$cl->SetFilter ( $qinfo["filter"], array ( $qinfo["filtervalue"] ) );
-	            	$results = $cl->Query ( $query, $index );
-
-            	} else
+					if ( @$qinfo["filter"] )	$cl->SetFilter ( $qinfo["filter"], array ( $qinfo["filtervalue"] ) );
+					if ( @$qinfo["groupattr"] )	$cl->SetGroupBy ( $qinfo["groupattr"], $qinfo["groupfunc"], $qinfo["groupsort"] );
+					if ( @$qinfo["groupdistinct"] )	$cl->SetGroupDistinct ( $qinfo["groupdistinct"] );
+					if ( @$qinfo["resarray"] )	$cl->SetArrayResult ( true );
+					if ( @$qinfo["index"] )		$my_index = $qinfo["index"];
+
+	            	$results = $cl->Query ( $query, $my_index );
+					if ( @$qinfo["resarray"] )
+						$results ["resarray"] = 1;
+                }
+				else
 				{
 					$run_func = create_function( '$client, $query, $index, &$results', $this->_query_settings );
 					$run_func ( $cl, $query, $index, $results ); 
@@ -561,11 +623,23 @@ class SphinxConfig
 			return var_export ( $result, true )."\n";
 
 		$qinfo = $this->_queries[$nquery-1];
-		$str = "--- Query $nquery (mode=$qinfo[mode_s]) ---\n";
+		if ( array_key_exists ( "index", $qinfo ) && $qinfo ["index"] != '*' )
+			$str = "--- Query $nquery (mode=$qinfo[mode_s],index=$qinfo[index]) ---\n";
+		else
+			$str = "--- Query $nquery (mode=$qinfo[mode_s]) ---\n";
+
+		if ( @$qinfo["groupattr"] )
+			$str .= "GroupBy: attr: '".$qinfo["groupattr"]."' func: '".$qinfo["groupfunc_s"]."' sort: '".$qinfo["groupsort"]."'\n";
+
+		if ( @$qinfo["sortmode"] == SPH_SORT_EXPR )
+			$str .= "Sort: expr: ".$qinfo["sortby"]."\n";
+
 		$str .= "Query '$result[query]': retrieved $result[total_found] of $result[total] matches in $result[time] sec.\n";
 		if ( $result["warning"] )
 			$str .= "Warning: $result[warning]\n";
 
+		$array_result = @$result["resarray"];
+
 		if ( isset($result["words"]) && is_array($result["words"]) )
 		{
 			$str .= "Word stats:\n";
@@ -584,13 +658,18 @@ class SphinxConfig
 			$str .= "Matches:\n";
 			foreach ( $result ["matches"] as $doc => $docinfo )
 			{
+				if ( $array_result )
+					$doc_id = $docinfo ["id"];
+				else
+					$doc_id = $doc;
+
 				$weight = $docinfo ["weight"];
 
-				$str .= "$n. doc_id=$doc, weight=$weight";
+				$str .= "$n. doc_id=$doc_id, weight=$weight";
 
 				if ( empty ( $this->_query_attributes ) )
 				{
-					$query_res = mysql_query ( "select * from test_table where document_id = $doc" );
+					$query_res = mysql_query ( "select * from test_table where document_id = $doc_id" );
 
 					if ( $query_res === FALSE )
 						$str .= "\n";
@@ -603,9 +682,23 @@ class SphinxConfig
 								if ( array_search ( $col_name, $result ["fields"] ) !== FALSE )
 								   	$str .= " $col_name=\"$col_content\"";
 							}
-
-							$str .= "\n";
 				    	}
+
+						foreach ( $docinfo ["attrs"] as $attr => $value )
+						{
+							if ( is_array ( $value ) )
+							{
+								$str .= " $attr=\"";
+								foreach ( $value as $v )
+									$str .= $v." ";
+
+								$str .= "\"";
+							}
+							else
+						   		$str .= " $attr=\"$value\"";
+						}
+
+						$str .= "\n";
 					}
 				}
 				else
@@ -675,8 +768,8 @@ class SphinxConfig
 				continue;
 			}
 
-			$result_f_cur = $this->FormatResultSet ( $nquery, $this->_results[$nquery] );
-			$result_f_ref = $this->FormatResultSet ( $nquery, $ref );
+			$result_f_cur = $this->FormatResultSet ( $nquery + 1, $this->_results[$nquery] );
+			$result_f_ref = $this->FormatResultSet ( $nquery + 1, $ref );
 
 			file_put_contents ( "current", $result_f_cur );
 			file_put_contents ( "reference", $result_f_ref );
@@ -872,7 +965,7 @@ function RunTest ( $test_dir )
 
 		EraseDirContents ( $indexer_data_path );
 
-		$indexer_ret = RunIndexer ( $error );
+		$indexer_ret = RunIndexer ( $error, "--all" );
 
 		if ( $indexer_ret==1 )
 		{
@@ -881,7 +974,22 @@ function RunTest ( $test_dir )
 
 			continue;
 
-		} else if ( $indexer_ret==2 )
+		}
+		else if ( $indexer_ret==2 )
+		{
+			fwrite ( $report, "$error\n" );
+		}
+
+		$indexer_ret = $config->RunIndexerEx ( $error );
+		if ( $indexer_ret==1 )
+		{
+			if ( !HandleFailure ( $config, $report, $error, $nfailed ) )
+				$log .= "\tsubtest $subtest: error running indexer; see $report_file\n";
+
+			continue;
+
+		}
+		else if ( $indexer_ret==2 )
 		{
 			fwrite ( $report, "$error\n" );
 		}

File diff suppressed because it is too large
+ 0 - 0
test/test_02/model.bin


File diff suppressed because it is too large
+ 0 - 0
test/test_20/model.bin


+ 122 - 0
test/test_20/test.xml

@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="utf-8"?>
+<test>
+
+<name>groupby</name>
+
+<config>
+indexer
+{
+	mem_limit		= 16M
+}
+
+searchd
+{
+	<searchd_settings/>
+}
+
+source srctest
+{
+	type			= mysql
+	<sql_settings/>
+
+	sql_query_pre 	= set time_zone='+0:00'
+	sql_query		= SELECT document_id, tag, UNIX_TIMESTAMP(time) as time, mva, body FROM test_table
+	sql_attr_uint	= tag
+	sql_attr_timestamp = time
+	sql_attr_multi	= uint mva from field;
+}
+
+index test
+{
+	source			= srctest
+	path			= <data_path/>/test
+	min_word_len	= 1
+	min_prefix_len	= 1
+	enable_star		= 1
+    charset_type 	= utf-8
+}
+
+source srctest2 : srctest
+{
+	sql_query		= SELECT document_id+10, tag+20 AS tag, UNIX_TIMESTAMP(time) as time, mva, body FROM test_table
+}
+
+index test2 : test
+{
+	source			= srctest2
+	path			= <data_path/>/test2
+}
+
+source srcmini1 : srctest
+{
+	sql_query		= SELECT document_id, tag, UNIX_TIMESTAMP(time) as time, mva, body FROM test_table WHERE document_id IN (1,7)
+}
+
+index mini1 : test
+{
+	source			= srcmini1
+	path			= <data_path/>/mini1
+}
+
+source srcmini2 : srctest
+{
+	sql_query		= SELECT document_id, tag, UNIX_TIMESTAMP(time) as time, mva, body FROM test_table WHERE document_id IN (1)
+}
+
+index mini2 : test
+{
+	source			= srcmini2
+	path			= <data_path/>/mini2
+}
+</config>
+
+<queries>
+<query mode="extended2" index="test" groupattr="tag" groupfunc="attr" groupsort="@group desc"></query>
+<query mode="extended2" index="test" groupattr="tag" groupfunc="attr" groupsort="@group asc"></query>
+<query mode="extended2" index="test" groupattr="tag" groupfunc="attr" groupsort="@count desc"></query>
+<query mode="extended2" index="test" groupattr="tag" groupfunc="attr" groupsort="@count asc"></query>
+<query mode="extended2" index="test" groupattr="tag" groupfunc="attr" groupsort="tag desc"></query>
+<query mode="extended2" index="test" groupattr="time" groupfunc="day">test*</query>
+<query mode="extended2" index="test" groupattr="time" groupfunc="week">test*</query>
+<query mode="extended2" index="test" groupattr="time" groupfunc="month">test*</query>
+<query mode="extended2" index="test" groupattr="time" groupfunc="year">test*</query>
+<query mode="extended2" index="test" groupattr="mva" groupfunc="attr" groupsort="@group desc" resarray="1">test*</query>
+<query mode="extended2" index="test" groupattr="mva" groupfunc="attr" groupsort="@group asc" resarray="1">test*</query>
+
+<query mode="extended2" index="test" groupattr="time" groupfunc="year" groupdistinct="tag"></query>
+<query mode="extended2" index="test2" groupattr="time" groupfunc="year" groupdistinct="tag"></query>
+<query mode="extended2" index="test,test2" groupattr="time" groupfunc="year" groupdistinct="tag"></query>
+
+<query mode="extended2" index="mini1,mini2" groupattr="time" groupfunc="year" groupdistinct="tag"></query>
+</queries>
+
+<db_create>
+CREATE TABLE `test_table`
+(
+	`document_id` int(11) NOT NULL default '0',
+	`tag` int(11) NOT NULL default '0',
+	`time` datetime,
+	`mva` varchar(255) NOT NULL default '',
+	`body` varchar(255) NOT NULL default ''
+)
+</db_create>
+
+<db_drop>
+DROP TABLE IF EXISTS `test_table`
+</db_drop>
+
+<db_insert>
+INSERT INTO `test_table` VALUES
+( 1, 1, '2007-12-01 12:00:00', '1,2,3',	'test1' ),
+( 2, 2, '2006-05-02 12:00:00', '3,4,5',	'test2' ),
+( 3, 2, '2005-06-03 12:00:00', '4,5,6',	'test3' ),
+( 4, 3, '2005-07-04 12:00:00', '1,2,3',	'test4' ),
+( 5, 3, '2003-09-05 12:00:00', '3,5',	'test5' ),
+( 6, 3, '2003-09-06 12:00:00', '3,5',	'test6' ),
+( 7, 4, '2007-12-01 12:00:00', '4,5',	'test7' ),
+( 8, 4, '2006-05-02 12:00:00', '4,5,6',	'test8' ),
+( 9, 4, '2005-06-03 12:00:00', '4',		'test9' ),
+( 10, 4,'2005-07-04 12:00:00', '3,4,5',	'test10' )
+</db_insert>
+
+</test>

File diff suppressed because it is too large
+ 0 - 0
test/test_21/model.bin


+ 87 - 0
test/test_21/test.xml

@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="utf-8"?>
+<test>
+
+<name>sorting</name>
+
+<config>
+indexer
+{
+	mem_limit		= 16M
+}
+
+searchd
+{
+	<searchd_settings/>
+}
+
+source srctest
+{
+	type			= mysql
+	<sql_settings/>
+
+	sql_query		= SELECT document_id,tag1,tag2,tag3,tag4,body FROM test_table
+	sql_attr_uint	= tag1
+	sql_attr_uint	= tag2:8
+	sql_attr_float	= tag3
+	sql_attr_float	= tag4
+}
+
+index test
+{
+	source			= srctest
+	path			= <data_path/>/test
+	min_word_len	= 1
+	min_prefix_len	= 1
+	enable_star		= 1
+    charset_type 	= utf-8
+}
+</config>
+
+<queries>
+<query mode="extended2" sortmode="expr" sortby="tag1/tag2-tag3">test*</query>
+<query mode="extended2" sortmode="expr" sortby="tag1+tag2*tag3">test*</query>
+<query mode="extended2" sortmode="expr" sortby="abs(tag3)">test*</query>
+<query mode="extended2" sortmode="expr" sortby="min(tag1,tag2)*2">test*</query>
+<query mode="extended2" sortmode="expr" sortby="ceil(tag4)">test*</query>
+<query mode="extended2" sortmode="expr" sortby="floor(tag4)">test*</query>
+<query mode="extended2" sortmode="expr" sortby="sin(tag4)+cos(tag4)">test*</query>
+<query mode="extended2" sortmode="expr" sortby="ln(tag1)+log2(tag1)+log10(tag1)">test*</query>
+<query mode="extended2" sortmode="expr" sortby="exp(tag1)+sqrt(tag2)">test*</query>
+<query mode="extended2" sortmode="expr" sortby="if (tag1=tag2, 1, 0)">test*</query>
+<query mode="extended2" sortmode="expr" sortby="tag2">test*</query>
+<query mode="extended2" sortmode="expr" sortby="tag3">test*</query>
+<query mode="extended2" sortmode="extended" sortby="tag2 DESC">test*</query>
+<query mode="extended2" sortmode="extended" sortby="tag3 DESC">test*</query>
+</queries>
+
+<db_create>
+CREATE TABLE `test_table`
+(
+	`document_id` int(11) NOT NULL default '0',
+	`tag1` int(11) NOT NULL default '0',
+	`tag2` int(11) NOT NULL default '0',
+	`tag3` FLOAT,
+	`tag4` FLOAT,
+	`body` varchar(255) NOT NULL default ''
+)
+</db_create>
+
+<db_drop>
+DROP TABLE IF EXISTS `test_table`
+</db_drop>
+
+<db_insert>
+INSERT INTO `test_table` VALUES
+( 1, 1, 5, -4.0, 1.5,	'test1' ),
+( 2, 2, 7, 12.0, 3.14,	'test2' ),
+( 3, 2, 4, -8.0, 5.2,	'test3' ), 
+( 4, 3, 3, 5.0,  7.6,	'test4' ),
+( 5, 3, 8, 1.0,  45.2, 	'test5' ), 
+( 6, 3, 9, 4.0,  17.2,	'test6' ), 
+( 7, 4, 4, 6.0,  -0.8,	'test7' ), 
+( 8, 4, 1, 7.0,  99.0,	'test8' ), 
+( 9, 4, 3, 2.0,  -16.1,	'test9' ), 
+( 10,4, 2, 12.0, 0.0,	'test10' )
+</db_insert>
+
+</test>

+ 1 - 0
test/test_22/model.bin

@@ -0,0 +1 @@
+a:1:{i:0;a:2:{i:0;a:11:{s:5:"error";s:0:"";s:7:"warning";s:0:"";s:6:"status";i:0;s:6:"fields";a:1:{i:0;s:4:"body";}s:5:"attrs";a:0:{}s:7:"matches";a:2:{i:1;a:2:{s:6:"weight";s:1:"1";s:5:"attrs";a:0:{}}i:2;a:2:{s:6:"weight";s:1:"1";s:5:"attrs";a:0:{}}}s:5:"total";s:1:"2";s:11:"total_found";s:1:"2";s:4:"time";s:5:"0.020";s:5:"words";a:1:{s:7:"wrapped";a:2:{s:4:"docs";s:1:"2";s:4:"hits";s:1:"2";}}s:5:"query";s:6:"FOLDED";}i:1;a:11:{s:5:"error";s:0:"";s:7:"warning";s:0:"";s:6:"status";i:0;s:6:"fields";a:1:{i:0;s:4:"body";}s:5:"attrs";a:0:{}s:7:"matches";a:2:{i:1;a:2:{s:6:"weight";s:1:"1";s:5:"attrs";a:0:{}}i:2;a:2:{s:6:"weight";s:1:"1";s:5:"attrs";a:0:{}}}s:5:"total";s:1:"2";s:11:"total_found";s:1:"2";s:4:"time";s:5:"0.000";s:5:"words";a:1:{s:7:"wrapped";a:2:{s:4:"docs";s:1:"2";s:4:"hits";s:1:"2";}}s:5:"query";s:6:"folded";}}}

+ 57 - 0
test/test_22/test.xml

@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="utf-8"?>
+<test>
+
+<name>wordforms vs case folding</name>
+
+<config>
+indexer
+{
+	mem_limit		= 16M
+}
+
+searchd
+{
+	<searchd_settings/>
+}
+
+source srctest
+{
+	type			= mysql
+	<sql_settings/>
+
+	sql_query		= SELECT * FROM test_table
+}
+
+index test
+{
+	source			= srctest
+	path			= <data_path/>/test
+    charset_type 	= utf-8
+	wordforms		= wordforms.txt
+}
+</config>
+
+<queries>
+<query>FOLDED</query>
+<query>folded</query>
+</queries>
+
+<db_create>
+CREATE TABLE `test_table`
+(
+	`document_id` int(11) NOT NULL default '0',
+	`body` varchar(255) NOT NULL default ''
+)
+</db_create>
+
+<db_drop>
+DROP TABLE IF EXISTS `test_table`
+</db_drop>
+
+<db_insert>
+INSERT INTO `test_table` VALUES
+( 1, 'test FoLdEd' ),
+( 2, 'folded' )
+</db_insert>
+
+</test>

File diff suppressed because it is too large
+ 0 - 0
test/test_23/model.bin


+ 114 - 0
test/test_23/test.xml

@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="utf-8"?>
+<test>
+
+<name>min_word_len vs queries (part 1)</name>
+
+<config>
+indexer
+{
+	mem_limit		= 16M
+}
+
+searchd
+{
+	<searchd_settings/>
+}
+
+source srctest
+{
+	type			= mysql
+	<sql_settings/>
+
+	sql_query		= SELECT * FROM test_table
+}
+
+index test
+{
+	source			= srctest
+	path			= <data_path/>/test
+    charset_type 	= utf-8
+	enable_star		= 1
+
+    <Dynamic>
+      <Variant>	min_word_len = 3 </Variant>
+      <Variant>	min_word_len = 4 </Variant>
+      <Variant>	min_word_len = 5 </Variant>
+    </Dynamic>
+
+    <Dynamic>
+      <Variant>	min_prefix_len = 3 </Variant>
+      <Variant>	min_prefix_len = 4 </Variant>
+      <Variant>	min_prefix_len = 5 </Variant>
+    </Dynamic>
+}
+</config>
+
+<queries>
+<query>a</query>
+<query>bb</query>
+<query>ccc</query>
+<query>dddd</query>
+<query>eeeee</query>
+<query>ffffff</query>
+<query>ggggggg</query>
+<query>b*</query>
+<query>cc*</query>
+<query>ddd*</query>
+<query>eeee*</query>
+<query>fffff*</query>
+<query>gggggg*</query>
+<query mode="extended2">a</query>
+<query mode="extended2">bb</query>
+<query mode="extended2">ccc</query>
+<query mode="extended2">dddd</query>
+<query mode="extended2">eeeee</query>
+<query mode="extended2">ffffff</query>
+<query mode="extended2">ggggggg</query>
+<query mode="extended2">b*</query>
+<query mode="extended2">cc*</query>
+<query mode="extended2">ddd*</query>
+<query mode="extended2">eeee*</query>
+<query mode="extended2">fffff*</query>
+<query mode="extended2">gggggg*</query>
+<query mode="phrase">hello world</query>
+<query mode="phrase">hello me world</query>
+<query mode="phrase">hello two world</query>
+<query mode="phrase">hello four world</query>
+<query mode="phrase">hello me* world</query>
+<query mode="phrase">hello two* world</query>
+<query mode="phrase">hello four* world</query>
+</queries>
+
+<db_create>
+CREATE TABLE `test_table`
+(
+	`document_id` int(11) NOT NULL default '0',
+	`body` varchar(255) NOT NULL default ''
+)
+</db_create>
+
+<db_drop>
+DROP TABLE IF EXISTS `test_table`
+</db_drop>
+
+<db_insert>
+INSERT INTO `test_table` VALUES
+( 1,  'a' ),
+( 2,  'bb' ),
+( 3,  'ccc' ),
+( 4,  'dddd' ),
+( 5,  'eeeee' ),
+( 6,  'ffffff' ),
+( 7,  'ggggggg' ),
+( 8,  'hello world' ),
+( 9,  'hello a world' ),
+( 10, 'hello aa world' ),
+( 11, 'hello aaa world' ),
+( 12, 'hello aaaa world' ),
+( 13, 'hello aaaaa world' ),
+( 14, 'hello me world' ),
+( 15, 'hello two world' ),
+( 16, 'hello four world' )
+</db_insert>
+
+</test>

File diff suppressed because it is too large
+ 0 - 0
test/test_24/model.bin


+ 113 - 0
test/test_24/test.xml

@@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="utf-8"?>
+<test>
+
+<name>min_word_len vs queries (part 2)</name>
+
+<config>
+indexer
+{
+	mem_limit		= 16M
+}
+
+searchd
+{
+	<searchd_settings/>
+}
+
+source srctest
+{
+	type			= mysql
+	<sql_settings/>
+
+	sql_query		= SELECT * FROM test_table
+}
+
+index test
+{
+	source			= srctest
+	path			= <data_path/>/test
+    charset_type 	= sbcs
+	enable_star		= 1
+
+    <Dynamic>
+      <Variant>	min_word_len = 3 </Variant>
+      <Variant>	min_word_len = 4 </Variant>
+      <Variant>	min_word_len = 5 </Variant>
+    </Dynamic>
+
+    <Dynamic>
+      <Variant>	min_prefix_len = 3 </Variant>
+      <Variant>	min_prefix_len = 4 </Variant>
+      <Variant>	min_prefix_len = 5 </Variant>
+    </Dynamic>
+}
+</config>
+
+<queries>
+<query>a</query>
+<query>bb</query>
+<query>ccc</query>
+<query>dddd</query>
+<query>eeeee</query>
+<query>ffffff</query>
+<query>ggggggg</query>
+<query>b*</query>
+<query>cc*</query>
+<query>ddd*</query>
+<query>eeee*</query>
+<query>fffff*</query>
+<query>gggggg*</query>
+<query mode="extended2">a</query>
+<query mode="extended2">bb</query>
+<query mode="extended2">ccc</query>
+<query mode="extended2">dddd</query>
+<query mode="extended2">eeeee</query>
+<query mode="extended2">ffffff</query>
+<query mode="extended2">ggggggg</query>
+<query mode="extended2">b*</query>
+<query mode="extended2">cc*</query>
+<query mode="extended2">ddd*</query>
+<query mode="extended2">eeee*</query>
+<query mode="extended2">fffff*</query>
+<query mode="extended2">gggggg*</query>
+<query mode="phrase">hello me world</query>
+<query mode="phrase">hello two world</query>
+<query mode="phrase">hello four world</query>
+<query mode="phrase">hello me* world</query>
+<query mode="phrase">hello two* world</query>
+<query mode="phrase">hello four* world</query>
+</queries>
+
+<db_create>
+CREATE TABLE `test_table`
+(
+	`document_id` int(11) NOT NULL default '0',
+	`body` varchar(255) NOT NULL default ''
+)
+</db_create>
+
+<db_drop>
+DROP TABLE IF EXISTS `test_table`
+</db_drop>
+
+<db_insert>
+INSERT INTO `test_table` VALUES
+( 1,  'a' ),
+( 2,  'bb' ),
+( 3,  'ccc' ),
+( 4,  'dddd' ),
+( 5,  'eeeee' ),
+( 6,  'ffffff' ),
+( 7,  'ggggggg' ),
+( 8,  'hello world' ),
+( 9,  'hello a world' ),
+( 10, 'hello aa world' ),
+( 11, 'hello aaa world' ),
+( 12, 'hello aaaa world' ),
+( 13, 'hello aaaaa world' ),
+( 14, 'hello me world' ),
+( 15, 'hello two world' ),
+( 16, 'hello four world' )
+</db_insert>
+
+</test>

File diff suppressed because it is too large
+ 0 - 0
test/test_25/model.bin


+ 114 - 0
test/test_25/test.xml

@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="utf-8"?>
+<test>
+
+<name>min_word_len vs queries (part 3)</name>
+
+<config>
+indexer
+{
+	mem_limit		= 16M
+}
+
+searchd
+{
+	<searchd_settings/>
+}
+
+source srctest
+{
+	type			= mysql
+	<sql_settings/>
+
+	sql_query		= SELECT * FROM test_table
+}
+
+index test
+{
+	source			= srctest
+	path			= <data_path/>/test
+    charset_type 	= utf-8
+	exceptions		= synonyms.txt
+	enable_star		= 1
+
+    <Dynamic>
+      <Variant>	min_word_len = 3 </Variant>
+      <Variant>	min_word_len = 4 </Variant>
+      <Variant>	min_word_len = 5 </Variant>
+    </Dynamic>
+
+    <Dynamic>
+      <Variant>	min_prefix_len = 3 </Variant>
+      <Variant>	min_prefix_len = 4 </Variant>
+      <Variant>	min_prefix_len = 5 </Variant>
+    </Dynamic>
+}
+</config>
+
+<queries>
+<query>a</query>
+<query>bb</query>
+<query>ccc</query>
+<query>dddd</query>
+<query>eeeee</query>
+<query>ffffff</query>
+<query>ggggggg</query>
+<query>b*</query>
+<query>cc*</query>
+<query>ddd*</query>
+<query>eeee*</query>
+<query>fffff*</query>
+<query>gggggg*</query>
+<query mode="extended2">a</query>
+<query mode="extended2">bb</query>
+<query mode="extended2">ccc</query>
+<query mode="extended2">dddd</query>
+<query mode="extended2">eeeee</query>
+<query mode="extended2">ffffff</query>
+<query mode="extended2">ggggggg</query>
+<query mode="extended2">b*</query>
+<query mode="extended2">cc*</query>
+<query mode="extended2">ddd*</query>
+<query mode="extended2">eeee*</query>
+<query mode="extended2">fffff*</query>
+<query mode="extended2">gggggg*</query>
+<query mode="phrase">hello me world</query>
+<query mode="phrase">hello two world</query>
+<query mode="phrase">hello four world</query>
+<query mode="phrase">hello me* world</query>
+<query mode="phrase">hello two* world</query>
+<query mode="phrase">hello four* world</query>
+</queries>
+
+<db_create>
+CREATE TABLE `test_table`
+(
+	`document_id` int(11) NOT NULL default '0',
+	`body` varchar(255) NOT NULL default ''
+)
+</db_create>
+
+<db_drop>
+DROP TABLE IF EXISTS `test_table`
+</db_drop>
+
+<db_insert>
+INSERT INTO `test_table` VALUES
+( 1,  'a' ),
+( 2,  'bb' ),
+( 3,  'ccc' ),
+( 4,  'dddd' ),
+( 5,  'eeeee' ),
+( 6,  'ffffff' ),
+( 7,  'ggggggg' ),
+( 8,  'hello world' ),
+( 9,  'hello a world' ),
+( 10, 'hello aa world' ),
+( 11, 'hello aaa world' ),
+( 12, 'hello aaaa world' ),
+( 13, 'hello aaaaa world' ),
+( 14, 'hello me world' ),
+( 15, 'hello two world' ),
+( 16, 'hello four world' )
+</db_insert>
+
+</test>

+ 1 - 0
test/test_26/model.bin

@@ -0,0 +1 @@
+a:1:{i:0;a:5:{i:0;a:11:{s:5:"error";s:0:"";s:7:"warning";s:0:"";s:6:"status";i:0;s:6:"fields";a:1:{i:0;s:4:"body";}s:5:"attrs";a:0:{}s:7:"matches";a:1:{i:1;a:2:{s:6:"weight";s:1:"1";s:5:"attrs";a:0:{}}}s:5:"total";s:1:"1";s:11:"total_found";s:1:"1";s:4:"time";s:5:"0.000";s:5:"words";a:1:{s:5:"main1";a:2:{s:4:"docs";s:1:"1";s:4:"hits";s:1:"1";}}s:5:"query";s:5:"main1";}i:1;a:11:{s:5:"error";s:0:"";s:7:"warning";s:0:"";s:6:"status";i:0;s:6:"fields";a:1:{i:0;s:4:"body";}s:5:"attrs";a:0:{}s:7:"matches";a:1:{i:2;a:2:{s:6:"weight";s:1:"1";s:5:"attrs";a:0:{}}}s:5:"total";s:1:"1";s:11:"total_found";s:1:"1";s:4:"time";s:5:"0.000";s:5:"words";a:1:{s:5:"main2";a:2:{s:4:"docs";s:1:"1";s:4:"hits";s:1:"1";}}s:5:"query";s:5:"main2";}i:2;a:11:{s:5:"error";s:0:"";s:7:"warning";s:0:"";s:6:"status";i:0;s:6:"fields";a:1:{i:0;s:4:"body";}s:5:"attrs";a:0:{}s:7:"matches";a:1:{i:3;a:2:{s:6:"weight";s:1:"1";s:5:"attrs";a:0:{}}}s:5:"total";s:1:"1";s:11:"total_found";s:1:"1";s:4:"time";s:5:"0.000";s:5:"words";a:1:{s:6:"delta1";a:2:{s:4:"docs";s:1:"1";s:4:"hits";s:1:"1";}}s:5:"query";s:6:"delta1";}i:3;a:11:{s:5:"error";s:0:"";s:7:"warning";s:0:"";s:6:"status";i:0;s:6:"fields";a:1:{i:0;s:4:"body";}s:5:"attrs";a:0:{}s:7:"matches";a:1:{i:4;a:2:{s:6:"weight";s:1:"1";s:5:"attrs";a:0:{}}}s:5:"total";s:1:"1";s:11:"total_found";s:1:"1";s:4:"time";s:5:"0.000";s:5:"words";a:1:{s:6:"delta2";a:2:{s:4:"docs";s:1:"1";s:4:"hits";s:1:"1";}}s:5:"query";s:6:"delta2";}i:4;a:10:{s:5:"error";s:0:"";s:7:"warning";s:0:"";s:6:"status";i:0;s:6:"fields";a:1:{i:0;s:4:"body";}s:5:"attrs";a:0:{}s:5:"total";s:1:"0";s:11:"total_found";s:1:"0";s:4:"time";s:5:"0.000";s:5:"words";a:1:{s:6:"gamma0";a:2:{s:4:"docs";s:1:"0";s:4:"hits";s:1:"0";}}s:5:"query";s:6:"gamma0";}}}

+ 80 - 0
test/test_26/test.xml

@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="utf-8"?>
+<test>
+
+<name>basic main/delta merge</name>
+
+<config>
+indexer
+{
+	mem_limit		= 16M
+}
+
+searchd
+{
+	<searchd_settings/>
+}
+
+source srcmain
+{
+	type			= mysql
+	<sql_settings/>
+
+	sql_query		= SELECT * FROM test_table WHERE document_id in (1,2)
+}
+
+source srcdelta
+{
+	type			= mysql
+	<sql_settings/>
+
+	sql_query		= SELECT * FROM test_table WHERE document_id in (3,4)
+}
+
+index main
+{
+	source			= srcmain
+	path			= <data_path/>/main
+    charset_type 	= utf-8
+}
+
+index delta
+{
+	source			= srcdelta
+	path			= <data_path/>/delta
+    charset_type 	= utf-8
+}
+</config>
+
+<indexer>
+<run>--merge main delta</run>
+</indexer>
+
+<queries>
+<query index="main">main1</query>
+<query index="main">main2</query>
+<query index="main">delta1</query>
+<query index="main">delta2</query>
+<query index="main">gamma0</query>
+</queries>
+
+<db_create>
+CREATE TABLE `test_table`
+(
+	`document_id` int(11) NOT NULL default '0',
+	`body` varchar(255) NOT NULL default ''
+)
+</db_create>
+
+<db_drop>
+DROP TABLE IF EXISTS `test_table`
+</db_drop>
+
+<db_insert>
+INSERT INTO `test_table` VALUES
+( 1, 'main1' ),
+( 2, 'main2' ),
+( 3, 'delta1' ),
+( 4, 'delta2' )
+</db_insert>
+
+</test>

File diff suppressed because it is too large
+ 0 - 0
test/test_27/model.bin


Some files were not shown because too many files changed in this diff