client.rb 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153
  1. # = client.rb - Sphinx Client API
  2. #
  3. # Author:: Dmytro Shteflyuk <mailto:[email protected]>.
  4. # Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
  5. # License:: Distributes under the same terms as Ruby
  6. # Version:: 0.9.9-r1299
  7. # Website:: http://kpumuk.info/projects/ror-plugins/sphinx
  8. #
  9. # This library is distributed under the terms of the Ruby license.
  10. # You can freely distribute/modify this library.
  11. # ==Sphinx Client API
  12. #
  13. # The Sphinx Client API is used to communicate with <tt>searchd</tt>
  14. # daemon and get search results from Sphinx.
  15. #
  16. # ===Usage
  17. #
  18. # sphinx = Sphinx::Client.new
  19. # result = sphinx.Query('test')
  20. # ids = result['matches'].map { |match| match['id'] }.join(',')
  21. # posts = Post.find :all, :conditions => "id IN (#{ids})"
  22. #
  23. # docs = posts.map(&:body)
  24. # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
  25. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  26. # WARNING
  27. # We strongly recommend you to use SphinxQL instead of the API
  28. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  29. require 'socket'
  30. module Sphinx
  31. # :stopdoc:
  32. class SphinxError < StandardError; end
  33. class SphinxArgumentError < SphinxError; end
  34. class SphinxConnectError < SphinxError; end
  35. class SphinxResponseError < SphinxError; end
  36. class SphinxInternalError < SphinxError; end
  37. class SphinxTemporaryError < SphinxError; end
  38. class SphinxUnknownError < SphinxError; end
  39. # :startdoc:
  40. class Client
  41. # :stopdoc:
  42. # Known searchd commands
  43. # search command
  44. SEARCHD_COMMAND_SEARCH = 0
  45. # excerpt command
  46. SEARCHD_COMMAND_EXCERPT = 1
  47. # update command
  48. SEARCHD_COMMAND_UPDATE = 2
  49. # keywords command
  50. SEARCHD_COMMAND_KEYWORDS = 3
  51. # Current client-side command implementation versions
  52. # search command version
  53. VER_COMMAND_SEARCH = 0x119
  54. # excerpt command version
  55. VER_COMMAND_EXCERPT = 0x102
  56. # update command version
  57. VER_COMMAND_UPDATE = 0x103
  58. # keywords command version
  59. VER_COMMAND_KEYWORDS = 0x100
  60. # Known searchd status codes
  61. # general success, command-specific reply follows
  62. SEARCHD_OK = 0
  63. # general failure, command-specific reply may follow
  64. SEARCHD_ERROR = 1
  65. # temporaty failure, client should retry later
  66. SEARCHD_RETRY = 2
  67. # general success, warning message and command-specific reply follow
  68. SEARCHD_WARNING = 3
  69. # :startdoc:
  70. # Known match modes
  71. # match all query words
  72. SPH_MATCH_ALL = 0
  73. # match any query word
  74. SPH_MATCH_ANY = 1
  75. # match this exact phrase
  76. SPH_MATCH_PHRASE = 2
  77. # match this boolean query
  78. SPH_MATCH_BOOLEAN = 3
  79. # match this extended query
  80. SPH_MATCH_EXTENDED = 4
  81. # match all document IDs w/o fulltext query, apply filters
  82. SPH_MATCH_FULLSCAN = 5
  83. # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
  84. SPH_MATCH_EXTENDED2 = 6
  85. # Known ranking modes (ext2 only)
  86. # default mode, phrase proximity major factor and BM25 minor one
  87. SPH_RANK_PROXIMITY_BM25 = 0
  88. # statistical mode, BM25 ranking only (faster but worse quality)
  89. SPH_RANK_BM25 = 1
  90. # no ranking, all matches get a weight of 1
  91. SPH_RANK_NONE = 2
  92. # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
  93. SPH_RANK_WORDCOUNT = 3
  94. # phrase proximity
  95. SPH_RANK_PROXIMITY = 4
  96. SPH_RANK_MATCHANY = 5
  97. SPH_RANK_FIELDMASK = 6
  98. SPH_RANK_SPH04 = 7
  99. SPH_RANK_EXPR = 8
  100. # Known sort modes
  101. # sort by document relevance desc, then by date
  102. SPH_SORT_RELEVANCE = 0
  103. # sort by document date desc, then by relevance desc
  104. SPH_SORT_ATTR_DESC = 1
  105. # sort by document date asc, then by relevance desc
  106. SPH_SORT_ATTR_ASC = 2
  107. # sort by time segments (hour/day/week/etc) desc, then by relevance desc
  108. SPH_SORT_TIME_SEGMENTS = 3
  109. # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
  110. SPH_SORT_EXTENDED = 4
  111. # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
  112. SPH_SORT_EXPR = 5
  113. # Known filter types
  114. # filter by integer values set
  115. SPH_FILTER_VALUES = 0
  116. # filter by integer range
  117. SPH_FILTER_RANGE = 1
  118. # filter by float range
  119. SPH_FILTER_FLOATRANGE = 2
  120. # Known attribute types
  121. # this attr is just an integer
  122. SPH_ATTR_INTEGER = 1
  123. # this attr is a timestamp
  124. SPH_ATTR_TIMESTAMP = 2
  125. # this attr is an ordinal string number (integer at search time,
  126. # specially handled at indexing time)
  127. SPH_ATTR_ORDINAL = 3
  128. # this attr is a boolean bit field
  129. SPH_ATTR_BOOL = 4
  130. # this attr is a float
  131. SPH_ATTR_FLOAT = 5
  132. # signed 64-bit integer
  133. SPH_ATTR_BIGINT = 6
  134. # string
  135. SPH_ATTR_STRING = 7
  136. # this attr has multiple values (0 or more)
  137. SPH_ATTR_MULTI = 0x40000001
  138. SPH_ATTR_MULTI64 = 0x40000002
  139. # Known grouping functions
  140. # group by day
  141. SPH_GROUPBY_DAY = 0
  142. # group by week
  143. SPH_GROUPBY_WEEK = 1
  144. # group by month
  145. SPH_GROUPBY_MONTH = 2
  146. # group by year
  147. SPH_GROUPBY_YEAR = 3
  148. # group by attribute value
  149. SPH_GROUPBY_ATTR = 4
  150. # group by sequential attrs pair
  151. SPH_GROUPBY_ATTRPAIR = 5
  152. # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
  153. def initialize
  154. # per-client-object settings
  155. @host = 'localhost' # searchd host (default is "localhost")
  156. @port = 9312 # searchd port (default is 9312)
  157. # per-query settings
  158. @offset = 0 # how many records to seek from result-set start (default is 0)
  159. @limit = 20 # how many records to return from result-set starting at offset (default is 20)
  160. @mode = SPH_MATCH_EXTENDED2 # query matching mode (default is SPH_MATCH_EXTENDED2)
  161. @weights = [] # per-field weights (default is 1 for all fields)
  162. @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
  163. @sortby = '' # attribute to sort by (defualt is "")
  164. @min_id = 0 # min ID to match (default is 0, which means no limit)
  165. @max_id = 0 # max ID to match (default is 0, which means no limit)
  166. @filters = [] # search filters
  167. @groupby = '' # group-by attribute name
  168. @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
  169. @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
  170. @groupdistinct = '' # group-by count-distinct attribute
  171. @maxmatches = 1000 # max matches to retrieve
  172. @cutoff = 0 # cutoff to stop searching at (default is 0)
  173. @retrycount = 0 # distributed retries count
  174. @retrydelay = 0 # distributed retries delay
  175. @anchor = [] # geographical anchor point
  176. @indexweights = [] # per-index weights
  177. @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
  178. @rankexpr = '' # ranker expression for SPH_RANK_EXPR
  179. @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
  180. @fieldweights = {} # per-field-name weights
  181. @overrides = [] # per-query attribute values overrides
  182. @select = '*' # select-list (attributes or expressions, with optional aliases)
  183. # per-reply fields (for single-query case)
  184. @error = '' # last error message
  185. @warning = '' # last warning message
  186. @reqs = [] # requests storage (for multi-query case)
  187. @mbenc = '' # stored mbstring encoding
  188. end
  189. # Get last error message.
  190. def GetLastError
  191. @error
  192. end
  193. # Get last warning message.
  194. def GetLastWarning
  195. @warning
  196. end
  197. # Set searchd host name (string) and port (integer).
  198. def SetServer(host, port)
  199. assert { host.instance_of? String }
  200. assert { port.instance_of? Fixnum }
  201. @host = host
  202. @port = port
  203. end
  204. # Set offset and count into result set,
  205. # and optionally set max-matches and cutoff limits.
  206. def SetLimits(offset, limit, max = 0, cutoff = 0)
  207. assert { offset.instance_of? Fixnum }
  208. assert { limit.instance_of? Fixnum }
  209. assert { max.instance_of? Fixnum }
  210. assert { offset >= 0 }
  211. assert { limit > 0 }
  212. assert { max >= 0 }
  213. @offset = offset
  214. @limit = limit
  215. @maxmatches = max if max > 0
  216. @cutoff = cutoff if cutoff > 0
  217. end
  218. # Set maximum query time, in milliseconds, per-index,
  219. # integer, 0 means "do not limit"
  220. def SetMaxQueryTime(max)
  221. assert { max.instance_of? Fixnum }
  222. assert { max >= 0 }
  223. @maxquerytime = max
  224. end
  225. # Set matching mode. DEPRECATED
  226. def SetMatchMode(mode)
  227. # $stderr.puts "DEPRECATED: Do not call this method or, even better, use SphinxQL instead of an API\n"
  228. assert { mode == SPH_MATCH_ALL \
  229. || mode == SPH_MATCH_ANY \
  230. || mode == SPH_MATCH_PHRASE \
  231. || mode == SPH_MATCH_BOOLEAN \
  232. || mode == SPH_MATCH_EXTENDED \
  233. || mode == SPH_MATCH_FULLSCAN \
  234. || mode == SPH_MATCH_EXTENDED2 }
  235. @mode = mode
  236. end
  237. # Set ranking mode.
  238. def SetRankingMode(ranker, rankexpr = '')
  239. assert { ranker == SPH_RANK_PROXIMITY_BM25 \
  240. || ranker == SPH_RANK_BM25 \
  241. || ranker == SPH_RANK_NONE \
  242. || ranker == SPH_RANK_WORDCOUNT \
  243. || ranker == SPH_RANK_PROXIMITY \
  244. || ranker == SPH_RANK_MATCHANY \
  245. || ranker == SPH_RANK_FIELDMASK \
  246. || ranker == SPH_RANK_SPH04 \
  247. || ranker == SPH_RANK_EXPR }
  248. @ranker = ranker
  249. @rankexpr = rankexpr
  250. end
  251. # Set matches sorting mode.
  252. def SetSortMode(mode, sortby = '')
  253. assert { mode == SPH_SORT_RELEVANCE \
  254. || mode == SPH_SORT_ATTR_DESC \
  255. || mode == SPH_SORT_ATTR_ASC \
  256. || mode == SPH_SORT_TIME_SEGMENTS \
  257. || mode == SPH_SORT_EXTENDED \
  258. || mode == SPH_SORT_EXPR }
  259. assert { sortby.instance_of? String }
  260. assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
  261. @sort = mode
  262. @sortby = sortby
  263. end
  264. # Bind per-field weights by order.
  265. #
  266. # DEPRECATED; use SetFieldWeights() instead.
  267. def SetWeights(weights)
  268. assert { weights.instance_of? Array }
  269. weights.each do |weight|
  270. assert { weight.instance_of? Fixnum }
  271. end
  272. @weights = weights
  273. end
  274. # Bind per-field weights by name.
  275. #
  276. # Takes string (field name) to integer name (field weight) hash as an argument.
  277. # * Takes precedence over SetWeights().
  278. # * Unknown names will be silently ignored.
  279. # * Unbound fields will be silently given a weight of 1.
  280. def SetFieldWeights(weights)
  281. assert { weights.instance_of? Hash }
  282. weights.each do |name, weight|
  283. assert { name.instance_of? String }
  284. assert { weight.instance_of? Fixnum }
  285. end
  286. @fieldweights = weights
  287. end
  288. # Bind per-index weights by name.
  289. def SetIndexWeights(weights)
  290. assert { weights.instance_of? Hash }
  291. weights.each do |index, weight|
  292. assert { index.instance_of? String }
  293. assert { weight.instance_of? Fixnum }
  294. end
  295. @indexweights = weights
  296. end
  297. # Set IDs range to match.
  298. #
  299. # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
  300. def SetIDRange(min, max)
  301. assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
  302. assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
  303. assert { min <= max }
  304. @min_id = min
  305. @max_id = max
  306. end
  307. # Set values filter.
  308. #
  309. # Only match those records where <tt>attribute</tt> column values
  310. # are in specified set.
  311. def SetFilter(attribute, values, exclude = false)
  312. assert { attribute.instance_of? String }
  313. assert { values.instance_of? Array }
  314. assert { !values.empty? }
  315. if values.instance_of?(Array) && values.size > 0
  316. values.each do |value|
  317. assert { value.instance_of? Fixnum }
  318. end
  319. @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
  320. end
  321. end
  322. # Set range filter.
  323. #
  324. # Only match those records where <tt>attribute</tt> column value
  325. # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
  326. def SetFilterRange(attribute, min, max, exclude = false)
  327. assert { attribute.instance_of? String }
  328. assert { min.instance_of? Fixnum or min.instance_of? Bignum }
  329. assert { max.instance_of? Fixnum or max.instance_of? Bignum }
  330. assert { min <= max }
  331. @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
  332. end
  333. # Set float range filter.
  334. #
  335. # Only match those records where <tt>attribute</tt> column value
  336. # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
  337. def SetFilterFloatRange(attribute, min, max, exclude = false)
  338. assert { attribute.instance_of? String }
  339. assert { min.instance_of? Float }
  340. assert { max.instance_of? Float }
  341. assert { min <= max }
  342. @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
  343. end
  344. # Setup anchor point for geosphere distance calculations.
  345. #
  346. # Required to use <tt>@geodist</tt> in filters and sorting
  347. # distance will be computed to this point. Latitude and longitude
  348. # must be in radians.
  349. #
  350. # * <tt>attrlat</tt> -- is the name of latitude attribute
  351. # * <tt>attrlong</tt> -- is the name of longitude attribute
  352. # * <tt>lat</tt> -- is anchor point latitude, in radians
  353. # * <tt>long</tt> -- is anchor point longitude, in radians
  354. def SetGeoAnchor(attrlat, attrlong, lat, long)
  355. assert { attrlat.instance_of? String }
  356. assert { attrlong.instance_of? String }
  357. assert { lat.instance_of? Float }
  358. assert { long.instance_of? Float }
  359. @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
  360. end
  361. # Set grouping attribute and function.
  362. #
  363. # In grouping mode, all matches are assigned to different groups
  364. # based on grouping function value.
  365. #
  366. # Each group keeps track of the total match count, and the best match
  367. # (in this group) according to current sorting function.
  368. #
  369. # The final result set contains one best match per group, with
  370. # grouping function value and matches count attached.
  371. #
  372. # Groups in result set could be sorted by any sorting clause,
  373. # including both document attributes and the following special
  374. # internal Sphinx attributes:
  375. #
  376. # * @id - match document ID;
  377. # * @weight, @rank, @relevance - match weight;
  378. # * @group - groupby function value;
  379. # * @count - amount of matches in group.
  380. #
  381. # the default mode is to sort by groupby value in descending order,
  382. # ie. by '@group desc'.
  383. #
  384. # 'total_found' would contain total amount of matching groups over
  385. # the whole index.
  386. #
  387. # WARNING: grouping is done in fixed memory and thus its results
  388. # are only approximate; so there might be more groups reported
  389. # in total_found than actually present. @count might also
  390. # be underestimated.
  391. #
  392. # For example, if sorting by relevance and grouping by "published"
  393. # attribute with SPH_GROUPBY_DAY function, then the result set will
  394. # contain one most relevant match per each day when there were any
  395. # matches published, with day number and per-day match count attached,
  396. # and sorted by day number in descending order (ie. recent days first).
  397. def SetGroupBy(attribute, func, groupsort = '@group desc')
  398. assert { attribute.instance_of? String }
  399. assert { groupsort.instance_of? String }
  400. assert { func == SPH_GROUPBY_DAY \
  401. || func == SPH_GROUPBY_WEEK \
  402. || func == SPH_GROUPBY_MONTH \
  403. || func == SPH_GROUPBY_YEAR \
  404. || func == SPH_GROUPBY_ATTR \
  405. || func == SPH_GROUPBY_ATTRPAIR }
  406. @groupby = attribute
  407. @groupfunc = func
  408. @groupsort = groupsort
  409. end
  410. # Set count-distinct attribute for group-by queries.
  411. def SetGroupDistinct(attribute)
  412. assert { attribute.instance_of? String }
  413. @groupdistinct = attribute
  414. end
  415. # Set distributed retries count and delay.
  416. def SetRetries(count, delay = 0)
  417. assert { count.instance_of? Fixnum }
  418. assert { delay.instance_of? Fixnum }
  419. @retrycount = count
  420. @retrydelay = delay
  421. end
  422. # DEPRECATED: Set attribute values override
  423. #
  424. # There can be only one override per attribute.
  425. # +values+ must be a hash that maps document IDs to attribute values.
  426. def SetOverride(attrname, attrtype, values)
  427. # $stderr.puts "DEPRECATED: Do not call this method. Use SphinxQL REMAP() function instead.\n"
  428. assert { attrname.instance_of? String }
  429. assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
  430. assert { values.instance_of? Hash }
  431. @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
  432. end
  433. # Set select-list (attributes or expressions), SQL-like syntax.
  434. def SetSelect(select)
  435. assert { select.instance_of? String }
  436. @select = select
  437. end
  438. # Clear all filters (for multi-queries).
  439. def ResetFilters
  440. @filters = []
  441. @anchor = []
  442. end
  443. # Clear groupby settings (for multi-queries).
  444. def ResetGroupBy
  445. @groupby = ''
  446. @groupfunc = SPH_GROUPBY_DAY
  447. @groupsort = '@group desc'
  448. @groupdistinct = ''
  449. end
  450. # Clear all attribute value overrides (for multi-queries).
  451. def ResetOverrides
  452. @overrides = []
  453. end
  454. # Connect to searchd server and run given search query.
  455. #
  456. # <tt>query</tt> is query string
  457. # <tt>index</tt> is index name (or names) to query. default value is "*" which means
  458. # to query all indexes. Accepted characters for index names are letters, numbers,
  459. # dash, and underscore; everything else is considered a separator. Therefore,
  460. # all the following calls are valid and will search two indexes:
  461. #
  462. # sphinx.Query('test query', 'main delta')
  463. # sphinx.Query('test query', 'main;delta')
  464. # sphinx.Query('test query', 'main, delta')
  465. #
  466. # Index order matters. If identical IDs are found in two or more indexes,
  467. # weight and attribute values from the very last matching index will be used
  468. # for sorting and returning to client. Therefore, in the example above,
  469. # matches from "delta" index will always "win" over matches from "main".
  470. #
  471. # Returns false on failure.
  472. # Returns hash which has the following keys on success:
  473. #
  474. # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
  475. # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
  476. # * <tt>'total_found'</tt> -- total amount of matching documents in index
  477. # * <tt>'time'</tt> -- search time
  478. # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
  479. def Query(query, index = '*', comment = '')
  480. assert { @reqs.empty? }
  481. @reqs = []
  482. self.AddQuery(query, index, comment)
  483. results = self.RunQueries
  484. # probably network error; error message should be already filled
  485. return false unless results.instance_of?(Array)
  486. @error = results[0]['error']
  487. @warning = results[0]['warning']
  488. return false if results[0]['status'] == SEARCHD_ERROR
  489. return results[0]
  490. end
  491. # Add query to batch.
  492. #
  493. # Batch queries enable searchd to perform internal optimizations,
  494. # if possible; and reduce network connection overheads in all cases.
  495. #
  496. # For instance, running exactly the same query with different
  497. # groupby settings will enable searched to perform expensive
  498. # full-text search and ranking operation only once, but compute
  499. # multiple groupby results from its output.
  500. #
  501. # Parameters are exactly the same as in <tt>Query</tt> call.
  502. # Returns index to results array returned by <tt>RunQueries</tt> call.
  503. def AddQuery(query, index = '*', comment = '')
  504. # build request
  505. # mode and limits
  506. request = Request.new
  507. request.put_int @offset, @limit, @mode, @ranker
  508. # process the 'expr' ranker
  509. if @ranker == SPH_RANK_EXPR
  510. request.put_string @rankexpr
  511. end
  512. request.put_int @sort
  513. request.put_string @sortby
  514. # query itself
  515. request.put_string query
  516. # weights
  517. request.put_int_array @weights
  518. # indexes
  519. request.put_string index
  520. # id64 range marker
  521. request.put_int 1
  522. # id64 range
  523. request.put_int64 @min_id.to_i, @max_id.to_i
  524. # filters
  525. request.put_int @filters.length
  526. @filters.each do |filter|
  527. request.put_string filter['attr']
  528. request.put_int filter['type']
  529. case filter['type']
  530. when SPH_FILTER_VALUES
  531. request.put_int64_array filter['values']
  532. when SPH_FILTER_RANGE
  533. request.put_int64 filter['min'], filter['max']
  534. when SPH_FILTER_FLOATRANGE
  535. request.put_float filter['min'], filter['max']
  536. else
  537. raise SphinxInternalError, 'Internal error: unhandled filter type'
  538. end
  539. request.put_int filter['exclude'] ? 1 : 0
  540. end
  541. # group-by clause, max-matches count, group-sort clause, cutoff count
  542. request.put_int @groupfunc
  543. request.put_string @groupby
  544. request.put_int @maxmatches
  545. request.put_string @groupsort
  546. request.put_int @cutoff, @retrycount, @retrydelay
  547. request.put_string @groupdistinct
  548. # anchor point
  549. if @anchor.empty?
  550. request.put_int 0
  551. else
  552. request.put_int 1
  553. request.put_string @anchor['attrlat'], @anchor['attrlong']
  554. request.put_float @anchor['lat'], @anchor['long']
  555. end
  556. # per-index weights
  557. request.put_int @indexweights.length
  558. @indexweights.each do |idx, weight|
  559. request.put_string idx
  560. request.put_int weight
  561. end
  562. # max query time
  563. request.put_int @maxquerytime
  564. # per-field weights
  565. request.put_int @fieldweights.length
  566. @fieldweights.each do |field, weight|
  567. request.put_string field
  568. request.put_int weight
  569. end
  570. # comment
  571. request.put_string comment
  572. # attribute overrides
  573. request.put_int @overrides.length
  574. for entry in @overrides do
  575. request.put_string entry['attr']
  576. request.put_int entry['type'], entry['values'].size
  577. entry['values'].each do |id, val|
  578. assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
  579. assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
  580. request.put_int64 id
  581. case entry['type']
  582. when SPH_ATTR_FLOAT
  583. request.put_float val
  584. when SPH_ATTR_BIGINT
  585. request.put_int64 val
  586. else
  587. request.put_int val
  588. end
  589. end
  590. end
  591. # select-list
  592. request.put_string @select
  593. # store request to requests array
  594. @reqs << request.to_s;
  595. return @reqs.length - 1
  596. end
  597. # Run queries batch.
  598. #
  599. # Returns an array of result sets on success.
  600. # Returns false on network IO failure.
  601. #
  602. # Each result set in returned array is a hash which containts
  603. # the same keys as the hash returned by <tt>Query</tt>, plus:
  604. #
  605. # * <tt>'error'</tt> -- search error for this query
  606. # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
  607. def RunQueries
  608. if @reqs.empty?
  609. @error = 'No queries defined, issue AddQuery() first'
  610. return false
  611. end
  612. req = @reqs.join('')
  613. nreqs = @reqs.length
  614. @reqs = []
  615. response = PerformRequest(:search, req, nreqs)
  616. # parse response
  617. begin
  618. results = []
  619. ires = 0
  620. while ires < nreqs
  621. ires += 1
  622. result = {}
  623. result['error'] = ''
  624. result['warning'] = ''
  625. # extract status
  626. status = result['status'] = response.get_int
  627. if status != SEARCHD_OK
  628. message = response.get_string
  629. if status == SEARCHD_WARNING
  630. result['warning'] = message
  631. else
  632. result['error'] = message
  633. results << result
  634. next
  635. end
  636. end
  637. # read schema
  638. fields = []
  639. attrs = {}
  640. attrs_names_in_order = []
  641. nfields = response.get_int
  642. while nfields > 0
  643. nfields -= 1
  644. fields << response.get_string
  645. end
  646. result['fields'] = fields
  647. nattrs = response.get_int
  648. while nattrs > 0
  649. nattrs -= 1
  650. attr = response.get_string
  651. type = response.get_int
  652. attrs[attr] = type
  653. attrs_names_in_order << attr
  654. end
  655. result['attrs'] = attrs
  656. # read match count
  657. count = response.get_int
  658. id64 = response.get_int
  659. # read matches
  660. result['matches'] = []
  661. while count > 0
  662. count -= 1
  663. if id64 != 0
  664. doc = response.get_int64
  665. weight = response.get_int
  666. else
  667. doc, weight = response.get_ints(2)
  668. end
  669. r = {} # This is a single result put in the result['matches'] array
  670. r['id'] = doc
  671. r['weight'] = weight
  672. attrs_names_in_order.each do |a|
  673. r['attrs'] ||= {}
  674. case attrs[a]
  675. when SPH_ATTR_BIGINT
  676. # handle 64-bit ints
  677. r['attrs'][a] = response.get_int64
  678. when SPH_ATTR_FLOAT
  679. # handle floats
  680. r['attrs'][a] = response.get_float
  681. when SPH_ATTR_STRING
  682. # handle string
  683. r['attrs'][a] = response.get_string
  684. else
  685. # handle everything else as unsigned ints
  686. val = response.get_int
  687. if attrs[a]==SPH_ATTR_MULTI
  688. r['attrs'][a] = []
  689. 1.upto(val) do
  690. r['attrs'][a] << response.get_int
  691. end
  692. elsif attrs[a]==SPH_ATTR_MULTI64
  693. r['attrs'][a] = []
  694. val = val/2
  695. 1.upto(val) do
  696. r['attrs'][a] << response.get_int64
  697. end
  698. else
  699. r['attrs'][a] = val
  700. end
  701. end
  702. end
  703. result['matches'] << r
  704. end
  705. result['total'], result['total_found'], msecs, words = response.get_ints(4)
  706. result['time'] = '%.3f' % (msecs / 1000.0)
  707. result['words'] = {}
  708. while words > 0
  709. words -= 1
  710. word = response.get_string
  711. docs, hits = response.get_ints(2)
  712. result['words'][word] = { 'docs' => docs, 'hits' => hits }
  713. end
  714. results << result
  715. end
  716. #rescue EOFError
  717. # @error = 'incomplete reply'
  718. # raise SphinxResponseError, @error
  719. end
  720. return results
  721. end
  722. # Connect to searchd server and generate exceprts from given documents.
  723. #
  724. # * <tt>docs</tt> -- an array of strings which represent the documents' contents
  725. # * <tt>index</tt> -- a string specifiying the index which settings will be used
  726. # for stemming, lexing and case folding
  727. # * <tt>words</tt> -- a string which contains the words to highlight
  728. # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
  729. #
  730. # You can use following parameters:
  731. # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
  732. # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
  733. # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
  734. # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
  735. # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
  736. # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
  737. # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
  738. # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
  739. # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
  740. #
  741. # Returns false on failure.
  742. # Returns an array of string excerpts on success.
  743. def BuildExcerpts(docs, index, words, opts = {})
  744. assert { docs.instance_of? Array }
  745. assert { index.instance_of? String }
  746. assert { words.instance_of? String }
  747. assert { opts.instance_of? Hash }
  748. # fixup options
  749. opts['before_match'] ||= '<b>';
  750. opts['after_match'] ||= '</b>';
  751. opts['chunk_separator'] ||= ' ... ';
  752. opts['html_strip_mode'] ||= 'index';
  753. opts['limit'] ||= 256;
  754. opts['limit_passages'] ||= 0;
  755. opts['limit_words'] ||= 0;
  756. opts['around'] ||= 5;
  757. opts['start_passage_id'] ||= 1;
  758. opts['exact_phrase'] ||= false
  759. opts['single_passage'] ||= false
  760. opts['use_boundaries'] ||= false
  761. opts['weight_order'] ||= false
  762. opts['load_files'] ||= false
  763. opts['allow_empty'] ||= false
  764. # build request
  765. # v.1.0 req
  766. flags = 1
  767. flags |= 2 if opts['exact_phrase']
  768. flags |= 4 if opts['single_passage']
  769. flags |= 8 if opts['use_boundaries']
  770. flags |= 16 if opts['weight_order']
  771. flags |= 32 if opts['query_mode']
  772. flags |= 64 if opts['force_all_words']
  773. flags |= 128 if opts['load_files']
  774. flags |= 256 if opts['allow_empty']
  775. request = Request.new
  776. request.put_int 0, flags # mode=0, flags=1 (remove spaces)
  777. # req index
  778. request.put_string index
  779. # req words
  780. request.put_string words
  781. # options
  782. request.put_string opts['before_match']
  783. request.put_string opts['after_match']
  784. request.put_string opts['chunk_separator']
  785. request.put_int opts['limit'].to_i, opts['around'].to_i
  786. # options v1.2
  787. request.put_int opts['limit_passages'].to_i
  788. request.put_int opts['limit_words'].to_i
  789. request.put_int opts['start_passage_id'].to_i
  790. request.put_string opts['html_strip_mode']
  791. # documents
  792. request.put_int docs.size
  793. docs.each do |doc|
  794. assert { doc.instance_of? String }
  795. request.put_string doc
  796. end
  797. response = PerformRequest(:excerpt, request)
  798. # parse response
  799. begin
  800. res = []
  801. docs.each do |doc|
  802. res << response.get_string
  803. end
  804. rescue EOFError
  805. @error = 'incomplete reply'
  806. raise SphinxResponseError, @error
  807. end
  808. return res
  809. end
  810. # Connect to searchd server, and generate keyword list for a given query.
  811. #
  812. # Returns an array of words on success.
  813. def BuildKeywords(query, index, hits)
  814. assert { query.instance_of? String }
  815. assert { index.instance_of? String }
  816. assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
  817. # build request
  818. request = Request.new
  819. # v.1.0 req
  820. request.put_string query # req query
  821. request.put_string index # req index
  822. request.put_int hits ? 1 : 0
  823. response = PerformRequest(:keywords, request)
  824. # parse response
  825. begin
  826. res = []
  827. nwords = response.get_int
  828. 0.upto(nwords - 1) do |i|
  829. tokenized = response.get_string
  830. normalized = response.get_string
  831. entry = { 'tokenized' => tokenized, 'normalized' => normalized }
  832. entry['docs'], entry['hits'] = response.get_ints(2) if hits
  833. res << entry
  834. end
  835. rescue EOFError
  836. @error = 'incomplete reply'
  837. raise SphinxResponseError, @error
  838. end
  839. return res
  840. end
  841. # Batch update given attributes in given rows in given indexes.
  842. #
  843. # * +index+ is a name of the index to be updated
  844. # * +attrs+ is an array of attribute name strings.
  845. # * +values+ is a hash where key is document id, and value is an array of
  846. # * +mva+ identifies whether update MVA
  847. # new attribute values
  848. # * +ignoreexistent+ identifies whether silently ignore updating of non-existent columns
  849. #
  850. # Returns number of actually updated documents (0 or more) on success.
  851. # Returns -1 on failure.
  852. #
  853. # Usage example:
  854. # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
  855. def UpdateAttributes(index, attrs, values, mva = false, ignoreexistent = false )
  856. # verify everything
  857. assert { index.instance_of? String }
  858. assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
  859. assert { ignoreexistent.instance_of?(TrueClass) || ignoreexistent.instance_of?(FalseClass) }
  860. assert { attrs.instance_of? Array }
  861. attrs.each do |attr|
  862. assert { attr.instance_of? String }
  863. end
  864. assert { values.instance_of? Hash }
  865. values.each do |id, entry|
  866. assert { id.instance_of? Fixnum }
  867. assert { entry.instance_of? Array }
  868. assert { entry.length == attrs.length }
  869. entry.each do |v|
  870. if mva
  871. assert { v.instance_of? Array }
  872. v.each { |vv| assert { vv.instance_of? Fixnum } }
  873. else
  874. assert { v.instance_of? Fixnum }
  875. end
  876. end
  877. end
  878. # build request
  879. request = Request.new
  880. request.put_string index
  881. request.put_int attrs.length
  882. request.put_int ignoreexistent ? 1 : 0
  883. for attr in attrs
  884. request.put_string attr
  885. request.put_int mva ? 1 : 0
  886. end
  887. request.put_int values.length
  888. values.each do |id, entry|
  889. request.put_int64 id
  890. if mva
  891. entry.each { |v| request.put_int_array v }
  892. else
  893. request.put_int(*entry)
  894. end
  895. end
  896. response = PerformRequest(:update, request)
  897. # parse response
  898. begin
  899. return response.get_int
  900. rescue EOFError
  901. @error = 'incomplete reply'
  902. raise SphinxResponseError, @error
  903. end
  904. end
  905. protected
  906. # Connect to searchd server.
  907. def Connect
  908. begin
  909. if @host[0,1]=='/'
  910. sock = UNIXSocket.new(@host)
  911. else
  912. sock = TCPSocket.new(@host, @port)
  913. end
  914. rescue => err
  915. @error = "connection to #{@host}:#{@port} failed (error=#{err})"
  916. raise SphinxConnectError, @error
  917. end
  918. v = sock.recv(4).unpack('N*').first
  919. if v < 1
  920. sock.close
  921. @error = "expected searchd protocol version 1+, got version '#{v}'"
  922. raise SphinxConnectError, @error
  923. end
  924. sock.send([1].pack('N'), 0)
  925. sock
  926. end
  927. # Get and check response packet from searchd server.
  928. def GetResponse(sock, client_version)
  929. response = ''
  930. len = 0
  931. header = sock.recv(8)
  932. if header.length == 8
  933. status, ver, len = header.unpack('n2N')
  934. left = len.to_i
  935. while left > 0 do
  936. begin
  937. chunk = sock.recv(left)
  938. if chunk
  939. response << chunk
  940. left -= chunk.length
  941. end
  942. rescue EOFError
  943. break
  944. end
  945. end
  946. end
  947. sock.close
  948. # check response
  949. read = response.length
  950. if response.empty? or read != len.to_i
  951. @error = response.empty? \
  952. ? 'received zero-sized searchd response' \
  953. : "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
  954. raise SphinxResponseError, @error
  955. end
  956. # check status
  957. if (status == SEARCHD_WARNING)
  958. wlen = response[0, 4].unpack('N*').first
  959. @warning = response[4, wlen]
  960. return response[4 + wlen, response.length - 4 - wlen]
  961. end
  962. if status == SEARCHD_ERROR
  963. @error = 'searchd error: ' + response[4, response.length - 4]
  964. raise SphinxInternalError, @error
  965. end
  966. if status == SEARCHD_RETRY
  967. @error = 'temporary searchd error: ' + response[4, response.length - 4]
  968. raise SphinxTemporaryError, @error
  969. end
  970. unless status == SEARCHD_OK
  971. @error = "unknown status code: '#{status}'"
  972. raise SphinxUnknownError, @error
  973. end
  974. # check version
  975. if ver < client_version
  976. @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
  977. "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
  978. end
  979. return response
  980. end
  981. # Connect, send query, get response.
  982. def PerformRequest(command, request, additional = nil)
  983. cmd = command.to_s.upcase
  984. command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
  985. command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
  986. sock = self.Connect
  987. len = request.to_s.length + (additional != nil ? 8 : 0)
  988. header = [command_id, command_ver, len].pack('nnN')
  989. header << [0, additional].pack('NN') if additional != nil
  990. sock.send(header + request.to_s, 0)
  991. response = self.GetResponse(sock, command_ver)
  992. return Response.new(response)
  993. end
  994. # :stopdoc:
  995. def assert
  996. raise 'Assertion failed!' unless yield if $DEBUG
  997. end
  998. # :startdoc:
  999. end
  1000. end