sphinxapi.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. #
  2. # $Id$
  3. #
  4. # Python version of Sphinx searchd client (Python API)
  5. #
  6. # Copyright (c) 2006-2008, Andrew Aksyonoff
  7. # Copyright (c) 2006, Mike Osadnik
  8. # All rights reserved
  9. #
  10. # This program is free software; you can redistribute it and/or modify
  11. # it under the terms of the GNU General Public License. You should have
  12. # received a copy of the GPL license along with this program; if you
  13. # did not, you can find it at http://www.gnu.org/
  14. #
  15. import sys
  16. import select
  17. import socket
  18. from struct import *
  19. # known searchd commands
  20. SEARCHD_COMMAND_SEARCH = 0
  21. SEARCHD_COMMAND_EXCERPT = 1
  22. SEARCHD_COMMAND_UPDATE = 2
  23. SEARCHD_COMMAND_KEYWORDS= 3
  24. # current client-side command implementation versions
  25. VER_COMMAND_SEARCH = 0x113
  26. VER_COMMAND_EXCERPT = 0x100
  27. VER_COMMAND_UPDATE = 0x101
  28. VER_COMMAND_KEYWORDS = 0x100
  29. # known searchd status codes
  30. SEARCHD_OK = 0
  31. SEARCHD_ERROR = 1
  32. SEARCHD_RETRY = 2
  33. SEARCHD_WARNING = 3
  34. # known match modes
  35. SPH_MATCH_ALL = 0
  36. SPH_MATCH_ANY = 1
  37. SPH_MATCH_PHRASE = 2
  38. SPH_MATCH_BOOLEAN = 3
  39. SPH_MATCH_EXTENDED = 4
  40. SPH_MATCH_FULLSCAN = 5
  41. SPH_MATCH_EXTENDED2 = 6
  42. # known ranking modes (extended2 mode only)
  43. SPH_RANK_PROXIMITY_BM25 = 0 # default mode, phrase proximity major factor and BM25 minor one
  44. SPH_RANK_BM25 = 1 # statistical mode, BM25 ranking only (faster but worse quality)
  45. SPH_RANK_NONE = 2 # no ranking, all matches get a weight of 1
  46. SPH_RANK_WORDCOUNT = 3 # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
  47. # known sort modes
  48. SPH_SORT_RELEVANCE = 0
  49. SPH_SORT_ATTR_DESC = 1
  50. SPH_SORT_ATTR_ASC = 2
  51. SPH_SORT_TIME_SEGMENTS = 3
  52. SPH_SORT_EXTENDED = 4
  53. SPH_SORT_EXPR = 5
  54. # known filter types
  55. SPH_FILTER_VALUES = 0
  56. SPH_FILTER_RANGE = 1
  57. SPH_FILTER_FLOATRANGE = 2
  58. # known attribute types
  59. SPH_ATTR_NONE = 0
  60. SPH_ATTR_INTEGER = 1
  61. SPH_ATTR_TIMESTAMP = 2
  62. SPH_ATTR_ORDINAL = 3
  63. SPH_ATTR_BOOL = 4
  64. SPH_ATTR_FLOAT = 5
  65. SPH_ATTR_MULTI = 0X40000000L
  66. # known grouping functions
  67. SPH_GROUPBY_DAY = 0
  68. SPH_GROUPBY_WEEK = 1
  69. SPH_GROUPBY_MONTH = 2
  70. SPH_GROUPBY_YEAR = 3
  71. SPH_GROUPBY_ATTR = 4
  72. class SphinxClient:
  73. def __init__ (self):
  74. """
  75. Create a new client object, and fill defaults.
  76. """
  77. self._host = 'localhost' # searchd host (default is "localhost")
  78. self._port = 3312 # searchd port (default is 3312)
  79. self._offset = 0 # how much records to seek from result-set start (default is 0)
  80. self._limit = 20 # how much records to return from result-set starting at offset (default is 20)
  81. self._mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
  82. self._weights = [] # per-field weights (default is 1 for all fields)
  83. self._sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
  84. self._sortby = '' # attribute to sort by (defualt is "")
  85. self._min_id = 0 # min ID to match (default is 0)
  86. self._max_id = 0xFFFFFFFF # max ID to match (default is UINT_MAX)
  87. self._filters = [] # search filters
  88. self._groupby = '' # group-by attribute name
  89. self._groupfunc = SPH_GROUPBY_DAY # group-by function (to pre-process group-by attribute value with)
  90. self._groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
  91. self._groupdistinct = '' # group-by count-distinct attribute
  92. self._maxmatches = 1000 # max matches to retrieve
  93. self._cutoff = 0 # cutoff to stop searching at
  94. self._retrycount = 0 # distributed retry count
  95. self._retrydelay = 0 # distributed retry delay
  96. self._anchor = {} # geographical anchor point
  97. self._indexweights = {} # per-index weights
  98. self._ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode
  99. self._maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
  100. self._fieldweights = {} # per-field-name weights
  101. self._error = '' # last error message
  102. self._warning = '' # last warning message
  103. self._reqs = [] # requests array for multi-query
  104. return
  105. def GetLastError (self):
  106. """
  107. Get last error message (string).
  108. """
  109. return self._error
  110. def GetLastWarning (self):
  111. """
  112. Get last warning message (string).
  113. """
  114. return self._warning
  115. def SetServer (self, host, port):
  116. """
  117. Set searchd server host and port.
  118. """
  119. assert(isinstance(host, str))
  120. assert(isinstance(port, int))
  121. self._host = host
  122. self._port = port
  123. def _Connect (self):
  124. """
  125. INTERNAL METHOD, DO NOT CALL. Connects to searchd server.
  126. """
  127. try:
  128. sock = socket.socket ( socket.AF_INET, socket.SOCK_STREAM )
  129. sock.connect ( ( self._host, self._port ) )
  130. except socket.error, msg:
  131. if sock:
  132. sock.close()
  133. self._error = 'connection to %s:%s failed (%s)' % ( self._host, self._port, msg )
  134. return 0
  135. v = unpack('>L', sock.recv(4))
  136. if v<1:
  137. sock.close()
  138. self._error = 'expected searchd protocol version, got %s' % v
  139. return 0
  140. # all ok, send my version
  141. sock.send(pack('>L', 1))
  142. return sock
  143. def _GetResponse (self, sock, client_ver):
  144. """
  145. INTERNAL METHOD, DO NOT CALL. Gets and checks response packet from searchd server.
  146. """
  147. (status, ver, length) = unpack('>2HL', sock.recv(8))
  148. response = ''
  149. left = length
  150. while left>0:
  151. chunk = sock.recv(left)
  152. if chunk:
  153. response += chunk
  154. left -= len(chunk)
  155. else:
  156. break
  157. sock.close()
  158. # check response
  159. read = len(response)
  160. if not response or read!=length:
  161. if length:
  162. self._error = 'failed to read searchd response (status=%s, ver=%s, len=%s, read=%s)' \
  163. % (status, ver, length, read)
  164. else:
  165. self._error = 'received zero-sized searchd response'
  166. return None
  167. # check status
  168. if status==SEARCHD_WARNING:
  169. wend = 4 + unpack ( '>L', response[0:4] )[0]
  170. self._warning = response[4:wend]
  171. return response[wend:]
  172. if status==SEARCHD_ERROR:
  173. self._error = 'searchd error: '+response[4:]
  174. return None
  175. if status==SEARCHD_RETRY:
  176. self._error = 'temporary searchd error: '+response[4:]
  177. return None
  178. if status!=SEARCHD_OK:
  179. self._error = 'unknown status code %d' % status
  180. return None
  181. # check version
  182. if ver<client_ver:
  183. self._warning = 'searchd command v.%d.%d older than client\'s v.%d.%d, some options might not work' \
  184. % (ver>>8, ver&0xff, client_ver>>8, client_ver&0xff)
  185. return response
  186. def SetLimits (self, offset, limit, maxmatches=0, cutoff=0):
  187. """
  188. Set offset and count into result set, and optionally set max-matches and cutoff limits.
  189. """
  190. assert(isinstance(offset, int) and offset>=0)
  191. assert(isinstance(limit, int) and limit>0)
  192. assert(maxmatches>=0)
  193. self._offset = offset
  194. self._limit = limit
  195. if maxmatches>0:
  196. self._maxmatches = maxmatches
  197. if cutoff>=0:
  198. self._cutoff = cutoff
  199. def SetMaxQueryTime (self, maxquerytime):
  200. """
  201. Set maximum query time, in milliseconds, per-index. 0 means 'do not limit'.
  202. """
  203. assert(isinstance(maxquerytime,int) and maxquerytime>0)
  204. self._maxquerytime = maxquerytime
  205. def SetMatchMode (self, mode):
  206. """
  207. Set matching mode.
  208. """
  209. assert(mode in [SPH_MATCH_ALL, SPH_MATCH_ANY, SPH_MATCH_PHRASE, SPH_MATCH_BOOLEAN, SPH_MATCH_EXTENDED, SPH_MATCH_FULLSCAN, SPH_MATCH_EXTENDED2])
  210. self._mode = mode
  211. def SetRankingMode (self, ranker):
  212. """
  213. Set ranking mode.
  214. """
  215. assert(ranker in [SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, SPH_RANK_NONE, SPH_RANK_WORDCOUNT])
  216. self._ranker = ranker
  217. def SetSortMode ( self, mode, clause='' ):
  218. """
  219. Set sorting mode.
  220. """
  221. assert ( mode in [SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC, SPH_SORT_TIME_SEGMENTS, SPH_SORT_EXTENDED, SPH_SORT_EXPR] )
  222. assert ( isinstance ( clause, str ) )
  223. self._sort = mode
  224. self._sortby = clause
  225. def SetWeights (self, weights):
  226. """
  227. Set per-field weights.
  228. WARNING, DEPRECATED; do not use it! use SetFieldWeights() instead
  229. """
  230. assert(isinstance(weights, list))
  231. for w in weights:
  232. assert(isinstance(w, int))
  233. self._weights = weights
  234. def SetFieldWeights (self, weights):
  235. """
  236. Bind per-field weights by name; expects (name,field_weight) dictionary as argument.
  237. """
  238. assert(isinstance(weights,dict))
  239. for key,val in weights.items():
  240. assert(isinstance(key,str))
  241. assert(isinstance(val,int))
  242. self._fieldweights = weights
  243. def SetIndexWeights (self, weights):
  244. """
  245. Bind per-index weights by name; expects (name,index_weight) dictionary as argument.
  246. """
  247. assert(isinstance(weights,dict))
  248. for key,val in weights.items():
  249. assert(isinstance(key,str))
  250. assert(isinstance(val,int))
  251. self._indexweights = weights
  252. def SetIDRange (self, minid, maxid):
  253. """
  254. Set IDs range to match.
  255. Only match records if document ID is beetwen $min and $max (inclusive).
  256. """
  257. assert(isinstance(minid, int))
  258. assert(isinstance(maxid, int))
  259. assert(minid<=maxid)
  260. self._min_id = minid
  261. self._max_id = maxid
  262. def SetFilter ( self, attribute, values, exclude=0 ):
  263. """
  264. Set values set filter.
  265. Only match records where 'attribute' value is in given 'values' set.
  266. """
  267. assert(isinstance(attribute, str))
  268. assert(isinstance(values, list))
  269. assert(values)
  270. for value in values:
  271. assert(isinstance(value, int))
  272. self._filters.append ( { 'type':SPH_FILTER_VALUES, 'attr':attribute, 'exclude':exclude, 'values':values } )
  273. def SetFilterRange (self, attribute, min_, max_, exclude=0 ):
  274. """
  275. Set range filter.
  276. Only match records if 'attribute' value is beetwen 'min_' and 'max_' (inclusive).
  277. """
  278. assert(isinstance(attribute, str))
  279. assert(isinstance(min_, int))
  280. assert(isinstance(max_, int))
  281. assert(min_<=max_)
  282. self._filters.append ( { 'type':SPH_FILTER_RANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_ } )
  283. def SetFilterFloatRange (self, attribute, min_, max_, exclude=0 ):
  284. assert(isinstance(attribute,str))
  285. assert(isinstance(min_,float))
  286. assert(isinstance(max_,float))
  287. assert(min_ <= max_)
  288. self._filters.append ( {'type':SPH_FILTER_FLOATRANGE, 'attr':attribute, 'exclude':exclude, 'min':min_, 'max':max_} )
  289. def SetGeoAnchor (self, attrlat, attrlong, latitude, longitude):
  290. assert(isinstance(attrlat,str))
  291. assert(isinstance(attrlong,str))
  292. assert(isinstance(latitude,float))
  293. assert(isinstance(longitude,float))
  294. self._anchor['attrlat'] = attrlat
  295. self._anchor['attrlong'] = attrlong
  296. self._anchor['lat'] = latitude
  297. self._anchor['long'] = longitude
  298. def SetGroupBy ( self, attribute, func, groupsort='@group desc' ):
  299. """
  300. Set grouping attribute and function.
  301. """
  302. assert(isinstance(attribute, str))
  303. assert(func in [SPH_GROUPBY_DAY, SPH_GROUPBY_WEEK, SPH_GROUPBY_MONTH, SPH_GROUPBY_YEAR, SPH_GROUPBY_ATTR] )
  304. assert(isinstance(groupsort, str))
  305. self._groupby = attribute
  306. self._groupfunc = func
  307. self._groupsort = groupsort
  308. def SetGroupDistinct (self, attribute):
  309. assert(isinstance(attribute,str))
  310. self._groupdistinct = attribute
  311. def SetRetries (self, count, delay=0):
  312. assert(isinstance(count,int) and count>=0)
  313. assert(isinstance(delay,int) and delay>=0)
  314. self._retrycount = count
  315. self._retrydelay = delay
  316. def ResetFilters (self):
  317. """
  318. Clear all filters (for multi-queries).
  319. """
  320. self._filters = []
  321. self._anchor = {}
  322. def ResetGroupBy (self):
  323. """
  324. Clear groupby settings (for multi-queries).
  325. """
  326. self._groupby = ''
  327. self._groupfunc = SPH_GROUPBY_DAY
  328. self._groupsort = '@group desc'
  329. self._groupdistinct = ''
  330. def Query (self, query, index='*', comment=''):
  331. """
  332. Connect to searchd server and run given search query.
  333. Returns None on failure; result set hash on success (see documentation for details).
  334. """
  335. assert(len(self._reqs)==0)
  336. self.AddQuery(query,index,comment)
  337. results = self.RunQueries()
  338. if len(results)==0:
  339. return None
  340. self._error = results[0]['error']
  341. self._warning = results[0]['warning']
  342. if results[0]['status'] == SEARCHD_ERROR:
  343. return None
  344. return results[0]
  345. def AddQuery (self, query, index='*', comment=''):
  346. """
  347. Add query to batch.
  348. """
  349. # build request
  350. req = [pack('>5L', self._offset, self._limit, self._mode, self._ranker, self._sort)]
  351. req.append(pack('>L', len(self._sortby)))
  352. req.append(self._sortby)
  353. if isinstance(query,unicode):
  354. query = query.encode('utf-8')
  355. assert(isinstance(query,str))
  356. req.append(pack('>L', len(query)))
  357. req.append(query)
  358. req.append(pack('>L', len(self._weights)))
  359. for w in self._weights:
  360. req.append(pack('>L', w))
  361. req.append(pack('>L', len(index)))
  362. req.append(index)
  363. req.append(pack('>L',0)) # id64 range marker FIXME! IMPLEMENT!
  364. req.append(pack('>L', self._min_id))
  365. req.append(pack('>L', self._max_id))
  366. # filters
  367. req.append ( pack ( '>L', len(self._filters) ) )
  368. for f in self._filters:
  369. req.append ( pack ( '>L', len(f['attr'])) + f['attr'])
  370. filtertype = f['type']
  371. req.append ( pack ( '>L', filtertype))
  372. if filtertype == SPH_FILTER_VALUES:
  373. req.append ( pack ('>L', len(f['values'])))
  374. for val in f['values']:
  375. req.append ( pack ('>L', val))
  376. elif filtertype == SPH_FILTER_RANGE:
  377. req.append ( pack ('>2L', f['min'], f['max']))
  378. elif filtertype == SPH_FILTER_FLOATRANGE:
  379. req.append ( pack ('>2f', f['min'], f['max']))
  380. req.append ( pack ( '>L', f['exclude'] ) )
  381. # group-by, max-matches, group-sort
  382. req.append ( pack ( '>2L', self._groupfunc, len(self._groupby) ) )
  383. req.append ( self._groupby )
  384. req.append ( pack ( '>2L', self._maxmatches, len(self._groupsort) ) )
  385. req.append ( self._groupsort )
  386. req.append ( pack ( '>LLL', self._cutoff, self._retrycount, self._retrydelay))
  387. req.append ( pack ( '>L', len(self._groupdistinct)))
  388. req.append ( self._groupdistinct)
  389. # anchor point
  390. if len(self._anchor) == 0:
  391. req.append ( pack ('>L', 0))
  392. else:
  393. attrlat, attrlong = self._anchor['attrlat'], self._anchor['attrlong']
  394. latitude, longitude = self._anchor['lat'], self._anchor['long']
  395. req.append ( pack ('>L', 1))
  396. req.append ( pack ('>L', len(attrlat)) + attrlat)
  397. req.append ( pack ('>L', len(attrlong)) + attrlong)
  398. req.append ( pack ('>f', latitude) + pack ('>f', longitude))
  399. # per-index weights
  400. req.append ( pack ('>L',len(self._indexweights)))
  401. for indx,weight in self._indexweights.items():
  402. req.append ( pack ('>L',len(indx)) + indx + pack ('>L',weight))
  403. # max query time
  404. req.append ( pack ('>L', self._maxquerytime) )
  405. # per-field weights
  406. req.append ( pack ('>L',len(self._fieldweights) ) )
  407. for field,weight in self._fieldweights.items():
  408. req.append ( pack ('>L',len(field)) + field + pack ('>L',weight) )
  409. # comment
  410. req.append ( pack('>L',len(comment)) + comment )
  411. # send query, get response
  412. req = ''.join(req)
  413. self._reqs.append(req)
  414. return
  415. def RunQueries (self):
  416. """
  417. Run queries batch.
  418. Returns None on network IO failure; or an array of result set hashes on success.
  419. """
  420. if len(self._reqs)==0:
  421. self._error = 'no queries defined, issue AddQuery() first'
  422. return None
  423. sock = self._Connect()
  424. if not sock:
  425. return None
  426. req = ''.join(self._reqs)
  427. length = len(req)+4
  428. req = pack('>HHLL', SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, length, len(self._reqs))+req
  429. sock.send(req)
  430. response = self._GetResponse(sock, VER_COMMAND_SEARCH)
  431. if not response:
  432. return None
  433. nreqs = len(self._reqs)
  434. # parse response
  435. max_ = len(response)
  436. p = 0
  437. results = []
  438. for i in range(0,nreqs,1):
  439. result = {}
  440. result['error'] = ''
  441. result['warning'] = ''
  442. status = unpack('>L', response[p:p+4])[0]
  443. p += 4
  444. result['status'] = status
  445. if status != SEARCHD_OK:
  446. length = unpack('>L', response[p:p+4])[0]
  447. p += 4
  448. message = response[p:p+length]
  449. p += length
  450. if status == SEARCHD_WARNING:
  451. result['warning'] = message
  452. else:
  453. result['error'] = message
  454. continue
  455. # read schema
  456. fields = []
  457. attrs = []
  458. nfields = unpack('>L', response[p:p+4])[0]
  459. p += 4
  460. while nfields>0 and p<max_:
  461. nfields -= 1
  462. length = unpack('>L', response[p:p+4])[0]
  463. p += 4
  464. fields.append(response[p:p+length])
  465. p += length
  466. result['fields'] = fields
  467. nattrs = unpack('>L', response[p:p+4])[0]
  468. p += 4
  469. while nattrs>0 and p<max_:
  470. nattrs -= 1
  471. length = unpack('>L', response[p:p+4])[0]
  472. p += 4
  473. attr = response[p:p+length]
  474. p += length
  475. type_ = unpack('>L', response[p:p+4])[0]
  476. p += 4
  477. attrs.append([attr,type_])
  478. result['attrs'] = attrs
  479. # read match count
  480. count = unpack('>L', response[p:p+4])[0]
  481. p += 4
  482. id64 = unpack('>L', response[p:p+4])[0]
  483. p += 4
  484. # read matches
  485. result['matches'] = []
  486. while count>0 and p<max_:
  487. count -= 1
  488. if id64:
  489. doc, dochi, weight = unpack('>3L', response[p:p+12])
  490. doc += (dochi<<32)
  491. p += 12
  492. else:
  493. doc, weight = unpack('>2L', response[p:p+8])
  494. p += 8
  495. match = { 'id':doc, 'weight':weight, 'attrs':{} }
  496. for i in range(len(attrs)):
  497. if attrs[i][1] == SPH_ATTR_FLOAT:
  498. match['attrs'][attrs[i][0]] = unpack('>f', response[p:p+4])[0]
  499. elif attrs[i][1] == (SPH_ATTR_MULTI | SPH_ATTR_INTEGER):
  500. match['attrs'][attrs[i][0]] = []
  501. nvals = unpack('>L', response[p:p+4])[0]
  502. p += 4
  503. for n in range(0,nvals,1):
  504. match['attrs'][attrs[i][0]].append(unpack('>L', response[p:p+4])[0])
  505. p += 4
  506. p -= 4
  507. else:
  508. match['attrs'][attrs[i][0]] = unpack('>L', response[p:p+4])[0]
  509. p += 4
  510. result['matches'].append ( match )
  511. result['total'], result['total_found'], result['time'], words = unpack('>4L', response[p:p+16])
  512. result['time'] = '%.3f' % (result['time']/1000.0)
  513. p += 16
  514. result['words'] = []
  515. while words>0:
  516. words -= 1
  517. length = unpack('>L', response[p:p+4])[0]
  518. p += 4
  519. word = response[p:p+length]
  520. p += length
  521. docs, hits = unpack('>2L', response[p:p+8])
  522. p += 8
  523. result['words'].append({'word':word, 'docs':docs, 'hits':hits})
  524. results.append(result)
  525. self._reqs = []
  526. sock.close()
  527. return results
  528. def BuildExcerpts (self, docs, index, words, opts=None):
  529. """
  530. Connect to searchd server and generate exceprts from given documents.
  531. """
  532. if not opts:
  533. opts = {}
  534. if isinstance(words,unicode):
  535. words = words.encode('utf-8')
  536. assert(isinstance(docs, list))
  537. assert(isinstance(index, str))
  538. assert(isinstance(words, str))
  539. assert(isinstance(opts, dict))
  540. sock = self._Connect()
  541. if not sock:
  542. return None
  543. # fixup options
  544. opts.setdefault('before_match', '<b>')
  545. opts.setdefault('after_match', '</b>')
  546. opts.setdefault('chunk_separator', ' ... ')
  547. opts.setdefault('limit', 256)
  548. opts.setdefault('around', 5)
  549. # build request
  550. # v.1.0 req
  551. # mode=0, flags=1 (remove spaces)
  552. req = [pack('>2L', 0, 1)]
  553. # req index
  554. req.append(pack('>L', len(index)))
  555. req.append(index)
  556. # req words
  557. req.append(pack('>L', len(words)))
  558. req.append(words)
  559. # options
  560. req.append(pack('>L', len(opts['before_match'])))
  561. req.append(opts['before_match'])
  562. req.append(pack('>L', len(opts['after_match'])))
  563. req.append(opts['after_match'])
  564. req.append(pack('>L', len(opts['chunk_separator'])))
  565. req.append(opts['chunk_separator'])
  566. req.append(pack('>L', int(opts['limit'])))
  567. req.append(pack('>L', int(opts['around'])))
  568. # documents
  569. req.append(pack('>L', len(docs)))
  570. for doc in docs:
  571. if isinstance(doc,unicode):
  572. doc = doc.encode('utf-8')
  573. assert(isinstance(doc, str))
  574. req.append(pack('>L', len(doc)))
  575. req.append(doc)
  576. req = ''.join(req)
  577. # send query, get response
  578. length = len(req)
  579. # add header
  580. req = pack('>2HL', SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, length)+req
  581. wrote = sock.send(req)
  582. response = self._GetResponse(sock, VER_COMMAND_EXCERPT )
  583. if not response:
  584. return []
  585. # parse response
  586. pos = 0
  587. res = []
  588. rlen = len(response)
  589. for i in range(len(docs)):
  590. length = unpack('>L', response[pos:pos+4])[0]
  591. pos += 4
  592. if pos+length > rlen:
  593. self._error = 'incomplete reply'
  594. return []
  595. res.append(response[pos:pos+length])
  596. pos += length
  597. return res
  598. def UpdateAttributes ( self, index, attrs, values ):
  599. """
  600. Update given attribute values on given documents in given indexes.
  601. Returns amount of updated documents (0 or more) on success, or -1 on failure.
  602. 'attrs' must be a list of strings.
  603. 'values' must be a dict with int key (document ID) and list of int values (new attribute values).
  604. Example:
  605. res = cl.UpdateAttributes ( 'test1', [ 'group_id', 'date_added' ], { 2:[123,1000000000], 4:[456,1234567890] } )
  606. """
  607. assert ( isinstance ( index, str ) )
  608. assert ( isinstance ( attrs, list ) )
  609. assert ( isinstance ( values, dict ) )
  610. for attr in attrs:
  611. assert ( isinstance ( attr, str ) )
  612. for docid, entry in values.items():
  613. assert ( isinstance ( docid, int ) )
  614. assert ( isinstance ( entry, list ) )
  615. assert ( len(attrs)==len(entry) )
  616. for val in entry:
  617. assert ( isinstance ( val, int ) )
  618. # build request
  619. req = [ pack('>L',len(index)), index ]
  620. req.append ( pack('>L',len(attrs)) )
  621. for attr in attrs:
  622. req.append ( pack('>L',len(attr)) + attr )
  623. req.append ( pack('>L',len(values)) )
  624. for docid, entry in values.items():
  625. req.append ( pack('>q',docid) )
  626. for val in entry:
  627. req.append ( pack('>L',val) )
  628. # connect, send query, get response
  629. sock = self._Connect()
  630. if not sock:
  631. return None
  632. req = ''.join(req)
  633. length = len(req)
  634. req = pack ( '>2HL', SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, length ) + req
  635. wrote = sock.send ( req )
  636. response = self._GetResponse ( sock, VER_COMMAND_UPDATE )
  637. if not response:
  638. return -1
  639. # parse response
  640. updated = unpack ( '>L', response[0:4] )[0]
  641. return updated
  642. def BuildKeywords ( self, query, index, hits ):
  643. """
  644. Connect to searchd server, and generate keywords list for a given query.
  645. Returns None on failure, or a list of keywords on success.
  646. """
  647. assert ( isinstance ( query, str ) )
  648. assert ( isinstance ( index, str ) )
  649. assert ( isinstance ( hits, int ) )
  650. # build request
  651. req = [ pack ( '>L', len(query) ) + query ]
  652. req.append ( pack ( '>L', len(index) ) + index )
  653. req.append ( pack ( '>L', hits ) )
  654. # connect, send query, get response
  655. sock = self._Connect()
  656. if not sock:
  657. return None
  658. req = ''.join(req)
  659. length = len(req)
  660. req = pack ( '>2HL', SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, length ) + req
  661. wrote = sock.send ( req )
  662. response = self._GetResponse ( sock, VER_COMMAND_KEYWORDS )
  663. if not response:
  664. return None
  665. # parse response
  666. res = []
  667. nwords = unpack ( '>L', response[0:4] )[0]
  668. p = 4
  669. max_ = len(response)
  670. while nwords>0 and p<max_:
  671. nwords -= 1
  672. length = unpack ( '>L', response[p:p+4] )[0]
  673. p += 4
  674. tokenized = response[p:p+length]
  675. p += length
  676. length = unpack ( '>L', response[p:p+4] )[0]
  677. p += 4
  678. normalized = response[p:p+length]
  679. p += length
  680. entry = { 'tokenized':tokenized, 'normalized':normalized }
  681. if hits:
  682. entry['docs'], entry['hits'] = unpack ( '>2L', response[p:p+8] )
  683. p += 8
  684. res.append ( entry )
  685. if nwords>0 or p>max_:
  686. self._error = 'incomplete reply'
  687. return None
  688. return res
  689. #
  690. # $Id$
  691. #