sphinxapi.php 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181
  1. <?php
  2. //
  3. // $Id$
  4. //
  5. //
  6. // Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
  7. //
  8. // This program is free software; you can redistribute it and/or modify
  9. // it under the terms of the GNU General Public License. You should have
  10. // received a copy of the GPL license along with this program; if you
  11. // did not, you can find it at http://www.gnu.org/
  12. //
  13. /////////////////////////////////////////////////////////////////////////////
  14. // PHP version of Sphinx searchd client (PHP API)
  15. /////////////////////////////////////////////////////////////////////////////
  16. /// known searchd commands
  17. define ( "SEARCHD_COMMAND_SEARCH", 0 );
  18. define ( "SEARCHD_COMMAND_EXCERPT", 1 );
  19. define ( "SEARCHD_COMMAND_UPDATE", 2 );
  20. define ( "SEARCHD_COMMAND_KEYWORDS",3 );
  21. /// current client-side command implementation versions
  22. define ( "VER_COMMAND_SEARCH", 0x113 );
  23. define ( "VER_COMMAND_EXCERPT", 0x100 );
  24. define ( "VER_COMMAND_UPDATE", 0x101 );
  25. define ( "VER_COMMAND_KEYWORDS", 0x100 );
  26. /// known searchd status codes
  27. define ( "SEARCHD_OK", 0 );
  28. define ( "SEARCHD_ERROR", 1 );
  29. define ( "SEARCHD_RETRY", 2 );
  30. define ( "SEARCHD_WARNING", 3 );
  31. /// known match modes
  32. define ( "SPH_MATCH_ALL", 0 );
  33. define ( "SPH_MATCH_ANY", 1 );
  34. define ( "SPH_MATCH_PHRASE", 2 );
  35. define ( "SPH_MATCH_BOOLEAN", 3 );
  36. define ( "SPH_MATCH_EXTENDED", 4 );
  37. define ( "SPH_MATCH_FULLSCAN", 5 );
  38. define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
  39. /// known ranking modes (ext2 only)
  40. define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
  41. define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
  42. define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
  43. define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
  44. /// known sort modes
  45. define ( "SPH_SORT_RELEVANCE", 0 );
  46. define ( "SPH_SORT_ATTR_DESC", 1 );
  47. define ( "SPH_SORT_ATTR_ASC", 2 );
  48. define ( "SPH_SORT_TIME_SEGMENTS", 3 );
  49. define ( "SPH_SORT_EXTENDED", 4 );
  50. define ( "SPH_SORT_EXPR", 5 );
  51. /// known filter types
  52. define ( "SPH_FILTER_VALUES", 0 );
  53. define ( "SPH_FILTER_RANGE", 1 );
  54. define ( "SPH_FILTER_FLOATRANGE", 2 );
  55. /// known attribute types
  56. define ( "SPH_ATTR_INTEGER", 1 );
  57. define ( "SPH_ATTR_TIMESTAMP", 2 );
  58. define ( "SPH_ATTR_ORDINAL", 3 );
  59. define ( "SPH_ATTR_BOOL", 4 );
  60. define ( "SPH_ATTR_FLOAT", 5 );
  61. define ( "SPH_ATTR_MULTI", 0x40000000 );
  62. /// known grouping functions
  63. define ( "SPH_GROUPBY_DAY", 0 );
  64. define ( "SPH_GROUPBY_WEEK", 1 );
  65. define ( "SPH_GROUPBY_MONTH", 2 );
  66. define ( "SPH_GROUPBY_YEAR", 3 );
  67. define ( "SPH_GROUPBY_ATTR", 4 );
  68. define ( "SPH_GROUPBY_ATTRPAIR", 5 );
  69. /// portably pack numeric to 64 unsigned bits, network order
  70. function sphPack64 ( $v )
  71. {
  72. assert ( is_numeric($v) );
  73. // x64 route
  74. if ( PHP_INT_SIZE>=8 )
  75. {
  76. $i = (int)$v;
  77. return pack ( "NN", $i>>32, $i&((1<<32)-1) );
  78. }
  79. // x32 route, bcmath
  80. $x = "4294967296";
  81. if ( function_exists("bcmul") )
  82. {
  83. $h = bcdiv ( $v, $x, 0 );
  84. $l = bcmod ( $v, $x );
  85. return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
  86. }
  87. // x32 route, 15 or less decimal digits
  88. // we can use float, because its actually double and has 52 precision bits
  89. if ( strlen($v)<=15 )
  90. {
  91. $f = (float)$v;
  92. $h = (int)($f/$x);
  93. $l = (int)($f-$x*$h);
  94. return pack ( "NN", $h, $l );
  95. }
  96. // x32 route, 16 or more decimal digits
  97. // well, let me know if you *really* need this
  98. die ( "INTERNAL ERROR: packing more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
  99. }
  100. /// portably unpack 64 unsigned bits, network order to numeric
  101. function sphUnpack64 ( $v )
  102. {
  103. list($h,$l) = array_values ( unpack ( "N*N*", $v ) );
  104. // x64 route
  105. if ( PHP_INT_SIZE>=8 )
  106. {
  107. if ( $h<0 ) $h += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
  108. if ( $l<0 ) $l += (1<<32);
  109. return ($h<<32) + $l;
  110. }
  111. // x32 route
  112. $h = sprintf ( "%u", $h );
  113. $l = sprintf ( "%u", $l );
  114. $x = "4294967296";
  115. // bcmath
  116. if ( function_exists("bcmul") )
  117. return bcadd ( $l, bcmul ( $x, $h ) );
  118. // no bcmath, 15 or less decimal digits
  119. // we can use float, because its actually double and has 52 precision bits
  120. if ( $h<1048576 )
  121. {
  122. $f = ((float)$h)*$x + (float)$l;
  123. return sprintf ( "%.0f", $f ); // builtin conversion is only about 39-40 bits precise!
  124. }
  125. // x32 route, 16 or more decimal digits
  126. // well, let me know if you *really* need this
  127. die ( "INTERNAL ERROR: unpacking more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
  128. }
  129. /// sphinx searchd client class
  130. class SphinxClient
  131. {
  132. var $_host; ///< searchd host (default is "localhost")
  133. var $_port; ///< searchd port (default is 3312)
  134. var $_offset; ///< how many records to seek from result-set start (default is 0)
  135. var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
  136. var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
  137. var $_weights; ///< per-field weights (default is 1 for all fields)
  138. var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
  139. var $_sortby; ///< attribute to sort by (defualt is "")
  140. var $_min_id; ///< min ID to match (default is 0, which means no limit)
  141. var $_max_id; ///< max ID to match (default is 0, which means no limit)
  142. var $_filters; ///< search filters
  143. var $_groupby; ///< group-by attribute name
  144. var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
  145. var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
  146. var $_groupdistinct;///< group-by count-distinct attribute
  147. var $_maxmatches; ///< max matches to retrieve
  148. var $_cutoff; ///< cutoff to stop searching at (default is 0)
  149. var $_retrycount; ///< distributed retries count
  150. var $_retrydelay; ///< distributed retries delay
  151. var $_anchor; ///< geographical anchor point
  152. var $_indexweights; ///< per-index weights
  153. var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
  154. var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
  155. var $_fieldweights; ///< per-field-name weights
  156. var $_error; ///< last error message
  157. var $_warning; ///< last warning message
  158. var $_reqs; ///< requests array for multi-query
  159. var $_mbenc; ///< stored mbstring encoding
  160. var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
  161. /////////////////////////////////////////////////////////////////////////////
  162. // common stuff
  163. /////////////////////////////////////////////////////////////////////////////
  164. /// create a new client object and fill defaults
  165. function SphinxClient ()
  166. {
  167. // per-client-object settings
  168. $this->_host = "localhost";
  169. $this->_port = 3312;
  170. // per-query settings
  171. $this->_offset = 0;
  172. $this->_limit = 20;
  173. $this->_mode = SPH_MATCH_ALL;
  174. $this->_weights = array ();
  175. $this->_sort = SPH_SORT_RELEVANCE;
  176. $this->_sortby = "";
  177. $this->_min_id = 0;
  178. $this->_max_id = 0;
  179. $this->_filters = array ();
  180. $this->_groupby = "";
  181. $this->_groupfunc = SPH_GROUPBY_DAY;
  182. $this->_groupsort = "@group desc";
  183. $this->_groupdistinct= "";
  184. $this->_maxmatches = 1000;
  185. $this->_cutoff = 0;
  186. $this->_retrycount = 0;
  187. $this->_retrydelay = 0;
  188. $this->_anchor = array ();
  189. $this->_indexweights= array ();
  190. $this->_ranker = SPH_RANK_PROXIMITY_BM25;
  191. $this->_maxquerytime= 0;
  192. $this->_fieldweights= array();
  193. $this->_error = ""; // per-reply fields (for single-query case)
  194. $this->_warning = "";
  195. $this->_reqs = array (); // requests storage (for multi-query case)
  196. $this->_mbenc = "";
  197. $this->_arrayresult = false;
  198. }
  199. /// get last error message (string)
  200. function GetLastError ()
  201. {
  202. return $this->_error;
  203. }
  204. /// get last warning message (string)
  205. function GetLastWarning ()
  206. {
  207. return $this->_warning;
  208. }
  209. /// set searchd host name (string) and port (integer)
  210. function SetServer ( $host, $port )
  211. {
  212. assert ( is_string($host) );
  213. assert ( is_int($port) );
  214. $this->_host = $host;
  215. $this->_port = $port;
  216. }
  217. /////////////////////////////////////////////////////////////////////////////
  218. /// enter mbstring workaround mode
  219. function _MBPush ()
  220. {
  221. $this->_mbenc = "";
  222. if ( ini_get ( "mbstring.func_overload" ) & 2 )
  223. {
  224. $this->_mbenc = mb_internal_encoding();
  225. mb_internal_encoding ( "latin1" );
  226. }
  227. }
  228. /// leave mbstring workaround mode
  229. function _MBPop ()
  230. {
  231. if ( $this->_mbenc )
  232. mb_internal_encoding ( $this->_mbenc );
  233. }
  234. /// connect to searchd server
  235. function _Connect ()
  236. {
  237. return fopen('php://stdout', 'w');
  238. }
  239. function _OldConnect()
  240. {
  241. if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
  242. {
  243. $this->_error = "connection to {$this->_host}:{$this->_port} failed";
  244. return false;
  245. }
  246. // check version
  247. list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
  248. $v = (int)$v;
  249. if ( $v<1 )
  250. {
  251. fclose ( $fp );
  252. $this->_error = "expected searchd protocol version 1+, got version '$v'";
  253. return false;
  254. }
  255. // all ok, send my version
  256. fwrite ( $fp, pack ( "N", 1 ) );
  257. return $fp;
  258. }
  259. /// get and check response packet from searchd server
  260. function _GetResponse ( $fp, $client_ver )
  261. {
  262. return false;
  263. }
  264. function _OldGetResponse ( $fp, $client_ver )
  265. {
  266. $response = "";
  267. $len = 0;
  268. $header = fread ( $fp, 8 );
  269. if ( strlen($header)==8 )
  270. {
  271. list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
  272. $left = $len;
  273. while ( $left>0 && !feof($fp) )
  274. {
  275. $chunk = fread ( $fp, $left );
  276. if ( $chunk )
  277. {
  278. $response .= $chunk;
  279. $left -= strlen($chunk);
  280. }
  281. }
  282. }
  283. fclose ( $fp );
  284. // check response
  285. $read = strlen ( $response );
  286. if ( !$response || $read!=$len )
  287. {
  288. $this->_error = $len
  289. ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
  290. : "received zero-sized searchd response";
  291. return false;
  292. }
  293. // check status
  294. if ( $status==SEARCHD_WARNING )
  295. {
  296. list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
  297. $this->_warning = substr ( $response, 4, $wlen );
  298. return substr ( $response, 4+$wlen );
  299. }
  300. if ( $status==SEARCHD_ERROR )
  301. {
  302. $this->_error = "searchd error: " . substr ( $response, 4 );
  303. return false;
  304. }
  305. if ( $status==SEARCHD_RETRY )
  306. {
  307. $this->_error = "temporary searchd error: " . substr ( $response, 4 );
  308. return false;
  309. }
  310. if ( $status!=SEARCHD_OK )
  311. {
  312. $this->_error = "unknown status code '$status'";
  313. return false;
  314. }
  315. // check version
  316. if ( $ver<$client_ver )
  317. {
  318. $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
  319. $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
  320. }
  321. return $response;
  322. }
  323. /////////////////////////////////////////////////////////////////////////////
  324. // searching
  325. /////////////////////////////////////////////////////////////////////////////
  326. /// set offset and count into result set,
  327. /// and optionally set max-matches and cutoff limits
  328. function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
  329. {
  330. assert ( is_int($offset) );
  331. assert ( is_int($limit) );
  332. assert ( $offset>=0 );
  333. assert ( $limit>0 );
  334. assert ( $max>=0 );
  335. $this->_offset = $offset;
  336. $this->_limit = $limit;
  337. if ( $max>0 )
  338. $this->_maxmatches = $max;
  339. if ( $cutoff>0 )
  340. $this->_cutoff = $cutoff;
  341. }
  342. /// set maximum query time, in milliseconds, per-index
  343. /// integer, 0 means "do not limit"
  344. function SetMaxQueryTime ( $max )
  345. {
  346. assert ( is_int($max) );
  347. assert ( $max>=0 );
  348. $this->_maxquerytime = $max;
  349. }
  350. /// set matching mode
  351. function SetMatchMode ( $mode )
  352. {
  353. assert ( $mode==SPH_MATCH_ALL
  354. || $mode==SPH_MATCH_ANY
  355. || $mode==SPH_MATCH_PHRASE
  356. || $mode==SPH_MATCH_BOOLEAN
  357. || $mode==SPH_MATCH_EXTENDED
  358. || $mode==SPH_MATCH_FULLSCAN
  359. || $mode==SPH_MATCH_EXTENDED2 );
  360. $this->_mode = $mode;
  361. }
  362. /// set ranking mode
  363. function SetRankingMode ( $ranker )
  364. {
  365. assert ( $ranker==SPH_RANK_PROXIMITY_BM25
  366. || $ranker==SPH_RANK_BM25
  367. || $ranker==SPH_RANK_NONE
  368. || $ranker==SPH_RANK_WORDCOUNT );
  369. $this->_ranker = $ranker;
  370. }
  371. /// set matches sorting mode
  372. function SetSortMode ( $mode, $sortby="" )
  373. {
  374. assert (
  375. $mode==SPH_SORT_RELEVANCE ||
  376. $mode==SPH_SORT_ATTR_DESC ||
  377. $mode==SPH_SORT_ATTR_ASC ||
  378. $mode==SPH_SORT_TIME_SEGMENTS ||
  379. $mode==SPH_SORT_EXTENDED ||
  380. $mode==SPH_SORT_EXPR );
  381. assert ( is_string($sortby) );
  382. assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
  383. $this->_sort = $mode;
  384. $this->_sortby = $sortby;
  385. }
  386. /// bind per-field weights by order
  387. /// DEPRECATED; use SetFieldWeights() instead
  388. function SetWeights ( $weights )
  389. {
  390. assert ( is_array($weights) );
  391. foreach ( $weights as $weight )
  392. assert ( is_int($weight) );
  393. $this->_weights = $weights;
  394. }
  395. /// bind per-field weights by name
  396. function SetFieldWeights ( $weights )
  397. {
  398. assert ( is_array($weights) );
  399. foreach ( $weights as $name=>$weight )
  400. {
  401. assert ( is_string($name) );
  402. assert ( is_int($weight) );
  403. }
  404. $this->_fieldweights = $weights;
  405. }
  406. /// bind per-index weights by name
  407. function SetIndexWeights ( $weights )
  408. {
  409. assert ( is_array($weights) );
  410. foreach ( $weights as $index=>$weight )
  411. {
  412. assert ( is_string($index) );
  413. assert ( is_int($weight) );
  414. }
  415. $this->_indexweights = $weights;
  416. }
  417. /// set IDs range to match
  418. /// only match records if document ID is beetwen $min and $max (inclusive)
  419. function SetIDRange ( $min, $max )
  420. {
  421. assert ( is_numeric($min) );
  422. assert ( is_numeric($max) );
  423. assert ( $min<=$max );
  424. $this->_min_id = $min;
  425. $this->_max_id = $max;
  426. }
  427. /// set values set filter
  428. /// only match records where $attribute value is in given set
  429. function SetFilter ( $attribute, $values, $exclude=false )
  430. {
  431. assert ( is_string($attribute) );
  432. assert ( is_array($values) );
  433. assert ( count($values) );
  434. if ( is_array($values) && count($values) )
  435. {
  436. foreach ( $values as $value )
  437. assert ( is_numeric($value) );
  438. $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
  439. }
  440. }
  441. /// set range filter
  442. /// only match records if $attribute value is beetwen $min and $max (inclusive)
  443. function SetFilterRange ( $attribute, $min, $max, $exclude=false )
  444. {
  445. assert ( is_string($attribute) );
  446. assert ( is_int($min) );
  447. assert ( is_int($max) );
  448. assert ( $min<=$max );
  449. $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  450. }
  451. /// set float range filter
  452. /// only match records if $attribute value is beetwen $min and $max (inclusive)
  453. function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
  454. {
  455. assert ( is_string($attribute) );
  456. assert ( is_float($min) );
  457. assert ( is_float($max) );
  458. assert ( $min<=$max );
  459. $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  460. }
  461. /// setup anchor point for geosphere distance calculations
  462. /// required to use @geodist in filters and sorting
  463. /// latitude and longitude must be in radians
  464. function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
  465. {
  466. assert ( is_string($attrlat) );
  467. assert ( is_string($attrlong) );
  468. assert ( is_float($lat) );
  469. assert ( is_float($long) );
  470. $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
  471. }
  472. /// set grouping attribute and function
  473. function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
  474. {
  475. assert ( is_string($attribute) );
  476. assert ( is_string($groupsort) );
  477. assert ( $func==SPH_GROUPBY_DAY
  478. || $func==SPH_GROUPBY_WEEK
  479. || $func==SPH_GROUPBY_MONTH
  480. || $func==SPH_GROUPBY_YEAR
  481. || $func==SPH_GROUPBY_ATTR
  482. || $func==SPH_GROUPBY_ATTRPAIR );
  483. $this->_groupby = $attribute;
  484. $this->_groupfunc = $func;
  485. $this->_groupsort = $groupsort;
  486. }
  487. /// set count-distinct attribute for group-by queries
  488. function SetGroupDistinct ( $attribute )
  489. {
  490. assert ( is_string($attribute) );
  491. $this->_groupdistinct = $attribute;
  492. }
  493. /// set distributed retries count and delay
  494. function SetRetries ( $count, $delay=0 )
  495. {
  496. assert ( is_int($count) && $count>=0 );
  497. assert ( is_int($delay) && $delay>=0 );
  498. $this->_retrycount = $count;
  499. $this->_retrydelay = $delay;
  500. }
  501. /// set result set format (hash or array; hash by default)
  502. /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
  503. function SetArrayResult ( $arrayresult )
  504. {
  505. assert ( is_bool($arrayresult) );
  506. $this->_arrayresult = $arrayresult;
  507. }
  508. //////////////////////////////////////////////////////////////////////////////
  509. /// clear all filters (for multi-queries)
  510. function ResetFilters ()
  511. {
  512. $this->_filters = array();
  513. $this->_anchor = array();
  514. }
  515. /// clear groupby settings (for multi-queries)
  516. function ResetGroupBy ()
  517. {
  518. $this->_groupby = "";
  519. $this->_groupfunc = SPH_GROUPBY_DAY;
  520. $this->_groupsort = "@group desc";
  521. $this->_groupdistinct= "";
  522. }
  523. //////////////////////////////////////////////////////////////////////////////
  524. /// connect to searchd server, run given search query through given indexes,
  525. /// and return the search results
  526. function Query ( $query, $index="*", $comment="" )
  527. {
  528. assert ( empty($this->_reqs) );
  529. $this->AddQuery ( $query, $index, $comment );
  530. $results = $this->RunQueries ();
  531. if ( !is_array($results) )
  532. return false; // probably network error; error message should be already filled
  533. $this->_error = $results[0]["error"];
  534. $this->_warning = $results[0]["warning"];
  535. if ( $results[0]["status"]==SEARCHD_ERROR )
  536. return false;
  537. else
  538. return $results[0];
  539. }
  540. /// helper to pack floats in network byte order
  541. function _PackFloat ( $f )
  542. {
  543. $t1 = pack ( "f", $f ); // machine order
  544. list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
  545. return pack ( "N", $t2 );
  546. }
  547. /// add query to multi-query batch
  548. /// returns index into results array from RunQueries() call
  549. function AddQuery ( $query, $index="*", $comment="" )
  550. {
  551. // mbstring workaround
  552. $this->_MBPush ();
  553. // build request
  554. $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
  555. $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
  556. $req .= pack ( "N", strlen($query) ) . $query; // query itself
  557. $req .= pack ( "N", count($this->_weights) ); // weights
  558. foreach ( $this->_weights as $weight )
  559. $req .= pack ( "N", (int)$weight );
  560. $req .= pack ( "N", strlen($index) ) . $index; // indexes
  561. $req .= pack ( "N", 1 ); // id64 range marker
  562. $req .= sphPack64 ( $this->_min_id ) . sphPack64 ( $this->_max_id ); // id64 range
  563. // filters
  564. $req .= pack ( "N", count($this->_filters) );
  565. foreach ( $this->_filters as $filter )
  566. {
  567. $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
  568. $req .= pack ( "N", $filter["type"] );
  569. switch ( $filter["type"] )
  570. {
  571. case SPH_FILTER_VALUES:
  572. $req .= pack ( "N", count($filter["values"]) );
  573. foreach ( $filter["values"] as $value )
  574. $req .= pack ( "N", floatval($value) ); // this uberhack is to workaround 32bit signed int limit on x32 platforms
  575. break;
  576. case SPH_FILTER_RANGE:
  577. $req .= pack ( "NN", $filter["min"], $filter["max"] );
  578. break;
  579. case SPH_FILTER_FLOATRANGE:
  580. $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
  581. break;
  582. default:
  583. assert ( 0 && "internal error: unhandled filter type" );
  584. }
  585. $req .= pack ( "N", $filter["exclude"] );
  586. }
  587. // group-by clause, max-matches count, group-sort clause, cutoff count
  588. $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
  589. $req .= pack ( "N", $this->_maxmatches );
  590. $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
  591. $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
  592. $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
  593. // anchor point
  594. if ( empty($this->_anchor) )
  595. {
  596. $req .= pack ( "N", 0 );
  597. } else
  598. {
  599. $a =& $this->_anchor;
  600. $req .= pack ( "N", 1 );
  601. $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
  602. $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
  603. $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
  604. }
  605. // per-index weights
  606. $req .= pack ( "N", count($this->_indexweights) );
  607. foreach ( $this->_indexweights as $idx=>$weight )
  608. $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
  609. // max query time
  610. $req .= pack ( "N", $this->_maxquerytime );
  611. // per-field weights
  612. $req .= pack ( "N", count($this->_fieldweights) );
  613. foreach ( $this->_fieldweights as $field=>$weight )
  614. $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
  615. // comment
  616. $req .= pack ( "N", strlen($comment) ) . $comment;
  617. // mbstring workaround
  618. $this->_MBPop ();
  619. // store request to requests array
  620. $this->_reqs[] = $req;
  621. return count($this->_reqs)-1;
  622. }
  623. /// connect to searchd, run queries batch, and return an array of result sets
  624. function RunQueries ()
  625. {
  626. if ( empty($this->_reqs) )
  627. {
  628. $this->_error = "no queries defined, issue AddQuery() first";
  629. return false;
  630. }
  631. // mbstring workaround
  632. $this->_MBPush ();
  633. if (!( $fp = $this->_Connect() ))
  634. {
  635. $this->_MBPop ();
  636. return false;
  637. }
  638. ////////////////////////////
  639. // send query, get response
  640. ////////////////////////////
  641. $nreqs = count($this->_reqs);
  642. $req = join ( "", $this->_reqs );
  643. $len = 4+strlen($req);
  644. $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
  645. fwrite ( $fp, $req, $len+8 );
  646. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
  647. {
  648. $this->_MBPop ();
  649. return false;
  650. }
  651. $this->_reqs = array ();
  652. //////////////////
  653. // parse response
  654. //////////////////
  655. $p = 0; // current position
  656. $max = strlen($response); // max position for checks, to protect against broken responses
  657. $results = array ();
  658. for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
  659. {
  660. $results[] = array();
  661. $result =& $results[$ires];
  662. $result["error"] = "";
  663. $result["warning"] = "";
  664. // extract status
  665. list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  666. $result["status"] = $status;
  667. if ( $status!=SEARCHD_OK )
  668. {
  669. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  670. $message = substr ( $response, $p, $len ); $p += $len;
  671. if ( $status==SEARCHD_WARNING )
  672. {
  673. $result["warning"] = $message;
  674. } else
  675. {
  676. $result["error"] = $message;
  677. continue;
  678. }
  679. }
  680. // read schema
  681. $fields = array ();
  682. $attrs = array ();
  683. list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  684. while ( $nfields-->0 && $p<$max )
  685. {
  686. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  687. $fields[] = substr ( $response, $p, $len ); $p += $len;
  688. }
  689. $result["fields"] = $fields;
  690. list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  691. while ( $nattrs-->0 && $p<$max )
  692. {
  693. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  694. $attr = substr ( $response, $p, $len ); $p += $len;
  695. list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  696. $attrs[$attr] = $type;
  697. }
  698. $result["attrs"] = $attrs;
  699. // read match count
  700. list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  701. list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  702. // read matches
  703. $idx = -1;
  704. while ( $count-->0 && $p<$max )
  705. {
  706. // index into result array
  707. $idx++;
  708. // parse document id and weight
  709. if ( $id64 )
  710. {
  711. $doc = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8;
  712. list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  713. } else
  714. {
  715. list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
  716. substr ( $response, $p, 8 ) ) );
  717. $p += 8;
  718. if ( PHP_INT_SIZE>=8 )
  719. {
  720. // x64 route, workaround broken unpack() in 5.2.2+
  721. if ( $doc<0 ) $doc += (1<<32);
  722. } else
  723. {
  724. // x32 route, workaround php signed/unsigned braindamage
  725. $doc = sprintf ( "%u", $doc );
  726. }
  727. }
  728. $weight = sprintf ( "%u", $weight );
  729. // create match entry
  730. if ( $this->_arrayresult )
  731. $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
  732. else
  733. $result["matches"][$doc]["weight"] = $weight;
  734. // parse and create attributes
  735. $attrvals = array ();
  736. foreach ( $attrs as $attr=>$type )
  737. {
  738. // handle floats
  739. if ( $type==SPH_ATTR_FLOAT )
  740. {
  741. list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  742. list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
  743. $attrvals[$attr] = $fval;
  744. continue;
  745. }
  746. // handle everything else as unsigned ints
  747. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  748. if ( $type & SPH_ATTR_MULTI )
  749. {
  750. $attrvals[$attr] = array ();
  751. $nvalues = $val;
  752. while ( $nvalues-->0 && $p<$max )
  753. {
  754. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  755. $attrvals[$attr][] = sprintf ( "%u", $val );
  756. }
  757. } else
  758. {
  759. $attrvals[$attr] = sprintf ( "%u", $val );
  760. }
  761. }
  762. if ( $this->_arrayresult )
  763. $result["matches"][$idx]["attrs"] = $attrvals;
  764. else
  765. $result["matches"][$doc]["attrs"] = $attrvals;
  766. }
  767. list ( $total, $total_found, $msecs, $words ) =
  768. array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
  769. $result["total"] = sprintf ( "%u", $total );
  770. $result["total_found"] = sprintf ( "%u", $total_found );
  771. $result["time"] = sprintf ( "%.3f", $msecs/1000 );
  772. $p += 16;
  773. while ( $words-->0 && $p<$max )
  774. {
  775. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  776. $word = substr ( $response, $p, $len ); $p += $len;
  777. list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
  778. $result["words"][$word] = array (
  779. "docs"=>sprintf ( "%u", $docs ),
  780. "hits"=>sprintf ( "%u", $hits ) );
  781. }
  782. }
  783. $this->_MBPop ();
  784. return $results;
  785. }
  786. /////////////////////////////////////////////////////////////////////////////
  787. // excerpts generation
  788. /////////////////////////////////////////////////////////////////////////////
  789. /// connect to searchd server, and generate exceprts (snippets)
  790. /// of given documents for given query. returns false on failure,
  791. /// an array of snippets on success
  792. function BuildExcerpts ( $docs, $index, $words, $opts=array() )
  793. {
  794. assert ( is_array($docs) );
  795. assert ( is_string($index) );
  796. assert ( is_string($words) );
  797. assert ( is_array($opts) );
  798. $this->_MBPush ();
  799. if (!( $fp = $this->_Connect() ))
  800. {
  801. $this->_MBPop();
  802. return false;
  803. }
  804. /////////////////
  805. // fixup options
  806. /////////////////
  807. if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
  808. if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
  809. if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
  810. if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
  811. if ( !isset($opts["around"]) ) $opts["around"] = 5;
  812. if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
  813. if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
  814. if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
  815. if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
  816. /////////////////
  817. // build request
  818. /////////////////
  819. // v.1.0 req
  820. $flags = 1; // remove spaces
  821. if ( $opts["exact_phrase"] ) $flags |= 2;
  822. if ( $opts["single_passage"] ) $flags |= 4;
  823. if ( $opts["use_boundaries"] ) $flags |= 8;
  824. if ( $opts["weight_order"] ) $flags |= 16;
  825. $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
  826. $req .= pack ( "N", strlen($index) ) . $index; // req index
  827. $req .= pack ( "N", strlen($words) ) . $words; // req words
  828. // options
  829. $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
  830. $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
  831. $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
  832. $req .= pack ( "N", (int)$opts["limit"] );
  833. $req .= pack ( "N", (int)$opts["around"] );
  834. // documents
  835. $req .= pack ( "N", count($docs) );
  836. foreach ( $docs as $doc )
  837. {
  838. assert ( is_string($doc) );
  839. $req .= pack ( "N", strlen($doc) ) . $doc;
  840. }
  841. ////////////////////////////
  842. // send query, get response
  843. ////////////////////////////
  844. $len = strlen($req);
  845. $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
  846. $wrote = fwrite ( $fp, $req, $len+8 );
  847. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
  848. {
  849. $this->_MBPop ();
  850. return false;
  851. }
  852. //////////////////
  853. // parse response
  854. //////////////////
  855. $pos = 0;
  856. $res = array ();
  857. $rlen = strlen($response);
  858. for ( $i=0; $i<count($docs); $i++ )
  859. {
  860. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
  861. $pos += 4;
  862. if ( $pos+$len > $rlen )
  863. {
  864. $this->_error = "incomplete reply";
  865. $this->_MBPop ();
  866. return false;
  867. }
  868. $res[] = $len ? substr ( $response, $pos, $len ) : "";
  869. $pos += $len;
  870. }
  871. $this->_MBPop ();
  872. return $res;
  873. }
  874. /////////////////////////////////////////////////////////////////////////////
  875. // keyword generation
  876. /////////////////////////////////////////////////////////////////////////////
  877. /// connect to searchd server, and generate keyword list for a given query
  878. /// returns false on failure,
  879. /// an array of words on success
  880. function BuildKeywords ( $query, $index, $hits )
  881. {
  882. assert ( is_string($query) );
  883. assert ( is_string($index) );
  884. assert ( is_bool($hits) );
  885. $this->_MBPush ();
  886. if (!( $fp = $this->_Connect() ))
  887. {
  888. $this->_MBPop();
  889. return false;
  890. }
  891. /////////////////
  892. // build request
  893. /////////////////
  894. // v.1.0 req
  895. $req = pack ( "N", strlen($query) ) . $query; // req query
  896. $req .= pack ( "N", strlen($index) ) . $index; // req index
  897. $req .= pack ( "N", (int)$hits );
  898. ////////////////////////////
  899. // send query, get response
  900. ////////////////////////////
  901. $len = strlen($req);
  902. $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
  903. $wrote = fwrite ( $fp, $req, $len+8 );
  904. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ))
  905. {
  906. $this->_MBPop ();
  907. return false;
  908. }
  909. //////////////////
  910. // parse response
  911. //////////////////
  912. $pos = 0;
  913. $res = array ();
  914. $rlen = strlen($response);
  915. list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
  916. $pos += 4;
  917. for ( $i=0; $i<$nwords; $i++ )
  918. {
  919. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
  920. $tokenized = $len ? substr ( $response, $pos, $len ) : "";
  921. $pos += $len;
  922. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
  923. $normalized = $len ? substr ( $response, $pos, $len ) : "";
  924. $pos += $len;
  925. $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
  926. if ( $hits )
  927. {
  928. list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
  929. $pos += 8;
  930. $res [$i]["docs"] = $ndocs;
  931. $res [$i]["hits"] = $nhits;
  932. }
  933. if ( $pos > $rlen )
  934. {
  935. $this->_error = "incomplete reply";
  936. $this->_MBPop ();
  937. return false;
  938. }
  939. }
  940. $this->_MBPop ();
  941. return $res;
  942. }
  943. function EscapeString ( $string )
  944. {
  945. $from = array ( '(',')','|','-','!','@','~','\"','&' );
  946. $to = array ( '\\(','\\)','\\|','\\-','\\!','\\@','\\~','\\\"', '\\&' );
  947. return str_replace ( $from, $to, $string );
  948. }
  949. /////////////////////////////////////////////////////////////////////////////
  950. // attribute updates
  951. /////////////////////////////////////////////////////////////////////////////
  952. /// update given attribute values on given documents in given indexes
  953. /// returns amount of updated documents (0 or more) on success, or -1 on failure
  954. function UpdateAttributes ( $index, $attrs, $values )
  955. {
  956. // verify everything
  957. assert ( is_string($index) );
  958. assert ( is_array($attrs) );
  959. foreach ( $attrs as $attr )
  960. assert ( is_string($attr) );
  961. assert ( is_array($values) );
  962. foreach ( $values as $id=>$entry )
  963. {
  964. assert ( is_numeric($id) );
  965. assert ( is_array($entry) );
  966. assert ( count($entry)==count($attrs) );
  967. foreach ( $entry as $v )
  968. assert ( is_int($v) );
  969. }
  970. // build request
  971. $req = pack ( "N", strlen($index) ) . $index;
  972. $req .= pack ( "N", count($attrs) );
  973. foreach ( $attrs as $attr )
  974. $req .= pack ( "N", strlen($attr) ) . $attr;
  975. $req .= pack ( "N", count($values) );
  976. foreach ( $values as $id=>$entry )
  977. {
  978. $req .= sphPack64 ( $id );
  979. foreach ( $entry as $v )
  980. $req .= pack ( "N", $v );
  981. }
  982. // mbstring workaround
  983. $this->_MBPush ();
  984. // connect, send query, get response
  985. if (!( $fp = $this->_Connect() ))
  986. {
  987. $this->_MBPop ();
  988. return -1;
  989. }
  990. $len = strlen($req);
  991. $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
  992. fwrite ( $fp, $req, $len+8 );
  993. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
  994. {
  995. $this->_MBPop ();
  996. return -1;
  997. }
  998. // parse response
  999. list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
  1000. $this->_MBPop ();
  1001. return $updated;
  1002. }
  1003. }
  1004. //
  1005. // $Id$
  1006. //
  1007. ?>