sphinxapi.php 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959
  1. <?php
  2. //
  3. // $Id$
  4. //
  5. //
  6. // Copyright (c) 2001-2007, Andrew Aksyonoff. All rights reserved.
  7. //
  8. // This program is free software; you can redistribute it and/or modify
  9. // it under the terms of the GNU General Public License. You should have
  10. // received a copy of the GPL license along with this program; if you
  11. // did not, you can find it at http://www.gnu.org/
  12. //
  13. /////////////////////////////////////////////////////////////////////////////
  14. // PHP version of Sphinx searchd client (PHP API)
  15. /////////////////////////////////////////////////////////////////////////////
  16. /// known searchd commands
  17. define ( "SEARCHD_COMMAND_SEARCH", 0 );
  18. define ( "SEARCHD_COMMAND_EXCERPT", 1 );
  19. define ( "SEARCHD_COMMAND_UPDATE", 2 );
  20. /// current client-side command implementation versions
  21. define ( "VER_COMMAND_SEARCH", 0x10F );
  22. define ( "VER_COMMAND_EXCERPT", 0x100 );
  23. define ( "VER_COMMAND_UPDATE", 0x100 );
  24. /// known searchd status codes
  25. define ( "SEARCHD_OK", 0 );
  26. define ( "SEARCHD_ERROR", 1 );
  27. define ( "SEARCHD_RETRY", 2 );
  28. define ( "SEARCHD_WARNING", 3 );
  29. /// known match modes
  30. define ( "SPH_MATCH_ALL", 0 );
  31. define ( "SPH_MATCH_ANY", 1 );
  32. define ( "SPH_MATCH_PHRASE", 2 );
  33. define ( "SPH_MATCH_BOOLEAN", 3 );
  34. define ( "SPH_MATCH_EXTENDED", 4 );
  35. define ( "SPH_MATCH_FULLSCAN", 5 );
  36. define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
  37. /// known sort modes
  38. define ( "SPH_SORT_RELEVANCE", 0 );
  39. define ( "SPH_SORT_ATTR_DESC", 1 );
  40. define ( "SPH_SORT_ATTR_ASC", 2 );
  41. define ( "SPH_SORT_TIME_SEGMENTS", 3 );
  42. define ( "SPH_SORT_EXTENDED", 4 );
  43. /// known filter types
  44. define ( "SPH_FILTER_VALUES", 0 );
  45. define ( "SPH_FILTER_RANGE", 1 );
  46. define ( "SPH_FILTER_FLOATRANGE", 2 );
  47. /// known attribute types
  48. define ( "SPH_ATTR_INTEGER", 1 );
  49. define ( "SPH_ATTR_TIMESTAMP", 2 );
  50. define ( "SPH_ATTR_ORDINAL", 3 );
  51. define ( "SPH_ATTR_BOOL", 4 );
  52. define ( "SPH_ATTR_FLOAT", 5 );
  53. define ( "SPH_ATTR_MULTI", 0x40000000 );
  54. /// known grouping functions
  55. define ( "SPH_GROUPBY_DAY", 0 );
  56. define ( "SPH_GROUPBY_WEEK", 1 );
  57. define ( "SPH_GROUPBY_MONTH", 2 );
  58. define ( "SPH_GROUPBY_YEAR", 3 );
  59. define ( "SPH_GROUPBY_ATTR", 4 );
  60. define ( "SPH_GROUPBY_ATTRPAIR", 5 );
  61. /// sphinx searchd client class
  62. class SphinxClient
  63. {
  64. var $_host; ///< searchd host (default is "localhost")
  65. var $_port; ///< searchd port (default is 3312)
  66. var $_offset; ///< how many records to seek from result-set start (default is 0)
  67. var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
  68. var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
  69. var $_weights; ///< per-field weights (default is 1 for all fields)
  70. var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
  71. var $_sortby; ///< attribute to sort by (defualt is "")
  72. var $_min_id; ///< min ID to match (default is 0, which means no limit)
  73. var $_max_id; ///< max ID to match (default is 0, which means no limit)
  74. var $_filters; ///< search filters
  75. var $_groupby; ///< group-by attribute name
  76. var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
  77. var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
  78. var $_groupdistinct;///< group-by count-distinct attribute
  79. var $_maxmatches; ///< max matches to retrieve
  80. var $_cutoff; ///< cutoff to stop searching at (default is 0)
  81. var $_retrycount; ///< distributed retries count
  82. var $_retrydelay; ///< distributed retries delay
  83. var $_anchor; ///< geographical anchor point
  84. var $_error; ///< last error message
  85. var $_warning; ///< last warning message
  86. var $_reqs; ///< requests array for multi-query
  87. /////////////////////////////////////////////////////////////////////////////
  88. // common stuff
  89. /////////////////////////////////////////////////////////////////////////////
  90. /// create a new client object and fill defaults
  91. function SphinxClient ()
  92. {
  93. // per-client-object settings
  94. $this->_host = "localhost";
  95. $this->_port = 3312;
  96. // per-query settings
  97. $this->_offset = 0;
  98. $this->_limit = 20;
  99. $this->_mode = SPH_MATCH_ALL;
  100. $this->_weights = array ();
  101. $this->_sort = SPH_SORT_RELEVANCE;
  102. $this->_sortby = "";
  103. $this->_min_id = 0;
  104. $this->_max_id = 0;
  105. $this->_filters = array ();
  106. $this->_groupby = "";
  107. $this->_groupfunc = SPH_GROUPBY_DAY;
  108. $this->_groupsort = "@group desc";
  109. $this->_groupdistinct= "";
  110. $this->_maxmatches = 1000;
  111. $this->_cutoff = 0;
  112. $this->_retrycount = 0;
  113. $this->_retrydelay = 0;
  114. $this->_anchor = array ();
  115. $this->_indexweights= array ();
  116. // per-reply fields (for single-query case)
  117. $this->_error = "";
  118. $this->_warning = "";
  119. // requests storage (for multi-query case)
  120. $this->_reqs = array ();
  121. }
  122. /// get last error message (string)
  123. function GetLastError ()
  124. {
  125. return $this->_error;
  126. }
  127. /// get last warning message (string)
  128. function GetLastWarning ()
  129. {
  130. return $this->_warning;
  131. }
  132. /// set searchd server
  133. function SetServer ( $host, $port )
  134. {
  135. assert ( is_string($host) );
  136. assert ( is_int($port) );
  137. $this->_host = $host;
  138. $this->_port = $port;
  139. }
  140. /////////////////////////////////////////////////////////////////////////////
  141. /// connect to searchd server
  142. function _Connect ()
  143. {
  144. if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
  145. {
  146. $this->_error = "connection to {$this->_host}:{$this->_port} failed";
  147. return false;
  148. }
  149. // check version
  150. list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
  151. $v = (int)$v;
  152. if ( $v<1 )
  153. {
  154. fclose ( $fp );
  155. $this->_error = "expected searchd protocol version 1+, got version '$v'";
  156. return false;
  157. }
  158. // all ok, send my version
  159. fwrite ( $fp, pack ( "N", 1 ) );
  160. return $fp;
  161. }
  162. /// get and check response packet from searchd server
  163. function _GetResponse ( $fp, $client_ver )
  164. {
  165. $response = "";
  166. $len = 0;
  167. $header = fread ( $fp, 8 );
  168. if ( strlen($header)==8 )
  169. {
  170. list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
  171. $left = $len;
  172. while ( $left>0 && !feof($fp) )
  173. {
  174. $chunk = fread ( $fp, $left );
  175. if ( $chunk )
  176. {
  177. $response .= $chunk;
  178. $left -= strlen($chunk);
  179. }
  180. }
  181. }
  182. fclose ( $fp );
  183. // check response
  184. $read = strlen ( $response );
  185. if ( !$response || $read!=$len )
  186. {
  187. $this->_error = $len
  188. ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
  189. : "received zero-sized searchd response";
  190. return false;
  191. }
  192. // check status
  193. if ( $status==SEARCHD_WARNING )
  194. {
  195. list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
  196. $this->_warning = substr ( $response, 4, $wlen );
  197. return substr ( $response, 4+$wlen );
  198. }
  199. if ( $status==SEARCHD_ERROR )
  200. {
  201. $this->_error = "searchd error: " . substr ( $response, 4 );
  202. return false;
  203. }
  204. if ( $status==SEARCHD_RETRY )
  205. {
  206. $this->_error = "temporary searchd error: " . substr ( $response, 4 );
  207. return false;
  208. }
  209. if ( $status!=SEARCHD_OK )
  210. {
  211. $this->_error = "unknown status code '$status'";
  212. return false;
  213. }
  214. // check version
  215. if ( $ver<$client_ver )
  216. {
  217. $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
  218. $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
  219. }
  220. return $response;
  221. }
  222. /////////////////////////////////////////////////////////////////////////////
  223. // searching
  224. /////////////////////////////////////////////////////////////////////////////
  225. /// set offset and count into result set,
  226. /// and max-matches and cutoff to use while searching
  227. function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
  228. {
  229. assert ( is_int($offset) );
  230. assert ( is_int($limit) );
  231. assert ( $offset>=0 );
  232. assert ( $limit>0 );
  233. assert ( $max>=0 );
  234. $this->_offset = $offset;
  235. $this->_limit = $limit;
  236. if ( $max>0 )
  237. $this->_maxmatches = $max;
  238. if ( $cutoff>0 )
  239. $this->_cutoff = $cutoff;
  240. }
  241. /// set match mode
  242. function SetMatchMode ( $mode )
  243. {
  244. assert ( $mode==SPH_MATCH_ALL
  245. || $mode==SPH_MATCH_ANY
  246. || $mode==SPH_MATCH_PHRASE
  247. || $mode==SPH_MATCH_BOOLEAN
  248. || $mode==SPH_MATCH_EXTENDED
  249. || $mode==SPH_MATCH_EXTENDED2 );
  250. $this->_mode = $mode;
  251. }
  252. /// set matches sorting mode
  253. function SetSortMode ( $mode, $sortby="" )
  254. {
  255. assert (
  256. $mode==SPH_SORT_RELEVANCE ||
  257. $mode==SPH_SORT_ATTR_DESC ||
  258. $mode==SPH_SORT_ATTR_ASC ||
  259. $mode==SPH_SORT_TIME_SEGMENTS ||
  260. $mode==SPH_SORT_EXTENDED );
  261. assert ( is_string($sortby) );
  262. assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
  263. $this->_sort = $mode;
  264. $this->_sortby = $sortby;
  265. }
  266. /// set per-field weights
  267. function SetWeights ( $weights )
  268. {
  269. assert ( is_array($weights) );
  270. foreach ( $weights as $weight )
  271. assert ( is_int($weight) );
  272. $this->_weights = $weights;
  273. }
  274. /// set per-index weights
  275. function SetIndexWeights ( $weights )
  276. {
  277. assert ( is_array($weights) );
  278. foreach ( $weights as $index=>$weight )
  279. {
  280. assert ( is_string($index) );
  281. assert ( is_int($weight) );
  282. }
  283. $this->_indexweights = $weights;
  284. }
  285. /// set IDs range to match
  286. /// only match those records where document ID
  287. /// is beetwen $min and $max (including $min and $max)
  288. function SetIDRange ( $min, $max )
  289. {
  290. assert ( is_int($min) );
  291. assert ( is_int($max) );
  292. assert ( $min<=$max );
  293. $this->_min_id = $min;
  294. $this->_max_id = $max;
  295. }
  296. /// set values filter
  297. /// only match those records where $attribute column values
  298. /// are in specified set
  299. function SetFilter ( $attribute, $values, $exclude=false )
  300. {
  301. assert ( is_string($attribute) );
  302. assert ( is_array($values) );
  303. assert ( count($values) );
  304. if ( is_array($values) && count($values) )
  305. {
  306. foreach ( $values as $value )
  307. assert ( is_int($value) );
  308. $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
  309. }
  310. }
  311. /// set range filter
  312. /// only match those records where $attribute column value
  313. /// is beetwen $min and $max (including $min and $max)
  314. function SetFilterRange ( $attribute, $min, $max, $exclude=false )
  315. {
  316. assert ( is_string($attribute) );
  317. assert ( is_int($min) );
  318. assert ( is_int($max) );
  319. assert ( $min<=$max );
  320. $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  321. }
  322. /// set float range filter
  323. /// only match those records where $attribute column value
  324. /// is beetwen $min and $max (including $min and $max)
  325. function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
  326. {
  327. assert ( is_string($attribute) );
  328. assert ( is_float($min) );
  329. assert ( is_float($max) );
  330. assert ( $min<=$max );
  331. $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  332. }
  333. /// setup geographical anchor point
  334. /// required to use @geodist in filters and sorting
  335. /// distance will be computed to this point
  336. ///
  337. /// $attrlat is the name of latitude attribute
  338. /// $attrlong is the name of longitude attribute
  339. /// $lat is anchor point latitude, in radians
  340. /// $long is anchor point longitude, in radians
  341. function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
  342. {
  343. assert ( is_string($attrlat) );
  344. assert ( is_string($attrlong) );
  345. assert ( is_float($lat) );
  346. assert ( is_float($long) );
  347. $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
  348. }
  349. /// set grouping attribute and function
  350. ///
  351. /// in grouping mode, all matches are assigned to different groups
  352. /// based on grouping function value.
  353. ///
  354. /// each group keeps track of the total match count, and the best match
  355. /// (in this group) according to current sorting function.
  356. ///
  357. /// the final result set contains one best match per group, with
  358. /// grouping function value and matches count attached.
  359. ///
  360. /// groups in result set could be sorted by any sorting clause,
  361. /// including both document attributes and the following special
  362. /// internal Sphinx attributes:
  363. ///
  364. /// - @id - match document ID;
  365. /// - @weight, @rank, @relevance - match weight;
  366. /// - @group - groupby function value;
  367. /// - @count - amount of matches in group.
  368. ///
  369. /// the default mode is to sort by groupby value in descending order,
  370. /// ie. by "@group desc".
  371. ///
  372. /// "total_found" would contain total amount of matching groups over
  373. /// the whole index.
  374. ///
  375. /// WARNING: grouping is done in fixed memory and thus its results
  376. /// are only approximate; so there might be more groups reported
  377. /// in total_found than actually present. @count might also
  378. /// be underestimated.
  379. ///
  380. /// for example, if sorting by relevance and grouping by "published"
  381. /// attribute with SPH_GROUPBY_DAY function, then the result set will
  382. /// contain one most relevant match per each day when there were any
  383. /// matches published, with day number and per-day match count attached,
  384. /// and sorted by day number in descending order (ie. recent days first).
  385. function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
  386. {
  387. assert ( is_string($attribute) );
  388. assert ( is_string($groupsort) );
  389. assert ( $func==SPH_GROUPBY_DAY
  390. || $func==SPH_GROUPBY_WEEK
  391. || $func==SPH_GROUPBY_MONTH
  392. || $func==SPH_GROUPBY_YEAR
  393. || $func==SPH_GROUPBY_ATTR
  394. || $func==SPH_GROUPBY_ATTRPAIR );
  395. $this->_groupby = $attribute;
  396. $this->_groupfunc = $func;
  397. $this->_groupsort = $groupsort;
  398. }
  399. /// set count-distinct attribute for group-by queries
  400. function SetGroupDistinct ( $attribute )
  401. {
  402. assert ( is_string($attribute) );
  403. $this->_groupdistinct = $attribute;
  404. }
  405. /// set distributed retries count and delay
  406. function SetRetries ( $count, $delay=0 )
  407. {
  408. assert ( is_int($count) && $count>=0 );
  409. assert ( is_int($delay) && $delay>=0 );
  410. $this->_retrycount = $count;
  411. $this->_retrydelay = $delay;
  412. }
  413. //////////////////////////////////////////////////////////////////////////////
  414. /// clear all filters (for multi-queries)
  415. function ResetFilters ()
  416. {
  417. $this->_filters = array();
  418. $this->_anchor = array();
  419. }
  420. /// clear groupby settings
  421. function ResetGroupBy ()
  422. {
  423. $this->_groupby = "";
  424. $this->_groupfunc = SPH_GROUPBY_DAY;
  425. $this->_groupsort = "@group desc";
  426. $this->_groupdistinct= "";
  427. }
  428. //////////////////////////////////////////////////////////////////////////////
  429. /// connect to searchd server and run given search query
  430. ///
  431. /// $query is query string
  432. ///
  433. /// $index is index name (or names) to query. default value is "*" which means
  434. /// to query all indexes. accepted characters for index names are letters, numbers,
  435. /// dash, and underscore; everything else is considered a separator. therefore,
  436. /// all the following calls are valid and will search two indexes:
  437. ///
  438. /// $cl->Query ( "test query", "main delta" );
  439. /// $cl->Query ( "test query", "main;delta" );
  440. /// $cl->Query ( "test query", "main, delta" );
  441. ///
  442. /// index order matters. if identical IDs are found in two or more indexes,
  443. /// weight and attribute values from the very last matching index will be used
  444. /// for sorting and returning to client. therefore, in the example above,
  445. /// matches from "delta" index will always "win" over matches from "main".
  446. ///
  447. /// returns false on failure
  448. /// returns hash which has the following keys on success:
  449. /// "matches"
  450. /// hash which maps found document_id to ( "weight", "group" ) hash
  451. /// "total"
  452. /// total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
  453. /// "total_found"
  454. /// total amount of matching documents in index
  455. /// "time"
  456. /// search time
  457. /// "words"
  458. /// hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
  459. function Query ( $query, $index="*" )
  460. {
  461. assert ( empty($this->_reqs) );
  462. $this->AddQuery ( $query, $index );
  463. $results = $this->RunQueries ();
  464. if ( !is_array($results) )
  465. return false; // probably network error; error message should be already filled
  466. $this->_error = $results[0]["error"];
  467. $this->_warning = $results[0]["warning"];
  468. if ( $results[0]["status"]==SEARCHD_ERROR )
  469. return false;
  470. else
  471. return $results[0];
  472. }
  473. /// add query to batch
  474. ///
  475. /// batch queries enable searchd to perform internal optimizations,
  476. /// if possible; and reduce network connection overheads in all cases.
  477. ///
  478. /// for instance, running exactly the same query with different
  479. /// groupby settings will enable searched to perform expensive
  480. /// full-text search and ranking operation only once, but compute
  481. /// multiple groupby results from its output.
  482. ///
  483. /// parameters are exactly the same as in Query() call
  484. /// returns index to results array returned by RunQueries() call
  485. function AddQuery ( $query, $index="*" )
  486. {
  487. // build request
  488. $req = pack ( "NNNN", $this->_offset, $this->_limit, $this->_mode, $this->_sort ); // mode and limits
  489. $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
  490. $req .= pack ( "N", strlen($query) ) . $query; // query itself
  491. $req .= pack ( "N", count($this->_weights) ); // weights
  492. foreach ( $this->_weights as $weight )
  493. $req .= pack ( "N", (int)$weight );
  494. $req .= pack ( "N", strlen($index) ) . $index; // indexes
  495. $req .= pack ( "NNN", 0, (int)$this->_min_id, (int)$this->_max_id ); // id32 range
  496. // filters
  497. $req .= pack ( "N", count($this->_filters) );
  498. foreach ( $this->_filters as $filter )
  499. {
  500. $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
  501. $req .= pack ( "N", $filter["type"] );
  502. switch ( $filter["type"] )
  503. {
  504. case SPH_FILTER_VALUES:
  505. $req .= pack ( "N", count($filter["values"]) );
  506. foreach ( $filter["values"] as $value )
  507. $req .= pack ( "N", $value );
  508. break;
  509. case SPH_FILTER_RANGE:
  510. $req .= pack ( "NN", $filter["min"], $filter["max"] );
  511. break;
  512. case SPH_FILTER_FLOATRANGE:
  513. $req .= pack ( "ff", $filter["min"], $filter["max"] );
  514. break;
  515. default:
  516. assert ( 0 && "internal error: unhandled filter type" );
  517. }
  518. $req .= pack ( "N", $filter["exclude"] );
  519. }
  520. // group-by clause, max-matches count, group-sort clause, cutoff count
  521. $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
  522. $req .= pack ( "N", $this->_maxmatches );
  523. $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
  524. $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
  525. $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
  526. // anchor point
  527. if ( empty($this->_anchor) )
  528. {
  529. $req .= pack ( "N", 0 );
  530. } else
  531. {
  532. $a =& $this->_anchor;
  533. $req .= pack ( "N", 1 );
  534. $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
  535. $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
  536. $req .= pack ( "ff", $a["lat"], $a["long"] );
  537. }
  538. // per-index weights
  539. $req .= pack ( "N", count($this->_indexweights) );
  540. foreach ( $this->_indexweights as $idx=>$weight )
  541. $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
  542. // store request to requests array
  543. $this->_reqs[] = $req;
  544. return count($this->_reqs)-1;
  545. }
  546. /// run queries batch
  547. ///
  548. /// returns an array of result sets on success
  549. /// returns false on network IO failure
  550. ///
  551. /// each result set in returned array is a hash which containts
  552. /// the same keys as the hash returned by Query(), plus:
  553. /// "error"
  554. /// search error for this query
  555. /// "words"
  556. /// hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
  557. function RunQueries ()
  558. {
  559. if ( empty($this->_reqs) )
  560. {
  561. $this->_error = "no queries defined, issue AddQuery() first";
  562. return false;
  563. }
  564. if (!( $fp = $this->_Connect() ))
  565. return false;
  566. ////////////////////////////
  567. // send query, get response
  568. ////////////////////////////
  569. $nreqs = count($this->_reqs);
  570. $req = join ( "", $this->_reqs );
  571. $len = 4+strlen($req);
  572. $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
  573. fwrite ( $fp, $req, $len+8 );
  574. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
  575. return false;
  576. $this->_reqs = array ();
  577. //////////////////
  578. // parse response
  579. //////////////////
  580. $p = 0; // current position
  581. $max = strlen($response); // max position for checks, to protect against broken responses
  582. $results = array ();
  583. for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
  584. {
  585. $results[] = array();
  586. $result =& $results[$ires];
  587. $result["error"] = "";
  588. $result["warning"] = "";
  589. // extract status
  590. list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  591. $result["status"] = $status;
  592. if ( $status!=SEARCHD_OK )
  593. {
  594. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  595. $message = substr ( $response, $p, $len ); $p += $len;
  596. if ( $status==SEARCHD_WARNING )
  597. {
  598. $result["warning"] = $message;
  599. } else
  600. {
  601. $result["error"] = $message;
  602. continue;
  603. }
  604. }
  605. // read schema
  606. $fields = array ();
  607. $attrs = array ();
  608. list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  609. while ( $nfields-->0 && $p<$max )
  610. {
  611. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  612. $fields[] = substr ( $response, $p, $len ); $p += $len;
  613. }
  614. $result["fields"] = $fields;
  615. list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  616. while ( $nattrs-->0 && $p<$max )
  617. {
  618. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  619. $attr = substr ( $response, $p, $len ); $p += $len;
  620. list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  621. $attrs[$attr] = $type;
  622. }
  623. $result["attrs"] = $attrs;
  624. // read match count
  625. list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  626. list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  627. // read matches
  628. while ( $count-->0 && $p<$max )
  629. {
  630. if ( $id64 )
  631. {
  632. list ( $dochi, $doclo, $weight ) = array_values ( unpack ( "N*N*N*",
  633. substr ( $response, $p, 12 ) ) );
  634. $p += 12;
  635. $doc = (((int)$dochi)<<32) + ((int)$doclo);
  636. } else
  637. {
  638. list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
  639. substr ( $response, $p, 8 ) ) );
  640. $p += 8;
  641. $doc = sprintf ( "%u", $doc ); // workaround for php signed/unsigned braindamage
  642. }
  643. $weight = sprintf ( "%u", $weight );
  644. $result["matches"][$doc]["weight"] = $weight;
  645. $attrvals = array ();
  646. foreach ( $attrs as $attr=>$type )
  647. {
  648. // handle floats
  649. if ( $type==SPH_ATTR_FLOAT )
  650. {
  651. list(,$val) = unpack ( "f*", substr ( $response, $p, 4 ) ); $p += 4;
  652. $attrvals[$attr] = $val;
  653. continue;
  654. }
  655. // handle everything else as unsigned ints
  656. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  657. if ( $type & SPH_ATTR_MULTI )
  658. {
  659. $attrvals[$attr] = array ();
  660. $nvalues = $val;
  661. while ( $nvalues-->0 && $p<$max )
  662. {
  663. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  664. $attrvals[$attr][] = sprintf ( "%u", $val );
  665. }
  666. } else
  667. {
  668. $attrvals[$attr] = sprintf ( "%u", $val );
  669. }
  670. }
  671. $result["matches"][$doc]["attrs"] = $attrvals;
  672. }
  673. list ( $total, $total_found, $msecs, $words ) =
  674. array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
  675. $result["total"] = sprintf ( "%u", $total );
  676. $result["total_found"] = sprintf ( "%u", $total_found );
  677. $result["time"] = sprintf ( "%.3f", $msecs/1000 );
  678. $p += 16;
  679. while ( $words-->0 && $p<$max )
  680. {
  681. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  682. $word = substr ( $response, $p, $len ); $p += $len;
  683. list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
  684. $result["words"][$word] = array (
  685. "docs"=>sprintf ( "%u", $docs ),
  686. "hits"=>sprintf ( "%u", $hits ) );
  687. }
  688. }
  689. return $results;
  690. }
  691. /////////////////////////////////////////////////////////////////////////////
  692. // excerpts generation
  693. /////////////////////////////////////////////////////////////////////////////
  694. /// connect to searchd server and generate exceprts from given documents
  695. ///
  696. /// $docs is an array of strings which represent the documents' contents
  697. /// $index is a string specifiying the index which settings will be used
  698. /// for stemming, lexing and case folding
  699. /// $words is a string which contains the words to highlight
  700. /// $opts is a hash which contains additional optional highlighting parameters:
  701. /// "before_match"
  702. /// a string to insert before a set of matching words, default is "<b>"
  703. /// "after_match"
  704. /// a string to insert after a set of matching words, default is "<b>"
  705. /// "chunk_separator"
  706. /// a string to insert between excerpts chunks, default is " ... "
  707. /// "limit"
  708. /// max excerpt size in symbols (codepoints), default is 256
  709. /// "around"
  710. /// how much words to highlight around each match, default is 5
  711. ///
  712. /// returns false on failure
  713. /// returns an array of string excerpts on success
  714. function BuildExcerpts ( $docs, $index, $words, $opts=array() )
  715. {
  716. assert ( is_array($docs) );
  717. assert ( is_string($index) );
  718. assert ( is_string($words) );
  719. assert ( is_array($opts) );
  720. if (!( $fp = $this->_Connect() ))
  721. return false;
  722. /////////////////
  723. // fixup options
  724. /////////////////
  725. if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
  726. if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
  727. if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
  728. if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
  729. if ( !isset($opts["around"]) ) $opts["around"] = 5;
  730. /////////////////
  731. // build request
  732. /////////////////
  733. // v.1.0 req
  734. $req = pack ( "NN", 0, 1 ); // mode=0, flags=1 (remove spaces)
  735. $req .= pack ( "N", strlen($index) ) . $index; // req index
  736. $req .= pack ( "N", strlen($words) ) . $words; // req words
  737. // options
  738. $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
  739. $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
  740. $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
  741. $req .= pack ( "N", (int)$opts["limit"] );
  742. $req .= pack ( "N", (int)$opts["around"] );
  743. // documents
  744. $req .= pack ( "N", count($docs) );
  745. foreach ( $docs as $doc )
  746. {
  747. assert ( is_string($doc) );
  748. $req .= pack ( "N", strlen($doc) ) . $doc;
  749. }
  750. ////////////////////////////
  751. // send query, get response
  752. ////////////////////////////
  753. $len = strlen($req);
  754. $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
  755. $wrote = fwrite ( $fp, $req, $len+8 );
  756. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
  757. return false;
  758. //////////////////
  759. // parse response
  760. //////////////////
  761. $pos = 0;
  762. $res = array ();
  763. $rlen = strlen($response);
  764. for ( $i=0; $i<count($docs); $i++ )
  765. {
  766. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
  767. $pos += 4;
  768. if ( $pos+$len > $rlen )
  769. {
  770. $this->_error = "incomplete reply";
  771. return false;
  772. }
  773. $res[] = $len ? substr ( $response, $pos, $len ) : "";
  774. $pos += $len;
  775. }
  776. return $res;
  777. }
  778. /////////////////////////////////////////////////////////////////////////////
  779. // attribute updates
  780. /////////////////////////////////////////////////////////////////////////////
  781. /// update specified attributes on specified documents
  782. ///
  783. /// $index is a name of the index to be updated
  784. /// $attrs is an array of attribute name strings
  785. /// $values is a hash where key is document id, and value is an array of
  786. /// new attribute values
  787. ///
  788. /// returns number of actually updated documents (0 or more) on success
  789. /// returns -1 on failure
  790. ///
  791. /// usage example:
  792. /// $cl->UpdateAttributes ( "test1", array("group_id"), array(1=>array(456)) );
  793. function UpdateAttributes ( $index, $attrs, $values )
  794. {
  795. // verify everything
  796. assert ( is_string($index) );
  797. assert ( is_array($attrs) );
  798. foreach ( $attrs as $attr )
  799. assert ( is_string($attr) );
  800. assert ( is_array($values) );
  801. foreach ( $values as $id=>$entry )
  802. {
  803. assert ( is_int($id) );
  804. assert ( is_array($entry) );
  805. assert ( count($entry)==count($attrs) );
  806. foreach ( $entry as $v )
  807. assert ( is_int($v) );
  808. }
  809. // build request
  810. $req = pack ( "N", strlen($index) ) . $index;
  811. $req .= pack ( "N", count($attrs) );
  812. foreach ( $attrs as $attr )
  813. $req .= pack ( "N", strlen($attr) ) . $attr;
  814. $req .= pack ( "N", count($values) );
  815. foreach ( $values as $id=>$entry )
  816. {
  817. $req .= pack ( "N", $id );
  818. foreach ( $entry as $v )
  819. $req .= pack ( "N", $v );
  820. }
  821. // connect, send query, get response
  822. if (!( $fp = $this->_Connect() ))
  823. return -1;
  824. $len = strlen($req);
  825. $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
  826. fwrite ( $fp, $req, $len+8 );
  827. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
  828. return -1;
  829. // parse response
  830. list(,$updated) = unpack ( "N*", substr ( $response, $p, 4 ) );
  831. return $updated;
  832. }
  833. }
  834. //
  835. // $Id$
  836. //
  837. ?>