sphinxapi.php 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998
  1. <?php
  2. //
  3. // $Id$
  4. //
  5. //
  6. // Copyright (c) 2001-2007, Andrew Aksyonoff. All rights reserved.
  7. //
  8. // This program is free software; you can redistribute it and/or modify
  9. // it under the terms of the GNU General Public License. You should have
  10. // received a copy of the GPL license along with this program; if you
  11. // did not, you can find it at http://www.gnu.org/
  12. //
  13. /////////////////////////////////////////////////////////////////////////////
  14. // PHP version of Sphinx searchd client (PHP API)
  15. /////////////////////////////////////////////////////////////////////////////
  16. /// known searchd commands
  17. define ( "SEARCHD_COMMAND_SEARCH", 0 );
  18. define ( "SEARCHD_COMMAND_EXCERPT", 1 );
  19. define ( "SEARCHD_COMMAND_UPDATE", 2 );
  20. /// current client-side command implementation versions
  21. define ( "VER_COMMAND_SEARCH", 0x110 );
  22. define ( "VER_COMMAND_EXCERPT", 0x100 );
  23. define ( "VER_COMMAND_UPDATE", 0x100 );
  24. /// known searchd status codes
  25. define ( "SEARCHD_OK", 0 );
  26. define ( "SEARCHD_ERROR", 1 );
  27. define ( "SEARCHD_RETRY", 2 );
  28. define ( "SEARCHD_WARNING", 3 );
  29. /// known match modes
  30. define ( "SPH_MATCH_ALL", 0 );
  31. define ( "SPH_MATCH_ANY", 1 );
  32. define ( "SPH_MATCH_PHRASE", 2 );
  33. define ( "SPH_MATCH_BOOLEAN", 3 );
  34. define ( "SPH_MATCH_EXTENDED", 4 );
  35. define ( "SPH_MATCH_FULLSCAN", 5 );
  36. define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
  37. /// known ranking modes (ext2 only)
  38. define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
  39. define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
  40. define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
  41. /// known sort modes
  42. define ( "SPH_SORT_RELEVANCE", 0 );
  43. define ( "SPH_SORT_ATTR_DESC", 1 );
  44. define ( "SPH_SORT_ATTR_ASC", 2 );
  45. define ( "SPH_SORT_TIME_SEGMENTS", 3 );
  46. define ( "SPH_SORT_EXTENDED", 4 );
  47. /// known filter types
  48. define ( "SPH_FILTER_VALUES", 0 );
  49. define ( "SPH_FILTER_RANGE", 1 );
  50. define ( "SPH_FILTER_FLOATRANGE", 2 );
  51. /// known attribute types
  52. define ( "SPH_ATTR_INTEGER", 1 );
  53. define ( "SPH_ATTR_TIMESTAMP", 2 );
  54. define ( "SPH_ATTR_ORDINAL", 3 );
  55. define ( "SPH_ATTR_BOOL", 4 );
  56. define ( "SPH_ATTR_FLOAT", 5 );
  57. define ( "SPH_ATTR_MULTI", 0x40000000 );
  58. /// known grouping functions
  59. define ( "SPH_GROUPBY_DAY", 0 );
  60. define ( "SPH_GROUPBY_WEEK", 1 );
  61. define ( "SPH_GROUPBY_MONTH", 2 );
  62. define ( "SPH_GROUPBY_YEAR", 3 );
  63. define ( "SPH_GROUPBY_ATTR", 4 );
  64. define ( "SPH_GROUPBY_ATTRPAIR", 5 );
  65. /// sphinx searchd client class
  66. class SphinxClient
  67. {
  68. var $_host; ///< searchd host (default is "localhost")
  69. var $_port; ///< searchd port (default is 3312)
  70. var $_offset; ///< how many records to seek from result-set start (default is 0)
  71. var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
  72. var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
  73. var $_weights; ///< per-field weights (default is 1 for all fields)
  74. var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
  75. var $_sortby; ///< attribute to sort by (defualt is "")
  76. var $_min_id; ///< min ID to match (default is 0, which means no limit)
  77. var $_max_id; ///< max ID to match (default is 0, which means no limit)
  78. var $_filters; ///< search filters
  79. var $_groupby; ///< group-by attribute name
  80. var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
  81. var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
  82. var $_groupdistinct;///< group-by count-distinct attribute
  83. var $_maxmatches; ///< max matches to retrieve
  84. var $_cutoff; ///< cutoff to stop searching at (default is 0)
  85. var $_retrycount; ///< distributed retries count
  86. var $_retrydelay; ///< distributed retries delay
  87. var $_anchor; ///< geographical anchor point
  88. var $_indexweights; ///< per-index weights
  89. var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
  90. var $_error; ///< last error message
  91. var $_warning; ///< last warning message
  92. var $_reqs; ///< requests array for multi-query
  93. /////////////////////////////////////////////////////////////////////////////
  94. // common stuff
  95. /////////////////////////////////////////////////////////////////////////////
  96. /// create a new client object and fill defaults
  97. function SphinxClient ()
  98. {
  99. // per-client-object settings
  100. $this->_host = "localhost";
  101. $this->_port = 3312;
  102. // per-query settings
  103. $this->_offset = 0;
  104. $this->_limit = 20;
  105. $this->_mode = SPH_MATCH_ALL;
  106. $this->_weights = array ();
  107. $this->_sort = SPH_SORT_RELEVANCE;
  108. $this->_sortby = "";
  109. $this->_min_id = 0;
  110. $this->_max_id = 0;
  111. $this->_filters = array ();
  112. $this->_groupby = "";
  113. $this->_groupfunc = SPH_GROUPBY_DAY;
  114. $this->_groupsort = "@group desc";
  115. $this->_groupdistinct= "";
  116. $this->_maxmatches = 1000;
  117. $this->_cutoff = 0;
  118. $this->_retrycount = 0;
  119. $this->_retrydelay = 0;
  120. $this->_anchor = array ();
  121. $this->_indexweights= array ();
  122. $this->_ranker = SPH_RANK_PROXIMITY_BM25;
  123. // per-reply fields (for single-query case)
  124. $this->_error = "";
  125. $this->_warning = "";
  126. // requests storage (for multi-query case)
  127. $this->_reqs = array ();
  128. }
  129. /// get last error message (string)
  130. function GetLastError ()
  131. {
  132. return $this->_error;
  133. }
  134. /// get last warning message (string)
  135. function GetLastWarning ()
  136. {
  137. return $this->_warning;
  138. }
  139. /// set searchd server
  140. function SetServer ( $host, $port )
  141. {
  142. assert ( is_string($host) );
  143. assert ( is_int($port) );
  144. $this->_host = $host;
  145. $this->_port = $port;
  146. }
  147. /////////////////////////////////////////////////////////////////////////////
  148. /// connect to searchd server
  149. function _Connect ()
  150. {
  151. if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
  152. {
  153. $this->_error = "connection to {$this->_host}:{$this->_port} failed";
  154. return false;
  155. }
  156. // check version
  157. list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
  158. $v = (int)$v;
  159. if ( $v<1 )
  160. {
  161. fclose ( $fp );
  162. $this->_error = "expected searchd protocol version 1+, got version '$v'";
  163. return false;
  164. }
  165. // all ok, send my version
  166. fwrite ( $fp, pack ( "N", 1 ) );
  167. return $fp;
  168. }
  169. /// get and check response packet from searchd server
  170. function _GetResponse ( $fp, $client_ver )
  171. {
  172. $response = "";
  173. $len = 0;
  174. $header = fread ( $fp, 8 );
  175. if ( strlen($header)==8 )
  176. {
  177. list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
  178. $left = $len;
  179. while ( $left>0 && !feof($fp) )
  180. {
  181. $chunk = fread ( $fp, $left );
  182. if ( $chunk )
  183. {
  184. $response .= $chunk;
  185. $left -= strlen($chunk);
  186. }
  187. }
  188. }
  189. fclose ( $fp );
  190. // check response
  191. $read = strlen ( $response );
  192. if ( !$response || $read!=$len )
  193. {
  194. $this->_error = $len
  195. ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
  196. : "received zero-sized searchd response";
  197. return false;
  198. }
  199. // check status
  200. if ( $status==SEARCHD_WARNING )
  201. {
  202. list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
  203. $this->_warning = substr ( $response, 4, $wlen );
  204. return substr ( $response, 4+$wlen );
  205. }
  206. if ( $status==SEARCHD_ERROR )
  207. {
  208. $this->_error = "searchd error: " . substr ( $response, 4 );
  209. return false;
  210. }
  211. if ( $status==SEARCHD_RETRY )
  212. {
  213. $this->_error = "temporary searchd error: " . substr ( $response, 4 );
  214. return false;
  215. }
  216. if ( $status!=SEARCHD_OK )
  217. {
  218. $this->_error = "unknown status code '$status'";
  219. return false;
  220. }
  221. // check version
  222. if ( $ver<$client_ver )
  223. {
  224. $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
  225. $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
  226. }
  227. return $response;
  228. }
  229. /////////////////////////////////////////////////////////////////////////////
  230. // searching
  231. /////////////////////////////////////////////////////////////////////////////
  232. /// set offset and count into result set,
  233. /// and max-matches and cutoff to use while searching
  234. function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
  235. {
  236. assert ( is_int($offset) );
  237. assert ( is_int($limit) );
  238. assert ( $offset>=0 );
  239. assert ( $limit>0 );
  240. assert ( $max>=0 );
  241. $this->_offset = $offset;
  242. $this->_limit = $limit;
  243. if ( $max>0 )
  244. $this->_maxmatches = $max;
  245. if ( $cutoff>0 )
  246. $this->_cutoff = $cutoff;
  247. }
  248. /// set match mode
  249. function SetMatchMode ( $mode )
  250. {
  251. assert ( $mode==SPH_MATCH_ALL
  252. || $mode==SPH_MATCH_ANY
  253. || $mode==SPH_MATCH_PHRASE
  254. || $mode==SPH_MATCH_BOOLEAN
  255. || $mode==SPH_MATCH_EXTENDED
  256. || $mode==SPH_MATCH_EXTENDED2 );
  257. $this->_mode = $mode;
  258. }
  259. /// set ranking mode
  260. function SetRankingMode ( $ranker )
  261. {
  262. assert ( $ranker==SPH_RANK_PROXIMITY_BM25
  263. || $ranker==SPH_RANK_BM25
  264. || $ranker==SPH_RANK_NONE );
  265. $this->_ranker = $ranker;
  266. }
  267. /// set matches sorting mode
  268. function SetSortMode ( $mode, $sortby="" )
  269. {
  270. assert (
  271. $mode==SPH_SORT_RELEVANCE ||
  272. $mode==SPH_SORT_ATTR_DESC ||
  273. $mode==SPH_SORT_ATTR_ASC ||
  274. $mode==SPH_SORT_TIME_SEGMENTS ||
  275. $mode==SPH_SORT_EXTENDED );
  276. assert ( is_string($sortby) );
  277. assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
  278. $this->_sort = $mode;
  279. $this->_sortby = $sortby;
  280. }
  281. /// set per-field weights
  282. function SetWeights ( $weights )
  283. {
  284. assert ( is_array($weights) );
  285. foreach ( $weights as $weight )
  286. assert ( is_int($weight) );
  287. $this->_weights = $weights;
  288. }
  289. /// set per-index weights
  290. function SetIndexWeights ( $weights )
  291. {
  292. assert ( is_array($weights) );
  293. foreach ( $weights as $index=>$weight )
  294. {
  295. assert ( is_string($index) );
  296. assert ( is_int($weight) );
  297. }
  298. $this->_indexweights = $weights;
  299. }
  300. /// set IDs range to match
  301. /// only match those records where document ID
  302. /// is beetwen $min and $max (including $min and $max)
  303. function SetIDRange ( $min, $max )
  304. {
  305. assert ( is_int($min) );
  306. assert ( is_int($max) );
  307. assert ( $min<=$max );
  308. $this->_min_id = $min;
  309. $this->_max_id = $max;
  310. }
  311. /// set values filter
  312. /// only match those records where $attribute column values
  313. /// are in specified set
  314. function SetFilter ( $attribute, $values, $exclude=false )
  315. {
  316. assert ( is_string($attribute) );
  317. assert ( is_array($values) );
  318. assert ( count($values) );
  319. if ( is_array($values) && count($values) )
  320. {
  321. foreach ( $values as $value )
  322. assert ( is_numeric($value) );
  323. $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
  324. }
  325. }
  326. /// set range filter
  327. /// only match those records where $attribute column value
  328. /// is beetwen $min and $max (including $min and $max)
  329. function SetFilterRange ( $attribute, $min, $max, $exclude=false )
  330. {
  331. assert ( is_string($attribute) );
  332. assert ( is_int($min) );
  333. assert ( is_int($max) );
  334. assert ( $min<=$max );
  335. $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  336. }
  337. /// set float range filter
  338. /// only match those records where $attribute column value
  339. /// is beetwen $min and $max (including $min and $max)
  340. function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
  341. {
  342. assert ( is_string($attribute) );
  343. assert ( is_float($min) );
  344. assert ( is_float($max) );
  345. assert ( $min<=$max );
  346. $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  347. }
  348. /// setup geographical anchor point
  349. /// required to use @geodist in filters and sorting
  350. /// distance will be computed to this point
  351. ///
  352. /// $attrlat is the name of latitude attribute
  353. /// $attrlong is the name of longitude attribute
  354. /// $lat is anchor point latitude, in radians
  355. /// $long is anchor point longitude, in radians
  356. function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
  357. {
  358. assert ( is_string($attrlat) );
  359. assert ( is_string($attrlong) );
  360. assert ( is_float($lat) );
  361. assert ( is_float($long) );
  362. $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
  363. }
  364. /// set grouping attribute and function
  365. ///
  366. /// in grouping mode, all matches are assigned to different groups
  367. /// based on grouping function value.
  368. ///
  369. /// each group keeps track of the total match count, and the best match
  370. /// (in this group) according to current sorting function.
  371. ///
  372. /// the final result set contains one best match per group, with
  373. /// grouping function value and matches count attached.
  374. ///
  375. /// groups in result set could be sorted by any sorting clause,
  376. /// including both document attributes and the following special
  377. /// internal Sphinx attributes:
  378. ///
  379. /// - @id - match document ID;
  380. /// - @weight, @rank, @relevance - match weight;
  381. /// - @group - groupby function value;
  382. /// - @count - amount of matches in group.
  383. ///
  384. /// the default mode is to sort by groupby value in descending order,
  385. /// ie. by "@group desc".
  386. ///
  387. /// "total_found" would contain total amount of matching groups over
  388. /// the whole index.
  389. ///
  390. /// WARNING: grouping is done in fixed memory and thus its results
  391. /// are only approximate; so there might be more groups reported
  392. /// in total_found than actually present. @count might also
  393. /// be underestimated.
  394. ///
  395. /// for example, if sorting by relevance and grouping by "published"
  396. /// attribute with SPH_GROUPBY_DAY function, then the result set will
  397. /// contain one most relevant match per each day when there were any
  398. /// matches published, with day number and per-day match count attached,
  399. /// and sorted by day number in descending order (ie. recent days first).
  400. function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
  401. {
  402. assert ( is_string($attribute) );
  403. assert ( is_string($groupsort) );
  404. assert ( $func==SPH_GROUPBY_DAY
  405. || $func==SPH_GROUPBY_WEEK
  406. || $func==SPH_GROUPBY_MONTH
  407. || $func==SPH_GROUPBY_YEAR
  408. || $func==SPH_GROUPBY_ATTR
  409. || $func==SPH_GROUPBY_ATTRPAIR );
  410. $this->_groupby = $attribute;
  411. $this->_groupfunc = $func;
  412. $this->_groupsort = $groupsort;
  413. }
  414. /// set count-distinct attribute for group-by queries
  415. function SetGroupDistinct ( $attribute )
  416. {
  417. assert ( is_string($attribute) );
  418. $this->_groupdistinct = $attribute;
  419. }
  420. /// set distributed retries count and delay
  421. function SetRetries ( $count, $delay=0 )
  422. {
  423. assert ( is_int($count) && $count>=0 );
  424. assert ( is_int($delay) && $delay>=0 );
  425. $this->_retrycount = $count;
  426. $this->_retrydelay = $delay;
  427. }
  428. //////////////////////////////////////////////////////////////////////////////
  429. /// clear all filters (for multi-queries)
  430. function ResetFilters ()
  431. {
  432. $this->_filters = array();
  433. $this->_anchor = array();
  434. }
  435. /// clear groupby settings
  436. function ResetGroupBy ()
  437. {
  438. $this->_groupby = "";
  439. $this->_groupfunc = SPH_GROUPBY_DAY;
  440. $this->_groupsort = "@group desc";
  441. $this->_groupdistinct= "";
  442. }
  443. //////////////////////////////////////////////////////////////////////////////
  444. /// connect to searchd server and run given search query
  445. ///
  446. /// $query is query string
  447. ///
  448. /// $index is index name (or names) to query. default value is "*" which means
  449. /// to query all indexes. accepted characters for index names are letters, numbers,
  450. /// dash, and underscore; everything else is considered a separator. therefore,
  451. /// all the following calls are valid and will search two indexes:
  452. ///
  453. /// $cl->Query ( "test query", "main delta" );
  454. /// $cl->Query ( "test query", "main;delta" );
  455. /// $cl->Query ( "test query", "main, delta" );
  456. ///
  457. /// index order matters. if identical IDs are found in two or more indexes,
  458. /// weight and attribute values from the very last matching index will be used
  459. /// for sorting and returning to client. therefore, in the example above,
  460. /// matches from "delta" index will always "win" over matches from "main".
  461. ///
  462. /// returns false on failure
  463. /// returns hash which has the following keys on success:
  464. /// "matches"
  465. /// hash which maps found document_id to ( "weight", "group" ) hash
  466. /// "total"
  467. /// total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
  468. /// "total_found"
  469. /// total amount of matching documents in index
  470. /// "time"
  471. /// search time
  472. /// "words"
  473. /// hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
  474. function Query ( $query, $index="*" )
  475. {
  476. assert ( empty($this->_reqs) );
  477. $this->AddQuery ( $query, $index );
  478. $results = $this->RunQueries ();
  479. if ( !is_array($results) )
  480. return false; // probably network error; error message should be already filled
  481. $this->_error = $results[0]["error"];
  482. $this->_warning = $results[0]["warning"];
  483. if ( $results[0]["status"]==SEARCHD_ERROR )
  484. return false;
  485. else
  486. return $results[0];
  487. }
  488. /// helper to pack floats in network byte order
  489. function _PackFloat ( $f )
  490. {
  491. $t1 = pack ( "f", $f ); // machine order
  492. list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
  493. return pack ( "N", $t2 );
  494. }
  495. /// add query to batch
  496. ///
  497. /// batch queries enable searchd to perform internal optimizations,
  498. /// if possible; and reduce network connection overheads in all cases.
  499. ///
  500. /// for instance, running exactly the same query with different
  501. /// groupby settings will enable searched to perform expensive
  502. /// full-text search and ranking operation only once, but compute
  503. /// multiple groupby results from its output.
  504. ///
  505. /// parameters are exactly the same as in Query() call
  506. /// returns index to results array returned by RunQueries() call
  507. function AddQuery ( $query, $index="*" )
  508. {
  509. // build request
  510. $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
  511. $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
  512. $req .= pack ( "N", strlen($query) ) . $query; // query itself
  513. $req .= pack ( "N", count($this->_weights) ); // weights
  514. foreach ( $this->_weights as $weight )
  515. $req .= pack ( "N", (int)$weight );
  516. $req .= pack ( "N", strlen($index) ) . $index; // indexes
  517. $req .= pack ( "NNN", 0, (int)$this->_min_id, (int)$this->_max_id ); // id32 range
  518. // filters
  519. $req .= pack ( "N", count($this->_filters) );
  520. foreach ( $this->_filters as $filter )
  521. {
  522. $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
  523. $req .= pack ( "N", $filter["type"] );
  524. switch ( $filter["type"] )
  525. {
  526. case SPH_FILTER_VALUES:
  527. $req .= pack ( "N", count($filter["values"]) );
  528. foreach ( $filter["values"] as $value )
  529. $req .= pack ( "N", floatval($value) ); // this uberhack is to workaround 32bit signed int limit on x32 platforms
  530. break;
  531. case SPH_FILTER_RANGE:
  532. $req .= pack ( "NN", $filter["min"], $filter["max"] );
  533. break;
  534. case SPH_FILTER_FLOATRANGE:
  535. $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
  536. break;
  537. default:
  538. assert ( 0 && "internal error: unhandled filter type" );
  539. }
  540. $req .= pack ( "N", $filter["exclude"] );
  541. }
  542. // group-by clause, max-matches count, group-sort clause, cutoff count
  543. $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
  544. $req .= pack ( "N", $this->_maxmatches );
  545. $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
  546. $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
  547. $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
  548. // anchor point
  549. if ( empty($this->_anchor) )
  550. {
  551. $req .= pack ( "N", 0 );
  552. } else
  553. {
  554. $a =& $this->_anchor;
  555. $req .= pack ( "N", 1 );
  556. $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
  557. $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
  558. $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
  559. }
  560. // per-index weights
  561. $req .= pack ( "N", count($this->_indexweights) );
  562. foreach ( $this->_indexweights as $idx=>$weight )
  563. $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
  564. // store request to requests array
  565. $this->_reqs[] = $req;
  566. return count($this->_reqs)-1;
  567. }
  568. /// run queries batch
  569. ///
  570. /// returns an array of result sets on success
  571. /// returns false on network IO failure
  572. ///
  573. /// each result set in returned array is a hash which containts
  574. /// the same keys as the hash returned by Query(), plus:
  575. /// "error"
  576. /// search error for this query
  577. /// "words"
  578. /// hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
  579. function RunQueries ()
  580. {
  581. if ( empty($this->_reqs) )
  582. {
  583. $this->_error = "no queries defined, issue AddQuery() first";
  584. return false;
  585. }
  586. if (!( $fp = $this->_Connect() ))
  587. return false;
  588. ////////////////////////////
  589. // send query, get response
  590. ////////////////////////////
  591. $nreqs = count($this->_reqs);
  592. $req = join ( "", $this->_reqs );
  593. $len = 4+strlen($req);
  594. $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
  595. fwrite ( $fp, $req, $len+8 );
  596. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
  597. return false;
  598. $this->_reqs = array ();
  599. //////////////////
  600. // parse response
  601. //////////////////
  602. $p = 0; // current position
  603. $max = strlen($response); // max position for checks, to protect against broken responses
  604. $results = array ();
  605. for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
  606. {
  607. $results[] = array();
  608. $result =& $results[$ires];
  609. $result["error"] = "";
  610. $result["warning"] = "";
  611. // extract status
  612. list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  613. $result["status"] = $status;
  614. if ( $status!=SEARCHD_OK )
  615. {
  616. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  617. $message = substr ( $response, $p, $len ); $p += $len;
  618. if ( $status==SEARCHD_WARNING )
  619. {
  620. $result["warning"] = $message;
  621. } else
  622. {
  623. $result["error"] = $message;
  624. continue;
  625. }
  626. }
  627. // read schema
  628. $fields = array ();
  629. $attrs = array ();
  630. list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  631. while ( $nfields-->0 && $p<$max )
  632. {
  633. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  634. $fields[] = substr ( $response, $p, $len ); $p += $len;
  635. }
  636. $result["fields"] = $fields;
  637. list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  638. while ( $nattrs-->0 && $p<$max )
  639. {
  640. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  641. $attr = substr ( $response, $p, $len ); $p += $len;
  642. list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  643. $attrs[$attr] = $type;
  644. }
  645. $result["attrs"] = $attrs;
  646. // read match count
  647. list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  648. list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  649. // read matches
  650. while ( $count-->0 && $p<$max )
  651. {
  652. if ( $id64 )
  653. {
  654. list ( $dochi, $doclo, $weight ) = array_values ( unpack ( "N*N*N*",
  655. substr ( $response, $p, 12 ) ) );
  656. $p += 12;
  657. $doc = (((int)$dochi)<<32) + ((int)$doclo);
  658. } else
  659. {
  660. list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
  661. substr ( $response, $p, 8 ) ) );
  662. $p += 8;
  663. $doc = sprintf ( "%u", $doc ); // workaround for php signed/unsigned braindamage
  664. }
  665. $weight = sprintf ( "%u", $weight );
  666. $result["matches"][$doc]["weight"] = $weight;
  667. $attrvals = array ();
  668. foreach ( $attrs as $attr=>$type )
  669. {
  670. // handle floats
  671. if ( $type==SPH_ATTR_FLOAT )
  672. {
  673. list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  674. list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
  675. $attrvals[$attr] = $fval;
  676. continue;
  677. }
  678. // handle everything else as unsigned ints
  679. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  680. if ( $type & SPH_ATTR_MULTI )
  681. {
  682. $attrvals[$attr] = array ();
  683. $nvalues = $val;
  684. while ( $nvalues-->0 && $p<$max )
  685. {
  686. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  687. $attrvals[$attr][] = sprintf ( "%u", $val );
  688. }
  689. } else
  690. {
  691. $attrvals[$attr] = sprintf ( "%u", $val );
  692. }
  693. }
  694. $result["matches"][$doc]["attrs"] = $attrvals;
  695. }
  696. list ( $total, $total_found, $msecs, $words ) =
  697. array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
  698. $result["total"] = sprintf ( "%u", $total );
  699. $result["total_found"] = sprintf ( "%u", $total_found );
  700. $result["time"] = sprintf ( "%.3f", $msecs/1000 );
  701. $p += 16;
  702. while ( $words-->0 && $p<$max )
  703. {
  704. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  705. $word = substr ( $response, $p, $len ); $p += $len;
  706. list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
  707. $result["words"][$word] = array (
  708. "docs"=>sprintf ( "%u", $docs ),
  709. "hits"=>sprintf ( "%u", $hits ) );
  710. }
  711. }
  712. return $results;
  713. }
  714. /////////////////////////////////////////////////////////////////////////////
  715. // excerpts generation
  716. /////////////////////////////////////////////////////////////////////////////
  717. /// connect to searchd server and generate exceprts from given documents
  718. ///
  719. /// $docs is an array of strings which represent the documents' contents
  720. /// $index is a string specifiying the index which settings will be used
  721. /// for stemming, lexing and case folding
  722. /// $words is a string which contains the words to highlight
  723. /// $opts is a hash which contains additional optional highlighting parameters:
  724. /// "before_match"
  725. /// a string to insert before a set of matching words, default is "<b>"
  726. /// "after_match"
  727. /// a string to insert after a set of matching words, default is "<b>"
  728. /// "chunk_separator"
  729. /// a string to insert between excerpts chunks, default is " ... "
  730. /// "limit"
  731. /// max excerpt size in symbols (codepoints), default is 256
  732. /// "around"
  733. /// how much words to highlight around each match, default is 5
  734. /// "exact_phrase"
  735. /// whether to highlight exact phrase matches only, default is false
  736. /// "single_passage"
  737. /// whether to extract single best passage only, default is false
  738. ///
  739. /// returns false on failure
  740. /// returns an array of string excerpts on success
  741. function BuildExcerpts ( $docs, $index, $words, $opts=array() )
  742. {
  743. assert ( is_array($docs) );
  744. assert ( is_string($index) );
  745. assert ( is_string($words) );
  746. assert ( is_array($opts) );
  747. if (!( $fp = $this->_Connect() ))
  748. return false;
  749. /////////////////
  750. // fixup options
  751. /////////////////
  752. if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
  753. if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
  754. if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
  755. if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
  756. if ( !isset($opts["around"]) ) $opts["around"] = 5;
  757. if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
  758. if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
  759. if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
  760. if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
  761. /////////////////
  762. // build request
  763. /////////////////
  764. // v.1.0 req
  765. $flags = 1; // remove spaces
  766. if ( $opts["exact_phrase"] ) $flags |= 2;
  767. if ( $opts["single_passage"] ) $flags |= 4;
  768. if ( $opts["use_boundaries"] ) $flags |= 8;
  769. if ( $opts["weight_order"] ) $flags |= 16;
  770. $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
  771. $req .= pack ( "N", strlen($index) ) . $index; // req index
  772. $req .= pack ( "N", strlen($words) ) . $words; // req words
  773. // options
  774. $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
  775. $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
  776. $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
  777. $req .= pack ( "N", (int)$opts["limit"] );
  778. $req .= pack ( "N", (int)$opts["around"] );
  779. // documents
  780. $req .= pack ( "N", count($docs) );
  781. foreach ( $docs as $doc )
  782. {
  783. assert ( is_string($doc) );
  784. $req .= pack ( "N", strlen($doc) ) . $doc;
  785. }
  786. ////////////////////////////
  787. // send query, get response
  788. ////////////////////////////
  789. $len = strlen($req);
  790. $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
  791. $wrote = fwrite ( $fp, $req, $len+8 );
  792. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
  793. return false;
  794. //////////////////
  795. // parse response
  796. //////////////////
  797. $pos = 0;
  798. $res = array ();
  799. $rlen = strlen($response);
  800. for ( $i=0; $i<count($docs); $i++ )
  801. {
  802. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
  803. $pos += 4;
  804. if ( $pos+$len > $rlen )
  805. {
  806. $this->_error = "incomplete reply";
  807. return false;
  808. }
  809. $res[] = $len ? substr ( $response, $pos, $len ) : "";
  810. $pos += $len;
  811. }
  812. return $res;
  813. }
  814. /////////////////////////////////////////////////////////////////////////////
  815. // attribute updates
  816. /////////////////////////////////////////////////////////////////////////////
  817. /// update specified attributes on specified documents
  818. ///
  819. /// $index is a name of the index to be updated
  820. /// $attrs is an array of attribute name strings
  821. /// $values is a hash where key is document id, and value is an array of
  822. /// new attribute values
  823. ///
  824. /// returns number of actually updated documents (0 or more) on success
  825. /// returns -1 on failure
  826. ///
  827. /// usage example:
  828. /// $cl->UpdateAttributes ( "test1", array("group_id"), array(1=>array(456)) );
  829. function UpdateAttributes ( $index, $attrs, $values )
  830. {
  831. // verify everything
  832. assert ( is_string($index) );
  833. assert ( is_array($attrs) );
  834. foreach ( $attrs as $attr )
  835. assert ( is_string($attr) );
  836. assert ( is_array($values) );
  837. foreach ( $values as $id=>$entry )
  838. {
  839. assert ( is_int($id) );
  840. assert ( is_array($entry) );
  841. assert ( count($entry)==count($attrs) );
  842. foreach ( $entry as $v )
  843. assert ( is_int($v) );
  844. }
  845. // build request
  846. $req = pack ( "N", strlen($index) ) . $index;
  847. $req .= pack ( "N", count($attrs) );
  848. foreach ( $attrs as $attr )
  849. $req .= pack ( "N", strlen($attr) ) . $attr;
  850. $req .= pack ( "N", count($values) );
  851. foreach ( $values as $id=>$entry )
  852. {
  853. $req .= pack ( "N", $id );
  854. foreach ( $entry as $v )
  855. $req .= pack ( "N", $v );
  856. }
  857. // connect, send query, get response
  858. if (!( $fp = $this->_Connect() ))
  859. return -1;
  860. $len = strlen($req);
  861. $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
  862. fwrite ( $fp, $req, $len+8 );
  863. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
  864. return -1;
  865. // parse response
  866. list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
  867. return $updated;
  868. }
  869. }
  870. //
  871. // $Id$
  872. //
  873. ?>