sphinxapi.php 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811
  1. <?php
  2. //
  3. // $Id$
  4. //
  5. //
  6. // Copyright (c) 2001-2012, Andrew Aksyonoff
  7. // Copyright (c) 2008-2012, Sphinx Technologies Inc
  8. // All rights reserved
  9. //
  10. // This program is free software; you can redistribute it and/or modify
  11. // it under the terms of the GNU General Public License. You should have
  12. // received a copy of the GPL license along with this program; if you
  13. // did not, you can find it at http://www.gnu.org/
  14. //
  15. /////////////////////////////////////////////////////////////////////////////
  16. // PHP version of Sphinx searchd client (PHP API)
  17. /////////////////////////////////////////////////////////////////////////////
  18. /// known searchd commands
  19. define ( "SEARCHD_COMMAND_SEARCH", 0 );
  20. define ( "SEARCHD_COMMAND_EXCERPT", 1 );
  21. define ( "SEARCHD_COMMAND_UPDATE", 2 );
  22. define ( "SEARCHD_COMMAND_KEYWORDS", 3 );
  23. define ( "SEARCHD_COMMAND_PERSIST", 4 );
  24. define ( "SEARCHD_COMMAND_STATUS", 5 );
  25. define ( "SEARCHD_COMMAND_FLUSHATTRS", 7 );
  26. /// current client-side command implementation versions
  27. define ( "VER_COMMAND_SEARCH", 0x11D );
  28. define ( "VER_COMMAND_EXCERPT", 0x104 );
  29. define ( "VER_COMMAND_UPDATE", 0x103 );
  30. define ( "VER_COMMAND_KEYWORDS", 0x100 );
  31. define ( "VER_COMMAND_STATUS", 0x100 );
  32. define ( "VER_COMMAND_QUERY", 0x100 );
  33. define ( "VER_COMMAND_FLUSHATTRS", 0x100 );
  34. /// known searchd status codes
  35. define ( "SEARCHD_OK", 0 );
  36. define ( "SEARCHD_ERROR", 1 );
  37. define ( "SEARCHD_RETRY", 2 );
  38. define ( "SEARCHD_WARNING", 3 );
  39. /// known match modes
  40. define ( "SPH_MATCH_ALL", 0 );
  41. define ( "SPH_MATCH_ANY", 1 );
  42. define ( "SPH_MATCH_PHRASE", 2 );
  43. define ( "SPH_MATCH_BOOLEAN", 3 );
  44. define ( "SPH_MATCH_EXTENDED", 4 );
  45. define ( "SPH_MATCH_FULLSCAN", 5 );
  46. define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
  47. /// known ranking modes (ext2 only)
  48. define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
  49. define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
  50. define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
  51. define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
  52. define ( "SPH_RANK_PROXIMITY", 4 );
  53. define ( "SPH_RANK_MATCHANY", 5 );
  54. define ( "SPH_RANK_FIELDMASK", 6 );
  55. define ( "SPH_RANK_SPH04", 7 );
  56. define ( "SPH_RANK_EXPR", 8 );
  57. define ( "SPH_RANK_TOTAL", 9 );
  58. /// known sort modes
  59. define ( "SPH_SORT_RELEVANCE", 0 );
  60. define ( "SPH_SORT_ATTR_DESC", 1 );
  61. define ( "SPH_SORT_ATTR_ASC", 2 );
  62. define ( "SPH_SORT_TIME_SEGMENTS", 3 );
  63. define ( "SPH_SORT_EXTENDED", 4 );
  64. define ( "SPH_SORT_EXPR", 5 );
  65. /// known filter types
  66. define ( "SPH_FILTER_VALUES", 0 );
  67. define ( "SPH_FILTER_RANGE", 1 );
  68. define ( "SPH_FILTER_FLOATRANGE", 2 );
  69. /// known attribute types
  70. define ( "SPH_ATTR_INTEGER", 1 );
  71. define ( "SPH_ATTR_TIMESTAMP", 2 );
  72. define ( "SPH_ATTR_ORDINAL", 3 );
  73. define ( "SPH_ATTR_BOOL", 4 );
  74. define ( "SPH_ATTR_FLOAT", 5 );
  75. define ( "SPH_ATTR_BIGINT", 6 );
  76. define ( "SPH_ATTR_STRING", 7 );
  77. define ( "SPH_ATTR_FACTORS", 1001 );
  78. define ( "SPH_ATTR_MULTI", 0x40000001 );
  79. define ( "SPH_ATTR_MULTI64", 0x40000002 );
  80. /// known grouping functions
  81. define ( "SPH_GROUPBY_DAY", 0 );
  82. define ( "SPH_GROUPBY_WEEK", 1 );
  83. define ( "SPH_GROUPBY_MONTH", 2 );
  84. define ( "SPH_GROUPBY_YEAR", 3 );
  85. define ( "SPH_GROUPBY_ATTR", 4 );
  86. define ( "SPH_GROUPBY_ATTRPAIR", 5 );
  87. // important properties of PHP's integers:
  88. // - always signed (one bit short of PHP_INT_SIZE)
  89. // - conversion from string to int is saturated
  90. // - float is double
  91. // - div converts arguments to floats
  92. // - mod converts arguments to ints
  93. // the packing code below works as follows:
  94. // - when we got an int, just pack it
  95. // if performance is a problem, this is the branch users should aim for
  96. //
  97. // - otherwise, we got a number in string form
  98. // this might be due to different reasons, but we assume that this is
  99. // because it didn't fit into PHP int
  100. //
  101. // - factor the string into high and low ints for packing
  102. // - if we have bcmath, then it is used
  103. // - if we don't, we have to do it manually (this is the fun part)
  104. //
  105. // - x64 branch does factoring using ints
  106. // - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int
  107. //
  108. // unpacking routines are pretty much the same.
  109. // - return ints if we can
  110. // - otherwise format number into a string
  111. /// pack 64-bit signed
  112. function sphPackI64 ( $v )
  113. {
  114. assert ( is_numeric($v) );
  115. // x64
  116. if ( PHP_INT_SIZE>=8 )
  117. {
  118. $v = (int)$v;
  119. return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
  120. }
  121. // x32, int
  122. if ( is_int($v) )
  123. return pack ( "NN", $v < 0 ? -1 : 0, $v );
  124. // x32, bcmath
  125. if ( function_exists("bcmul") )
  126. {
  127. if ( bccomp ( $v, 0 ) == -1 )
  128. $v = bcadd ( "18446744073709551616", $v );
  129. $h = bcdiv ( $v, "4294967296", 0 );
  130. $l = bcmod ( $v, "4294967296" );
  131. return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
  132. }
  133. // x32, no-bcmath
  134. $p = max(0, strlen($v) - 13);
  135. $lo = abs((float)substr($v, $p));
  136. $hi = abs((float)substr($v, 0, $p));
  137. $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912
  138. $q = floor($m/4294967296.0);
  139. $l = $m - ($q*4294967296.0);
  140. $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328
  141. if ( $v<0 )
  142. {
  143. if ( $l==0 )
  144. $h = 4294967296.0 - $h;
  145. else
  146. {
  147. $h = 4294967295.0 - $h;
  148. $l = 4294967296.0 - $l;
  149. }
  150. }
  151. return pack ( "NN", $h, $l );
  152. }
  153. /// pack 64-bit unsigned
  154. function sphPackU64 ( $v )
  155. {
  156. assert ( is_numeric($v) );
  157. // x64
  158. if ( PHP_INT_SIZE>=8 )
  159. {
  160. assert ( $v>=0 );
  161. // x64, int
  162. if ( is_int($v) )
  163. return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
  164. // x64, bcmath
  165. if ( function_exists("bcmul") )
  166. {
  167. $h = bcdiv ( $v, 4294967296, 0 );
  168. $l = bcmod ( $v, 4294967296 );
  169. return pack ( "NN", $h, $l );
  170. }
  171. // x64, no-bcmath
  172. $p = max ( 0, strlen($v) - 13 );
  173. $lo = (int)substr ( $v, $p );
  174. $hi = (int)substr ( $v, 0, $p );
  175. $m = $lo + $hi*1316134912;
  176. $l = $m % 4294967296;
  177. $h = $hi*2328 + (int)($m/4294967296);
  178. return pack ( "NN", $h, $l );
  179. }
  180. // x32, int
  181. if ( is_int($v) )
  182. return pack ( "NN", 0, $v );
  183. // x32, bcmath
  184. if ( function_exists("bcmul") )
  185. {
  186. $h = bcdiv ( $v, "4294967296", 0 );
  187. $l = bcmod ( $v, "4294967296" );
  188. return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
  189. }
  190. // x32, no-bcmath
  191. $p = max(0, strlen($v) - 13);
  192. $lo = (float)substr($v, $p);
  193. $hi = (float)substr($v, 0, $p);
  194. $m = $lo + $hi*1316134912.0;
  195. $q = floor($m / 4294967296.0);
  196. $l = $m - ($q * 4294967296.0);
  197. $h = $hi*2328.0 + $q;
  198. return pack ( "NN", $h, $l );
  199. }
  200. // unpack 64-bit unsigned
  201. function sphUnpackU64 ( $v )
  202. {
  203. list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
  204. if ( PHP_INT_SIZE>=8 )
  205. {
  206. if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
  207. if ( $lo<0 ) $lo += (1<<32);
  208. // x64, int
  209. if ( $hi<=2147483647 )
  210. return ($hi<<32) + $lo;
  211. // x64, bcmath
  212. if ( function_exists("bcmul") )
  213. return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
  214. // x64, no-bcmath
  215. $C = 100000;
  216. $h = ((int)($hi / $C) << 32) + (int)($lo / $C);
  217. $l = (($hi % $C) << 32) + ($lo % $C);
  218. if ( $l>$C )
  219. {
  220. $h += (int)($l / $C);
  221. $l = $l % $C;
  222. }
  223. if ( $h==0 )
  224. return $l;
  225. return sprintf ( "%d%05d", $h, $l );
  226. }
  227. // x32, int
  228. if ( $hi==0 )
  229. {
  230. if ( $lo>0 )
  231. return $lo;
  232. return sprintf ( "%u", $lo );
  233. }
  234. $hi = sprintf ( "%u", $hi );
  235. $lo = sprintf ( "%u", $lo );
  236. // x32, bcmath
  237. if ( function_exists("bcmul") )
  238. return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
  239. // x32, no-bcmath
  240. $hi = (float)$hi;
  241. $lo = (float)$lo;
  242. $q = floor($hi/10000000.0);
  243. $r = $hi - $q*10000000.0;
  244. $m = $lo + $r*4967296.0;
  245. $mq = floor($m/10000000.0);
  246. $l = $m - $mq*10000000.0;
  247. $h = $q*4294967296.0 + $r*429.0 + $mq;
  248. $h = sprintf ( "%.0f", $h );
  249. $l = sprintf ( "%07.0f", $l );
  250. if ( $h=="0" )
  251. return sprintf( "%.0f", (float)$l );
  252. return $h . $l;
  253. }
  254. // unpack 64-bit signed
  255. function sphUnpackI64 ( $v )
  256. {
  257. list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
  258. // x64
  259. if ( PHP_INT_SIZE>=8 )
  260. {
  261. if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
  262. if ( $lo<0 ) $lo += (1<<32);
  263. return ($hi<<32) + $lo;
  264. }
  265. // x32, int
  266. if ( $hi==0 )
  267. {
  268. if ( $lo>0 )
  269. return $lo;
  270. return sprintf ( "%u", $lo );
  271. }
  272. // x32, int
  273. elseif ( $hi==-1 )
  274. {
  275. if ( $lo<0 )
  276. return $lo;
  277. return sprintf ( "%.0f", $lo - 4294967296.0 );
  278. }
  279. $neg = "";
  280. $c = 0;
  281. if ( $hi<0 )
  282. {
  283. $hi = ~$hi;
  284. $lo = ~$lo;
  285. $c = 1;
  286. $neg = "-";
  287. }
  288. $hi = sprintf ( "%u", $hi );
  289. $lo = sprintf ( "%u", $lo );
  290. // x32, bcmath
  291. if ( function_exists("bcmul") )
  292. return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c );
  293. // x32, no-bcmath
  294. $hi = (float)$hi;
  295. $lo = (float)$lo;
  296. $q = floor($hi/10000000.0);
  297. $r = $hi - $q*10000000.0;
  298. $m = $lo + $r*4967296.0;
  299. $mq = floor($m/10000000.0);
  300. $l = $m - $mq*10000000.0 + $c;
  301. $h = $q*4294967296.0 + $r*429.0 + $mq;
  302. if ( $l==10000000 )
  303. {
  304. $l = 0;
  305. $h += 1;
  306. }
  307. $h = sprintf ( "%.0f", $h );
  308. $l = sprintf ( "%07.0f", $l );
  309. if ( $h=="0" )
  310. return $neg . sprintf( "%.0f", (float)$l );
  311. return $neg . $h . $l;
  312. }
  313. function sphFixUint ( $value )
  314. {
  315. if ( PHP_INT_SIZE>=8 )
  316. {
  317. // x64 route, workaround broken unpack() in 5.2.2+
  318. if ( $value<0 ) $value += (1<<32);
  319. return $value;
  320. }
  321. else
  322. {
  323. // x32 route, workaround php signed/unsigned braindamage
  324. return sprintf ( "%u", $value );
  325. }
  326. }
  327. function SetBit ( $flag, $bit, $on )
  328. {
  329. if ( $on )
  330. {
  331. $flag += ( 1<<$bit );
  332. } else
  333. {
  334. $reset = 255 ^ ( 1<<$bit );
  335. $flag = $flag & $reset;
  336. }
  337. return $flag;
  338. }
  339. /// sphinx searchd client class
  340. class SphinxClient
  341. {
  342. var $_host; ///< searchd host (default is "localhost")
  343. var $_port; ///< searchd port (default is 9312)
  344. var $_offset; ///< how many records to seek from result-set start (default is 0)
  345. var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
  346. var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
  347. var $_weights; ///< per-field weights (default is 1 for all fields)
  348. var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
  349. var $_sortby; ///< attribute to sort by (defualt is "")
  350. var $_min_id; ///< min ID to match (default is 0, which means no limit)
  351. var $_max_id; ///< max ID to match (default is 0, which means no limit)
  352. var $_filters; ///< search filters
  353. var $_groupby; ///< group-by attribute name
  354. var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
  355. var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
  356. var $_groupdistinct;///< group-by count-distinct attribute
  357. var $_maxmatches; ///< max matches to retrieve
  358. var $_cutoff; ///< cutoff to stop searching at (default is 0)
  359. var $_retrycount; ///< distributed retries count
  360. var $_retrydelay; ///< distributed retries delay
  361. var $_anchor; ///< geographical anchor point
  362. var $_indexweights; ///< per-index weights
  363. var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
  364. var $_rankexpr; ///< ranking mode expression (for SPH_RANK_EXPR)
  365. var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
  366. var $_fieldweights; ///< per-field-name weights
  367. var $_overrides; ///< per-query attribute values overrides
  368. var $_select; ///< select-list (attributes or expressions, with optional aliases)
  369. var $_query_flags; ///< per-query various flags
  370. var $_predictedtime; ///< per-query max_predicted_time
  371. var $_outerorderby; ///< outer match sort by
  372. var $_outeroffset; ///< outer offset
  373. var $_outerlimit; ///< outer limit
  374. var $_hasouter;
  375. var $_error; ///< last error message
  376. var $_warning; ///< last warning message
  377. var $_connerror; ///< connection error vs remote error flag
  378. var $_reqs; ///< requests array for multi-query
  379. var $_mbenc; ///< stored mbstring encoding
  380. var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
  381. var $_timeout; ///< connect timeout
  382. /////////////////////////////////////////////////////////////////////////////
  383. // common stuff
  384. /////////////////////////////////////////////////////////////////////////////
  385. /// create a new client object and fill defaults
  386. function SphinxClient ()
  387. {
  388. // per-client-object settings
  389. $this->_host = "localhost";
  390. $this->_port = 9312;
  391. $this->_path = false;
  392. $this->_socket = false;
  393. // per-query settings
  394. $this->_offset = 0;
  395. $this->_limit = 20;
  396. $this->_mode = SPH_MATCH_ALL;
  397. $this->_weights = array ();
  398. $this->_sort = SPH_SORT_RELEVANCE;
  399. $this->_sortby = "";
  400. $this->_min_id = 0;
  401. $this->_max_id = 0;
  402. $this->_filters = array ();
  403. $this->_groupby = "";
  404. $this->_groupfunc = SPH_GROUPBY_DAY;
  405. $this->_groupsort = "@group desc";
  406. $this->_groupdistinct= "";
  407. $this->_maxmatches = 1000;
  408. $this->_cutoff = 0;
  409. $this->_retrycount = 0;
  410. $this->_retrydelay = 0;
  411. $this->_anchor = array ();
  412. $this->_indexweights= array ();
  413. $this->_ranker = SPH_RANK_PROXIMITY_BM25;
  414. $this->_rankexpr = "";
  415. $this->_maxquerytime= 0;
  416. $this->_fieldweights= array();
  417. $this->_overrides = array();
  418. $this->_select = "*";
  419. $this->_query_flags = 0;
  420. $this->_predictedtime = 0;
  421. $this->_outerorderby = "";
  422. $this->_outeroffset = 0;
  423. $this->_outerlimit = 0;
  424. $this->_hasouter = false;
  425. $this->_error = ""; // per-reply fields (for single-query case)
  426. $this->_warning = "";
  427. $this->_connerror = false;
  428. $this->_reqs = array (); // requests storage (for multi-query case)
  429. $this->_mbenc = "";
  430. $this->_arrayresult = false;
  431. $this->_timeout = 0;
  432. }
  433. function __destruct()
  434. {
  435. if ( $this->_socket !== false )
  436. fclose ( $this->_socket );
  437. }
  438. /// get last error message (string)
  439. function GetLastError ()
  440. {
  441. return $this->_error;
  442. }
  443. /// get last warning message (string)
  444. function GetLastWarning ()
  445. {
  446. return $this->_warning;
  447. }
  448. /// get last error flag (to tell network connection errors from searchd errors or broken responses)
  449. function IsConnectError()
  450. {
  451. return $this->_connerror;
  452. }
  453. /// set searchd host name (string) and port (integer)
  454. function SetServer ( $host, $port = 0 )
  455. {
  456. assert ( is_string($host) );
  457. if ( $host[0] == '/')
  458. {
  459. $this->_path = 'unix://' . $host;
  460. return;
  461. }
  462. if ( substr ( $host, 0, 7 )=="unix://" )
  463. {
  464. $this->_path = $host;
  465. return;
  466. }
  467. $this->_host = $host;
  468. if ( is_int($port) )
  469. if ( $port )
  470. $this->_port = $port;
  471. $this->_path = '';
  472. }
  473. /// set server connection timeout (0 to remove)
  474. function SetConnectTimeout ( $timeout )
  475. {
  476. assert ( is_numeric($timeout) );
  477. $this->_timeout = $timeout;
  478. }
  479. function _Send ( $handle, $data, $length )
  480. {
  481. if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length )
  482. {
  483. $this->_error = 'connection unexpectedly closed (timed out?)';
  484. $this->_connerror = true;
  485. return false;
  486. }
  487. return true;
  488. }
  489. /////////////////////////////////////////////////////////////////////////////
  490. /// enter mbstring workaround mode
  491. function _MBPush ()
  492. {
  493. $this->_mbenc = "";
  494. if ( ini_get ( "mbstring.func_overload" ) & 2 )
  495. {
  496. $this->_mbenc = mb_internal_encoding();
  497. mb_internal_encoding ( "latin1" );
  498. }
  499. }
  500. /// leave mbstring workaround mode
  501. function _MBPop ()
  502. {
  503. if ( $this->_mbenc )
  504. mb_internal_encoding ( $this->_mbenc );
  505. }
  506. /// connect to searchd server
  507. function _Connect ()
  508. {
  509. if ( $this->_socket!==false )
  510. {
  511. // we are in persistent connection mode, so we have a socket
  512. // however, need to check whether it's still alive
  513. if ( !@feof ( $this->_socket ) )
  514. return $this->_socket;
  515. // force reopen
  516. $this->_socket = false;
  517. }
  518. $errno = 0;
  519. $errstr = "";
  520. $this->_connerror = false;
  521. if ( $this->_path )
  522. {
  523. $host = $this->_path;
  524. $port = 0;
  525. }
  526. else
  527. {
  528. $host = $this->_host;
  529. $port = $this->_port;
  530. }
  531. if ( $this->_timeout<=0 )
  532. $fp = @fsockopen ( $host, $port, $errno, $errstr );
  533. else
  534. $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout );
  535. if ( !$fp )
  536. {
  537. if ( $this->_path )
  538. $location = $this->_path;
  539. else
  540. $location = "{$this->_host}:{$this->_port}";
  541. $errstr = trim ( $errstr );
  542. $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)";
  543. $this->_connerror = true;
  544. return false;
  545. }
  546. // send my version
  547. // this is a subtle part. we must do it before (!) reading back from searchd.
  548. // because otherwise under some conditions (reported on FreeBSD for instance)
  549. // TCP stack could throttle write-write-read pattern because of Nagle.
  550. if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) )
  551. {
  552. fclose ( $fp );
  553. $this->_error = "failed to send client protocol version";
  554. return false;
  555. }
  556. // check version
  557. list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
  558. $v = (int)$v;
  559. if ( $v<1 )
  560. {
  561. fclose ( $fp );
  562. $this->_error = "expected searchd protocol version 1+, got version '$v'";
  563. return false;
  564. }
  565. return $fp;
  566. }
  567. /// get and check response packet from searchd server
  568. function _GetResponse ( $fp, $client_ver )
  569. {
  570. $response = "";
  571. $len = 0;
  572. $header = fread ( $fp, 8 );
  573. if ( strlen($header)==8 )
  574. {
  575. list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
  576. $left = $len;
  577. while ( $left>0 && !feof($fp) )
  578. {
  579. $chunk = fread ( $fp, min ( 8192, $left ) );
  580. if ( $chunk )
  581. {
  582. $response .= $chunk;
  583. $left -= strlen($chunk);
  584. }
  585. }
  586. }
  587. if ( $this->_socket === false )
  588. fclose ( $fp );
  589. // check response
  590. $read = strlen ( $response );
  591. if ( !$response || $read!=$len )
  592. {
  593. $this->_error = $len
  594. ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
  595. : "received zero-sized searchd response";
  596. return false;
  597. }
  598. // check status
  599. if ( $status==SEARCHD_WARNING )
  600. {
  601. list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
  602. $this->_warning = substr ( $response, 4, $wlen );
  603. return substr ( $response, 4+$wlen );
  604. }
  605. if ( $status==SEARCHD_ERROR )
  606. {
  607. $this->_error = "searchd error: " . substr ( $response, 4 );
  608. return false;
  609. }
  610. if ( $status==SEARCHD_RETRY )
  611. {
  612. $this->_error = "temporary searchd error: " . substr ( $response, 4 );
  613. return false;
  614. }
  615. if ( $status!=SEARCHD_OK )
  616. {
  617. $this->_error = "unknown status code '$status'";
  618. return false;
  619. }
  620. // check version
  621. if ( $ver<$client_ver )
  622. {
  623. $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
  624. $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
  625. }
  626. return $response;
  627. }
  628. /////////////////////////////////////////////////////////////////////////////
  629. // searching
  630. /////////////////////////////////////////////////////////////////////////////
  631. /// set offset and count into result set,
  632. /// and optionally set max-matches and cutoff limits
  633. function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
  634. {
  635. assert ( is_int($offset) );
  636. assert ( is_int($limit) );
  637. assert ( $offset>=0 );
  638. assert ( $limit>0 );
  639. assert ( $max>=0 );
  640. $this->_offset = $offset;
  641. $this->_limit = $limit;
  642. if ( $max>0 )
  643. $this->_maxmatches = $max;
  644. if ( $cutoff>0 )
  645. $this->_cutoff = $cutoff;
  646. }
  647. /// set maximum query time, in milliseconds, per-index
  648. /// integer, 0 means "do not limit"
  649. function SetMaxQueryTime ( $max )
  650. {
  651. assert ( is_int($max) );
  652. assert ( $max>=0 );
  653. $this->_maxquerytime = $max;
  654. }
  655. /// set matching mode
  656. function SetMatchMode ( $mode )
  657. {
  658. assert ( $mode==SPH_MATCH_ALL
  659. || $mode==SPH_MATCH_ANY
  660. || $mode==SPH_MATCH_PHRASE
  661. || $mode==SPH_MATCH_BOOLEAN
  662. || $mode==SPH_MATCH_EXTENDED
  663. || $mode==SPH_MATCH_FULLSCAN
  664. || $mode==SPH_MATCH_EXTENDED2 );
  665. $this->_mode = $mode;
  666. }
  667. /// set ranking mode
  668. function SetRankingMode ( $ranker, $rankexpr="" )
  669. {
  670. assert ( $ranker===0 || $ranker>=1 && $ranker<SPH_RANK_TOTAL );
  671. assert ( is_string($rankexpr) );
  672. $this->_ranker = $ranker;
  673. $this->_rankexpr = $rankexpr;
  674. }
  675. /// set matches sorting mode
  676. function SetSortMode ( $mode, $sortby="" )
  677. {
  678. assert (
  679. $mode==SPH_SORT_RELEVANCE ||
  680. $mode==SPH_SORT_ATTR_DESC ||
  681. $mode==SPH_SORT_ATTR_ASC ||
  682. $mode==SPH_SORT_TIME_SEGMENTS ||
  683. $mode==SPH_SORT_EXTENDED ||
  684. $mode==SPH_SORT_EXPR );
  685. assert ( is_string($sortby) );
  686. assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
  687. $this->_sort = $mode;
  688. $this->_sortby = $sortby;
  689. }
  690. /// bind per-field weights by order
  691. /// DEPRECATED; use SetFieldWeights() instead
  692. function SetWeights ( $weights )
  693. {
  694. assert ( is_array($weights) );
  695. foreach ( $weights as $weight )
  696. assert ( is_int($weight) );
  697. $this->_weights = $weights;
  698. }
  699. /// bind per-field weights by name
  700. function SetFieldWeights ( $weights )
  701. {
  702. assert ( is_array($weights) );
  703. foreach ( $weights as $name=>$weight )
  704. {
  705. assert ( is_string($name) );
  706. assert ( is_int($weight) );
  707. }
  708. $this->_fieldweights = $weights;
  709. }
  710. /// bind per-index weights by name
  711. function SetIndexWeights ( $weights )
  712. {
  713. assert ( is_array($weights) );
  714. foreach ( $weights as $index=>$weight )
  715. {
  716. assert ( is_string($index) );
  717. assert ( is_int($weight) );
  718. }
  719. $this->_indexweights = $weights;
  720. }
  721. /// set IDs range to match
  722. /// only match records if document ID is beetwen $min and $max (inclusive)
  723. function SetIDRange ( $min, $max )
  724. {
  725. assert ( is_numeric($min) );
  726. assert ( is_numeric($max) );
  727. assert ( $min<=$max );
  728. $this->_min_id = $min;
  729. $this->_max_id = $max;
  730. }
  731. /// set values set filter
  732. /// only match records where $attribute value is in given set
  733. function SetFilter ( $attribute, $values, $exclude=false )
  734. {
  735. assert ( is_string($attribute) );
  736. assert ( is_array($values) );
  737. assert ( count($values) );
  738. if ( is_array($values) && count($values) )
  739. {
  740. foreach ( $values as $value )
  741. assert ( is_numeric($value) );
  742. $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
  743. }
  744. }
  745. /// set range filter
  746. /// only match records if $attribute value is beetwen $min and $max (inclusive)
  747. function SetFilterRange ( $attribute, $min, $max, $exclude=false )
  748. {
  749. assert ( is_string($attribute) );
  750. assert ( is_numeric($min) );
  751. assert ( is_numeric($max) );
  752. assert ( $min<=$max );
  753. $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  754. }
  755. /// set float range filter
  756. /// only match records if $attribute value is beetwen $min and $max (inclusive)
  757. function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
  758. {
  759. assert ( is_string($attribute) );
  760. assert ( is_float($min) );
  761. assert ( is_float($max) );
  762. assert ( $min<=$max );
  763. $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
  764. }
  765. /// setup anchor point for geosphere distance calculations
  766. /// required to use @geodist in filters and sorting
  767. /// latitude and longitude must be in radians
  768. function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
  769. {
  770. assert ( is_string($attrlat) );
  771. assert ( is_string($attrlong) );
  772. assert ( is_float($lat) );
  773. assert ( is_float($long) );
  774. $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
  775. }
  776. /// set grouping attribute and function
  777. function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
  778. {
  779. assert ( is_string($attribute) );
  780. assert ( is_string($groupsort) );
  781. assert ( $func==SPH_GROUPBY_DAY
  782. || $func==SPH_GROUPBY_WEEK
  783. || $func==SPH_GROUPBY_MONTH
  784. || $func==SPH_GROUPBY_YEAR
  785. || $func==SPH_GROUPBY_ATTR
  786. || $func==SPH_GROUPBY_ATTRPAIR );
  787. $this->_groupby = $attribute;
  788. $this->_groupfunc = $func;
  789. $this->_groupsort = $groupsort;
  790. }
  791. /// set count-distinct attribute for group-by queries
  792. function SetGroupDistinct ( $attribute )
  793. {
  794. assert ( is_string($attribute) );
  795. $this->_groupdistinct = $attribute;
  796. }
  797. /// set distributed retries count and delay
  798. function SetRetries ( $count, $delay=0 )
  799. {
  800. assert ( is_int($count) && $count>=0 );
  801. assert ( is_int($delay) && $delay>=0 );
  802. $this->_retrycount = $count;
  803. $this->_retrydelay = $delay;
  804. }
  805. /// set result set format (hash or array; hash by default)
  806. /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
  807. function SetArrayResult ( $arrayresult )
  808. {
  809. assert ( is_bool($arrayresult) );
  810. $this->_arrayresult = $arrayresult;
  811. }
  812. /// set attribute values override
  813. /// there can be only one override per attribute
  814. /// $values must be a hash that maps document IDs to attribute values
  815. function SetOverride ( $attrname, $attrtype, $values )
  816. {
  817. assert ( is_string ( $attrname ) );
  818. assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) );
  819. assert ( is_array ( $values ) );
  820. $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values );
  821. }
  822. /// set select-list (attributes or expressions), SQL-like syntax
  823. function SetSelect ( $select )
  824. {
  825. assert ( is_string ( $select ) );
  826. $this->_select = $select;
  827. }
  828. function SetQueryFlag ( $flag_name, $flag_value )
  829. {
  830. $known_names = array ( "reverse_scan", "sort_method", "max_predicted_time", "boolean_simplify", "idf" );
  831. $flags = array (
  832. "reverse_scan" => array ( 0, 1 ),
  833. "sort_method" => array ( "pq", "kbuffer" ),
  834. "max_predicted_time" => array ( 0 ),
  835. "boolean_simplify" => array ( true, false ),
  836. "idf" => array ("normalized", "plain" )
  837. );
  838. assert ( isset ( $flag_name, $known_names ) );
  839. assert ( in_array( $flag_value, $flags[$flag_name], true ) || ( $flag_name=="max_predicted_time" && is_int ( $flag_value ) && $flag_value>=0 ) );
  840. if ( $flag_name=="reverse_scan" ) $this->_query_flags = SetBit ( $this->_query_flags, 0, $flag_value==1 );
  841. if ( $flag_name=="sort_method" ) $this->_query_flags = SetBit ( $this->_query_flags, 1, $flag_value=="kbuffer" );
  842. if ( $flag_name=="max_predicted_time" )
  843. {
  844. $this->_query_flags = SetBit ( $this->_query_flags, 2, $flag_value>0 );
  845. $this->_predictedtime = (int)$flag_value;
  846. }
  847. if ( $flag_name=="boolean_simplify" ) $this->_query_flags = SetBit ( $this->_query_flags, 3, $flag_value );
  848. if ( $flag_name=="idf" ) $this->_query_flags = SetBit ( $this->_query_flags, 4, $flag_value=="plain" );
  849. }
  850. /// set outer order by parameters
  851. function SetOuterSelect ( $orderby, $offset, $limit )
  852. {
  853. assert ( is_string($orderby) );
  854. assert ( is_int($offset) );
  855. assert ( is_int($limit) );
  856. assert ( $offset>=0 );
  857. assert ( $limit>0 );
  858. $this->_outerorderby = $orderby;
  859. $this->_outeroffset = $offset;
  860. $this->_outerlimit = $limit;
  861. $this->_hasouter = true;
  862. }
  863. //////////////////////////////////////////////////////////////////////////////
  864. /// clear all filters (for multi-queries)
  865. function ResetFilters ()
  866. {
  867. $this->_filters = array();
  868. $this->_anchor = array();
  869. }
  870. /// clear groupby settings (for multi-queries)
  871. function ResetGroupBy ()
  872. {
  873. $this->_groupby = "";
  874. $this->_groupfunc = SPH_GROUPBY_DAY;
  875. $this->_groupsort = "@group desc";
  876. $this->_groupdistinct= "";
  877. }
  878. /// clear all attribute value overrides (for multi-queries)
  879. function ResetOverrides ()
  880. {
  881. $this->_overrides = array ();
  882. }
  883. function ResetQueryFlag ()
  884. {
  885. $this->_query_flags = 0;
  886. $this->_predictedtime = 0;
  887. }
  888. function ResetOuterSelect ()
  889. {
  890. $this->_outerorderby = '';
  891. $this->_outeroffset = 0;
  892. $this->_outerlimit = 0;
  893. $this->_hasouter = false;
  894. }
  895. //////////////////////////////////////////////////////////////////////////////
  896. /// connect to searchd server, run given search query through given indexes,
  897. /// and return the search results
  898. function Query ( $query, $index="*", $comment="" )
  899. {
  900. assert ( empty($this->_reqs) );
  901. $this->AddQuery ( $query, $index, $comment );
  902. $results = $this->RunQueries ();
  903. $this->_reqs = array (); // just in case it failed too early
  904. if ( !is_array($results) )
  905. return false; // probably network error; error message should be already filled
  906. $this->_error = $results[0]["error"];
  907. $this->_warning = $results[0]["warning"];
  908. if ( $results[0]["status"]==SEARCHD_ERROR )
  909. return false;
  910. else
  911. return $results[0];
  912. }
  913. /// helper to pack floats in network byte order
  914. function _PackFloat ( $f )
  915. {
  916. $t1 = pack ( "f", $f ); // machine order
  917. list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
  918. return pack ( "N", $t2 );
  919. }
  920. /// add query to multi-query batch
  921. /// returns index into results array from RunQueries() call
  922. function AddQuery ( $query, $index="*", $comment="" )
  923. {
  924. // mbstring workaround
  925. $this->_MBPush ();
  926. // build request
  927. $req = pack ( "NNNNN", $this->_query_flags, $this->_offset, $this->_limit, $this->_mode, $this->_ranker );
  928. if ( $this->_ranker==SPH_RANK_EXPR )
  929. $req .= pack ( "N", strlen($this->_rankexpr) ) . $this->_rankexpr;
  930. $req .= pack ( "N", $this->_sort ); // (deprecated) sort mode
  931. $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
  932. $req .= pack ( "N", strlen($query) ) . $query; // query itself
  933. $req .= pack ( "N", count($this->_weights) ); // weights
  934. foreach ( $this->_weights as $weight )
  935. $req .= pack ( "N", (int)$weight );
  936. $req .= pack ( "N", strlen($index) ) . $index; // indexes
  937. $req .= pack ( "N", 1 ); // id64 range marker
  938. $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range
  939. // filters
  940. $req .= pack ( "N", count($this->_filters) );
  941. foreach ( $this->_filters as $filter )
  942. {
  943. $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
  944. $req .= pack ( "N", $filter["type"] );
  945. switch ( $filter["type"] )
  946. {
  947. case SPH_FILTER_VALUES:
  948. $req .= pack ( "N", count($filter["values"]) );
  949. foreach ( $filter["values"] as $value )
  950. $req .= sphPackI64 ( $value );
  951. break;
  952. case SPH_FILTER_RANGE:
  953. $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] );
  954. break;
  955. case SPH_FILTER_FLOATRANGE:
  956. $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
  957. break;
  958. default:
  959. assert ( 0 && "internal error: unhandled filter type" );
  960. }
  961. $req .= pack ( "N", $filter["exclude"] );
  962. }
  963. // group-by clause, max-matches count, group-sort clause, cutoff count
  964. $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
  965. $req .= pack ( "N", $this->_maxmatches );
  966. $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
  967. $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
  968. $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
  969. // anchor point
  970. if ( empty($this->_anchor) )
  971. {
  972. $req .= pack ( "N", 0 );
  973. } else
  974. {
  975. $a =& $this->_anchor;
  976. $req .= pack ( "N", 1 );
  977. $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
  978. $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
  979. $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
  980. }
  981. // per-index weights
  982. $req .= pack ( "N", count($this->_indexweights) );
  983. foreach ( $this->_indexweights as $idx=>$weight )
  984. $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
  985. // max query time
  986. $req .= pack ( "N", $this->_maxquerytime );
  987. // per-field weights
  988. $req .= pack ( "N", count($this->_fieldweights) );
  989. foreach ( $this->_fieldweights as $field=>$weight )
  990. $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
  991. // comment
  992. $req .= pack ( "N", strlen($comment) ) . $comment;
  993. // attribute overrides
  994. $req .= pack ( "N", count($this->_overrides) );
  995. foreach ( $this->_overrides as $key => $entry )
  996. {
  997. $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"];
  998. $req .= pack ( "NN", $entry["type"], count($entry["values"]) );
  999. foreach ( $entry["values"] as $id=>$val )
  1000. {
  1001. assert ( is_numeric($id) );
  1002. assert ( is_numeric($val) );
  1003. $req .= sphPackU64 ( $id );
  1004. switch ( $entry["type"] )
  1005. {
  1006. case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break;
  1007. case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break;
  1008. default: $req .= pack ( "N", $val ); break;
  1009. }
  1010. }
  1011. }
  1012. // select-list
  1013. $req .= pack ( "N", strlen($this->_select) ) . $this->_select;
  1014. // max_predicted_time
  1015. if ( $this->_predictedtime>0 )
  1016. $req .= pack ( "N", (int)$this->_predictedtime );
  1017. $req .= pack ( "N", strlen($this->_outerorderby) ) . $this->_outerorderby;
  1018. $req .= pack ( "NN", $this->_outeroffset, $this->_outerlimit );
  1019. if ( $this->_hasouter )
  1020. $req .= pack ( "N", 1 );
  1021. else
  1022. $req .= pack ( "N", 0 );
  1023. // mbstring workaround
  1024. $this->_MBPop ();
  1025. // store request to requests array
  1026. $this->_reqs[] = $req;
  1027. return count($this->_reqs)-1;
  1028. }
  1029. /// connect to searchd, run queries batch, and return an array of result sets
  1030. function RunQueries ()
  1031. {
  1032. if ( empty($this->_reqs) )
  1033. {
  1034. $this->_error = "no queries defined, issue AddQuery() first";
  1035. return false;
  1036. }
  1037. // mbstring workaround
  1038. $this->_MBPush ();
  1039. if (!( $fp = $this->_Connect() ))
  1040. {
  1041. $this->_MBPop ();
  1042. return false;
  1043. }
  1044. // send query, get response
  1045. $nreqs = count($this->_reqs);
  1046. $req = join ( "", $this->_reqs );
  1047. $len = 8+strlen($req);
  1048. $req = pack ( "nnNNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, 0, $nreqs ) . $req; // add header
  1049. if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
  1050. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) )
  1051. {
  1052. $this->_MBPop ();
  1053. return false;
  1054. }
  1055. // query sent ok; we can reset reqs now
  1056. $this->_reqs = array ();
  1057. // parse and return response
  1058. return $this->_ParseSearchResponse ( $response, $nreqs );
  1059. }
  1060. /// parse and return search query (or queries) response
  1061. function _ParseSearchResponse ( $response, $nreqs )
  1062. {
  1063. $p = 0; // current position
  1064. $max = strlen($response); // max position for checks, to protect against broken responses
  1065. $results = array ();
  1066. for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
  1067. {
  1068. $results[] = array();
  1069. $result =& $results[$ires];
  1070. $result["error"] = "";
  1071. $result["warning"] = "";
  1072. // extract status
  1073. list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1074. $result["status"] = $status;
  1075. if ( $status!=SEARCHD_OK )
  1076. {
  1077. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1078. $message = substr ( $response, $p, $len ); $p += $len;
  1079. if ( $status==SEARCHD_WARNING )
  1080. {
  1081. $result["warning"] = $message;
  1082. } else
  1083. {
  1084. $result["error"] = $message;
  1085. continue;
  1086. }
  1087. }
  1088. // read schema
  1089. $fields = array ();
  1090. $attrs = array ();
  1091. list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1092. while ( $nfields-->0 && $p<$max )
  1093. {
  1094. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1095. $fields[] = substr ( $response, $p, $len ); $p += $len;
  1096. }
  1097. $result["fields"] = $fields;
  1098. list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1099. while ( $nattrs-->0 && $p<$max )
  1100. {
  1101. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1102. $attr = substr ( $response, $p, $len ); $p += $len;
  1103. list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1104. $attrs[$attr] = $type;
  1105. }
  1106. $result["attrs"] = $attrs;
  1107. // read match count
  1108. list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1109. list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1110. // read matches
  1111. $idx = -1;
  1112. while ( $count-->0 && $p<$max )
  1113. {
  1114. // index into result array
  1115. $idx++;
  1116. // parse document id and weight
  1117. if ( $id64 )
  1118. {
  1119. $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
  1120. list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1121. }
  1122. else
  1123. {
  1124. list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
  1125. substr ( $response, $p, 8 ) ) );
  1126. $p += 8;
  1127. $doc = sphFixUint($doc);
  1128. }
  1129. $weight = sprintf ( "%u", $weight );
  1130. // create match entry
  1131. if ( $this->_arrayresult )
  1132. $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
  1133. else
  1134. $result["matches"][$doc]["weight"] = $weight;
  1135. // parse and create attributes
  1136. $attrvals = array ();
  1137. foreach ( $attrs as $attr=>$type )
  1138. {
  1139. // handle 64bit ints
  1140. if ( $type==SPH_ATTR_BIGINT )
  1141. {
  1142. $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8;
  1143. continue;
  1144. }
  1145. // handle floats
  1146. if ( $type==SPH_ATTR_FLOAT )
  1147. {
  1148. list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1149. list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
  1150. $attrvals[$attr] = $fval;
  1151. continue;
  1152. }
  1153. // handle everything else as unsigned ints
  1154. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1155. if ( $type==SPH_ATTR_MULTI )
  1156. {
  1157. $attrvals[$attr] = array ();
  1158. $nvalues = $val;
  1159. while ( $nvalues-->0 && $p<$max )
  1160. {
  1161. list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1162. $attrvals[$attr][] = sphFixUint($val);
  1163. }
  1164. } else if ( $type==SPH_ATTR_MULTI64 )
  1165. {
  1166. $attrvals[$attr] = array ();
  1167. $nvalues = $val;
  1168. while ( $nvalues>0 && $p<$max )
  1169. {
  1170. $attrvals[$attr][] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8;
  1171. $nvalues -= 2;
  1172. }
  1173. } else if ( $type==SPH_ATTR_STRING )
  1174. {
  1175. $attrvals[$attr] = substr ( $response, $p, $val );
  1176. $p += $val;
  1177. } else if ( $type==SPH_ATTR_FACTORS )
  1178. {
  1179. $attrvals[$attr] = substr ( $response, $p, $val-4 );
  1180. $p += $val-4;
  1181. } else
  1182. {
  1183. $attrvals[$attr] = sphFixUint($val);
  1184. }
  1185. }
  1186. if ( $this->_arrayresult )
  1187. $result["matches"][$idx]["attrs"] = $attrvals;
  1188. else
  1189. $result["matches"][$doc]["attrs"] = $attrvals;
  1190. }
  1191. list ( $total, $total_found, $msecs, $words ) =
  1192. array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
  1193. $result["total"] = sprintf ( "%u", $total );
  1194. $result["total_found"] = sprintf ( "%u", $total_found );
  1195. $result["time"] = sprintf ( "%.3f", $msecs/1000 );
  1196. $p += 16;
  1197. while ( $words-->0 && $p<$max )
  1198. {
  1199. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1200. $word = substr ( $response, $p, $len ); $p += $len;
  1201. list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
  1202. $result["words"][$word] = array (
  1203. "docs"=>sprintf ( "%u", $docs ),
  1204. "hits"=>sprintf ( "%u", $hits ) );
  1205. }
  1206. }
  1207. $this->_MBPop ();
  1208. return $results;
  1209. }
  1210. /////////////////////////////////////////////////////////////////////////////
  1211. // excerpts generation
  1212. /////////////////////////////////////////////////////////////////////////////
  1213. /// connect to searchd server, and generate exceprts (snippets)
  1214. /// of given documents for given query. returns false on failure,
  1215. /// an array of snippets on success
  1216. function BuildExcerpts ( $docs, $index, $words, $opts=array() )
  1217. {
  1218. assert ( is_array($docs) );
  1219. assert ( is_string($index) );
  1220. assert ( is_string($words) );
  1221. assert ( is_array($opts) );
  1222. $this->_MBPush ();
  1223. if (!( $fp = $this->_Connect() ))
  1224. {
  1225. $this->_MBPop();
  1226. return false;
  1227. }
  1228. /////////////////
  1229. // fixup options
  1230. /////////////////
  1231. if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
  1232. if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
  1233. if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
  1234. if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
  1235. if ( !isset($opts["limit_passages"]) ) $opts["limit_passages"] = 0;
  1236. if ( !isset($opts["limit_words"]) ) $opts["limit_words"] = 0;
  1237. if ( !isset($opts["around"]) ) $opts["around"] = 5;
  1238. if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
  1239. if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
  1240. if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
  1241. if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
  1242. if ( !isset($opts["query_mode"]) ) $opts["query_mode"] = false;
  1243. if ( !isset($opts["force_all_words"]) ) $opts["force_all_words"] = false;
  1244. if ( !isset($opts["start_passage_id"]) ) $opts["start_passage_id"] = 1;
  1245. if ( !isset($opts["load_files"]) ) $opts["load_files"] = false;
  1246. if ( !isset($opts["html_strip_mode"]) ) $opts["html_strip_mode"] = "index";
  1247. if ( !isset($opts["allow_empty"]) ) $opts["allow_empty"] = false;
  1248. if ( !isset($opts["passage_boundary"]) ) $opts["passage_boundary"] = "none";
  1249. if ( !isset($opts["emit_zones"]) ) $opts["emit_zones"] = false;
  1250. if ( !isset($opts["load_files_scattered"]) ) $opts["load_files_scattered"] = false;
  1251. /////////////////
  1252. // build request
  1253. /////////////////
  1254. // v.1.2 req
  1255. $flags = 1; // remove spaces
  1256. if ( $opts["exact_phrase"] ) $flags |= 2;
  1257. if ( $opts["single_passage"] ) $flags |= 4;
  1258. if ( $opts["use_boundaries"] ) $flags |= 8;
  1259. if ( $opts["weight_order"] ) $flags |= 16;
  1260. if ( $opts["query_mode"] ) $flags |= 32;
  1261. if ( $opts["force_all_words"] ) $flags |= 64;
  1262. if ( $opts["load_files"] ) $flags |= 128;
  1263. if ( $opts["allow_empty"] ) $flags |= 256;
  1264. if ( $opts["emit_zones"] ) $flags |= 512;
  1265. if ( $opts["load_files_scattered"] ) $flags |= 1024;
  1266. $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
  1267. $req .= pack ( "N", strlen($index) ) . $index; // req index
  1268. $req .= pack ( "N", strlen($words) ) . $words; // req words
  1269. // options
  1270. $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
  1271. $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
  1272. $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
  1273. $req .= pack ( "NN", (int)$opts["limit"], (int)$opts["around"] );
  1274. $req .= pack ( "NNN", (int)$opts["limit_passages"], (int)$opts["limit_words"], (int)$opts["start_passage_id"] ); // v.1.2
  1275. $req .= pack ( "N", strlen($opts["html_strip_mode"]) ) . $opts["html_strip_mode"];
  1276. $req .= pack ( "N", strlen($opts["passage_boundary"]) ) . $opts["passage_boundary"];
  1277. // documents
  1278. $req .= pack ( "N", count($docs) );
  1279. foreach ( $docs as $doc )
  1280. {
  1281. assert ( is_string($doc) );
  1282. $req .= pack ( "N", strlen($doc) ) . $doc;
  1283. }
  1284. ////////////////////////////
  1285. // send query, get response
  1286. ////////////////////////////
  1287. $len = strlen($req);
  1288. $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
  1289. if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
  1290. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) )
  1291. {
  1292. $this->_MBPop ();
  1293. return false;
  1294. }
  1295. //////////////////
  1296. // parse response
  1297. //////////////////
  1298. $pos = 0;
  1299. $res = array ();
  1300. $rlen = strlen($response);
  1301. for ( $i=0; $i<count($docs); $i++ )
  1302. {
  1303. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
  1304. $pos += 4;
  1305. if ( $pos+$len > $rlen )
  1306. {
  1307. $this->_error = "incomplete reply";
  1308. $this->_MBPop ();
  1309. return false;
  1310. }
  1311. $res[] = $len ? substr ( $response, $pos, $len ) : "";
  1312. $pos += $len;
  1313. }
  1314. $this->_MBPop ();
  1315. return $res;
  1316. }
  1317. /////////////////////////////////////////////////////////////////////////////
  1318. // keyword generation
  1319. /////////////////////////////////////////////////////////////////////////////
  1320. /// connect to searchd server, and generate keyword list for a given query
  1321. /// returns false on failure,
  1322. /// an array of words on success
  1323. function BuildKeywords ( $query, $index, $hits )
  1324. {
  1325. assert ( is_string($query) );
  1326. assert ( is_string($index) );
  1327. assert ( is_bool($hits) );
  1328. $this->_MBPush ();
  1329. if (!( $fp = $this->_Connect() ))
  1330. {
  1331. $this->_MBPop();
  1332. return false;
  1333. }
  1334. /////////////////
  1335. // build request
  1336. /////////////////
  1337. // v.1.0 req
  1338. $req = pack ( "N", strlen($query) ) . $query; // req query
  1339. $req .= pack ( "N", strlen($index) ) . $index; // req index
  1340. $req .= pack ( "N", (int)$hits );
  1341. ////////////////////////////
  1342. // send query, get response
  1343. ////////////////////////////
  1344. $len = strlen($req);
  1345. $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
  1346. if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
  1347. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) )
  1348. {
  1349. $this->_MBPop ();
  1350. return false;
  1351. }
  1352. //////////////////
  1353. // parse response
  1354. //////////////////
  1355. $pos = 0;
  1356. $res = array ();
  1357. $rlen = strlen($response);
  1358. list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
  1359. $pos += 4;
  1360. for ( $i=0; $i<$nwords; $i++ )
  1361. {
  1362. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
  1363. $tokenized = $len ? substr ( $response, $pos, $len ) : "";
  1364. $pos += $len;
  1365. list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
  1366. $normalized = $len ? substr ( $response, $pos, $len ) : "";
  1367. $pos += $len;
  1368. $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
  1369. if ( $hits )
  1370. {
  1371. list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
  1372. $pos += 8;
  1373. $res [$i]["docs"] = $ndocs;
  1374. $res [$i]["hits"] = $nhits;
  1375. }
  1376. if ( $pos > $rlen )
  1377. {
  1378. $this->_error = "incomplete reply";
  1379. $this->_MBPop ();
  1380. return false;
  1381. }
  1382. }
  1383. $this->_MBPop ();
  1384. return $res;
  1385. }
  1386. function EscapeString ( $string )
  1387. {
  1388. $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' );
  1389. $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' );
  1390. return str_replace ( $from, $to, $string );
  1391. }
  1392. /////////////////////////////////////////////////////////////////////////////
  1393. // attribute updates
  1394. /////////////////////////////////////////////////////////////////////////////
  1395. /// batch update given attributes in given rows in given indexes
  1396. /// returns amount of updated documents (0 or more) on success, or -1 on failure
  1397. function UpdateAttributes ( $index, $attrs, $values, $mva=false, $ignorenonexistent=false )
  1398. {
  1399. // verify everything
  1400. assert ( is_string($index) );
  1401. assert ( is_bool($mva) );
  1402. assert ( is_bool($ignorenonexistent) );
  1403. assert ( is_array($attrs) );
  1404. foreach ( $attrs as $attr )
  1405. assert ( is_string($attr) );
  1406. assert ( is_array($values) );
  1407. foreach ( $values as $id=>$entry )
  1408. {
  1409. assert ( is_numeric($id) );
  1410. assert ( is_array($entry) );
  1411. assert ( count($entry)==count($attrs) );
  1412. foreach ( $entry as $v )
  1413. {
  1414. if ( $mva )
  1415. {
  1416. assert ( is_array($v) );
  1417. foreach ( $v as $vv )
  1418. assert ( is_int($vv) );
  1419. } else
  1420. assert ( is_int($v) );
  1421. }
  1422. }
  1423. // build request
  1424. $this->_MBPush ();
  1425. $req = pack ( "N", strlen($index) ) . $index;
  1426. $req .= pack ( "N", count($attrs) );
  1427. $req .= pack ( "N", $ignorenonexistent ? 1 : 0 );
  1428. foreach ( $attrs as $attr )
  1429. {
  1430. $req .= pack ( "N", strlen($attr) ) . $attr;
  1431. $req .= pack ( "N", $mva ? 1 : 0 );
  1432. }
  1433. $req .= pack ( "N", count($values) );
  1434. foreach ( $values as $id=>$entry )
  1435. {
  1436. $req .= sphPackU64 ( $id );
  1437. foreach ( $entry as $v )
  1438. {
  1439. $req .= pack ( "N", $mva ? count($v) : $v );
  1440. if ( $mva )
  1441. foreach ( $v as $vv )
  1442. $req .= pack ( "N", $vv );
  1443. }
  1444. }
  1445. // connect, send query, get response
  1446. if (!( $fp = $this->_Connect() ))
  1447. {
  1448. $this->_MBPop ();
  1449. return -1;
  1450. }
  1451. $len = strlen($req);
  1452. $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
  1453. if ( !$this->_Send ( $fp, $req, $len+8 ) )
  1454. {
  1455. $this->_MBPop ();
  1456. return -1;
  1457. }
  1458. if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
  1459. {
  1460. $this->_MBPop ();
  1461. return -1;
  1462. }
  1463. // parse response
  1464. list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
  1465. $this->_MBPop ();
  1466. return $updated;
  1467. }
  1468. /////////////////////////////////////////////////////////////////////////////
  1469. // persistent connections
  1470. /////////////////////////////////////////////////////////////////////////////
  1471. function Open()
  1472. {
  1473. if ( $this->_socket !== false )
  1474. {
  1475. $this->_error = 'already connected';
  1476. return false;
  1477. }
  1478. if ( !$fp = $this->_Connect() )
  1479. return false;
  1480. // command, command version = 0, body length = 4, body = 1
  1481. $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 );
  1482. if ( !$this->_Send ( $fp, $req, 12 ) )
  1483. return false;
  1484. $this->_socket = $fp;
  1485. return true;
  1486. }
  1487. function Close()
  1488. {
  1489. if ( $this->_socket === false )
  1490. {
  1491. $this->_error = 'not connected';
  1492. return false;
  1493. }
  1494. fclose ( $this->_socket );
  1495. $this->_socket = false;
  1496. return true;
  1497. }
  1498. //////////////////////////////////////////////////////////////////////////
  1499. // status
  1500. //////////////////////////////////////////////////////////////////////////
  1501. function Status ()
  1502. {
  1503. $this->_MBPush ();
  1504. if (!( $fp = $this->_Connect() ))
  1505. {
  1506. $this->_MBPop();
  1507. return false;
  1508. }
  1509. $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1
  1510. if ( !( $this->_Send ( $fp, $req, 12 ) ) ||
  1511. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) )
  1512. {
  1513. $this->_MBPop ();
  1514. return false;
  1515. }
  1516. $res = substr ( $response, 4 ); // just ignore length, error handling, etc
  1517. $p = 0;
  1518. list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
  1519. $res = array();
  1520. for ( $i=0; $i<$rows; $i++ )
  1521. for ( $j=0; $j<$cols; $j++ )
  1522. {
  1523. list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  1524. $res[$i][] = substr ( $response, $p, $len ); $p += $len;
  1525. }
  1526. $this->_MBPop ();
  1527. return $res;
  1528. }
  1529. //////////////////////////////////////////////////////////////////////////
  1530. // flush
  1531. //////////////////////////////////////////////////////////////////////////
  1532. function FlushAttributes ()
  1533. {
  1534. $this->_MBPush ();
  1535. if (!( $fp = $this->_Connect() ))
  1536. {
  1537. $this->_MBPop();
  1538. return -1;
  1539. }
  1540. $req = pack ( "nnN", SEARCHD_COMMAND_FLUSHATTRS, VER_COMMAND_FLUSHATTRS, 0 ); // len=0
  1541. if ( !( $this->_Send ( $fp, $req, 8 ) ) ||
  1542. !( $response = $this->_GetResponse ( $fp, VER_COMMAND_FLUSHATTRS ) ) )
  1543. {
  1544. $this->_MBPop ();
  1545. return -1;
  1546. }
  1547. $tag = -1;
  1548. if ( strlen($response)==4 )
  1549. list(,$tag) = unpack ( "N*", $response );
  1550. else
  1551. $this->_error = "unexpected response length";
  1552. $this->_MBPop ();
  1553. return $tag;
  1554. }
  1555. }
  1556. //
  1557. // $Id$
  1558. //