trie-fd.c 21 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004
  1. /*
  2. * libwebsockets - small server side websockets and web server implementation
  3. *
  4. * Copyright (C) 2010 - 2019 Andy Green <[email protected]>
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to
  8. * deal in the Software without restriction, including without limitation the
  9. * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10. * sell copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22. * IN THE SOFTWARE.
  23. */
  24. #include "private-lib-core.h"
  25. #include "private-lib-misc-fts.h"
  26. #include <stdio.h>
  27. #include <string.h>
  28. #include <assert.h>
  29. #include <fcntl.h>
  30. #include <sys/types.h>
  31. #include <sys/stat.h>
  32. #define AC_COUNT_STASHED_CHILDREN 8
  33. struct ch {
  34. jg2_file_offset ofs;
  35. char name[64];
  36. int inst;
  37. int child_agg;
  38. int name_length;
  39. int effpos;
  40. int descendents;
  41. };
  42. struct wac {
  43. struct ch ch[AC_COUNT_STASHED_CHILDREN];
  44. jg2_file_offset self;
  45. jg2_file_offset tifs;
  46. int child_count;
  47. int child;
  48. int agg;
  49. int desc;
  50. char done_children;
  51. char once;
  52. };
  53. struct linetable {
  54. struct linetable *next;
  55. int chunk_line_number_start;
  56. int chunk_line_number_count;
  57. off_t chunk_filepos_start;
  58. off_t vli_ofs_in_index;
  59. };
  60. static uint32_t
  61. b32(unsigned char *b)
  62. {
  63. return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
  64. }
  65. static uint16_t
  66. b16(unsigned char *b)
  67. {
  68. return (b[0] << 8) | b[1];
  69. }
  70. static int
  71. lws_fts_filepath(struct lws_fts_file *jtf, int filepath_index, char *result,
  72. size_t len, uint32_t *ofs_linetable, uint32_t *lines)
  73. {
  74. unsigned char buf[256 + 15];
  75. uint32_t flen;
  76. int ra, bp = 0;
  77. size_t m;
  78. off_t o;
  79. if (filepath_index > jtf->filepaths)
  80. return 1;
  81. if (lseek(jtf->fd, jtf->filepath_table + (4 * filepath_index),
  82. SEEK_SET) < 0) {
  83. lwsl_err("%s: unable to seek\n", __func__);
  84. return 1;
  85. }
  86. ra = read(jtf->fd, buf, 4);
  87. if (ra < 0)
  88. return 1;
  89. o = (unsigned int)b32(buf);
  90. if (lseek(jtf->fd, o, SEEK_SET) < 0) {
  91. lwsl_err("%s: unable to seek\n", __func__);
  92. return 1;
  93. }
  94. ra = read(jtf->fd, buf, sizeof(buf));
  95. if (ra < 0)
  96. return 1;
  97. if (ofs_linetable)
  98. bp += rq32(&buf[bp], ofs_linetable);
  99. else
  100. bp += rq32(&buf[bp], &flen);
  101. if (lines)
  102. bp += rq32(&buf[bp], lines);
  103. else
  104. bp += rq32(&buf[bp], &flen);
  105. bp += rq32(&buf[bp], &flen);
  106. m = flen;
  107. if (len - 1 < m)
  108. m = flen - 1;
  109. strncpy(result, (char *)&buf[bp], m);
  110. result[m] = '\0';
  111. result[len - 1] = '\0';
  112. return 0;
  113. }
  114. /*
  115. * returns -1 for fail or fd open on the trie file.
  116. *
  117. * *root is set to the position of the root trie entry.
  118. * *flen is set to the length of the whole file
  119. */
  120. int
  121. lws_fts_adopt(struct lws_fts_file *jtf)
  122. {
  123. unsigned char buf[256];
  124. off_t ot;
  125. if (read(jtf->fd, buf, TRIE_FILE_HDR_SIZE) != TRIE_FILE_HDR_SIZE) {
  126. lwsl_err("%s: unable to read file header\n", __func__);
  127. goto bail;
  128. }
  129. if (buf[0] != 0xca || buf[1] != 0x7a ||
  130. buf[2] != 0x5f || buf[3] != 0x75) {
  131. lwsl_err("%s: bad magic %02X %02X %02X %02X\n", __func__,
  132. buf[0], buf[1], buf[2], buf[3]);
  133. goto bail;
  134. }
  135. jtf->root = b32(&buf[4]);
  136. ot = lseek(jtf->fd, 0, SEEK_END);
  137. if (ot < 0) {
  138. lwsl_err("%s: unable to seek\n", __func__);
  139. goto bail;
  140. }
  141. jtf->flen = ot;
  142. if (jtf->flen != b32(&buf[8])) {
  143. lwsl_err("%s: file size doesn't match expected\n", __func__);
  144. goto bail;
  145. }
  146. jtf->filepath_table = b32(&buf[12]);
  147. jtf->filepaths = b32(&buf[16]);
  148. return jtf->fd;
  149. bail:
  150. return -1;
  151. }
  152. struct lws_fts_file *
  153. lws_fts_open(const char *filepath)
  154. {
  155. struct lws_fts_file *jtf;
  156. jtf = lws_malloc(sizeof(*jtf), "fts open");
  157. if (!jtf)
  158. goto bail1;
  159. jtf->fd = open(filepath, O_RDONLY);
  160. if (jtf->fd < 0) {
  161. lwsl_err("%s: unable to open %s\n", __func__, filepath);
  162. goto bail2;
  163. }
  164. if (lws_fts_adopt(jtf) < 0)
  165. goto bail3;
  166. return jtf;
  167. bail3:
  168. close(jtf->fd);
  169. bail2:
  170. lws_free(jtf);
  171. bail1:
  172. return NULL;
  173. }
  174. void
  175. lws_fts_close(struct lws_fts_file *jtf)
  176. {
  177. close(jtf->fd);
  178. lws_free(jtf);
  179. }
  180. #define grab(_pos, _size) { \
  181. bp = 0; \
  182. if (lseek(jtf->fd, _pos, SEEK_SET) < 0) { \
  183. lwsl_err("%s: unable to seek\n", __func__); \
  184. \
  185. goto bail; \
  186. } \
  187. \
  188. ra = read(jtf->fd, buf, _size); \
  189. if (ra < 0) \
  190. goto bail; \
  191. }
  192. static struct linetable *
  193. lws_fts_cache_chunktable(struct lws_fts_file *jtf, uint32_t ofs_linetable,
  194. struct lwsac **linetable_head)
  195. {
  196. struct linetable *lt, *first = NULL, **prev = NULL;
  197. unsigned char buf[8];
  198. int line = 1, bp, ra;
  199. off_t cfs = 0;
  200. *linetable_head = NULL;
  201. do {
  202. grab(ofs_linetable, sizeof(buf));
  203. lt = lwsac_use(linetable_head, sizeof(*lt), 0);
  204. if (!lt)
  205. goto bail;
  206. if (!first)
  207. first = lt;
  208. lt->next = NULL;
  209. if (prev)
  210. *prev = lt;
  211. prev = &lt->next;
  212. lt->chunk_line_number_start = line;
  213. lt->chunk_line_number_count = b16(&buf[bp + 2]);
  214. lt->vli_ofs_in_index = ofs_linetable + 8;
  215. lt->chunk_filepos_start = cfs;
  216. line += lt->chunk_line_number_count;
  217. cfs += b32(&buf[bp + 4]);
  218. ofs_linetable += b16(&buf[bp]);
  219. } while (b16(&buf[bp]));
  220. return first;
  221. bail:
  222. lwsac_free(linetable_head);
  223. return NULL;
  224. }
  225. static int
  226. lws_fts_getfileoffset(struct lws_fts_file *jtf, struct linetable *ltstart,
  227. int line, off_t *_ofs)
  228. {
  229. struct linetable *lt = ltstart;
  230. unsigned char buf[LWS_FTS_LINES_PER_CHUNK * 5];
  231. uint32_t ll;
  232. off_t ofs;
  233. int bp, ra;
  234. /* first figure out which chunk */
  235. do {
  236. if (line >= lt->chunk_line_number_start &&
  237. line < lt->chunk_line_number_start +
  238. lt->chunk_line_number_count)
  239. break;
  240. lt = lt->next;
  241. } while (lt);
  242. if (!lt)
  243. goto bail;
  244. /* we know it's in this chunk */
  245. ofs = lt->chunk_filepos_start;
  246. line -= lt->chunk_line_number_start;
  247. grab(lt->vli_ofs_in_index, sizeof(buf));
  248. bp = 0;
  249. while (line) {
  250. bp += rq32(&buf[bp], &ll);
  251. ofs += ll;
  252. line--;
  253. }
  254. /* we know the offset it is at in the original file */
  255. *_ofs = ofs;
  256. return 0;
  257. bail:
  258. lwsl_info("%s: bail %d\n", __func__, line);
  259. return 1;
  260. }
  261. static int
  262. ac_record(struct lws_fts_file *jtf, struct lwsac **results_head,
  263. const char *needle, int pos, struct wac *s, int sp,
  264. uint32_t instances, uint32_t agg_instances, uint32_t children,
  265. struct lws_fts_result_autocomplete ***ppac)
  266. {
  267. struct lws_fts_result_autocomplete *ac;
  268. int n, m;
  269. char *p;
  270. if (!instances && !agg_instances)
  271. return 1;
  272. m = pos;
  273. for (n = 1; n <= sp; n++)
  274. m += s[n].ch[s[n].child - 1].name_length;
  275. ac = lwsac_use(results_head, sizeof(*ac) + m + 1, 0);
  276. if (!ac)
  277. return -1;
  278. p = (char *)(ac + 1);
  279. **ppac = ac;
  280. ac->next = NULL;
  281. *ppac = &ac->next;
  282. ac->instances = instances;
  283. ac->agg_instances = agg_instances;
  284. ac->ac_length = m;
  285. ac->has_children = !!children;
  286. ac->elided = 0;
  287. memcpy(p, needle, pos);
  288. p += pos;
  289. for (n = 1; n <= sp; n++) {
  290. int w = s[n].child - 1;
  291. memcpy(p, s[n].ch[w].name, s[n].ch[w].name_length);
  292. p += s[n].ch[w].name_length;
  293. }
  294. p = (char *)(ac + 1);
  295. p[m] = '\0';
  296. /*
  297. * deduct this child's instance weight from his antecdents to track
  298. * relative path attractiveness dynamically, after we already used its
  299. * best results (children are sorted best-first)
  300. */
  301. for (n = sp; n >= 0; n--) {
  302. s[n].ch[s[n].child - 1].child_agg -= instances;
  303. s[n].agg -= instances;
  304. }
  305. return 0;
  306. }
  307. struct lws_fts_result *
  308. lws_fts_search(struct lws_fts_file *jtf, struct lws_fts_search_params *ftsp)
  309. {
  310. uint32_t children, instances, co, sl, agg, slt, chunk,
  311. fileofs_tif_start, desc, agg_instances;
  312. int pos = 0, n, m, nl, bp, base = 0, ra, palm, budget, sp, ofd = -1;
  313. unsigned long long tf = lws_now_usecs();
  314. struct lws_fts_result_autocomplete **pac = NULL;
  315. char stasis, nac = 0, credible, needle[32];
  316. struct lws_fts_result_filepath *fp;
  317. struct lws_fts_result *result;
  318. unsigned char buf[4096];
  319. off_t o, child_ofs;
  320. struct wac s[128];
  321. ftsp->results_head = NULL;
  322. if (!ftsp->needle)
  323. return NULL;
  324. nl = (int)strlen(ftsp->needle);
  325. if ((size_t)nl > sizeof(needle) - 2)
  326. return NULL;
  327. result = lwsac_use(&ftsp->results_head, sizeof(*result), 0);
  328. if (!result)
  329. return NULL;
  330. /* start with no results... */
  331. result->autocomplete_head = NULL;
  332. pac = &result->autocomplete_head;
  333. result->filepath_head = NULL;
  334. result->duration_ms = 0;
  335. result->effective_flags = ftsp->flags;
  336. palm = 0;
  337. for (n = 0; n < nl; n++)
  338. needle[n] = tolower(ftsp->needle[n]);
  339. needle[nl] = '\0';
  340. o = jtf->root;
  341. do {
  342. bp = 0;
  343. base = 0;
  344. grab(o, sizeof(buf));
  345. child_ofs = o + bp;
  346. bp += rq32(&buf[bp], &fileofs_tif_start);
  347. bp += rq32(&buf[bp], &children);
  348. bp += rq32(&buf[bp], &instances);
  349. bp += rq32(&buf[bp], &agg_instances);
  350. palm = pos;
  351. /* the children follow here */
  352. if (pos == nl) {
  353. nac = 0;
  354. if (!fileofs_tif_start)
  355. /*
  356. * we matched, but there are no instances of
  357. * this, it's actually an intermediate
  358. */
  359. goto autocomp;
  360. /* we leave with bp positioned at the instance list */
  361. o = fileofs_tif_start;
  362. grab(o, sizeof(buf));
  363. break;
  364. }
  365. if (ra - bp < 1024) {
  366. /*
  367. * We don't have enough. So reload the buffer starting
  368. * at where we got to.
  369. */
  370. base += bp;
  371. grab(o + base, sizeof(buf));
  372. }
  373. /* gets set if any child COULD match needle if it went on */
  374. credible = 0;
  375. for (n = 0; (uint32_t)n < children; n++) {
  376. uint32_t inst;
  377. bp += rq32(&buf[bp], &co);
  378. bp += rq32(&buf[bp], &inst);
  379. bp += rq32(&buf[bp], &agg);
  380. bp += rq32(&buf[bp], &desc);
  381. bp += rq32(&buf[bp], &sl);
  382. if (sl > (uint32_t)(nl - pos)) {
  383. /*
  384. * it can't be a match because it's longer than
  385. * our needle string (but that leaves it as a
  386. * perfectly fine autocomplete candidate)
  387. */
  388. size_t g = nl - pos;
  389. /*
  390. * "credible" means at least one child matches
  391. * all the chars in needle up to as many as it
  392. * has. If not "credible" this path cannot
  393. * match.
  394. */
  395. if (!strncmp((char *)&buf[bp], &needle[pos], g))
  396. credible = 1;
  397. else
  398. /*
  399. * deflate the parent agg using the
  400. * knowledge this child is not on the
  401. * path shown by the remainder of needle
  402. */
  403. agg_instances -= agg;
  404. nac = 0;
  405. bp += sl;
  406. slt = 0;
  407. pos = palm;
  408. goto ensure;
  409. }
  410. /* the comparison string potentially has huge length */
  411. slt = sl;
  412. while (slt) {
  413. /*
  414. * the strategy is to compare whatever we have
  415. * lying around, then bring in more if it didn't
  416. * fail to match yet. That way we don't bring
  417. * in anything we could already have known was
  418. * not needed due to a match fail.
  419. */
  420. chunk = ra - bp;
  421. if (chunk > slt)
  422. chunk = slt;
  423. if ((chunk == 1 && needle[pos] != buf[bp]) ||
  424. (chunk != 1 &&
  425. memcmp(&needle[pos], &buf[bp], chunk))) {
  426. /*
  427. * it doesn't match... so nothing can
  428. * autocomplete this...
  429. */
  430. bp += slt;
  431. slt = 0;
  432. nac = 1;
  433. goto ensure;
  434. }
  435. slt -= chunk;
  436. pos += chunk;
  437. bp += chunk;
  438. /* so far, it matches */
  439. if (!slt) {
  440. /* we matched the whole thing */
  441. o = co;
  442. if (!co)
  443. goto bail;
  444. n = (int)children;
  445. credible = 1;
  446. }
  447. ensure:
  448. /*
  449. * do we have at least buf more to match, or the
  450. * remainder of the string, whichever is less?
  451. *
  452. * bp may exceed sizeof(buf) on no match path
  453. */
  454. chunk = sizeof(buf);
  455. if (slt < chunk)
  456. chunk = slt;
  457. if (ra - bp >= (int)chunk)
  458. continue;
  459. /*
  460. * We don't have enough. So reload buf starting
  461. * at where we got to.
  462. */
  463. base += bp;
  464. grab(o + base, sizeof(buf));
  465. } /* while we are still comparing */
  466. } /* for each child */
  467. if ((uint32_t)n == children) {
  468. if (!credible)
  469. goto bail;
  470. nac = 0;
  471. goto autocomp;
  472. }
  473. } while(1);
  474. result->duration_ms = (int)((lws_now_usecs() - tf) / 1000);
  475. if (!instances && !children)
  476. return result;
  477. /* the match list may easily exceed one read buffer load ... */
  478. o += bp;
  479. /*
  480. * Only do the file match list if it was requested in the search flags
  481. */
  482. if (!(ftsp->flags & LWSFTS_F_QUERY_FILES))
  483. goto autocomp;
  484. do {
  485. uint32_t fi, tot, line, ro, ofs_linetable, lines, fplen,
  486. *u, _o;
  487. struct lwsac *lt_head = NULL;
  488. struct linetable *ltst;
  489. char path[256], *pp;
  490. int footprint;
  491. off_t fo;
  492. ofd = -1;
  493. grab(o, sizeof(buf));
  494. ro = o;
  495. bp += rq32(&buf[bp], &_o);
  496. o = _o;
  497. assert(!o || o > TRIE_FILE_HDR_SIZE);
  498. bp += rq32(&buf[bp], &fi);
  499. bp += rq32(&buf[bp], &tot);
  500. if (lws_fts_filepath(jtf, fi, path, sizeof(path) - 1,
  501. &ofs_linetable, &lines)) {
  502. lwsl_err("can't get filepath index %d\n", fi);
  503. goto bail;
  504. }
  505. if (ftsp->only_filepath && strcmp(path, ftsp->only_filepath))
  506. continue;
  507. ltst = lws_fts_cache_chunktable(jtf, ofs_linetable, &lt_head);
  508. if (!ltst)
  509. goto bail;
  510. if (ftsp->flags & LWSFTS_F_QUERY_QUOTE_LINE) {
  511. ofd = open(path, O_RDONLY);
  512. if (ofd < 0) {
  513. lwsac_free(&lt_head);
  514. goto bail;
  515. }
  516. }
  517. fplen = (int)strlen(path);
  518. footprint = sizeof(*fp) + fplen + 1;
  519. if (ftsp->flags & LWSFTS_F_QUERY_FILE_LINES) {
  520. /* line number and offset in file */
  521. footprint += 2 * sizeof(uint32_t) * tot;
  522. if (ftsp->flags & LWSFTS_F_QUERY_QUOTE_LINE)
  523. /* pointer to quote string */
  524. footprint += sizeof(void *) * tot;
  525. }
  526. fp = lwsac_use(&ftsp->results_head, footprint, 0);
  527. if (!fp) {
  528. lwsac_free(&lt_head);
  529. goto bail;
  530. }
  531. fp->filepath_length = fplen;
  532. fp->lines_in_file = lines;
  533. fp->matches = tot;
  534. fp->matches_length = footprint - sizeof(*fp) - (fplen + 1);
  535. fp->next = result->filepath_head;
  536. result->filepath_head = fp;
  537. /* line table first so it can be aligned */
  538. u = (uint32_t*)(fp + 1);
  539. if (ftsp->flags & LWSFTS_F_QUERY_FILE_LINES) {
  540. /* for each line number */
  541. for (n = 0; (uint32_t)n < tot; n++) {
  542. unsigned char lbuf[256], *p;
  543. char ebuf[384];
  544. const char **v;
  545. int m;
  546. if ((ra - bp) < 8) {
  547. base += bp;
  548. grab(ro + base, sizeof(buf));
  549. }
  550. bp += rq32(&buf[bp], &line);
  551. *u++ = line;
  552. if (lws_fts_getfileoffset(jtf, ltst, line, &fo))
  553. continue;
  554. *u++ = (uint32_t)fo;
  555. if (!(ftsp->flags & LWSFTS_F_QUERY_QUOTE_LINE))
  556. continue;
  557. if (lseek(ofd, fo, SEEK_SET) < 0)
  558. continue;
  559. m = read(ofd, lbuf, sizeof(lbuf) - 1);
  560. if (m < 0)
  561. continue;
  562. lbuf[sizeof(lbuf) - 1] = '\0';
  563. p = (unsigned char *)strchr((char *)lbuf, '\n');
  564. if (p)
  565. m = lws_ptr_diff(p, lbuf);
  566. lbuf[m] = '\0';
  567. p = (unsigned char *)strchr((char *)lbuf, '\r');
  568. if (p)
  569. m = lws_ptr_diff(p, lbuf);
  570. lbuf[m] = '\0';
  571. lws_json_purify(ebuf, (const char *)lbuf,
  572. sizeof(ebuf) - 1, NULL);
  573. m = (int)strlen(ebuf);
  574. p = lwsac_use(&ftsp->results_head, m + 1, 0);
  575. if (!p) {
  576. lwsac_free(&lt_head);
  577. goto bail;
  578. }
  579. memcpy(p, ebuf, m);
  580. p[m] = '\0';
  581. v = (const char **)u;
  582. *v = (const char *)p;
  583. u += sizeof(const char *) / sizeof(uint32_t);
  584. }
  585. }
  586. pp = ((char *)&fp[1]) + fp->matches_length;
  587. memcpy(pp, path, fplen);
  588. pp[fplen] = '\0';
  589. if (ofd >= 0) {
  590. close(ofd);
  591. ofd = -1;
  592. }
  593. lwsac_free(&lt_head);
  594. if (ftsp->only_filepath)
  595. break;
  596. } while (o);
  597. /* sort the instance file list by results density */
  598. do {
  599. struct lws_fts_result_filepath **prf, *rf1, *rf2;
  600. stasis = 1;
  601. /* bubble sort keeps going until nothing changed */
  602. prf = &result->filepath_head;
  603. while (*prf) {
  604. rf1 = *prf;
  605. rf2 = rf1->next;
  606. if (rf2 && rf1->lines_in_file && rf2->lines_in_file &&
  607. ((rf1->matches * 1000) / rf1->lines_in_file) <
  608. ((rf2->matches * 1000) / rf2->lines_in_file)) {
  609. stasis = 0;
  610. *prf = rf2;
  611. rf1->next = rf2->next;
  612. rf2->next = rf1;
  613. }
  614. prf = &(*prf)->next;
  615. }
  616. } while (!stasis);
  617. autocomp:
  618. if (!(ftsp->flags & LWSFTS_F_QUERY_AUTOCOMPLETE) || nac)
  619. return result;
  620. /*
  621. * autocomplete (ie, the descendent paths that yield the most hits)
  622. *
  623. * We actually need to spider the earliest terminal descendents from
  624. * the child we definitely got past, and present the first n terminal
  625. * strings. The descendents are already sorted in order of highest
  626. * aggregated hits in their descendents first, so simply collecting n
  627. * earliest leaf children is enough.
  628. *
  629. * The leaf children may be quite deep down in a stack however. So we
  630. * have to go through all the walking motions collecting and retaining
  631. * child into for when we come back up the walk.
  632. *
  633. * We can completely ignore file instances for this, we just need the
  634. * earliest children. And we can restrict how many children we stash
  635. * in each stack level to eg, 5.
  636. *
  637. * child_ofs comes in pointing at the start of the trie entry that is
  638. * to be the starting point for making suggestions.
  639. */
  640. budget = ftsp->max_autocomplete;
  641. base = 0;
  642. bp = 0;
  643. pac = &result->autocomplete_head;
  644. sp = 0;
  645. if (pos > (int)sizeof(s[sp].ch[0].name) - 1)
  646. pos = (int)sizeof(s[sp].ch[0].name) - 1;
  647. memset(&s[sp], 0, sizeof(s[sp]));
  648. s[sp].child = 1;
  649. s[sp].tifs = fileofs_tif_start;
  650. s[sp].self = child_ofs;
  651. s[sp].ch[0].effpos = pos;
  652. if (pos == nl)
  653. n = ac_record(jtf, &ftsp->results_head, needle, pos, s, 0,
  654. instances, agg_instances, children, &pac);
  655. while (sp >= 0 && budget) {
  656. int nobump = 0;
  657. struct ch *tch = &s[sp].ch[s[sp].child - 1];
  658. grab(child_ofs, sizeof(buf));
  659. bp += rq32(&buf[bp], &fileofs_tif_start);
  660. bp += rq32(&buf[bp], &children);
  661. bp += rq32(&buf[bp], &instances);
  662. bp += rq32(&buf[bp], &agg_instances);
  663. if (sp > 0 && s[sp - 1].done_children &&
  664. tch->effpos + tch->name_length >= nl &&
  665. tch->inst && fileofs_tif_start) {
  666. n = ac_record(jtf, &ftsp->results_head, needle, pos, s,
  667. sp, tch->inst, tch->child_agg,
  668. tch->descendents, &pac);
  669. if (n < 0)
  670. goto bail;
  671. if (!n)
  672. if (--budget == 0)
  673. break;
  674. }
  675. if (!s[sp].done_children && children) {
  676. s[sp].done_children = 1;
  677. sp++;
  678. memset(&s[sp], 0, sizeof(s[sp]));
  679. s[sp].tifs = fileofs_tif_start;
  680. s[sp].self = child_ofs;
  681. for (n = 0; n < (int)children && s[sp].child_count <
  682. (int)LWS_ARRAY_SIZE(s[0].ch); n++) {
  683. uint32_t slen, cho, agg, inst;
  684. int i = s[sp].child_count;
  685. struct ch *ch = &s[sp].ch[i];
  686. size_t max;
  687. bp += rq32(&buf[bp], &cho);
  688. bp += rq32(&buf[bp], &inst);
  689. bp += rq32(&buf[bp], &agg);
  690. bp += rq32(&buf[bp], &desc);
  691. bp += rq32(&buf[bp], &slen);
  692. max = slen;
  693. if (max > sizeof(ch->name) - 1)
  694. max = sizeof(ch->name) - 1;
  695. strncpy(ch->name, (char *)&buf[bp], max);
  696. bp += slen;
  697. ch->name_length = (int)max;
  698. ch->name[sizeof(ch->name) - 1] = '\0';
  699. ch->inst = inst;
  700. ch->effpos =
  701. s[sp - 1].ch[s[sp - 1].child - 1].effpos;
  702. ch->child_agg = agg;
  703. ch->descendents = desc;
  704. /*
  705. * if we have more needle chars than we matched
  706. * to get this far, we can only allow potential
  707. * matches that are consistent with the
  708. * additional unmatched character(s)...
  709. */
  710. m = nl - ch->effpos;
  711. if (m > ch->name_length)
  712. m = ch->name_length;
  713. if (m > 0 &&
  714. strncmp(&needle[ch->effpos], ch->name, m))
  715. continue;
  716. ch->effpos += m;
  717. s[sp].ch[s[sp].child_count++].ofs = cho;
  718. }
  719. }
  720. while (sp >= 0 && s[sp].child >= s[sp].child_count) {
  721. s[sp].done_children = 0;
  722. sp--;
  723. }
  724. /*
  725. * Compare parent remaining agg vs parent's next siblings' still
  726. * intact original agg... if the next sibling has more, abandon
  727. * the parent path and go with the sibling... this keeps the
  728. * autocomplete results related to popularity.
  729. */
  730. nobump = 0;
  731. n = sp - 1;
  732. while (n >= 0) {
  733. struct lws_fts_result_autocomplete *ac =
  734. (struct lws_fts_result_autocomplete *)pac;
  735. if (s[n].child < s[n].child_count &&
  736. s[n].ch[s[n].child - 1].child_agg <
  737. s[n].ch[s[n].child].child_agg) {
  738. if (pac)
  739. /*
  740. * mark the autocomplete result that
  741. * there were more children down his
  742. * path that we skipped in these results
  743. */
  744. ac->elided = 1;
  745. for (m = n; m < sp + 1; m++)
  746. s[m].done_children = 0;
  747. sp = n;
  748. child_ofs = s[sp].ch[s[sp].child++].ofs;
  749. nobump = 1;
  750. }
  751. n--;
  752. }
  753. if (nobump || sp < 0)
  754. continue;
  755. child_ofs = s[sp].ch[s[sp].child++].ofs;
  756. }
  757. /* let's do a final sort into agg order */
  758. do {
  759. struct lws_fts_result_autocomplete *ac1, *ac2;
  760. stasis = 1;
  761. /* bubble sort keeps going until nothing changed */
  762. pac = &result->autocomplete_head;
  763. while (*pac) {
  764. ac1 = *pac;
  765. ac2 = ac1->next;
  766. if (ac2 && ac1->instances < ac2->instances) {
  767. stasis = 0;
  768. *pac = ac2;
  769. ac1->next = ac2->next;
  770. ac2->next = ac1;
  771. }
  772. pac = &(*pac)->next;
  773. }
  774. } while (!stasis);
  775. return result;
  776. bail:
  777. if (ofd >= 0)
  778. close(ofd);
  779. lwsl_info("%s: search ended up at bail\n", __func__);
  780. return result;
  781. }