tree.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. /*-
  26. * This is a new directory-walking system that addresses a number
  27. * of problems I've had with fts(3). In particular, it has no
  28. * pathname-length limits (other than the size of 'int'), handles
  29. * deep logical traversals, uses considerably less memory, and has
  30. * an opaque interface (easier to modify in the future).
  31. *
  32. * Internally, it keeps a single list of "tree_entry" items that
  33. * represent filesystem objects that require further attention.
  34. * Non-directories are not kept in memory: they are pulled from
  35. * readdir(), returned to the client, then freed as soon as possible.
  36. * Any directory entry to be traversed gets pushed onto the stack.
  37. *
  38. * There is surprisingly little information that needs to be kept for
  39. * each item on the stack. Just the name, depth (represented here as the
  40. * string length of the parent directory's pathname), and some markers
  41. * indicating how to get back to the parent (via chdir("..") for a
  42. * regular dir or via fchdir(2) for a symlink).
  43. */
  44. #include "tree_config.h"
  45. #ifdef HAVE_SYS_STAT_H
  46. #include <sys/stat.h>
  47. #endif
  48. #ifdef HAVE_DIRENT_H
  49. #include <dirent.h>
  50. #endif
  51. #ifdef HAVE_ERRNO_H
  52. #include <errno.h>
  53. #endif
  54. #ifdef HAVE_FCNTL_H
  55. #include <fcntl.h>
  56. #endif
  57. #ifdef HAVE_STDLIB_H
  58. #include <stdlib.h>
  59. #endif
  60. #ifdef HAVE_STRING_H
  61. #include <string.h>
  62. #endif
  63. #ifdef HAVE_UNISTD_H
  64. #include <unistd.h>
  65. #endif
  66. #include "tree.h"
  67. /*
  68. * TODO:
  69. * 1) Loop checking.
  70. * 3) Arbitrary logical traversals by closing/reopening intermediate fds.
  71. */
  72. struct tree_entry {
  73. struct tree_entry *next;
  74. struct tree_entry *parent;
  75. char *name;
  76. size_t dirname_length;
  77. dev_t dev;
  78. ino_t ino;
  79. int fd;
  80. int flags;
  81. };
  82. /* Definitions for tree_entry.flags bitmap. */
  83. #define isDir 1 /* This entry is a regular directory. */
  84. #define isDirLink 2 /* This entry is a symbolic link to a directory. */
  85. #define needsPreVisit 4 /* This entry needs to be previsited. */
  86. #define needsPostVisit 8 /* This entry needs to be postvisited. */
  87. /*
  88. * Local data for this package.
  89. */
  90. struct tree {
  91. struct tree_entry *stack;
  92. struct tree_entry *current;
  93. DIR *d;
  94. int initialDirFd;
  95. int flags;
  96. int visit_type;
  97. int tree_errno; /* Error code from last failed operation. */
  98. char *buff;
  99. const char *basename;
  100. size_t buff_length;
  101. size_t path_length;
  102. size_t dirname_length;
  103. int depth;
  104. int openCount;
  105. int maxOpenCount;
  106. struct stat lst;
  107. struct stat st;
  108. };
  109. /* Definitions for tree.flags bitmap. */
  110. #define needsReturn 8 /* Marks first entry as not having been returned yet. */
  111. #define hasStat 16 /* The st entry is set. */
  112. #define hasLstat 32 /* The lst entry is set. */
  113. #ifdef HAVE_DIRENT_D_NAMLEN
  114. /* BSD extension; avoids need for a strlen() call. */
  115. #define D_NAMELEN(dp) (dp)->d_namlen
  116. #else
  117. #define D_NAMELEN(dp) (strlen((dp)->d_name))
  118. #endif
  119. #if 0
  120. #include <stdio.h>
  121. void
  122. tree_dump(struct tree *t, FILE *out)
  123. {
  124. struct tree_entry *te;
  125. fprintf(out, "\tdepth: %d\n", t->depth);
  126. fprintf(out, "\tbuff: %s\n", t->buff);
  127. fprintf(out, "\tpwd: "); fflush(stdout); system("pwd");
  128. fprintf(out, "\taccess: %s\n", t->basename);
  129. fprintf(out, "\tstack:\n");
  130. for (te = t->stack; te != NULL; te = te->next) {
  131. fprintf(out, "\t\tte->name: %s%s%s\n", te->name,
  132. te->flags & needsPreVisit ? "" : " *",
  133. t->current == te ? " (current)" : "");
  134. }
  135. }
  136. #endif
  137. /*
  138. * Add a directory path to the current stack.
  139. */
  140. static void
  141. tree_push(struct tree *t, const char *path)
  142. {
  143. struct tree_entry *te;
  144. te = malloc(sizeof(*te));
  145. memset(te, 0, sizeof(*te));
  146. te->next = t->stack;
  147. t->stack = te;
  148. te->fd = -1;
  149. te->name = strdup(path);
  150. te->flags = needsPreVisit | needsPostVisit;
  151. te->dirname_length = t->dirname_length;
  152. }
  153. /*
  154. * Append a name to the current path.
  155. */
  156. static void
  157. tree_append(struct tree *t, const char *name, size_t name_length)
  158. {
  159. char *p;
  160. if (t->buff != NULL)
  161. t->buff[t->dirname_length] = '\0';
  162. /* Strip trailing '/' from name, unless entire name is "/". */
  163. while (name_length > 1 && name[name_length - 1] == '/')
  164. name_length--;
  165. /* Resize pathname buffer as needed. */
  166. while (name_length + 1 + t->dirname_length >= t->buff_length) {
  167. t->buff_length *= 2;
  168. if (t->buff_length < 1024)
  169. t->buff_length = 1024;
  170. t->buff = realloc(t->buff, t->buff_length);
  171. }
  172. p = t->buff + t->dirname_length;
  173. t->path_length = t->dirname_length + name_length;
  174. /* Add a separating '/' if it's needed. */
  175. if (t->dirname_length > 0 && p[-1] != '/') {
  176. *p++ = '/';
  177. t->path_length ++;
  178. }
  179. strncpy(p, name, name_length);
  180. p[name_length] = '\0';
  181. t->basename = p;
  182. }
  183. /*
  184. * Open a directory tree for traversal.
  185. */
  186. struct tree *
  187. tree_open(const char *path)
  188. {
  189. struct tree *t;
  190. t = malloc(sizeof(*t));
  191. memset(t, 0, sizeof(*t));
  192. tree_append(t, path, strlen(path));
  193. t->initialDirFd = open(".", O_RDONLY);
  194. /*
  195. * During most of the traversal, items are set up and then
  196. * returned immediately from tree_next(). That doesn't work
  197. * for the very first entry, so we set a flag for this special
  198. * case.
  199. */
  200. t->flags = needsReturn;
  201. return (t);
  202. }
  203. /*
  204. * We've finished a directory; ascend back to the parent.
  205. */
  206. static void
  207. tree_ascend(struct tree *t)
  208. {
  209. struct tree_entry *te;
  210. te = t->stack;
  211. t->depth--;
  212. if (te->flags & isDirLink) {
  213. fchdir(te->fd);
  214. close(te->fd);
  215. t->openCount--;
  216. } else {
  217. chdir("..");
  218. }
  219. }
  220. /*
  221. * Pop the working stack.
  222. */
  223. static void
  224. tree_pop(struct tree *t)
  225. {
  226. struct tree_entry *te;
  227. t->buff[t->dirname_length] = '\0';
  228. if (t->stack == t->current && t->current != NULL)
  229. t->current = t->current->parent;
  230. te = t->stack;
  231. t->stack = te->next;
  232. t->dirname_length = te->dirname_length;
  233. t->basename = t->buff + t->dirname_length;
  234. /* Special case: starting dir doesn't skip leading '/'. */
  235. if (t->dirname_length > 0)
  236. t->basename++;
  237. free(te->name);
  238. free(te);
  239. }
  240. /*
  241. * Get the next item in the tree traversal.
  242. */
  243. int
  244. tree_next(struct tree *t)
  245. {
  246. struct dirent *de = NULL;
  247. /* Handle the startup case by returning the initial entry. */
  248. if (t->flags & needsReturn) {
  249. t->flags &= ~needsReturn;
  250. return (t->visit_type = TREE_REGULAR);
  251. }
  252. while (t->stack != NULL) {
  253. /* If there's an open dir, get the next entry from there. */
  254. while (t->d != NULL) {
  255. de = readdir(t->d);
  256. if (de == NULL) {
  257. closedir(t->d);
  258. t->d = NULL;
  259. } else if (de->d_name[0] == '.'
  260. && de->d_name[1] == '\0') {
  261. /* Skip '.' */
  262. } else if (de->d_name[0] == '.'
  263. && de->d_name[1] == '.'
  264. && de->d_name[2] == '\0') {
  265. /* Skip '..' */
  266. } else {
  267. /*
  268. * Append the path to the current path
  269. * and return it.
  270. */
  271. tree_append(t, de->d_name, D_NAMELEN(de));
  272. t->flags &= ~hasLstat;
  273. t->flags &= ~hasStat;
  274. return (t->visit_type = TREE_REGULAR);
  275. }
  276. }
  277. /* If the current dir needs to be visited, set it up. */
  278. if (t->stack->flags & needsPreVisit) {
  279. t->current = t->stack;
  280. tree_append(t, t->stack->name, strlen(t->stack->name));
  281. t->stack->flags &= ~needsPreVisit;
  282. /* If it is a link, set up fd for the ascent. */
  283. if (t->stack->flags & isDirLink) {
  284. t->stack->fd = open(".", O_RDONLY);
  285. t->openCount++;
  286. if (t->openCount > t->maxOpenCount)
  287. t->maxOpenCount = t->openCount;
  288. }
  289. t->dirname_length = t->path_length;
  290. if (chdir(t->stack->name) != 0) {
  291. /* chdir() failed; return error */
  292. tree_pop(t);
  293. t->tree_errno = errno;
  294. return (t->visit_type = TREE_ERROR_DIR);
  295. }
  296. t->depth++;
  297. t->d = opendir(".");
  298. if (t->d == NULL) {
  299. tree_ascend(t); /* Undo "chdir" */
  300. tree_pop(t);
  301. t->tree_errno = errno;
  302. return (t->visit_type = TREE_ERROR_DIR);
  303. }
  304. t->flags &= ~hasLstat;
  305. t->flags &= ~hasStat;
  306. t->basename = ".";
  307. return (t->visit_type = TREE_POSTDESCENT);
  308. }
  309. /* We've done everything necessary for the top stack entry. */
  310. if (t->stack->flags & needsPostVisit) {
  311. tree_ascend(t);
  312. tree_pop(t);
  313. t->flags &= ~hasLstat;
  314. t->flags &= ~hasStat;
  315. return (t->visit_type = TREE_POSTASCENT);
  316. }
  317. }
  318. return (t->visit_type = 0);
  319. }
  320. /*
  321. * Return error code.
  322. */
  323. int
  324. tree_errno(struct tree *t)
  325. {
  326. return (t->tree_errno);
  327. }
  328. /*
  329. * Called by the client to mark the directory just returned from
  330. * tree_next() as needing to be visited.
  331. */
  332. void
  333. tree_descend(struct tree *t)
  334. {
  335. if (t->visit_type != TREE_REGULAR)
  336. return;
  337. if (tree_current_is_physical_dir(t)) {
  338. tree_push(t, t->basename);
  339. t->stack->flags |= isDir;
  340. } else if (tree_current_is_dir(t)) {
  341. tree_push(t, t->basename);
  342. t->stack->flags |= isDirLink;
  343. }
  344. }
  345. /*
  346. * Get the stat() data for the entry just returned from tree_next().
  347. */
  348. const struct stat *
  349. tree_current_stat(struct tree *t)
  350. {
  351. if (!(t->flags & hasStat)) {
  352. if (stat(t->basename, &t->st) != 0)
  353. return NULL;
  354. t->flags |= hasStat;
  355. }
  356. return (&t->st);
  357. }
  358. /*
  359. * Get the lstat() data for the entry just returned from tree_next().
  360. */
  361. const struct stat *
  362. tree_current_lstat(struct tree *t)
  363. {
  364. if (!(t->flags & hasLstat)) {
  365. if (lstat(t->basename, &t->lst) != 0)
  366. return NULL;
  367. t->flags |= hasLstat;
  368. }
  369. return (&t->lst);
  370. }
  371. /*
  372. * Test whether current entry is a dir or link to a dir.
  373. */
  374. int
  375. tree_current_is_dir(struct tree *t)
  376. {
  377. const struct stat *st;
  378. /*
  379. * If we already have lstat() info, then try some
  380. * cheap tests to determine if this is a dir.
  381. */
  382. if (t->flags & hasLstat) {
  383. /* If lstat() says it's a dir, it must be a dir. */
  384. if (S_ISDIR(tree_current_lstat(t)->st_mode))
  385. return 1;
  386. /* Not a dir; might be a link to a dir. */
  387. /* If it's not a link, then it's not a link to a dir. */
  388. if (!S_ISLNK(tree_current_lstat(t)->st_mode))
  389. return 0;
  390. /*
  391. * It's a link, but we don't know what it's a link to,
  392. * so we'll have to use stat().
  393. */
  394. }
  395. st = tree_current_stat(t);
  396. /* If we can't stat it, it's not a dir. */
  397. if (st == NULL)
  398. return 0;
  399. /* Use the definitive test. Hopefully this is cached. */
  400. return (S_ISDIR(st->st_mode));
  401. }
  402. /*
  403. * Test whether current entry is a physical directory. Usually, we
  404. * already have at least one of stat() or lstat() in memory, so we
  405. * use tricks to try to avoid an extra trip to the disk.
  406. */
  407. int
  408. tree_current_is_physical_dir(struct tree *t)
  409. {
  410. const struct stat *st;
  411. /*
  412. * If stat() says it isn't a dir, then it's not a dir.
  413. * If stat() data is cached, this check is free, so do it first.
  414. */
  415. if ((t->flags & hasStat)
  416. && (!S_ISDIR(tree_current_stat(t)->st_mode)))
  417. return 0;
  418. /*
  419. * Either stat() said it was a dir (in which case, we have
  420. * to determine whether it's really a link to a dir) or
  421. * stat() info wasn't available. So we use lstat(), which
  422. * hopefully is already cached.
  423. */
  424. st = tree_current_lstat(t);
  425. /* If we can't stat it, it's not a dir. */
  426. if (st == NULL)
  427. return 0;
  428. /* Use the definitive test. Hopefully this is cached. */
  429. return (S_ISDIR(st->st_mode));
  430. }
  431. /*
  432. * Test whether current entry is a symbolic link.
  433. */
  434. int
  435. tree_current_is_physical_link(struct tree *t)
  436. {
  437. const struct stat *st = tree_current_lstat(t);
  438. if (st == NULL)
  439. return 0;
  440. return (S_ISLNK(st->st_mode));
  441. }
  442. /*
  443. * Return the access path for the entry just returned from tree_next().
  444. */
  445. const char *
  446. tree_current_access_path(struct tree *t)
  447. {
  448. return (t->basename);
  449. }
  450. /*
  451. * Return the full path for the entry just returned from tree_next().
  452. */
  453. const char *
  454. tree_current_path(struct tree *t)
  455. {
  456. return (t->buff);
  457. }
  458. /*
  459. * Return the length of the path for the entry just returned from tree_next().
  460. */
  461. size_t
  462. tree_current_pathlen(struct tree *t)
  463. {
  464. return (t->path_length);
  465. }
  466. /*
  467. * Return the nesting depth of the entry just returned from tree_next().
  468. */
  469. int
  470. tree_current_depth(struct tree *t)
  471. {
  472. return (t->depth);
  473. }
  474. /*
  475. * Terminate the traversal and release any resources.
  476. */
  477. void
  478. tree_close(struct tree *t)
  479. {
  480. /* Release anything remaining in the stack. */
  481. while (t->stack != NULL)
  482. tree_pop(t);
  483. free(t->buff);
  484. /* chdir() back to where we started. */
  485. if (t->initialDirFd >= 0) {
  486. fchdir(t->initialDirFd);
  487. close(t->initialDirFd);
  488. t->initialDirFd = -1;
  489. }
  490. free(t);
  491. }