tree.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. /*-
  2. * Copyright (c) 2003-2007 Tim Kientzle
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. /*-
  26. * This is a new directory-walking system that addresses a number
  27. * of problems I've had with fts(3). In particular, it has no
  28. * pathname-length limits (other than the size of 'int'), handles
  29. * deep logical traversals, uses considerably less memory, and has
  30. * an opaque interface (easier to modify in the future).
  31. *
  32. * Internally, it keeps a single list of "tree_entry" items that
  33. * represent filesystem objects that require further attention.
  34. * Non-directories are not kept in memory: they are pulled from
  35. * readdir(), returned to the client, then freed as soon as possible.
  36. * Any directory entry to be traversed gets pushed onto the stack.
  37. *
  38. * There is surprisingly little information that needs to be kept for
  39. * each item on the stack. Just the name, depth (represented here as the
  40. * string length of the parent directory's pathname), and some markers
  41. * indicating how to get back to the parent (via chdir("..") for a
  42. * regular dir or via fchdir(2) for a symlink).
  43. */
  44. #include "tree_config.h"
  45. __FBSDID("$FreeBSD$");
  46. #ifdef HAVE_SYS_STAT_H
  47. #include <sys/stat.h>
  48. #endif
  49. #ifdef HAVE_DIRENT_H
  50. #include <dirent.h>
  51. #endif
  52. #ifdef HAVE_ERRNO_H
  53. #include <errno.h>
  54. #endif
  55. #ifdef HAVE_FCNTL_H
  56. #include <fcntl.h>
  57. #endif
  58. #ifdef HAVE_STDLIB_H
  59. #include <stdlib.h>
  60. #endif
  61. #ifdef HAVE_STRING_H
  62. #include <string.h>
  63. #endif
  64. #ifdef HAVE_UNISTD_H
  65. #include <unistd.h>
  66. #endif
  67. #include "tree.h"
  68. /*
  69. * TODO:
  70. * 1) Loop checking.
  71. * 3) Arbitrary logical traversals by closing/reopening intermediate fds.
  72. */
  73. struct tree_entry {
  74. struct tree_entry *next;
  75. struct tree_entry *parent;
  76. char *name;
  77. size_t dirname_length;
  78. dev_t dev;
  79. ino_t ino;
  80. int fd;
  81. int flags;
  82. };
  83. /* Definitions for tree_entry.flags bitmap. */
  84. #define isDir 1 /* This entry is a regular directory. */
  85. #define isDirLink 2 /* This entry is a symbolic link to a directory. */
  86. #define needsPreVisit 4 /* This entry needs to be previsited. */
  87. #define needsPostVisit 8 /* This entry needs to be postvisited. */
  88. /*
  89. * Local data for this package.
  90. */
  91. struct tree {
  92. struct tree_entry *stack;
  93. struct tree_entry *current;
  94. DIR *d;
  95. int initialDirFd;
  96. int flags;
  97. int visit_type;
  98. int tree_errno; /* Error code from last failed operation. */
  99. char *buff;
  100. const char *basename;
  101. size_t buff_length;
  102. size_t path_length;
  103. size_t dirname_length;
  104. int depth;
  105. int openCount;
  106. int maxOpenCount;
  107. struct stat lst;
  108. struct stat st;
  109. };
  110. /* Definitions for tree.flags bitmap. */
  111. #define needsReturn 8 /* Marks first entry as not having been returned yet. */
  112. #define hasStat 16 /* The st entry is set. */
  113. #define hasLstat 32 /* The lst entry is set. */
  114. #ifdef HAVE_DIRENT_D_NAMLEN
  115. /* BSD extension; avoids need for a strlen() call. */
  116. #define D_NAMELEN(dp) (dp)->d_namlen
  117. #else
  118. #define D_NAMELEN(dp) (strlen((dp)->d_name))
  119. #endif
  120. #if 0
  121. #include <stdio.h>
  122. void
  123. tree_dump(struct tree *t, FILE *out)
  124. {
  125. struct tree_entry *te;
  126. fprintf(out, "\tdepth: %d\n", t->depth);
  127. fprintf(out, "\tbuff: %s\n", t->buff);
  128. fprintf(out, "\tpwd: "); fflush(stdout); system("pwd");
  129. fprintf(out, "\taccess: %s\n", t->basename);
  130. fprintf(out, "\tstack:\n");
  131. for (te = t->stack; te != NULL; te = te->next) {
  132. fprintf(out, "\t\tte->name: %s%s%s\n", te->name,
  133. te->flags & needsPreVisit ? "" : " *",
  134. t->current == te ? " (current)" : "");
  135. }
  136. }
  137. #endif
  138. /*
  139. * Add a directory path to the current stack.
  140. */
  141. static void
  142. tree_push(struct tree *t, const char *path)
  143. {
  144. struct tree_entry *te;
  145. te = malloc(sizeof(*te));
  146. memset(te, 0, sizeof(*te));
  147. te->next = t->stack;
  148. t->stack = te;
  149. te->fd = -1;
  150. te->name = strdup(path);
  151. te->flags = needsPreVisit | needsPostVisit;
  152. te->dirname_length = t->dirname_length;
  153. }
  154. /*
  155. * Append a name to the current path.
  156. */
  157. static void
  158. tree_append(struct tree *t, const char *name, size_t name_length)
  159. {
  160. char *p;
  161. if (t->buff != NULL)
  162. t->buff[t->dirname_length] = '\0';
  163. /* Strip trailing '/' from name, unless entire name is "/". */
  164. while (name_length > 1 && name[name_length - 1] == '/')
  165. name_length--;
  166. /* Resize pathname buffer as needed. */
  167. while (name_length + 1 + t->dirname_length >= t->buff_length) {
  168. t->buff_length *= 2;
  169. if (t->buff_length < 1024)
  170. t->buff_length = 1024;
  171. t->buff = realloc(t->buff, t->buff_length);
  172. }
  173. p = t->buff + t->dirname_length;
  174. t->path_length = t->dirname_length + name_length;
  175. /* Add a separating '/' if it's needed. */
  176. if (t->dirname_length > 0 && p[-1] != '/') {
  177. *p++ = '/';
  178. t->path_length ++;
  179. }
  180. strncpy(p, name, name_length);
  181. p[name_length] = '\0';
  182. t->basename = p;
  183. }
  184. /*
  185. * Open a directory tree for traversal.
  186. */
  187. struct tree *
  188. tree_open(const char *path)
  189. {
  190. struct tree *t;
  191. t = malloc(sizeof(*t));
  192. memset(t, 0, sizeof(*t));
  193. tree_append(t, path, strlen(path));
  194. t->initialDirFd = open(".", O_RDONLY);
  195. /*
  196. * During most of the traversal, items are set up and then
  197. * returned immediately from tree_next(). That doesn't work
  198. * for the very first entry, so we set a flag for this special
  199. * case.
  200. */
  201. t->flags = needsReturn;
  202. return (t);
  203. }
  204. /*
  205. * We've finished a directory; ascend back to the parent.
  206. */
  207. static void
  208. tree_ascend(struct tree *t)
  209. {
  210. struct tree_entry *te;
  211. te = t->stack;
  212. t->depth--;
  213. if (te->flags & isDirLink) {
  214. fchdir(te->fd);
  215. close(te->fd);
  216. t->openCount--;
  217. } else {
  218. chdir("..");
  219. }
  220. }
  221. /*
  222. * Pop the working stack.
  223. */
  224. static void
  225. tree_pop(struct tree *t)
  226. {
  227. struct tree_entry *te;
  228. t->buff[t->dirname_length] = '\0';
  229. if (t->stack == t->current && t->current != NULL)
  230. t->current = t->current->parent;
  231. te = t->stack;
  232. t->stack = te->next;
  233. t->dirname_length = te->dirname_length;
  234. t->basename = t->buff + t->dirname_length;
  235. /* Special case: starting dir doesn't skip leading '/'. */
  236. if (t->dirname_length > 0)
  237. t->basename++;
  238. free(te->name);
  239. free(te);
  240. }
  241. /*
  242. * Get the next item in the tree traversal.
  243. */
  244. int
  245. tree_next(struct tree *t)
  246. {
  247. struct dirent *de = NULL;
  248. /* Handle the startup case by returning the initial entry. */
  249. if (t->flags & needsReturn) {
  250. t->flags &= ~needsReturn;
  251. return (t->visit_type = TREE_REGULAR);
  252. }
  253. while (t->stack != NULL) {
  254. /* If there's an open dir, get the next entry from there. */
  255. while (t->d != NULL) {
  256. de = readdir(t->d);
  257. if (de == NULL) {
  258. closedir(t->d);
  259. t->d = NULL;
  260. } else if (de->d_name[0] == '.'
  261. && de->d_name[1] == '\0') {
  262. /* Skip '.' */
  263. } else if (de->d_name[0] == '.'
  264. && de->d_name[1] == '.'
  265. && de->d_name[2] == '\0') {
  266. /* Skip '..' */
  267. } else {
  268. /*
  269. * Append the path to the current path
  270. * and return it.
  271. */
  272. tree_append(t, de->d_name, D_NAMELEN(de));
  273. t->flags &= ~hasLstat;
  274. t->flags &= ~hasStat;
  275. return (t->visit_type = TREE_REGULAR);
  276. }
  277. }
  278. /* If the current dir needs to be visited, set it up. */
  279. if (t->stack->flags & needsPreVisit) {
  280. t->current = t->stack;
  281. tree_append(t, t->stack->name, strlen(t->stack->name));
  282. t->stack->flags &= ~needsPreVisit;
  283. /* If it is a link, set up fd for the ascent. */
  284. if (t->stack->flags & isDirLink) {
  285. t->stack->fd = open(".", O_RDONLY);
  286. t->openCount++;
  287. if (t->openCount > t->maxOpenCount)
  288. t->maxOpenCount = t->openCount;
  289. }
  290. t->dirname_length = t->path_length;
  291. if (chdir(t->stack->name) != 0) {
  292. /* chdir() failed; return error */
  293. tree_pop(t);
  294. t->tree_errno = errno;
  295. return (t->visit_type = TREE_ERROR_DIR);
  296. }
  297. t->depth++;
  298. t->d = opendir(".");
  299. if (t->d == NULL) {
  300. tree_ascend(t); /* Undo "chdir" */
  301. tree_pop(t);
  302. t->tree_errno = errno;
  303. return (t->visit_type = TREE_ERROR_DIR);
  304. }
  305. t->flags &= ~hasLstat;
  306. t->flags &= ~hasStat;
  307. t->basename = ".";
  308. return (t->visit_type = TREE_POSTDESCENT);
  309. }
  310. /* We've done everything necessary for the top stack entry. */
  311. if (t->stack->flags & needsPostVisit) {
  312. tree_ascend(t);
  313. tree_pop(t);
  314. t->flags &= ~hasLstat;
  315. t->flags &= ~hasStat;
  316. return (t->visit_type = TREE_POSTASCENT);
  317. }
  318. }
  319. return (t->visit_type = 0);
  320. }
  321. /*
  322. * Return error code.
  323. */
  324. int
  325. tree_errno(struct tree *t)
  326. {
  327. return (t->tree_errno);
  328. }
  329. /*
  330. * Called by the client to mark the directory just returned from
  331. * tree_next() as needing to be visited.
  332. */
  333. void
  334. tree_descend(struct tree *t)
  335. {
  336. if (t->visit_type != TREE_REGULAR)
  337. return;
  338. if (tree_current_is_physical_dir(t)) {
  339. tree_push(t, t->basename);
  340. t->stack->flags |= isDir;
  341. } else if (tree_current_is_dir(t)) {
  342. tree_push(t, t->basename);
  343. t->stack->flags |= isDirLink;
  344. }
  345. }
  346. /*
  347. * Get the stat() data for the entry just returned from tree_next().
  348. */
  349. const struct stat *
  350. tree_current_stat(struct tree *t)
  351. {
  352. if (!(t->flags & hasStat)) {
  353. if (stat(t->basename, &t->st) != 0)
  354. return NULL;
  355. t->flags |= hasStat;
  356. }
  357. return (&t->st);
  358. }
  359. /*
  360. * Get the lstat() data for the entry just returned from tree_next().
  361. */
  362. const struct stat *
  363. tree_current_lstat(struct tree *t)
  364. {
  365. if (!(t->flags & hasLstat)) {
  366. if (lstat(t->basename, &t->lst) != 0)
  367. return NULL;
  368. t->flags |= hasLstat;
  369. }
  370. return (&t->lst);
  371. }
  372. /*
  373. * Test whether current entry is a dir or link to a dir.
  374. */
  375. int
  376. tree_current_is_dir(struct tree *t)
  377. {
  378. const struct stat *st;
  379. /*
  380. * If we already have lstat() info, then try some
  381. * cheap tests to determine if this is a dir.
  382. */
  383. if (t->flags & hasLstat) {
  384. /* If lstat() says it's a dir, it must be a dir. */
  385. if (S_ISDIR(tree_current_lstat(t)->st_mode))
  386. return 1;
  387. /* Not a dir; might be a link to a dir. */
  388. /* If it's not a link, then it's not a link to a dir. */
  389. if (!S_ISLNK(tree_current_lstat(t)->st_mode))
  390. return 0;
  391. /*
  392. * It's a link, but we don't know what it's a link to,
  393. * so we'll have to use stat().
  394. */
  395. }
  396. st = tree_current_stat(t);
  397. /* If we can't stat it, it's not a dir. */
  398. if (st == NULL)
  399. return 0;
  400. /* Use the definitive test. Hopefully this is cached. */
  401. return (S_ISDIR(st->st_mode));
  402. }
  403. /*
  404. * Test whether current entry is a physical directory. Usually, we
  405. * already have at least one of stat() or lstat() in memory, so we
  406. * use tricks to try to avoid an extra trip to the disk.
  407. */
  408. int
  409. tree_current_is_physical_dir(struct tree *t)
  410. {
  411. const struct stat *st;
  412. /*
  413. * If stat() says it isn't a dir, then it's not a dir.
  414. * If stat() data is cached, this check is free, so do it first.
  415. */
  416. if ((t->flags & hasStat)
  417. && (!S_ISDIR(tree_current_stat(t)->st_mode)))
  418. return 0;
  419. /*
  420. * Either stat() said it was a dir (in which case, we have
  421. * to determine whether it's really a link to a dir) or
  422. * stat() info wasn't available. So we use lstat(), which
  423. * hopefully is already cached.
  424. */
  425. st = tree_current_lstat(t);
  426. /* If we can't stat it, it's not a dir. */
  427. if (st == NULL)
  428. return 0;
  429. /* Use the definitive test. Hopefully this is cached. */
  430. return (S_ISDIR(st->st_mode));
  431. }
  432. /*
  433. * Test whether current entry is a symbolic link.
  434. */
  435. int
  436. tree_current_is_physical_link(struct tree *t)
  437. {
  438. const struct stat *st = tree_current_lstat(t);
  439. if (st == NULL)
  440. return 0;
  441. return (S_ISLNK(st->st_mode));
  442. }
  443. /*
  444. * Return the access path for the entry just returned from tree_next().
  445. */
  446. const char *
  447. tree_current_access_path(struct tree *t)
  448. {
  449. return (t->basename);
  450. }
  451. /*
  452. * Return the full path for the entry just returned from tree_next().
  453. */
  454. const char *
  455. tree_current_path(struct tree *t)
  456. {
  457. return (t->buff);
  458. }
  459. /*
  460. * Return the length of the path for the entry just returned from tree_next().
  461. */
  462. size_t
  463. tree_current_pathlen(struct tree *t)
  464. {
  465. return (t->path_length);
  466. }
  467. /*
  468. * Return the nesting depth of the entry just returned from tree_next().
  469. */
  470. int
  471. tree_current_depth(struct tree *t)
  472. {
  473. return (t->depth);
  474. }
  475. /*
  476. * Terminate the traversal and release any resources.
  477. */
  478. void
  479. tree_close(struct tree *t)
  480. {
  481. /* Release anything remaining in the stack. */
  482. while (t->stack != NULL)
  483. tree_pop(t);
  484. free(t->buff);
  485. /* chdir() back to where we started. */
  486. if (t->initialDirFd >= 0) {
  487. fchdir(t->initialDirFd);
  488. close(t->initialDirFd);
  489. t->initialDirFd = -1;
  490. }
  491. free(t);
  492. }