untar.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. /*
  2. * This file is in the public domain.
  3. * Use it as you wish.
  4. */
  5. /*
  6. * This is a compact tar extraction program using libarchive whose
  7. * primary goal is small executable size. Statically linked, it can
  8. * be very small, depending in large part on how cleanly factored your
  9. * system libraries are. Note that this uses the standard libarchive,
  10. * without any special recompilation. The only functional concession
  11. * is that this program uses the uid/gid from the archive instead of
  12. * doing uname/gname lookups. (Add a call to
  13. * archive_write_disk_set_standard_lookup() to enable uname/gname
  14. * lookups, but be aware that this can add 500k or more to a static
  15. * executable, depending on the system libraries, since user/group
  16. * lookups frequently pull in password, YP/LDAP, networking, and DNS
  17. * resolver libraries.)
  18. *
  19. * To build:
  20. * $ gcc -static -Wall -o untar untar.c -larchive
  21. * $ strip untar
  22. *
  23. * NOTE: On some systems, you may need to add additional flags
  24. * to ensure that untar.c is compiled the same way as libarchive
  25. * was compiled. In particular, Linux users will probably
  26. * have to add -D_FILE_OFFSET_BITS=64 to the command line above.
  27. *
  28. * For fun, statically compile the following simple hello.c program
  29. * using the same flags as for untar and compare the size:
  30. *
  31. * #include <stdio.h>
  32. * int main(int argc, char **argv) {
  33. * printf("hello, world\n");
  34. * return(0);
  35. * }
  36. *
  37. * You may be even more surprised by the compiled size of true.c listed here:
  38. *
  39. * int main(int argc, char **argv) {
  40. * return (0);
  41. * }
  42. *
  43. * On a slightly customized FreeBSD 5 system that I used around
  44. * 2005, hello above compiled to 89k compared to untar of 69k. So at
  45. * that time, libarchive's tar reader and extract-to-disk routines
  46. * compiled to less code than printf().
  47. *
  48. * On my FreeBSD development system today (August, 2009):
  49. * hello: 195024 bytes
  50. * true: 194912 bytes
  51. * untar: 259924 bytes
  52. */
  53. #include <sys/types.h>
  54. #include <sys/stat.h>
  55. #include <archive.h>
  56. #include <archive_entry.h>
  57. #include <fcntl.h>
  58. #include <stdio.h>
  59. #include <stdlib.h>
  60. #include <string.h>
  61. #include <unistd.h>
  62. static void errmsg(const char *);
  63. static void extract(const char *filename, int do_extract, int flags);
  64. static void fail(const char *, const char *, int);
  65. static int copy_data(struct archive *, struct archive *);
  66. static void msg(const char *);
  67. static void usage(void);
  68. static void warn(const char *, const char *);
  69. static int verbose = 0;
  70. int
  71. main(int argc, const char **argv)
  72. {
  73. const char *filename = NULL;
  74. int compress, flags, mode, opt;
  75. (void)argc;
  76. mode = 'x';
  77. verbose = 0;
  78. compress = '\0';
  79. flags = ARCHIVE_EXTRACT_TIME;
  80. /* Among other sins, getopt(3) pulls in printf(3). */
  81. while (*++argv != NULL && **argv == '-') {
  82. const char *p = *argv + 1;
  83. while ((opt = *p++) != '\0') {
  84. switch (opt) {
  85. case 'f':
  86. if (*p != '\0')
  87. filename = p;
  88. else
  89. filename = *++argv;
  90. p += strlen(p);
  91. break;
  92. case 'p':
  93. flags |= ARCHIVE_EXTRACT_PERM;
  94. flags |= ARCHIVE_EXTRACT_ACL;
  95. flags |= ARCHIVE_EXTRACT_FFLAGS;
  96. break;
  97. case 't':
  98. mode = opt;
  99. break;
  100. case 'v':
  101. verbose++;
  102. break;
  103. case 'x':
  104. mode = opt;
  105. break;
  106. default:
  107. usage();
  108. }
  109. }
  110. }
  111. switch (mode) {
  112. case 't':
  113. extract(filename, 0, flags);
  114. break;
  115. case 'x':
  116. extract(filename, 1, flags);
  117. break;
  118. }
  119. return (0);
  120. }
  121. static void
  122. extract(const char *filename, int do_extract, int flags)
  123. {
  124. struct archive *a;
  125. struct archive *ext;
  126. struct archive_entry *entry;
  127. int r;
  128. a = archive_read_new();
  129. ext = archive_write_disk_new();
  130. archive_write_disk_set_options(ext, flags);
  131. /*
  132. * Note: archive_write_disk_set_standard_lookup() is useful
  133. * here, but it requires library routines that can add 500k or
  134. * more to a static executable.
  135. */
  136. archive_read_support_format_tar(a);
  137. /*
  138. * On my system, enabling other archive formats adds 20k-30k
  139. * each. Enabling gzip decompression adds about 20k.
  140. * Enabling bzip2 is more expensive because the libbz2 library
  141. * isn't very well factored.
  142. */
  143. if (filename != NULL && strcmp(filename, "-") == 0)
  144. filename = NULL;
  145. if ((r = archive_read_open_filename(a, filename, 10240)))
  146. fail("archive_read_open_filename()",
  147. archive_error_string(a), r);
  148. for (;;) {
  149. r = archive_read_next_header(a, &entry);
  150. if (r == ARCHIVE_EOF)
  151. break;
  152. if (r != ARCHIVE_OK)
  153. fail("archive_read_next_header()",
  154. archive_error_string(a), 1);
  155. if (verbose && do_extract)
  156. msg("x ");
  157. if (verbose || !do_extract)
  158. msg(archive_entry_pathname(entry));
  159. if (do_extract) {
  160. r = archive_write_header(ext, entry);
  161. if (r != ARCHIVE_OK)
  162. warn("archive_write_header()",
  163. archive_error_string(ext));
  164. else {
  165. copy_data(a, ext);
  166. r = archive_write_finish_entry(ext);
  167. if (r != ARCHIVE_OK)
  168. fail("archive_write_finish_entry()",
  169. archive_error_string(ext), 1);
  170. }
  171. }
  172. if (verbose || !do_extract)
  173. msg("\n");
  174. }
  175. archive_read_close(a);
  176. archive_read_free(a);
  177. archive_write_close(ext);
  178. archive_write_free(ext);
  179. exit(0);
  180. }
  181. static int
  182. copy_data(struct archive *ar, struct archive *aw)
  183. {
  184. int r;
  185. const void *buff;
  186. size_t size;
  187. #if ARCHIVE_VERSION_NUMBER >= 3000000
  188. int64_t offset;
  189. #else
  190. off_t offset;
  191. #endif
  192. for (;;) {
  193. r = archive_read_data_block(ar, &buff, &size, &offset);
  194. if (r == ARCHIVE_EOF)
  195. return (ARCHIVE_OK);
  196. if (r != ARCHIVE_OK)
  197. return (r);
  198. r = archive_write_data_block(aw, buff, size, offset);
  199. if (r != ARCHIVE_OK) {
  200. warn("archive_write_data_block()",
  201. archive_error_string(aw));
  202. return (r);
  203. }
  204. }
  205. }
  206. /*
  207. * These reporting functions use low-level I/O; on some systems, this
  208. * is a significant code reduction. Of course, on many server and
  209. * desktop operating systems, malloc() and even crt rely on printf(),
  210. * which in turn pulls in most of the rest of stdio, so this is not an
  211. * optimization at all there. (If you're going to pay 100k or more
  212. * for printf() anyway, you may as well use it!)
  213. */
  214. static void
  215. msg(const char *m)
  216. {
  217. write(1, m, strlen(m));
  218. }
  219. static void
  220. errmsg(const char *m)
  221. {
  222. write(2, m, strlen(m));
  223. }
  224. static void
  225. warn(const char *f, const char *m)
  226. {
  227. errmsg(f);
  228. errmsg(" failed: ");
  229. errmsg(m);
  230. errmsg("\n");
  231. }
  232. static void
  233. fail(const char *f, const char *m, int r)
  234. {
  235. warn(f, m);
  236. exit(r);
  237. }
  238. static void
  239. usage(void)
  240. {
  241. const char *m = "Usage: untar [-tvx] [-f file] [file]\n";
  242. errmsg(m);
  243. exit(1);
  244. }