archivebox_oneshot.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. #!/usr/bin/env python3
  2. __package__ = 'archivebox.cli'
  3. __command__ = 'archivebox oneshot'
  4. import sys
  5. import argparse
  6. from pathlib import Path
  7. from typing import List, Optional, IO
  8. from archivebox.misc.util import docstring
  9. from archivebox.config import DATA_DIR
  10. from ..logging_util import SmartFormatter, accept_stdin, stderr
  11. from ..main import oneshot
  12. @docstring(oneshot.__doc__)
  13. def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
  14. parser = argparse.ArgumentParser(
  15. prog=__command__,
  16. description=oneshot.__doc__,
  17. add_help=True,
  18. formatter_class=SmartFormatter,
  19. )
  20. parser.add_argument(
  21. 'url',
  22. type=str,
  23. default=None,
  24. help=(
  25. 'URLs or paths to archive e.g.:\n'
  26. ' https://getpocket.com/users/USERNAME/feed/all\n'
  27. ' https://example.com/some/rss/feed.xml\n'
  28. ' https://example.com\n'
  29. ' ~/Downloads/firefox_bookmarks_export.html\n'
  30. ' ~/Desktop/sites_list.csv\n'
  31. )
  32. )
  33. parser.add_argument(
  34. "--extract",
  35. type=str,
  36. help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
  37. This does not take precedence over the configuration",
  38. default=""
  39. )
  40. parser.add_argument(
  41. '--out-dir',
  42. type=str,
  43. default=DATA_DIR,
  44. help= "Path to save the single archive folder to, e.g. ./example.com_archive"
  45. )
  46. command = parser.parse_args(args or ())
  47. stdin_url = None
  48. url = command.url
  49. if not url:
  50. stdin_url = accept_stdin(stdin)
  51. if (stdin_url and url) or (not stdin and not url):
  52. stderr(
  53. '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
  54. color='red',
  55. )
  56. raise SystemExit(2)
  57. oneshot(
  58. url=stdin_url or url,
  59. out_dir=Path(command.out_dir).resolve(),
  60. extractors=command.extract,
  61. )
  62. if __name__ == '__main__':
  63. main(args=sys.argv[1:], stdin=sys.stdin)