archivebox_oneshot.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. #!/usr/bin/env python3
  2. __package__ = 'archivebox.cli'
  3. __command__ = 'archivebox oneshot'
  4. import sys
  5. import argparse
  6. from pathlib import Path
  7. from typing import List, Optional, IO
  8. from ..main import oneshot
  9. from ..util import docstring
  10. from ..config import OUTPUT_DIR
  11. from ..logging_util import SmartFormatter, accept_stdin, stderr
  12. @docstring(oneshot.__doc__)
  13. def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
  14. parser = argparse.ArgumentParser(
  15. prog=__command__,
  16. description=oneshot.__doc__,
  17. add_help=True,
  18. formatter_class=SmartFormatter,
  19. )
  20. parser.add_argument(
  21. 'url',
  22. type=str,
  23. default=None,
  24. help=(
  25. 'URLs or paths to archive e.g.:\n'
  26. ' https://getpocket.com/users/USERNAME/feed/all\n'
  27. ' https://example.com/some/rss/feed.xml\n'
  28. ' https://example.com\n'
  29. ' ~/Downloads/firefox_bookmarks_export.html\n'
  30. ' ~/Desktop/sites_list.csv\n'
  31. )
  32. )
  33. parser.add_argument(
  34. "--extract",
  35. type=str,
  36. help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
  37. This does not take precedence over the configuration",
  38. default=""
  39. )
  40. parser.add_argument(
  41. '--out-dir',
  42. type=str,
  43. default=OUTPUT_DIR,
  44. help= "Path to save the single archive folder to, e.g. ./example.com_archive"
  45. )
  46. command = parser.parse_args(args or ())
  47. stdin_url = None
  48. url = command.url
  49. if not url:
  50. stdin_url = accept_stdin(stdin)
  51. if (stdin_url and url) or (not stdin and not url):
  52. stderr(
  53. '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
  54. color='red',
  55. )
  56. raise SystemExit(2)
  57. oneshot(
  58. url=stdin_url or url,
  59. out_dir=Path(command.out_dir).resolve(),
  60. extractors=command.extract,
  61. )
  62. if __name__ == '__main__':
  63. main(args=sys.argv[1:], stdin=sys.stdin)