archivebox_add.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. #!/usr/bin/env python3
  2. __package__ = 'archivebox.cli'
  3. __command__ = 'archivebox add'
  4. import sys
  5. import argparse
  6. from typing import List, Optional, IO
  7. from ..main import add, docstring
  8. from ..config import OUTPUT_DIR, ONLY_NEW
  9. from ..logging import SmartFormatter, accept_stdin, stderr
  10. @docstring(add.__doc__)
  11. def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
  12. parser = argparse.ArgumentParser(
  13. prog=__command__,
  14. description=add.__doc__,
  15. add_help=True,
  16. formatter_class=SmartFormatter,
  17. )
  18. parser.add_argument(
  19. '--update-all', #'-n',
  20. action='store_true',
  21. default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links
  22. help="Also retry previously skipped/failed links when adding new links",
  23. )
  24. parser.add_argument(
  25. '--index-only', #'-o',
  26. action='store_true',
  27. help="Add the links to the main index without archiving them",
  28. )
  29. parser.add_argument(
  30. 'urls',
  31. nargs='*',
  32. type=str,
  33. default=None,
  34. help=(
  35. 'URLs or paths to archive e.g.:\n'
  36. ' https://getpocket.com/users/USERNAME/feed/all\n'
  37. ' https://example.com/some/rss/feed.xml\n'
  38. ' https://example.com\n'
  39. ' ~/Downloads/firefox_bookmarks_export.html\n'
  40. ' ~/Desktop/sites_list.csv\n'
  41. )
  42. )
  43. parser.add_argument(
  44. "--depth",
  45. action="store",
  46. default=0,
  47. choices=[0, 1],
  48. type=int,
  49. help="Recursively archive all linked pages up to this many hops away"
  50. )
  51. command = parser.parse_args(args or ())
  52. urls = command.urls
  53. stdin_urls = accept_stdin(stdin)
  54. if (stdin_urls and urls) or (not stdin and not urls):
  55. stderr(
  56. '[X] You must pass URLs/paths to add via stdin or CLI arguments.\n',
  57. color='red',
  58. )
  59. raise SystemExit(2)
  60. add(
  61. urls=stdin_urls or urls,
  62. depth=command.depth,
  63. update_all=command.update_all,
  64. index_only=command.index_only,
  65. out_dir=pwd or OUTPUT_DIR,
  66. )
  67. if __name__ == '__main__':
  68. main(args=sys.argv[1:], stdin=sys.stdin)
  69. # TODO: Implement these
  70. #
  71. # parser.add_argument(
  72. # '--mirror', #'-m',
  73. # action='store_true',
  74. # help='Archive an entire site (finding all linked pages below it on the same domain)',
  75. # )
  76. # parser.add_argument(
  77. # '--crawler', #'-r',
  78. # choices=('depth_first', 'breadth_first'),
  79. # help='Controls which crawler to use in order to find outlinks in a given page',
  80. # default=None,
  81. # )