| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091 |
- #!/usr/bin/env python3
- __package__ = 'archivebox.cli'
- __command__ = 'archivebox add'
- import sys
- import argparse
- from typing import List, Optional, IO
- from ..main import add, docstring
- from ..config import OUTPUT_DIR, ONLY_NEW
- from ..logging import SmartFormatter, accept_stdin, stderr
- @docstring(add.__doc__)
- def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
- parser = argparse.ArgumentParser(
- prog=__command__,
- description=add.__doc__,
- add_help=True,
- formatter_class=SmartFormatter,
- )
- parser.add_argument(
- '--update-all', #'-n',
- action='store_true',
- default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links
- help="Also retry previously skipped/failed links when adding new links",
- )
- parser.add_argument(
- '--index-only', #'-o',
- action='store_true',
- help="Add the links to the main index without archiving them",
- )
- parser.add_argument(
- 'urls',
- nargs='*',
- type=str,
- default=None,
- help=(
- 'URLs or paths to archive e.g.:\n'
- ' https://getpocket.com/users/USERNAME/feed/all\n'
- ' https://example.com/some/rss/feed.xml\n'
- ' https://example.com\n'
- ' ~/Downloads/firefox_bookmarks_export.html\n'
- ' ~/Desktop/sites_list.csv\n'
- )
- )
- parser.add_argument(
- "--depth",
- action="store",
- default=0,
- choices=[0, 1],
- type=int,
- help="Recursively archive all linked pages up to this many hops away"
- )
- command = parser.parse_args(args or ())
- urls = command.urls
- stdin_urls = accept_stdin(stdin)
- if (stdin_urls and urls) or (not stdin and not urls):
- stderr(
- '[X] You must pass URLs/paths to add via stdin or CLI arguments.\n',
- color='red',
- )
- raise SystemExit(2)
- add(
- urls=stdin_urls or urls,
- depth=command.depth,
- update_all=command.update_all,
- index_only=command.index_only,
- out_dir=pwd or OUTPUT_DIR,
- )
- if __name__ == '__main__':
- main(args=sys.argv[1:], stdin=sys.stdin)
- # TODO: Implement these
- #
- # parser.add_argument(
- # '--mirror', #'-m',
- # action='store_true',
- # help='Archive an entire site (finding all linked pages below it on the same domain)',
- # )
- # parser.add_argument(
- # '--crawler', #'-r',
- # choices=('depth_first', 'breadth_first'),
- # help='Controls which crawler to use in order to find outlinks in a given page',
- # default=None,
- # )
|