archivebox_oneshot.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # #!/usr/bin/env python3
  2. ################## DEPRECATED IN FAVOR OF abx-dl #####################
  3. # https://github.com/ArchiveBox/abx-dl
  4. # __package__ = 'archivebox.cli'
  5. # __command__ = 'archivebox oneshot'
  6. # import sys
  7. # import argparse
  8. # from pathlib import Path
  9. # from typing import List, Optional, IO
  10. # from archivebox.misc.util import docstring
  11. # from archivebox.config import DATA_DIR
  12. # from archivebox.misc.logging_util import SmartFormatter, accept_stdin, stderr
  13. # @enforce_types
  14. # def oneshot(url: str, extractors: str="", out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> List[Link]:
  15. # """
  16. # Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
  17. # You can run this to archive single pages without needing to create a whole collection with archivebox init.
  18. # """
  19. # oneshot_link, _ = parse_links_memory([url])
  20. # if len(oneshot_link) > 1:
  21. # stderr(
  22. # '[X] You should pass a single url to the oneshot command',
  23. # color='red'
  24. # )
  25. # raise SystemExit(2)
  26. # methods = extractors.split(",") if extractors else ignore_methods(['title'])
  27. # archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, created_by_id=created_by_id)
  28. # return oneshot_link
  29. # @docstring(oneshot.__doc__)
  30. # def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
  31. # parser = argparse.ArgumentParser(
  32. # prog=__command__,
  33. # description=oneshot.__doc__,
  34. # add_help=True,
  35. # formatter_class=SmartFormatter,
  36. # )
  37. # parser.add_argument(
  38. # 'url',
  39. # type=str,
  40. # default=None,
  41. # help=(
  42. # 'URLs or paths to archive e.g.:\n'
  43. # ' https://getpocket.com/users/USERNAME/feed/all\n'
  44. # ' https://example.com/some/rss/feed.xml\n'
  45. # ' https://example.com\n'
  46. # ' ~/Downloads/firefox_bookmarks_export.html\n'
  47. # ' ~/Desktop/sites_list.csv\n'
  48. # )
  49. # )
  50. # parser.add_argument(
  51. # "--extract",
  52. # type=str,
  53. # help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
  54. # This does not take precedence over the configuration",
  55. # default=""
  56. # )
  57. # parser.add_argument(
  58. # '--out-dir',
  59. # type=str,
  60. # default=DATA_DIR,
  61. # help= "Path to save the single archive folder to, e.g. ./example.com_archive"
  62. # )
  63. # command = parser.parse_args(args or ())
  64. # stdin_url = None
  65. # url = command.url
  66. # if not url:
  67. # stdin_url = accept_stdin(stdin)
  68. # if (stdin_url and url) or (not stdin and not url):
  69. # stderr(
  70. # '[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
  71. # color='red',
  72. # )
  73. # raise SystemExit(2)
  74. # oneshot(
  75. # url=stdin_url or url,
  76. # out_dir=Path(command.out_dir).resolve(),
  77. # extractors=command.extract,
  78. # )
  79. # if __name__ == '__main__':
  80. # main(args=sys.argv[1:], stdin=sys.stdin)