archivebox_schedule.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. #!/usr/bin/env python3
  2. __package__ = 'archivebox.cli'
  3. __command__ = 'archivebox schedule'
  4. import sys
  5. import argparse
  6. from pathlib import Path
  7. from typing import Optional, List, IO
  8. from archivebox.misc.util import docstring
  9. from archivebox.config import DATA_DIR
  10. from ..logging_util import SmartFormatter, reject_stdin
  11. from ..main import schedule
  12. @docstring(schedule.__doc__)
  13. def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
  14. parser = argparse.ArgumentParser(
  15. prog=__command__,
  16. description=schedule.__doc__,
  17. add_help=True,
  18. formatter_class=SmartFormatter,
  19. )
  20. parser.add_argument(
  21. '--quiet', '-q',
  22. action='store_true',
  23. help=("Don't warn about storage space."),
  24. )
  25. group = parser.add_mutually_exclusive_group()
  26. group.add_argument(
  27. '--add', # '-a',
  28. action='store_true',
  29. help='Add a new scheduled ArchiveBox update job to cron',
  30. )
  31. parser.add_argument(
  32. '--every', # '-e',
  33. type=str,
  34. default=None,
  35. help='Run ArchiveBox once every [timeperiod] (hour/day/month/year or cron format e.g. "0 0 * * *")',
  36. )
  37. parser.add_argument(
  38. '--tag', '-t',
  39. type=str,
  40. default='',
  41. help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
  42. )
  43. parser.add_argument(
  44. '--depth', # '-d',
  45. type=int,
  46. choices=[0, 1],
  47. default=0,
  48. help='Depth to archive to [0] or 1, see "add" command help for more info',
  49. )
  50. parser.add_argument(
  51. '--overwrite',
  52. action='store_true',
  53. help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots',
  54. )
  55. parser.add_argument(
  56. '--update',
  57. action='store_true',
  58. help='Re-pull any URLs that have been previously added, as needed to fill missing ArchiveResults',
  59. )
  60. group.add_argument(
  61. '--clear', # '-c'
  62. action='store_true',
  63. help=("Stop all ArchiveBox scheduled runs (remove cron jobs)"),
  64. )
  65. group.add_argument(
  66. '--show', # '-s'
  67. action='store_true',
  68. help=("Print a list of currently active ArchiveBox cron jobs"),
  69. )
  70. group.add_argument(
  71. '--foreground', '-f',
  72. action='store_true',
  73. help=("Launch ArchiveBox scheduler as a long-running foreground task "
  74. "instead of using cron."),
  75. )
  76. group.add_argument(
  77. '--run-all', # '-a',
  78. action='store_true',
  79. help=("Run all the scheduled jobs once immediately, independent of "
  80. "their configured schedules, can be used together with --foreground"),
  81. )
  82. parser.add_argument(
  83. 'import_path',
  84. nargs='?',
  85. type=str,
  86. default=None,
  87. help=("Check this path and import any new links on every run "
  88. "(can be either local file or remote URL)"),
  89. )
  90. command = parser.parse_args(args or ())
  91. reject_stdin(__command__, stdin)
  92. schedule(
  93. add=command.add,
  94. show=command.show,
  95. clear=command.clear,
  96. foreground=command.foreground,
  97. run_all=command.run_all,
  98. quiet=command.quiet,
  99. every=command.every,
  100. tag=command.tag,
  101. depth=command.depth,
  102. overwrite=command.overwrite,
  103. update=command.update,
  104. import_path=command.import_path,
  105. out_dir=Path(pwd) if pwd else DATA_DIR,
  106. )
  107. if __name__ == '__main__':
  108. main(args=sys.argv[1:], stdin=sys.stdin)