|
|
@@ -1,38 +1,43 @@
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
__package__ = 'archivebox.cli'
|
|
|
-__command__ = 'archivebox schedule'
|
|
|
|
|
|
import sys
|
|
|
-import argparse
|
|
|
from pathlib import Path
|
|
|
-from typing import Optional, List, IO
|
|
|
|
|
|
-from archivebox.misc.util import docstring
|
|
|
-from archivebox.config import DATA_DIR
|
|
|
-from archivebox.misc.logging_util import SmartFormatter, reject_stdin
|
|
|
-from archivebox.config.common import ARCHIVING_CONFIG
|
|
|
+import rich_click as click
|
|
|
|
|
|
+from archivebox.misc.util import enforce_types, docstring
|
|
|
+from archivebox.config import DATA_DIR, CONSTANTS
|
|
|
+from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG
|
|
|
+from archivebox.misc.logging_util import stderr
|
|
|
+from archivebox.config.permissions import USER
|
|
|
|
|
|
-# @enforce_types
|
|
|
+
|
|
|
+CRON_COMMENT = 'ArchiveBox'
|
|
|
+
|
|
|
+
|
|
|
+@enforce_types
|
|
|
def schedule(add: bool=False,
|
|
|
- show: bool=False,
|
|
|
- clear: bool=False,
|
|
|
- foreground: bool=False,
|
|
|
- run_all: bool=False,
|
|
|
- quiet: bool=False,
|
|
|
- every: Optional[str]=None,
|
|
|
- tag: str='',
|
|
|
- depth: int=0,
|
|
|
- overwrite: bool=False,
|
|
|
- update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
|
|
|
- import_path: Optional[str]=None,
|
|
|
- out_dir: Path=DATA_DIR):
|
|
|
+ show: bool=False,
|
|
|
+ clear: bool=False,
|
|
|
+ foreground: bool=False,
|
|
|
+ run_all: bool=False,
|
|
|
+ quiet: bool=False,
|
|
|
+ every: str | None=None,
|
|
|
+ tag: str='',
|
|
|
+ depth: int | str=0,
|
|
|
+ overwrite: bool=False,
|
|
|
+ update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
|
|
|
+ import_path: str | None=None,
|
|
|
+ out_dir: Path=DATA_DIR) -> None:
|
|
|
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
|
|
|
+
|
|
|
+ depth = int(depth)
|
|
|
|
|
|
- check_data_folder()
|
|
|
+ from crontab import CronTab, CronSlices
|
|
|
+ from archivebox.misc.system import dedupe_cron_jobs
|
|
|
from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
|
|
|
- from archivebox.config.permissions import USER
|
|
|
|
|
|
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
|
|
|
|
|
|
@@ -65,7 +70,6 @@ def schedule(add: bool=False,
|
|
|
'>>',
|
|
|
quoted(Path(CONSTANTS.LOGS_DIR) / 'schedule.log'),
|
|
|
'2>&1',
|
|
|
-
|
|
|
]
|
|
|
new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT)
|
|
|
|
|
|
@@ -83,10 +87,11 @@ def schedule(add: bool=False,
|
|
|
raise SystemExit(1)
|
|
|
|
|
|
cron = dedupe_cron_jobs(cron)
|
|
|
+ print(cron)
|
|
|
cron.write()
|
|
|
|
|
|
total_runs = sum(j.frequency_per_year() for j in cron)
|
|
|
- existing_jobs = list(cron.find_comment(CRON_COMMENT))
|
|
|
+ existing_jobs = list(cron.find_command('archivebox'))
|
|
|
|
|
|
print()
|
|
|
print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
|
|
|
@@ -108,10 +113,6 @@ def schedule(add: bool=False,
|
|
|
stderr(' archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml')
|
|
|
raise SystemExit(0)
|
|
|
|
|
|
- cron = CronTab(user=True)
|
|
|
- cron = dedupe_cron_jobs(cron)
|
|
|
- existing_jobs = list(cron.find_comment(CRON_COMMENT))
|
|
|
-
|
|
|
if foreground or run_all:
|
|
|
if not existing_jobs:
|
|
|
stderr('{red}[X] You must schedule some jobs first before running in foreground mode.{reset}'.format(**SHELL_CONFIG.ANSI))
|
|
|
@@ -141,108 +142,25 @@ def schedule(add: bool=False,
|
|
|
print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
|
|
|
raise SystemExit(1)
|
|
|
|
|
|
- # if CAN_UPGRADE:
|
|
|
- # hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
|
|
|
-
|
|
|
-
|
|
|
|
|
|
[email protected]()
|
|
|
[email protected]('--quiet', '-q', is_flag=True, help="Don't warn about storage space")
|
|
|
[email protected]('--add', is_flag=True, help='Add a new scheduled ArchiveBox update job to cron')
|
|
|
[email protected]('--every', type=str, help='Run ArchiveBox once every [timeperiod] (hour/day/month/year or cron format e.g. "0 0 * * *")')
|
|
|
[email protected]('--tag', '-t', default='', help='Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3')
|
|
|
[email protected]('--depth', type=click.Choice(['0', '1']), default='0', help='Depth to archive to [0] or 1')
|
|
|
[email protected]('--overwrite', is_flag=True, help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots')
|
|
|
[email protected]('--update', is_flag=True, help='Re-pull any URLs that have been previously added, as needed to fill missing ArchiveResults')
|
|
|
[email protected]('--clear', is_flag=True, help='Stop all ArchiveBox scheduled runs (remove cron jobs)')
|
|
|
[email protected]('--show', is_flag=True, help='Print a list of currently active ArchiveBox cron jobs')
|
|
|
[email protected]('--foreground', '-f', is_flag=True, help='Launch ArchiveBox scheduler as a long-running foreground task instead of using cron')
|
|
|
[email protected]('--run-all', is_flag=True, help='Run all the scheduled jobs once immediately, independent of their configured schedules')
|
|
|
[email protected]('import_path', required=False)
|
|
|
@docstring(schedule.__doc__)
|
|
|
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
|
|
|
- parser = argparse.ArgumentParser(
|
|
|
- prog=__command__,
|
|
|
- description=schedule.__doc__,
|
|
|
- add_help=True,
|
|
|
- formatter_class=SmartFormatter,
|
|
|
- )
|
|
|
- parser.add_argument(
|
|
|
- '--quiet', '-q',
|
|
|
- action='store_true',
|
|
|
- help=("Don't warn about storage space."),
|
|
|
- )
|
|
|
- group = parser.add_mutually_exclusive_group()
|
|
|
- group.add_argument(
|
|
|
- '--add', # '-a',
|
|
|
- action='store_true',
|
|
|
- help='Add a new scheduled ArchiveBox update job to cron',
|
|
|
- )
|
|
|
- parser.add_argument(
|
|
|
- '--every', # '-e',
|
|
|
- type=str,
|
|
|
- default=None,
|
|
|
- help='Run ArchiveBox once every [timeperiod] (hour/day/month/year or cron format e.g. "0 0 * * *")',
|
|
|
- )
|
|
|
- parser.add_argument(
|
|
|
- '--tag', '-t',
|
|
|
- type=str,
|
|
|
- default='',
|
|
|
- help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
|
|
|
- )
|
|
|
- parser.add_argument(
|
|
|
- '--depth', # '-d',
|
|
|
- type=int,
|
|
|
- choices=[0, 1],
|
|
|
- default=0,
|
|
|
- help='Depth to archive to [0] or 1, see "add" command help for more info',
|
|
|
- )
|
|
|
- parser.add_argument(
|
|
|
- '--overwrite',
|
|
|
- action='store_true',
|
|
|
- help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots',
|
|
|
- )
|
|
|
- parser.add_argument(
|
|
|
- '--update',
|
|
|
- action='store_true',
|
|
|
- help='Re-pull any URLs that have been previously added, as needed to fill missing ArchiveResults',
|
|
|
- )
|
|
|
- group.add_argument(
|
|
|
- '--clear', # '-c'
|
|
|
- action='store_true',
|
|
|
- help=("Stop all ArchiveBox scheduled runs (remove cron jobs)"),
|
|
|
- )
|
|
|
- group.add_argument(
|
|
|
- '--show', # '-s'
|
|
|
- action='store_true',
|
|
|
- help=("Print a list of currently active ArchiveBox cron jobs"),
|
|
|
- )
|
|
|
- group.add_argument(
|
|
|
- '--foreground', '-f',
|
|
|
- action='store_true',
|
|
|
- help=("Launch ArchiveBox scheduler as a long-running foreground task "
|
|
|
- "instead of using cron."),
|
|
|
- )
|
|
|
- group.add_argument(
|
|
|
- '--run-all', # '-a',
|
|
|
- action='store_true',
|
|
|
- help=("Run all the scheduled jobs once immediately, independent of "
|
|
|
- "their configured schedules, can be used together with --foreground"),
|
|
|
- )
|
|
|
- parser.add_argument(
|
|
|
- 'import_path',
|
|
|
- nargs='?',
|
|
|
- type=str,
|
|
|
- default=None,
|
|
|
- help=("Check this path and import any new links on every run "
|
|
|
- "(can be either local file or remote URL)"),
|
|
|
- )
|
|
|
- command = parser.parse_args(args or ())
|
|
|
- reject_stdin(__command__, stdin)
|
|
|
-
|
|
|
- schedule(
|
|
|
- add=command.add,
|
|
|
- show=command.show,
|
|
|
- clear=command.clear,
|
|
|
- foreground=command.foreground,
|
|
|
- run_all=command.run_all,
|
|
|
- quiet=command.quiet,
|
|
|
- every=command.every,
|
|
|
- tag=command.tag,
|
|
|
- depth=command.depth,
|
|
|
- overwrite=command.overwrite,
|
|
|
- update=command.update,
|
|
|
- import_path=command.import_path,
|
|
|
- out_dir=Path(pwd) if pwd else DATA_DIR,
|
|
|
- )
|
|
|
+def main(**kwargs):
|
|
|
+ """Set ArchiveBox to regularly import URLs at specific times using cron"""
|
|
|
+ schedule(**kwargs)
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
- main(args=sys.argv[1:], stdin=sys.stdin)
|
|
|
+ main()
|