Browse Source

working archivebox_schedule cmd

Nick Sweeting 1 year ago
parent
commit
292730ebad
1 changed files with 46 additions and 128 deletions
  1. 46 128
      archivebox/cli/archivebox_schedule.py

+ 46 - 128
archivebox/cli/archivebox_schedule.py

@@ -1,38 +1,43 @@
 #!/usr/bin/env python3
 #!/usr/bin/env python3
 
 
 __package__ = 'archivebox.cli'
 __package__ = 'archivebox.cli'
-__command__ = 'archivebox schedule'
 
 
 import sys
 import sys
-import argparse
 from pathlib import Path
 from pathlib import Path
-from typing import Optional, List, IO
 
 
-from archivebox.misc.util import docstring
-from archivebox.config import DATA_DIR
-from archivebox.misc.logging_util import SmartFormatter, reject_stdin
-from archivebox.config.common import ARCHIVING_CONFIG
+import rich_click as click
 
 
+from archivebox.misc.util import enforce_types, docstring
+from archivebox.config import DATA_DIR, CONSTANTS
+from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG
+from archivebox.misc.logging_util import stderr
+from archivebox.config.permissions import USER
 
 
-# @enforce_types
+
+CRON_COMMENT = 'ArchiveBox'
+
+
+@enforce_types
 def schedule(add: bool=False,
 def schedule(add: bool=False,
-             show: bool=False,
-             clear: bool=False,
-             foreground: bool=False,
-             run_all: bool=False,
-             quiet: bool=False,
-             every: Optional[str]=None,
-             tag: str='',
-             depth: int=0,
-             overwrite: bool=False,
-             update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
-             import_path: Optional[str]=None,
-             out_dir: Path=DATA_DIR):
+            show: bool=False,
+            clear: bool=False,
+            foreground: bool=False,
+            run_all: bool=False,
+            quiet: bool=False,
+            every: str | None=None,
+            tag: str='',
+            depth: int | str=0,
+            overwrite: bool=False,
+            update: bool=not ARCHIVING_CONFIG.ONLY_NEW,
+            import_path: str | None=None,
+            out_dir: Path=DATA_DIR) -> None:
     """Set ArchiveBox to regularly import URLs at specific times using cron"""
     """Set ArchiveBox to regularly import URLs at specific times using cron"""
+ 
+    depth = int(depth)
     
     
-    check_data_folder()
+    from crontab import CronTab, CronSlices
+    from archivebox.misc.system import dedupe_cron_jobs
     from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
     from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
-    from archivebox.config.permissions import USER
 
 
     Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
     Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
 
 
@@ -65,7 +70,6 @@ def schedule(add: bool=False,
             '>>',
             '>>',
             quoted(Path(CONSTANTS.LOGS_DIR) / 'schedule.log'),
             quoted(Path(CONSTANTS.LOGS_DIR) / 'schedule.log'),
             '2>&1',
             '2>&1',
-
         ]
         ]
         new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT)
         new_job = cron.new(command=' '.join(cmd), comment=CRON_COMMENT)
 
 
@@ -83,10 +87,11 @@ def schedule(add: bool=False,
             raise SystemExit(1)
             raise SystemExit(1)
 
 
         cron = dedupe_cron_jobs(cron)
         cron = dedupe_cron_jobs(cron)
+        print(cron)
         cron.write()
         cron.write()
 
 
         total_runs = sum(j.frequency_per_year() for j in cron)
         total_runs = sum(j.frequency_per_year() for j in cron)
-        existing_jobs = list(cron.find_comment(CRON_COMMENT))
+        existing_jobs = list(cron.find_command('archivebox'))
 
 
         print()
         print()
         print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
         print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
@@ -108,10 +113,6 @@ def schedule(add: bool=False,
             stderr('        archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml')
             stderr('        archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml')
         raise SystemExit(0)
         raise SystemExit(0)
 
 
-    cron = CronTab(user=True)
-    cron = dedupe_cron_jobs(cron)
-    existing_jobs = list(cron.find_comment(CRON_COMMENT))
-
     if foreground or run_all:
     if foreground or run_all:
         if not existing_jobs:
         if not existing_jobs:
             stderr('{red}[X] You must schedule some jobs first before running in foreground mode.{reset}'.format(**SHELL_CONFIG.ANSI))
             stderr('{red}[X] You must schedule some jobs first before running in foreground mode.{reset}'.format(**SHELL_CONFIG.ANSI))
@@ -141,108 +142,25 @@ def schedule(add: bool=False,
                 print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
                 print('\n{green}[√] Stopped.{reset}'.format(**SHELL_CONFIG.ANSI))
                 raise SystemExit(1)
                 raise SystemExit(1)
 
 
-    # if CAN_UPGRADE:
-    #     hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
-
-
 
 
[email protected]()
[email protected]('--quiet', '-q', is_flag=True, help="Don't warn about storage space")
[email protected]('--add', is_flag=True, help='Add a new scheduled ArchiveBox update job to cron')
[email protected]('--every', type=str, help='Run ArchiveBox once every [timeperiod] (hour/day/month/year or cron format e.g. "0 0 * * *")')
[email protected]('--tag', '-t', default='', help='Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3')
[email protected]('--depth', type=click.Choice(['0', '1']), default='0', help='Depth to archive to [0] or 1')
[email protected]('--overwrite', is_flag=True, help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots')
[email protected]('--update', is_flag=True, help='Re-pull any URLs that have been previously added, as needed to fill missing ArchiveResults')
[email protected]('--clear', is_flag=True, help='Stop all ArchiveBox scheduled runs (remove cron jobs)')
[email protected]('--show', is_flag=True, help='Print a list of currently active ArchiveBox cron jobs')
[email protected]('--foreground', '-f', is_flag=True, help='Launch ArchiveBox scheduler as a long-running foreground task instead of using cron')
[email protected]('--run-all', is_flag=True, help='Run all the scheduled jobs once immediately, independent of their configured schedules')
[email protected]('import_path', required=False)
 @docstring(schedule.__doc__)
 @docstring(schedule.__doc__)
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description=schedule.__doc__,
-        add_help=True,
-        formatter_class=SmartFormatter,
-    )
-    parser.add_argument(
-        '--quiet', '-q',
-        action='store_true',
-        help=("Don't warn about storage space."),
-    )
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument(
-        '--add', # '-a',
-        action='store_true',
-        help='Add a new scheduled ArchiveBox update job to cron',
-    )
-    parser.add_argument(
-        '--every', # '-e',
-        type=str,
-        default=None,
-        help='Run ArchiveBox once every [timeperiod] (hour/day/month/year or cron format e.g. "0 0 * * *")',
-    )
-    parser.add_argument(
-        '--tag', '-t',
-        type=str,
-        default='',
-        help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
-    )
-    parser.add_argument(
-        '--depth', # '-d',
-        type=int,
-        choices=[0, 1],
-        default=0,
-        help='Depth to archive to [0] or 1, see "add" command help for more info',
-    )
-    parser.add_argument(
-        '--overwrite',
-        action='store_true',
-        help='Re-archive any URLs that have been previously archived, overwriting existing Snapshots',
-    )
-    parser.add_argument(
-        '--update',
-        action='store_true',
-        help='Re-pull any URLs that have been previously added, as needed to fill missing ArchiveResults',
-    )
-    group.add_argument(
-        '--clear', # '-c'
-        action='store_true',
-        help=("Stop all ArchiveBox scheduled runs (remove cron jobs)"),
-    )
-    group.add_argument(
-        '--show', # '-s'
-        action='store_true',
-        help=("Print a list of currently active ArchiveBox cron jobs"),
-    )
-    group.add_argument(
-        '--foreground', '-f',
-        action='store_true',
-        help=("Launch ArchiveBox scheduler as a long-running foreground task "
-              "instead of using cron."),
-    )
-    group.add_argument(
-        '--run-all', # '-a',
-        action='store_true',
-        help=("Run all the scheduled jobs once immediately, independent of "
-              "their configured schedules, can be used together with --foreground"),
-    )
-    parser.add_argument(
-        'import_path',
-        nargs='?',
-        type=str,
-        default=None,
-        help=("Check this path and import any new links on every run "
-              "(can be either local file or remote URL)"),
-    )
-    command = parser.parse_args(args or ())
-    reject_stdin(__command__, stdin)
-
-    schedule(
-        add=command.add,
-        show=command.show,
-        clear=command.clear,
-        foreground=command.foreground,
-        run_all=command.run_all,
-        quiet=command.quiet,
-        every=command.every,
-        tag=command.tag,
-        depth=command.depth,
-        overwrite=command.overwrite,
-        update=command.update,
-        import_path=command.import_path,
-        out_dir=Path(pwd) if pwd else DATA_DIR,
-    )
+def main(**kwargs):
+    """Set ArchiveBox to regularly import URLs at specific times using cron"""
+    schedule(**kwargs)
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+    main()