Browse Source

fix archivebox init and archivebox install CLI commands

Nick Sweeting 1 year ago
parent
commit
a0edf218e8
2 changed files with 66 additions and 116 deletions
  1. 42 65
      archivebox/cli/archivebox_init.py
  2. 24 51
      archivebox/cli/archivebox_install.py

+ 42 - 65
archivebox/cli/archivebox_init.py

@@ -1,30 +1,36 @@
 #!/usr/bin/env python3
 
 __package__ = 'archivebox.cli'
-__command__ = 'archivebox init'
 
+import os
 import sys
-import argparse
 from pathlib import Path
-from typing import Optional, List, IO
 
+from rich import print
+import rich_click as click
 
-from archivebox.misc.util import docstring
-from archivebox.config import DATA_DIR
-from archivebox.misc.logging_util import SmartFormatter, reject_stdin
+from archivebox.misc.util import docstring, enforce_types
 
 
-def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Path=DATA_DIR) -> None:
+@enforce_types
+def init(force: bool=False, quick: bool=False, install: bool=False, setup: bool=False) -> None:
     """Initialize a new ArchiveBox collection in the current directory"""
     
-    from core.models import Snapshot
-    from rich import print
+    install = install or setup
+    
+    from archivebox.config import CONSTANTS, VERSION, DATA_DIR
+    from archivebox.config.common import SERVER_CONFIG
+    from archivebox.config.collection import write_config_file
+    from archivebox.index import load_main_index, write_main_index, fix_invalid_folder_locations, get_invalid_folders
+    from archivebox.index.schema import Link
+    from archivebox.index.json import parse_json_main_index, parse_json_links_details
+    from archivebox.index.sql import apply_migrations
     
     # if os.access(out_dir / CONSTANTS.JSON_INDEX_FILENAME, os.F_OK):
     #     print("[red]:warning: This folder contains a JSON index. It is deprecated, and will no longer be kept up to date automatically.[/red]", file=sys.stderr)
     #     print("[red]    You can run `archivebox list --json --with-headers > static_index.json` to manually generate it.[/red]", file=sys.stderr)
 
-    is_empty = not len(set(os.listdir(out_dir)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
+    is_empty = not len(set(os.listdir(DATA_DIR)) - CONSTANTS.ALLOWED_IN_DATA_DIR)
     existing_index = os.path.isfile(CONSTANTS.DATABASE_FILE)
     if is_empty and not existing_index:
         print(f'[turquoise4][+] Initializing a new ArchiveBox v{VERSION} collection...[/turquoise4]')
@@ -62,7 +68,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
     
     # create the .archivebox_id file with a unique ID for this collection
     from archivebox.config.paths import _get_collection_id
-    _get_collection_id(CONSTANTS.DATA_DIR, force_create=True)
+    _get_collection_id(DATA_DIR, force_create=True)
     
     # create the ArchiveBox.conf file
     write_config_file({'SECRET_KEY': SERVER_CONFIG.SECRET_KEY})
@@ -73,7 +79,10 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
     else:
         print('\n[green][+] Building main SQL index and running initial migrations...[/green]')
     
-    for migration_line in apply_migrations(out_dir):
+    from archivebox.config.django import setup_django
+    setup_django()
+    
+    for migration_line in apply_migrations(DATA_DIR):
         sys.stdout.write(f'    {migration_line}\n')
 
     assert os.path.isfile(CONSTANTS.DATABASE_FILE) and os.access(CONSTANTS.DATABASE_FILE, os.R_OK)
@@ -88,11 +97,13 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
     print()
     print('[dodger_blue3][*] Checking links from indexes and archive folders (safe to Ctrl+C)...[/dodger_blue3]')
 
+    from core.models import Snapshot
+
     all_links = Snapshot.objects.none()
-    pending_links: Dict[str, Link] = {}
+    pending_links: dict[str, Link] = {}
 
     if existing_index:
-        all_links = load_main_index(out_dir=out_dir, warn=False)
+        all_links = load_main_index(DATA_DIR, warn=False)
         print(f'    √ Loaded {all_links.count()} links from existing main index.')
 
     if quick:
@@ -100,7 +111,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
     else:
         try:
             # Links in data folders that dont match their timestamp
-            fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
+            fixed, cant_fix = fix_invalid_folder_locations(DATA_DIR)
             if fixed:
                 print(f'    [yellow]√ Fixed {len(fixed)} data directory locations that didn\'t match their link timestamps.[/yellow]')
             if cant_fix:
@@ -109,7 +120,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
             # Links in JSON index but not in main index
             orphaned_json_links = {
                 link.url: link
-                for link in parse_json_main_index(out_dir)
+                for link in parse_json_main_index(DATA_DIR)
                 if not all_links.filter(url=link.url).exists()
             }
             if orphaned_json_links:
@@ -119,7 +130,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
             # Links in data dir indexes but not in main index
             orphaned_data_dir_links = {
                 link.url: link
-                for link in parse_json_links_details(out_dir)
+                for link in parse_json_links_details(DATA_DIR)
                 if not all_links.filter(url=link.url).exists()
             }
             if orphaned_data_dir_links:
@@ -129,7 +140,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
             # Links in invalid/duplicate data dirs
             invalid_folders = {
                 folder: link
-                for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
+                for folder, link in get_invalid_folders(all_links, DATA_DIR).items()
             }
             if invalid_folders:
                 print(f'    [red]! Skipped adding {len(invalid_folders)} invalid link data directories.[/red]')
@@ -148,7 +159,7 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
             print('        archivebox init --quick', file=sys.stderr)
             raise SystemExit(1)
         
-        write_main_index(list(pending_links.values()), out_dir=out_dir)
+        write_main_index(list(pending_links.values()), DATA_DIR)
 
     print('\n[green]----------------------------------------------------------------------[/green]')
 
@@ -163,13 +174,6 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
     else:
         print(f'[green][√] Done. A new ArchiveBox collection was initialized ({len(all_links) + len(pending_links)} links).[/green]')
 
-    json_index = out_dir / CONSTANTS.JSON_INDEX_FILENAME
-    html_index = out_dir / CONSTANTS.HTML_INDEX_FILENAME
-    index_name = f"{date.today()}_index_old"
-    if os.access(json_index, os.F_OK):
-        json_index.rename(f"{index_name}.json")
-    if os.access(html_index, os.F_OK):
-        html_index.rename(f"{index_name}.html")
     
     CONSTANTS.PERSONAS_DIR.mkdir(parents=True, exist_ok=True)
     CONSTANTS.DEFAULT_TMP_DIR.mkdir(parents=True, exist_ok=True)
@@ -180,7 +184,8 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
     STORAGE_CONFIG.LIB_DIR.mkdir(parents=True, exist_ok=True)
     
     if install:
-        run_subcommand('install', pwd=out_dir)
+        from archivebox.cli.archivebox_install import install as install_method
+        install_method()
 
     if Snapshot.objects.count() < 25:     # hide the hints for experienced users
         print()
@@ -194,44 +199,16 @@ def init(force: bool=False, quick: bool=False, install: bool=False, out_dir: Pat
         print('        archivebox help')
 
 
+
[email protected]()
[email protected]('--force', '-f', is_flag=True, help='Ignore unrecognized files in current directory and initialize anyway')
[email protected]('--quick', '-q', is_flag=True, help='Run any updates or migrations without rechecking all snapshot dirs')
[email protected]('--install', '-s', is_flag=True, help='Automatically install dependencies and extras used for archiving')
[email protected]('--setup', '-s', is_flag=True, help='DEPRECATED: equivalent to --install')
 @docstring(init.__doc__)
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description=init.__doc__,
-        add_help=True,
-        formatter_class=SmartFormatter,
-    )
-    parser.add_argument(
-        '--force', # '-f',
-        action='store_true',
-        help='Ignore unrecognized files in current directory and initialize anyway',
-    )
-    parser.add_argument(
-        '--quick', '-q',
-        action='store_true',
-        help='Run any updates or migrations without rechecking all snapshot dirs',
-    )
-    parser.add_argument(
-        '--install', #'-s',
-        action='store_true',
-        help='Automatically install dependencies and extras used for archiving',
-    )
-    parser.add_argument(
-        '--setup', #'-s',
-        action='store_true',
-        help='DEPRECATED: equivalent to --install',
-    )
-    command = parser.parse_args(args or ())
-    reject_stdin(__command__, stdin)
-
-    init(
-        force=command.force,
-        quick=command.quick,
-        install=command.install or command.setup,
-        out_dir=pwd or DATA_DIR,
-    )
-    
+def main(**kwargs) -> None:
+    init(**kwargs)
+
 
 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+    main()

+ 24 - 51
archivebox/cli/archivebox_install.py

@@ -5,16 +5,16 @@ __command__ = 'archivebox install'
 
 import os
 import sys
-import argparse
-from pathlib import Path
-from typing import Optional, List, IO
+from typing import Optional, List
 
-from archivebox.misc.util import docstring
-from archivebox.config import DATA_DIR
-from archivebox.misc.logging_util import SmartFormatter, reject_stdin
+import rich_click as click
+from rich import print
 
+from archivebox.misc.util import docstring, enforce_types
 
-def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, binaries: Optional[List[str]]=None, dry_run: bool=False) -> None:
+
+@enforce_types
+def install(binproviders: Optional[List[str]]=None, binaries: Optional[List[str]]=None, dry_run: bool=False) -> None:
     """Automatically install all ArchiveBox dependencies and extras"""
     
     # if running as root:
@@ -27,13 +27,17 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
     #    - install all binaries as current user
     #    - recommend user re-run with sudo if any deps need to be installed as root
 
-    from rich import print
-    
-    from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
-    from archivebox.config.paths import get_or_create_working_lib_dir
+    import abx
+    import archivebox
+    from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
+    from archivebox.config.paths import DATA_DIR, ARCHIVE_DIR, get_or_create_working_lib_dir
+    from archivebox.misc.logging import stderr
+    from archivebox.cli.archivebox_init import init
+    from archivebox.misc.system import run as run_shell
+
 
     if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
-        run_subcommand('init', stdin=None, pwd=out_dir)  # must init full index because we need a db to store InstalledBinary entries in
+        init()  # must init full index because we need a db to store InstalledBinary entries in
 
     print('\n[green][+] Installing ArchiveBox dependencies automatically...[/green]')
     
@@ -143,49 +147,18 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
     if binaries:
         extra_args.append(f'--binaries={",".join(binaries)}')
     
-    proc = run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version', *extra_args], capture_output=False, cwd=out_dir)
+    proc = run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version', *extra_args], capture_output=False, cwd=DATA_DIR)
     raise SystemExit(proc.returncode)
 
 
-
[email protected]()
[email protected]('--binproviders', '-p', type=str, help='Select binproviders to use DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)', default=None)
[email protected]('--binaries', '-b', type=str, help='Select binaries to install DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)', default=None)
[email protected]('--dry-run', '-d', is_flag=True, help='Show what would be installed without actually installing anything', default=False)
 @docstring(install.__doc__)
-def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
-    
-    parser = argparse.ArgumentParser(
-        prog=__command__,
-        description=install.__doc__,
-        add_help=True,
-        formatter_class=SmartFormatter,
-    )
-    parser.add_argument(
-        '--binproviders', '-p',
-        type=str,
-        help='Select binproviders to use DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)',
-        default=None,
-    )
-    parser.add_argument(
-        '--binaries', '-b',
-        type=str,
-        help='Select binaries to install DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)',
-        default=None,
-    )
-    parser.add_argument(
-        '--dry-run', '-d',
-        action='store_true',
-        help='Show what would be installed without actually installing anything',
-        default=False,
-    )
-    command = parser.parse_args(args or ())   # noqa
-    reject_stdin(__command__, stdin)
-
-    install(
-        # force=command.force,
-        out_dir=Path(pwd) if pwd else DATA_DIR,
-        binaries=command.binaries.split(',') if command.binaries else None,
-        binproviders=command.binproviders.split(',') if command.binproviders else None,
-        dry_run=command.dry_run,
-    )
+def main(**kwargs) -> None:
+    install(**kwargs)
     
 
 if __name__ == '__main__':
-    main(args=sys.argv[1:], stdin=sys.stdin)
+    main()