Browse Source

add quick-init option to skip reimporting all snapshot dirs on init

Nick Sweeting 4 years ago
parent
commit
3c3bae02d2
4 changed files with 59 additions and 45 deletions
  1. 6 0
      archivebox/cli/archivebox_init.py
  2. 7 1
      archivebox/cli/archivebox_server.py
  3. 45 43
      archivebox/main.py
  4. 1 1
      docker-compose.yml

+ 6 - 0
archivebox/cli/archivebox_init.py

@@ -27,11 +27,17 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         action='store_true',
         help='Ignore unrecognized files in current directory and initialize anyway',
     )
+    parser.add_argument(
+        '--quick', '-q',
+        action='store_true',
+        help='Run any updates or migrations without rechecking all snapshot dirs',
+    )
     command = parser.parse_args(args or ())
     reject_stdin(__command__, stdin)
 
     init(
         force=command.force,
+        quick=command.quick,
         out_dir=pwd or OUTPUT_DIR,
     )
     

+ 7 - 1
archivebox/cli/archivebox_server.py

@@ -41,7 +41,12 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     parser.add_argument(
         '--init',
         action='store_true',
-        help='Run archivebox init before starting the server',
+        help='Run a full archivebox init/upgrade before starting the server',
+    )
+    parser.add_argument(
+        '--quick-init', '-i',
+        action='store_true',
+        help='Run quick archivebox init/upgrade before starting the server',
     )
     parser.add_argument(
         '--createsuperuser',
@@ -56,6 +61,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         reload=command.reload,
         debug=command.debug,
         init=command.init,
+        quick_init=command.quick_init,
         createsuperuser=command.createsuperuser,
         out_dir=pwd or OUTPUT_DIR,
     )

+ 45 - 43
archivebox/main.py

@@ -263,7 +263,7 @@ def run(subcommand: str,
 
 
 @enforce_types
-def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
+def init(force: bool=False, quick: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
     """Initialize a new ArchiveBox collection in the current directory"""
     
     from core.models import Snapshot
@@ -345,48 +345,49 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None:
         all_links = load_main_index(out_dir=out_dir, warn=False)
         print('    √ Loaded {} links from existing main index.'.format(all_links.count()))
 
-    # Links in data folders that dont match their timestamp
-    fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
-    if fixed:
-        print('    {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **ANSI))
-    if cant_fix:
-        print('    {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI))
-
-    # Links in JSON index but not in main index
-    orphaned_json_links = {
-        link.url: link
-        for link in parse_json_main_index(out_dir)
-        if not all_links.filter(url=link.url).exists()
-    }
-    if orphaned_json_links:
-        pending_links.update(orphaned_json_links)
-        print('    {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **ANSI))
-
-    # Links in data dir indexes but not in main index
-    orphaned_data_dir_links = {
-        link.url: link
-        for link in parse_json_links_details(out_dir)
-        if not all_links.filter(url=link.url).exists()
-    }
-    if orphaned_data_dir_links:
-        pending_links.update(orphaned_data_dir_links)
-        print('    {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **ANSI))
-
-    # Links in invalid/duplicate data dirs
-    invalid_folders = {
-        folder: link
-        for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
-    }
-    if invalid_folders:
-        print('    {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **ANSI))
-        print('        X ' + '\n        X '.join(f'{folder} {link}' for folder, link in invalid_folders.items()))
-        print()
-        print('    {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI))
-        print('        archivebox status')
-        print('        archivebox list --status=invalid')
+    if not quick:
+        # Links in data folders that dont match their timestamp
+        fixed, cant_fix = fix_invalid_folder_locations(out_dir=out_dir)
+        if fixed:
+            print('    {lightyellow}√ Fixed {} data directory locations that didn\'t match their link timestamps.{reset}'.format(len(fixed), **ANSI))
+        if cant_fix:
+            print('    {lightyellow}! Could not fix {} data directory locations due to conflicts with existing folders.{reset}'.format(len(cant_fix), **ANSI))
+
+        # Links in JSON index but not in main index
+        orphaned_json_links = {
+            link.url: link
+            for link in parse_json_main_index(out_dir)
+            if not all_links.filter(url=link.url).exists()
+        }
+        if orphaned_json_links:
+            pending_links.update(orphaned_json_links)
+            print('    {lightyellow}√ Added {} orphaned links from existing JSON index...{reset}'.format(len(orphaned_json_links), **ANSI))
+
+        # Links in data dir indexes but not in main index
+        orphaned_data_dir_links = {
+            link.url: link
+            for link in parse_json_links_details(out_dir)
+            if not all_links.filter(url=link.url).exists()
+        }
+        if orphaned_data_dir_links:
+            pending_links.update(orphaned_data_dir_links)
+            print('    {lightyellow}√ Added {} orphaned links from existing archive directories.{reset}'.format(len(orphaned_data_dir_links), **ANSI))
+
+        # Links in invalid/duplicate data dirs
+        invalid_folders = {
+            folder: link
+            for folder, link in get_invalid_folders(all_links, out_dir=out_dir).items()
+        }
+        if invalid_folders:
+            print('    {lightyellow}! Skipped adding {} invalid link data directories.{reset}'.format(len(invalid_folders), **ANSI))
+            print('        X ' + '\n        X '.join(f'{folder} {link}' for folder, link in invalid_folders.items()))
+            print()
+            print('    {lightred}Hint:{reset} For more information about the link data directories that were skipped, run:'.format(**ANSI))
+            print('        archivebox status')
+            print('        archivebox list --status=invalid')
 
 
-    write_main_index(list(pending_links.values()), out_dir=out_dir)
+        write_main_index(list(pending_links.values()), out_dir=out_dir)
 
     print('\n{green}------------------------------------------------------------------{reset}'.format(**ANSI))
     if existing_index:
@@ -1063,14 +1064,15 @@ def server(runserver_args: Optional[List[str]]=None,
            reload: bool=False,
            debug: bool=False,
            init: bool=False,
+           quick_init: bool=False,
            createsuperuser: bool=False,
            out_dir: Path=OUTPUT_DIR) -> None:
     """Run the ArchiveBox HTTP server"""
 
     runserver_args = runserver_args or []
     
-    if init:
-        run_subcommand('init', stdin=None, pwd=out_dir)
+    if init or quick_init:
+        run_subcommand('init', quick=quick_init, stdin=None, pwd=out_dir)
 
     if createsuperuser:
         run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)

+ 1 - 1
docker-compose.yml

@@ -13,7 +13,7 @@ services:
     archivebox:
         # build: .
         image: ${DOCKER_IMAGE:-archivebox/archivebox:latest} 
-        command: server 0.0.0.0:8000
+        command: server --quick-init 0.0.0.0:8000
         stdin_open: true
         tty: true
         ports: