Browse Source

refactor: Remove `skip_index` from archive related functions

Cristian 5 years ago
parent
commit
275ad22db7
3 changed files with 16 additions and 16 deletions
  1. 5 3
      archivebox/cli/__init__.py
  2. 10 12
      archivebox/extractors/__init__.py
  3. 1 1
      archivebox/main.py

+ 5 - 3
archivebox/cli/__init__.py

@@ -19,7 +19,7 @@ meta_cmds = ('help', 'version')
 main_cmds = ('init', 'info', 'config')
 main_cmds = ('init', 'info', 'config')
 archive_cmds = ('add', 'remove', 'update', 'list', 'status')
 archive_cmds = ('add', 'remove', 'update', 'list', 'status')
 
 
-fake_db = ("oneshot",) + meta_cmds
+fake_db = ("oneshot",)
 
 
 display_first = (*meta_cmds, *main_cmds, *archive_cmds)
 display_first = (*meta_cmds, *main_cmds, *archive_cmds)
 
 
@@ -60,8 +60,10 @@ def run_subcommand(subcommand: str,
                    stdin: Optional[IO]=None,
                    stdin: Optional[IO]=None,
                    pwd: Union[Path, str, None]=None) -> None:
                    pwd: Union[Path, str, None]=None) -> None:
     """Run a given ArchiveBox subcommand with the given list of args"""
     """Run a given ArchiveBox subcommand with the given list of args"""
-    from ..config import setup_django
-    setup_django(in_memory_db=subcommand in fake_db)
+
+    if subcommand not in meta_cmds:
+        from ..config import setup_django
+        setup_django(in_memory_db=subcommand in fake_db)
 
 
     module = import_module('.archivebox_{}'.format(subcommand), __package__)
     module = import_module('.archivebox_{}'.format(subcommand), __package__)
     module.main(args=subcommand_args, stdin=stdin, pwd=pwd)    # type: ignore
     module.main(args=subcommand_args, stdin=stdin, pwd=pwd)    # type: ignore

+ 10 - 12
archivebox/extractors/__init__.py

@@ -67,16 +67,15 @@ def ignore_methods(to_ignore: List[str]):
     return list(methods)
     return list(methods)
 
 
 @enforce_types
 @enforce_types
-def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None, skip_index: bool=False) -> Link:
+def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[str]]=None, out_dir: Optional[Path]=None) -> Link:
     """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
     """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
 
 
     # TODO: Remove when the input is changed to be a snapshot. Suboptimal approach.
     # TODO: Remove when the input is changed to be a snapshot. Suboptimal approach.
-    if not skip_index:
-        from core.models import Snapshot, ArchiveResult
-        try:
-            snapshot = Snapshot.objects.get(url=link.url) # TODO: This will be unnecessary once everything is a snapshot
-        except Snapshot.DoesNotExist:
-            snapshot = write_link_to_sql_index(link)
+    from core.models import Snapshot, ArchiveResult
+    try:
+        snapshot = Snapshot.objects.get(url=link.url) # TODO: This will be unnecessary once everything is a snapshot
+    except Snapshot.DoesNotExist:
+        snapshot = write_link_to_sql_index(link)
 
 
     ARCHIVE_METHODS = get_default_archive_methods()
     ARCHIVE_METHODS = get_default_archive_methods()
     
     
@@ -93,7 +92,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
             os.makedirs(out_dir)
             os.makedirs(out_dir)
 
 
         link = load_link_details(link, out_dir=out_dir)
         link = load_link_details(link, out_dir=out_dir)
-        write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index)
+        write_link_details(link, out_dir=out_dir, skip_sql_index=False)
         log_link_archiving_started(link, out_dir, is_new)
         log_link_archiving_started(link, out_dir, is_new)
         link = link.overwrite(updated=datetime.now())
         link = link.overwrite(updated=datetime.now())
         stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
         stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
@@ -112,9 +111,8 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
 
 
                     stats[result.status] += 1
                     stats[result.status] += 1
                     log_archive_method_finished(result)
                     log_archive_method_finished(result)
-                    if not skip_index:
-                        write_search_index(link=link, texts=result.index_texts)
-                        ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
+                    write_search_index(link=link, texts=result.index_texts)
+                    ArchiveResult.objects.create(snapshot=snapshot, extractor=method_name, cmd=result.cmd, cmd_version=result.cmd_version,
                                                  output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
                                                  output=result.output, pwd=result.pwd, start_ts=result.start_ts, end_ts=result.end_ts, status=result.status)
 
 
                 else:
                 else:
@@ -135,7 +133,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
         except Exception:
         except Exception:
             pass
             pass
 
 
-        write_link_details(link, out_dir=out_dir, skip_sql_index=skip_index)
+        write_link_details(link, out_dir=out_dir, skip_sql_index=False)
 
 
         log_link_archiving_finished(link, link.link_dir, is_new, stats)
         log_link_archiving_finished(link, link.link_dir, is_new, stats)
 
 

+ 1 - 1
archivebox/main.py

@@ -524,7 +524,7 @@ def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
             )
             )
         raise SystemExit(2)
         raise SystemExit(2)
     methods = ignore_methods(['title'])
     methods = ignore_methods(['title'])
-    archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, skip_index=False)
+    archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
     return oneshot_link
     return oneshot_link
 
 
 @enforce_types
 @enforce_types