Sfoglia il codice sorgente

feat: Add --extract flag to update command

Cristian 5 anni fa
parent
commit
35389608d1
2 ha cambiato i file con 18 aggiunte e 1 eliminazioni
  1. 8 0
      archivebox/cli/archivebox_update.py
  2. 10 1
      archivebox/main.py

+ 8 - 0
archivebox/cli/archivebox_update.py

@@ -102,6 +102,13 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         default=None,
         help='Update only URLs matching these filter patterns.'
     )
+    parser.add_argument(
+        "--extract",
+        type=str,
+        help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
+              This does not take precedence over the configuration",
+        default=""
+    )
     command = parser.parse_args(args or ())
     filter_patterns_str = accept_stdin(stdin)
 
@@ -117,6 +124,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         after=command.after,
         before=command.before,
         out_dir=pwd or OUTPUT_DIR,
+        extractors=command.extract,
     )
     
 

+ 10 - 1
archivebox/main.py

@@ -680,6 +680,7 @@ def update(resume: Optional[float]=None,
            status: Optional[str]=None,
            after: Optional[str]=None,
            before: Optional[str]=None,
+           extractors: str="",
            out_dir: Path=OUTPUT_DIR) -> List[Link]:
     """Import any new links from subscriptions and retry any previously failed/skipped links"""
 
@@ -687,6 +688,8 @@ def update(resume: Optional[float]=None,
     check_dependencies()
     new_links: List[Link] = [] # TODO: Remove input argument: only_new
 
+    extractors = extractors.split(",") if extractors else []
+
     # Step 1: Filter for selected_links
     matching_snapshots = list_links(
         filter_patterns=filter_patterns,
@@ -717,7 +720,13 @@ def update(resume: Optional[float]=None,
             stderr(f'[√] Nothing found to resume after {resume}', color='green')
             return all_links
 
-    archive_links(to_archive, overwrite=overwrite, out_dir=out_dir)
+    archive_kwargs = {
+        "out_dir": out_dir,
+    }
+    if extractors:
+        archive_kwargs["methods"] = extractors
+
+    archive_links(to_archive, overwrite=overwrite, **archive_kwargs)
 
     # Step 4: Re-write links index with updated titles, icons, and resources
     all_links = load_main_index(out_dir=out_dir)