5 lat temu · fdd4effc92
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@@ -62,10 +62,17 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
 
				         help="Re-archive URLs from scratch, overwriting any existing files"
			
 
				     )
			
 
				     parser.add_argument(
			
 
				-        '--init', #'-i',
			
 
				+        "--init", #'-i',
			
 
				         action='store_true',
			
 
				         help="Init/upgrade the curent data directory before adding",
			
 
				     )
			
 
				+    parser.add_argument(
			
 
				+        "--extract",
			
 
				+        type=str,
			
 
				+        help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
			
 
				+              This does not take precedence over the configuration",
			
 
				+        default=""
			
 
				+    )
			
 
				     command = parser.parse_args(args or ())
			
 
				     urls = command.urls
			
 
				     stdin_urls = accept_stdin(stdin)
			
@@ -83,6 +90,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
 
				         overwrite=command.overwrite,
			
 
				         init=command.init,
			
 
				         out_dir=pwd or OUTPUT_DIR,
			
 
				+        extractors=command.extract,
			
 
				     )
			
 
				 
			
 
				 
			
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -525,11 +525,14 @@ def add(urls: Union[str, List[str]],
 
				         index_only: bool=False,
			
 
				         overwrite: bool=False,
			
 
				         init: bool=False,
			
 
				-        out_dir: Path=OUTPUT_DIR) -> List[Link]:
			
 
				+        out_dir: Path=OUTPUT_DIR,
			
 
				+        extractors: str="") -> List[Link]:
			
 
				     """Add a new URL or list of URLs to your archive"""
			
 
				 
			
 
				     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
			
 
				 
			
 
				+    extractors = extractors.split(",") if extractors else []
			
 
				+
			
 
				     if init:
			
 
				         run_subcommand('init', stdin=None, pwd=out_dir)
			
 
				 
			
@@ -567,12 +570,17 @@ def add(urls: Union[str, List[str]],
 
				         return all_links
			
 
				 
			
 
				     # Run the archive methods for each link
			
 
				+    archive_kwargs = {
			
 
				+        "out_dir": out_dir,
			
 
				+    }
			
 
				+    if extractors:
			
 
				+        archive_kwargs["methods"] = extractors
			
 
				     if update_all:
			
 
				-        archive_links(all_links, overwrite=overwrite, out_dir=out_dir)
			
 
				+        archive_links(all_links, overwrite=overwrite, **archive_kwargs)
			
 
				     elif overwrite:
			
 
				-        archive_links(imported_links, overwrite=True, out_dir=out_dir)
			
 
				+        archive_links(imported_links, overwrite=True, **archive_kwargs)
			
 
				     elif new_links:
			
 
				-        archive_links(new_links, overwrite=False, out_dir=out_dir)
			
 
				+        archive_links(new_links, overwrite=False, **archive_kwargs)
			
 
				     
			
 
				     return all_links
			
 
				 
			
--- a/tests/test_add.py
+++ b/tests/test_add.py
@@ -81,4 +81,13 @@ def test_add_updates_history_json_index(tmp_path, process, disable_extractors_di
 
				 
			
 
				     with open(archived_item_path / "index.json", "r") as f:
			
 
				         output_json = json.load(f)
			
 
				-    assert output_json["history"] != {}
			
 
				+    assert output_json["history"] != {}
			
 
				+
			
 
				+def test_extract_input_uses_only_passed_extractors(tmp_path, process):
			
 
				+    subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--extract", "wget"],
			
 
				+                    capture_output=True)
			
 
				+    
			
 
				+    archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
			
 
				+
			
 
				+    assert (archived_item_path / "warc").exists()
			
 
				+    assert not (archived_item_path / "singlefile.html").exists()