Răsfoiți Sursa

add extractors arg to oneshot command and bump version to v0.5.1

Nick Sweeting 5 ani în urmă
părinte
comite
9fa70b3452

+ 1 - 1
archivebox/cli/archivebox_add.py

@@ -89,8 +89,8 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         index_only=command.index_only,
         overwrite=command.overwrite,
         init=command.init,
-        out_dir=pwd or OUTPUT_DIR,
         extractors=command.extract,
+        out_dir=pwd or OUTPUT_DIR,
     )
 
 

+ 8 - 0
archivebox/cli/archivebox_oneshot.py

@@ -36,6 +36,13 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
             '    ~/Desktop/sites_list.csv\n'
         )
     )
+    parser.add_argument(
+        "--extract",
+        type=str,
+        help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
+              This does not take precedence over the configuration",
+        default=""
+    )
     parser.add_argument(
         '--out-dir',
         type=str,
@@ -55,6 +62,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     oneshot(
         url=stdin_url or url,
         out_dir=Path(command.out_dir).resolve(),
+        extractors=command.extract,
     )
 
 

+ 5 - 4
archivebox/main.py

@@ -511,7 +511,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
 
 
 @enforce_types
-def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
+def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
     """
     Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
     You can run this to archive single pages without needing to create a whole collection with archivebox init.
@@ -523,7 +523,8 @@ def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
                 color='red'
             )
         raise SystemExit(2)
-    methods = ignore_methods(['title'])
+
+    methods = extractors.split(",") if extractors else ignore_methods(['title'])
     archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
     return oneshot_link
 
@@ -534,8 +535,8 @@ def add(urls: Union[str, List[str]],
         index_only: bool=False,
         overwrite: bool=False,
         init: bool=False,
-        out_dir: Path=OUTPUT_DIR,
-        extractors: str="") -> List[Link]:
+        extractors: str="",
+        out_dir: Path=OUTPUT_DIR) -> List[Link]:
     """Add a new URL or list of URLs to your archive"""
 
     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'

+ 1 - 1
package.json

@@ -1,6 +1,6 @@
 {
   "name": "archivebox",
-  "version": "0.5.0",
+  "version": "0.5.1",
   "description": "ArchiveBox: The self-hosted internet archive",
   "author": "Nick Sweeting <[email protected]>",
   "license": "MIT",