Browse Source

feat: depth=0 crawls the current page only

Cristian 5 years ago
parent
commit
2db0324539
2 changed files with 21 additions and 5 deletions
  1. 11 3
      archivebox/cli/archivebox_add.py
  2. 10 2
      tests/test_args.py

+ 11 - 3
archivebox/cli/archivebox_add.py

@@ -53,14 +53,22 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         help="Recursively archive all linked pages up to this many hops away"
     )
     command = parser.parse_args(args or ())
-    import_str = accept_stdin(stdin)
+    #import_str = accept_stdin(stdin)
     add(
-        import_str=import_str,
-        import_path=command.import_path,
+        import_str=command.import_path,
+        import_path=None,
         update_all=command.update_all,
         index_only=command.index_only,
         out_dir=pwd or OUTPUT_DIR,
     )
+    #if command.depth == 1:
+    #    add(
+    #        import_str=None,
+    #        import_path=command.import_path,
+    #        update_all=command.update_all,
+    #        index_only=command.index_only,
+    #        out_dir=pwd or OUTPUT_DIR,
+    #    )
 
 
 if __name__ == '__main__':

+ 10 - 2
tests/test_args.py

@@ -1,7 +1,15 @@
 import subprocess
+import json
 
 from .fixtures import *
 
-def test_depth_flag_is_accepted(tmp_path, process):
+def test_depth_flag_is_accepted(process):
     arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
-    assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode('utf-8')
+    assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode('utf-8')
+
+def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process):
+    arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
+    archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
+    with open(archived_item_path / "index.json", "r") as f:
+        output_json = json.load(f)
+    assert output_json["base_url"] == "example.com"