Browse Source

feat: Add flag to list command to support index like output

Cristian 5 years ago
parent
commit
aab8f96520
5 changed files with 43 additions and 4 deletions
  1. 6 0
      archivebox/cli/archivebox_list.py
  2. 16 2
      archivebox/logging_util.py
  3. 3 2
      archivebox/main.py
  4. 0 0
      tests/test_add.py
  5. 18 0
      tests/test_list.py

+ 6 - 0
archivebox/cli/archivebox_list.py

@@ -46,6 +46,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         action='store_true',
         action='store_true',
         help="Print the output in JSON format with all columns included.",
         help="Print the output in JSON format with all columns included.",
     )
     )
+    parser.add_argument(
+        '--index',
+        action='store_true',
+        help='Include the index additional structures'
+    )
     parser.add_argument(
     parser.add_argument(
         '--sort', #'-s',
         '--sort', #'-s',
         type=str,
         type=str,
@@ -112,6 +117,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         sort=command.sort,
         sort=command.sort,
         csv=command.csv,
         csv=command.csv,
         json=command.json,
         json=command.json,
+        index=command.index,
         out_dir=pwd or OUTPUT_DIR,
         out_dir=pwd or OUTPUT_DIR,
     )
     )
     raise SystemExit(not matching_folders)
     raise SystemExit(not matching_folders)

+ 16 - 2
archivebox/logging_util.py

@@ -14,6 +14,8 @@ from typing import Optional, List, Dict, Union, IO, TYPE_CHECKING
 if TYPE_CHECKING:
 if TYPE_CHECKING:
     from .index.schema import Link, ArchiveResult
     from .index.schema import Link, ArchiveResult
 
 
+from .index.json import MAIN_INDEX_HEADER
+
 from .util import enforce_types
 from .util import enforce_types
 from .config import (
 from .config import (
     ConfigDict,
     ConfigDict,
@@ -460,10 +462,22 @@ def printable_filesize(num_bytes: Union[int, float]) -> str:
 @enforce_types
 @enforce_types
 def printable_folders(folders: Dict[str, Optional["Link"]],
 def printable_folders(folders: Dict[str, Optional["Link"]],
                       json: bool=False,
                       json: bool=False,
-                      csv: Optional[str]=None) -> str:
+                      csv: Optional[str]=None,
+                      index: bool=False) -> str:
+    links = folders.values()
     if json: 
     if json: 
         from .index.json import to_json
         from .index.json import to_json
-        return to_json(folders.values(), indent=4, sort_keys=True)
+        if index:
+            output = {
+                **MAIN_INDEX_HEADER,
+                'num_links': len(links),
+                'updated': datetime.now(),
+                'last_run_cmd': sys.argv,
+                'links': links,
+            }
+        else:
+            output = links
+        return to_json(output, indent=4, sort_keys=True)
 
 
     elif csv:
     elif csv:
         from .index.csv import links_to_csv
         from .index.csv import links_to_csv

+ 3 - 2
archivebox/main.py

@@ -151,7 +151,7 @@ def help(out_dir: str=OUTPUT_DIR) -> None:
     )
     )
 
 
 
 
-    if os.path.exists(os.path.join(out_dir, JSON_INDEX_FILENAME)):
+    if os.path.exists(os.path.join(out_dir, SQL_INDEX_FILENAME)):
         print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset}
         print('''{green}ArchiveBox v{}: The self-hosted internet archive.{reset}
 
 
 {lightred}Active data directory:{reset}
 {lightred}Active data directory:{reset}
@@ -730,6 +730,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
              sort: Optional[str]=None,
              sort: Optional[str]=None,
              csv: Optional[str]=None,
              csv: Optional[str]=None,
              json: bool=False,
              json: bool=False,
+             index: bool=False,
              out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
              out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
     """List, filter, and export information about archive entries"""
     """List, filter, and export information about archive entries"""
     
     
@@ -762,7 +763,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
         out_dir=out_dir,
         out_dir=out_dir,
     )
     )
     
     
-    print(printable_folders(folders, json=json, csv=csv))
+    print(printable_folders(folders, json=json, csv=csv, index=index))
     return folders
     return folders
 
 
 
 

+ 0 - 0
tests/test_args.py → tests/test_add.py


+ 18 - 0
tests/test_list.py

@@ -0,0 +1,18 @@
+import json
+
+from .fixtures import *
+
+def test_list_json(process, disable_extractors_dict):
+    subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"],
+                                  capture_output=True, env=disable_extractors_dict)
+    list_process = subprocess.run(["archivebox", "list", "--json"], capture_output=True)
+    output_json = json.loads(list_process.stdout.decode("utf-8"))
+    assert output_json[0]["url"] == "http://127.0.0.1:8080/static/example.com.html"
+
+
+def test_list_json_index(process, disable_extractors_dict):
+    subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"],
+                                  capture_output=True, env=disable_extractors_dict)
+    list_process = subprocess.run(["archivebox", "list", "--json", "--index"], capture_output=True)
+    output_json = json.loads(list_process.stdout.decode("utf-8"))
+    assert output_json["links"][0]["url"] == "http://127.0.0.1:8080/static/example.com.html"