5 years ago · 04291c4d47
--- a/archivebox/index/__init__.py
+++ b/archivebox/index/__init__.py
@@ -383,7 +383,7 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
 
															 def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
														
 
															     """indexed links without checking archive status or data directory validity"""
														
 
															-    links = [snapshot.as_link() for snapshot in snapshots.iterator()]
														
 
															+    links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
														
 
															     return {
														
 
															         link.link_dir: link
														
 
															         for link in links
														
@@ -391,7 +391,7 @@ def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
 
															 def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
														
 
															     """indexed links that are archived with a valid data directory"""
														
 
															-    links = [snapshot.as_link() for snapshot in snapshots.iterator()]
														
 
															+    links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
														
 
															     return {
														
 
															         link.link_dir: link
														
 
															         for link in filter(is_archived, links)
														
@@ -399,7 +399,7 @@ def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio
 
															 def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
														
 
															     """indexed links that are unarchived with no data directory or an empty data directory"""
														
 
															-    links = [snapshot.as_link() for snapshot in snapshots.iterator()]
														
 
															+    links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
														
 
															     return {
														
 
															         link.link_dir: link
														
 
															         for link in filter(is_unarchived, links)
														
@@ -424,7 +424,7 @@ def get_present_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
 
															 def get_valid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
														
 
															     """dirs with a valid index matched to the main index and archived content"""
														
 
															-    links = [snapshot.as_link() for snapshot in snapshots.iterator()]
														
 
															+    links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
														
 
															     return {
														
 
															         link.link_dir: link
														
 
															         for link in filter(is_valid, links)
														
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -46,6 +46,14 @@ def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
 
															                     yield line.split('"')[1]
														
 
															     return ()
														
 
															+@enforce_types
														
 
															+def generate_index_from_links(links: List[Link], with_headers: bool):
														
 
															+    if with_headers:
														
 
															+        output = main_index_template(links)
														
 
															+    else:
														
 
															+        output = main_index_template(links, template=MINIMAL_INDEX_TEMPLATE)
														
 
															+    return output
														
 
															+
														
 
															 @enforce_types
														
 
															 def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str:
														
 
															     """render the template for the entire main index"""
														
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -8,7 +8,7 @@ from pathlib import Path
 
															 from datetime import datetime
														
 
															 from typing import List, Optional, Iterator, Any, Union
														
 
															-from .schema import Link, ArchiveResult
														
 
															+from .schema import Link
														
 
															 from ..system import atomic_write
														
 
															 from ..util import enforce_types
														
 
															 from ..config import (
														
@@ -39,7 +39,20 @@ MAIN_INDEX_HEADER = {
 
															     },
														
 
															 }
														
 
															-### Main Links Index
														
 
															+@enforce_types
														
 
															+def generate_json_index_from_links(links: List[Link], with_headers: bool):
														
 
															+    if with_headers:
														
 
															+        output = {
														
 
															+            **MAIN_INDEX_HEADER,
														
 
															+            'num_links': len(links),
														
 
															+            'updated': datetime.now(),
														
 
															+            'last_run_cmd': sys.argv,
														
 
															+            'links': links,
														
 
															+        }
														
 
															+    else:
														
 
															+        output = links
														
 
															+    return to_json(output, indent=4, sort_keys=True)
														
 
															+
														
 
															 @enforce_types
														
 
															 def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
														
@@ -65,30 +78,6 @@ def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
 
															                             continue
														
 
															     return ()
														
 
															-@enforce_types
														
 
															-def write_json_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
														
 
															-    """write the json link index to a given path"""
														
 
															-
														
 
															-    assert isinstance(links, List), 'Links must be a list, not a generator.'
														
 
															-    assert not links or isinstance(links[0].history, dict)
														
 
															-    assert not links or isinstance(links[0].sources, list)
														
 
															-
														
 
															-    if links and links[0].history.get('title'):
														
 
															-        assert isinstance(links[0].history['title'][0], ArchiveResult)
														
 
															-
														
 
															-    if links and links[0].sources:
														
 
															-        assert isinstance(links[0].sources[0], str)
														
 
															-
														
 
															-    main_index_json = {
														
 
															-        **MAIN_INDEX_HEADER,
														
 
															-        'num_links': len(links),
														
 
															-        'updated': datetime.now(),
														
 
															-        'last_run_cmd': sys.argv,
														
 
															-        'links': links,
														
 
															-    }
														
 
															-    atomic_write(str(Path(out_dir) / JSON_INDEX_FILENAME), main_index_json)
														
 
															-
														
 
															-
														
 
															 ### Link Details Index
														
 
															 @enforce_types
														
--- a/archivebox/logging_util.py
+++ b/archivebox/logging_util.py
@@ -477,39 +477,7 @@ def printable_filesize(num_bytes: Union[int, float]) -> str:
 
															 @enforce_types
														
 
															 def printable_folders(folders: Dict[str, Optional["Link"]],
														
 
															-                      json: bool=False,
														
 
															-                      html: bool=False,
														
 
															-                      csv: Optional[str]=None,
														
 
															                       with_headers: bool=False) -> str:
														
 
															-    
														
 
															-    from .index.json import MAIN_INDEX_HEADER
														
 
															-
														
 
															-    links = folders.values()
														
 
															-    if json: 
														
 
															-        from .index.json import to_json
														
 
															-        if with_headers:
														
 
															-            output = {
														
 
															-                **MAIN_INDEX_HEADER,
														
 
															-                'num_links': len(links),
														
 
															-                'updated': datetime.now(),
														
 
															-                'last_run_cmd': sys.argv,
														
 
															-                'links': links,
														
 
															-            }
														
 
															-        else:
														
 
															-            output = links
														
 
															-        return to_json(output, indent=4, sort_keys=True)
														
 
															-    elif html:
														
 
															-        from .index.html import main_index_template
														
 
															-        if with_headers:
														
 
															-            output = main_index_template(links)
														
 
															-        else:
														
 
															-            from .index.html import MINIMAL_INDEX_TEMPLATE
														
 
															-            output = main_index_template(links, template=MINIMAL_INDEX_TEMPLATE)
														
 
															-        return output
														
 
															-    elif csv:
														
 
															-        from .index.csv import links_to_csv
														
 
															-        return links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
														
 
															-    
														
 
															     return '\n'.join(
														
 
															         f'{folder} {link and link.url} "{link and link.title}"'
														
 
															         for folder, link in folders.items()
														
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -49,12 +49,17 @@ from .index import (
 
															 from .index.json import (
														
 
															     parse_json_main_index,
														
 
															     parse_json_links_details,
														
 
															+    generate_json_index_from_links,
														
 
															 )
														
 
															 from .index.sql import (
														
 
															     get_admins,
														
 
															     apply_migrations,
														
 
															     remove_from_sql_main_index,
														
 
															 )
														
 
															+from .index.html import (
														
 
															+    generate_index_from_links,
														
 
															+)
														
 
															+from .index.csv import links_to_csv
														
 
															 from .extractors import archive_links, archive_link, ignore_methods
														
 
															 from .config import (
														
 
															     stderr,
														
@@ -745,7 +750,6 @@ def list_all(filter_patterns_str: Optional[str]=None,
 
															     elif filter_patterns_str:
														
 
															         filter_patterns = filter_patterns_str.split('\n')
														
 
															-
														
 
															     snapshots = list_links(
														
 
															         filter_patterns=filter_patterns,
														
 
															         filter_type=filter_type,
														
@@ -761,8 +765,16 @@ def list_all(filter_patterns_str: Optional[str]=None,
 
															         status=status,
														
 
															         out_dir=out_dir,
														
 
															     )
														
 
															-    
														
 
															-    print(printable_folders(folders, json=json, csv=csv, html=html, with_headers=with_headers))
														
 
															+
														
 
															+    if json: 
														
 
															+        output = generate_json_index_from_links(folders.values(), with_headers)
														
 
															+    elif html:
														
 
															+        output = generate_index_from_links(folders.values(), with_headers)
														
 
															+    elif csv:
														
 
															+        output = links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
														
 
															+    else:
														
 
															+        output = printable_folders(folders, with_headers=with_headers)
														
 
															+    print(output)
														
 
															     return folders