Browse Source

feat: Add html export to list command

Cristian 5 years ago
parent
commit
885ff50449

+ 6 - 0
archivebox/cli/archivebox_list.py

@@ -46,6 +46,11 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         action='store_true',
         action='store_true',
         help="Print the output in JSON format with all columns included.",
         help="Print the output in JSON format with all columns included.",
     )
     )
+    group.add_argument(
+        '--html',
+        action='store_true',
+        help="Print the output in HTML format"
+    )
     parser.add_argument(
     parser.add_argument(
         '--index',
         '--index',
         action='store_true',
         action='store_true',
@@ -117,6 +122,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
         sort=command.sort,
         sort=command.sort,
         csv=command.csv,
         csv=command.csv,
         json=command.json,
         json=command.json,
+        html=command.html,
         index=command.index,
         index=command.index,
         out_dir=pwd or OUTPUT_DIR,
         out_dir=pwd or OUTPUT_DIR,
     )
     )

+ 3 - 2
archivebox/index/html.py

@@ -31,6 +31,7 @@ from ..config import (
 
 
 join = lambda *paths: os.path.join(*paths)
 join = lambda *paths: os.path.join(*paths)
 MAIN_INDEX_TEMPLATE = join(TEMPLATES_DIR, 'main_index.html')
 MAIN_INDEX_TEMPLATE = join(TEMPLATES_DIR, 'main_index.html')
+MINIMAL_INDEX_TEMPLATE = join(TEMPLATES_DIR, 'main_index_minimal.html')
 MAIN_INDEX_ROW_TEMPLATE = join(TEMPLATES_DIR, 'main_index_row.html')
 MAIN_INDEX_ROW_TEMPLATE = join(TEMPLATES_DIR, 'main_index_row.html')
 LINK_DETAILS_TEMPLATE = join(TEMPLATES_DIR, 'link_details.html')
 LINK_DETAILS_TEMPLATE = join(TEMPLATES_DIR, 'link_details.html')
 TITLE_LOADING_MSG = 'Not yet archived...'
 TITLE_LOADING_MSG = 'Not yet archived...'
@@ -63,10 +64,10 @@ def write_html_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished:
 
 
 
 
 @enforce_types
 @enforce_types
-def main_index_template(links: List[Link], finished: bool=True) -> str:
+def main_index_template(links: List[Link], finished: bool=True, template: str=MAIN_INDEX_TEMPLATE) -> str:
     """render the template for the entire main index"""
     """render the template for the entire main index"""
 
 
-    return render_legacy_template(MAIN_INDEX_TEMPLATE, {
+    return render_legacy_template(template, {
         'version': VERSION,
         'version': VERSION,
         'git_sha': GIT_SHA,
         'git_sha': GIT_SHA,
         'num_links': str(len(links)),
         'num_links': str(len(links)),

+ 9 - 1
archivebox/logging_util.py

@@ -462,6 +462,7 @@ def printable_filesize(num_bytes: Union[int, float]) -> str:
 @enforce_types
 @enforce_types
 def printable_folders(folders: Dict[str, Optional["Link"]],
 def printable_folders(folders: Dict[str, Optional["Link"]],
                       json: bool=False,
                       json: bool=False,
+                      html: bool=False,
                       csv: Optional[str]=None,
                       csv: Optional[str]=None,
                       index: bool=False) -> str:
                       index: bool=False) -> str:
     links = folders.values()
     links = folders.values()
@@ -478,7 +479,14 @@ def printable_folders(folders: Dict[str, Optional["Link"]],
         else:
         else:
             output = links
             output = links
         return to_json(output, indent=4, sort_keys=True)
         return to_json(output, indent=4, sort_keys=True)
-
+    elif html:
+        from .index.html import main_index_template
+        if index:
+            output = main_index_template(links, True)
+        else:
+            from .index.html import MINIMAL_INDEX_TEMPLATE
+            output = main_index_template(links, True, MINIMAL_INDEX_TEMPLATE)
+        return output
     elif csv:
     elif csv:
         from .index.csv import links_to_csv
         from .index.csv import links_to_csv
         return links_to_csv(folders.values(), cols=csv.split(','), header=True)
         return links_to_csv(folders.values(), cols=csv.split(','), header=True)

+ 2 - 1
archivebox/main.py

@@ -730,6 +730,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
              sort: Optional[str]=None,
              sort: Optional[str]=None,
              csv: Optional[str]=None,
              csv: Optional[str]=None,
              json: bool=False,
              json: bool=False,
+             html: bool=False,
              index: bool=False,
              index: bool=False,
              out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
              out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
     """List, filter, and export information about archive entries"""
     """List, filter, and export information about archive entries"""
@@ -763,7 +764,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
         out_dir=out_dir,
         out_dir=out_dir,
     )
     )
     
     
-    print(printable_folders(folders, json=json, csv=csv, index=index))
+    print(printable_folders(folders, json=json, csv=csv, html=html, index=index))
     return folders
     return folders
 
 
 
 

+ 20 - 0
archivebox/themes/legacy/main_index_minimal.html

@@ -0,0 +1,20 @@
+<!DOCTYPE html>
+<html lang="en">
+    <head>
+        <title>Archived Sites</title>
+        <meta charset="utf-8" name="viewport" content="width=device-width, initial-scale=1">
+    </head>
+    <body data-status="$status">
+        <table id="table-bookmarks">
+            <thead>
+                <tr class="thead-tr">
+                    <th style="width: 100px;">Bookmarked</th>
+                    <th style="width: 26vw;">Saved Link ($num_links)</th>
+                    <th style="width: 50px">Files</th>
+                    <th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
+                </tr>
+            </thead>
+            <tbody>$rows</tbody>
+        </table>
+    </body>
+</html>

+ 16 - 0
tests/test_list.py

@@ -16,3 +16,19 @@ def test_list_json_index(process, disable_extractors_dict):
     list_process = subprocess.run(["archivebox", "list", "--json", "--index"], capture_output=True)
     list_process = subprocess.run(["archivebox", "list", "--json", "--index"], capture_output=True)
     output_json = json.loads(list_process.stdout.decode("utf-8"))
     output_json = json.loads(list_process.stdout.decode("utf-8"))
     assert output_json["links"][0]["url"] == "http://127.0.0.1:8080/static/example.com.html"
     assert output_json["links"][0]["url"] == "http://127.0.0.1:8080/static/example.com.html"
+
+def test_list_html(process, disable_extractors_dict):
+    subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"],
+                                  capture_output=True, env=disable_extractors_dict)
+    list_process = subprocess.run(["archivebox", "list", "--html"], capture_output=True)
+    output_html = list_process.stdout.decode("utf-8")
+    assert "<footer>" not in output_html
+    assert "http://127.0.0.1:8080/static/example.com.html" in output_html
+
+def test_list_html_index(process, disable_extractors_dict):
+    subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"],
+                                  capture_output=True, env=disable_extractors_dict)
+    list_process = subprocess.run(["archivebox", "list", "--html", "--index"], capture_output=True)
+    output_html = list_process.stdout.decode("utf-8")
+    assert "<footer>" in output_html
+    assert "http://127.0.0.1:8080/static/example.com.html" in output_html