Browse Source

feat: WARC link points to the first warc result in target path

Cristian 5 years ago
parent
commit
e594e6a75a
2 changed files with 10 additions and 4 deletions
  1. 5 3
      archivebox/core/utils.py
  2. 5 1
      archivebox/themes/default/base.html

+ 5 - 3
archivebox/core/utils.py

@@ -1,6 +1,7 @@
 from django.utils.html import format_html
 
 from core.models import Snapshot, EXTRACTORS
+from pathlib import Path
 
 
 def get_icons(snapshot: Snapshot) -> str:
@@ -34,9 +35,10 @@ def get_icons(snapshot: Snapshot) -> str:
                                                  exists, extractor, icons.get(extractor, "?"))
             if extractor == "wget":
                 # warc isn't technically it's own extractor, so we have to add it after wget
-
-                output += output_template.format(path, canon["warc_path"],
-                                                 exists, "warc", icons.get("warc", "?"))
+                exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
+                if exists:
+                    output += output_template.format(exists[0], "",
+                                                     True, "warc", icons.get("warc", "?"))
 
         except Exception as e:
             print(e)

+ 5 - 1
archivebox/themes/default/base.html

@@ -223,6 +223,10 @@
         .title-col a {
             color: black;
         }
+
+        .exists-False {
+          display: none;
+        }
     </style>
     <link rel="stylesheet" href="{% static 'bootstrap.min.css' %}">
     <link rel="stylesheet" href="{% static 'jquery.dataTables.min.css' %}" />
@@ -283,4 +287,4 @@
         </footer>
     </body>
     
-    </html>
+    </html>