Bläddra i källkod

better UX before titles have been fetched during archiving progress

Nick Sweeting 6 år sedan
förälder
incheckning
eb5cc8078a

+ 7 - 4
archivebox/archive_methods.py

@@ -7,7 +7,7 @@ from datetime import datetime
 from index import (
 from index import (
     parse_json_link_index,
     parse_json_link_index,
     write_link_index,
     write_link_index,
-    patch_index_title_hack,
+    update_main_index,
 )
 )
 from config import (
 from config import (
     CURL_BINARY,
     CURL_BINARY,
@@ -103,7 +103,9 @@ def archive_link(link_dir, link, overwrite=True):
         for archive_method in active_methods:
         for archive_method in active_methods:
             archive_method(link_dir, link, overwrite=overwrite)
             archive_method(link_dir, link, overwrite=overwrite)
 
 
+
         write_link_index(link_dir, link)
         write_link_index(link_dir, link)
+        update_main_index(link)
 
 
     except Exception as err:
     except Exception as err:
         print('    ! Failed to archive link: {}: {}'.format(err.__class__.__name__, err))
         print('    ! Failed to archive link: {}: {}'.format(err.__class__.__name__, err))
@@ -218,7 +220,7 @@ def fetch_wget(link_dir, link, requisites=FETCH_WGET_REQUISITES, warc=FETCH_WARC
     try:
     try:
         result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout)
         result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=link_dir, timeout=timeout)
         end()
         end()
-        output = wget_output_path(link, look_in=domain_dir)
+        output = wget_output_path(link)
 
 
         output_tail = ['          ' + line for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:] if line.strip()]
         output_tail = ['          ' + line for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:] if line.strip()]
 
 
@@ -391,11 +393,13 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT):
     output = 'archive.org.txt'
     output = 'archive.org.txt'
     archive_org_url = None
     archive_org_url = None
 
 
+
     path = os.path.join(link_dir, output)
     path = os.path.join(link_dir, output)
     if os.path.exists(path):
     if os.path.exists(path):
         archive_org_url = open(path, 'r').read().strip()
         archive_org_url = open(path, 'r').read().strip()
         return {'output': archive_org_url, 'status': 'skipped'}
         return {'output': archive_org_url, 'status': 'skipped'}
 
 
+
     submit_url = 'https://web.archive.org/save/{}'.format(link['url'])
     submit_url = 'https://web.archive.org/save/{}'.format(link['url'])
     CMD = [
     CMD = [
         CURL_BINARY,
         CURL_BINARY,
@@ -412,7 +416,6 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT):
         end()
         end()
 
 
         content_location, errors = parse_archive_dot_org_response(result.stdout)
         content_location, errors = parse_archive_dot_org_response(result.stdout)
-
         if content_location:
         if content_location:
             archive_org_url = 'https://web.archive.org{}'.format(content_location[0])
             archive_org_url = 'https://web.archive.org{}'.format(content_location[0])
         elif len(errors) == 1 and 'RobotAccessControlException' in errors[0]:
         elif len(errors) == 1 and 'RobotAccessControlException' in errors[0]:
@@ -427,6 +430,7 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT):
         output = e
         output = e
         print_error_hints(cmd=CMD, pwd=link_dir, err=e)
         print_error_hints(cmd=CMD, pwd=link_dir, err=e)
 
 
+
     if not isinstance(output, Exception):
     if not isinstance(output, Exception):
         # instead of writing None when archive.org rejects the url write the
         # instead of writing None when archive.org rejects the url write the
         # url to resubmit it to archive.org. This is so when the user visits
         # url to resubmit it to archive.org. This is so when the user visits
@@ -499,7 +503,6 @@ def fetch_title(link_dir, link, timeout=TIMEOUT):
     # TODO: figure out how to do this without gnarly string replacement
     # TODO: figure out how to do this without gnarly string replacement
     if title:
     if title:
         link['title'] = title
         link['title'] = title
-        patch_index_title_hack(link['url'], title)
 
 
     return {
     return {
         'cmd': 'fetch_page_title("{}")'.format(link['url']),
         'cmd': 'fetch_page_title("{}")'.format(link['url']),

+ 49 - 9
archivebox/index.py

@@ -22,8 +22,11 @@ from util import (
     pretty_path,
     pretty_path,
     check_link_structure,
     check_link_structure,
     check_links_structure,
     check_links_structure,
+    wget_output_path,
 )
 )
 
 
+TITLE_LOADING_MSG = 'Not yet archived...'
+
 
 
 ### Homepage index for all the links
 ### Homepage index for all the links
 
 
@@ -96,9 +99,20 @@ def write_html_links_index(out_dir, links, finished=False):
     with open(os.path.join(TEMPLATES_DIR, 'index_row.html'), 'r', encoding='utf-8') as f:
     with open(os.path.join(TEMPLATES_DIR, 'index_row.html'), 'r', encoding='utf-8') as f:
         link_row_html = f.read()
         link_row_html = f.read()
 
 
+    full_links_info = (derived_link_info(link) for link in links)
+
     link_rows = '\n'.join(
     link_rows = '\n'.join(
-        Template(link_row_html).substitute(**derived_link_info(link))
-        for link in links
+        Template(link_row_html).substitute(**{
+            **link,
+            'title': (
+                link['title']
+                or (link['base_url'] if link['is_archived'] else TITLE_LOADING_MSG)
+            ),
+            'archive_url': (
+                wget_output_path(link) or 'index.html'
+            ),
+        })
+        for link in full_links_info
     )
     )
 
 
     template_vars = {
     template_vars = {
@@ -118,24 +132,41 @@ def write_html_links_index(out_dir, links, finished=False):
     chmod_file(path)
     chmod_file(path)
 
 
 
 
-def patch_index_title_hack(link_url, new_title):
-    """hack to update just one link's title in the link index json"""
+def update_main_index(link):
+    """hack to in-place update one row's info in the generated index html"""
+
+    title = link['latest']['title']
+    successful = len([entry for entry in link['latest'].values() if entry])
 
 
+    # Patch JSON index
     json_path = os.path.join(OUTPUT_DIR, 'index.json')
     json_path = os.path.join(OUTPUT_DIR, 'index.json')
 
 
     links = parse_json_links_index(OUTPUT_DIR)
     links = parse_json_links_index(OUTPUT_DIR)
 
 
     changed = False
     changed = False
-    for link in links:
-        if link['url'] == link_url:
-            link['title'] = new_title
+    for json_link in links:
+        if json_link['url'] == link['url']:
+            json_link['title'] = title
+            json_link['latest'] = link['latest']
             changed = True
             changed = True
             break
             break
 
 
     if changed:
     if changed:
         write_json_links_index(OUTPUT_DIR, links)
         write_json_links_index(OUTPUT_DIR, links)
 
 
+    # Patch HTML index
+    html_path = os.path.join(OUTPUT_DIR, 'index.html')
 
 
+    html = open(html_path, 'r').read().split('\n')
+    for idx, line in enumerate(html):
+        if title and ('<span data-title-for="{}"'.format(link['url']) in line):
+            html[idx] = '<span>{}</span>'.format(title)
+        elif successful and ('<span data-number-for="{}"'.format(link['url']) in line):
+            html[idx] = '<span>{}</span>'.format(successful)
+            break
+
+    with open(html_path, 'w') as f:
+        f.write('\n'.join(html))
 
 
 ### Individual link index
 ### Individual link index
 
 
@@ -176,10 +207,19 @@ def write_html_link_index(out_dir, link):
 
 
     print('      √ index.html')
     print('      √ index.html')
 
 
+    link = derived_link_info(link)
+
     with open(path, 'w', encoding='utf-8') as f:
     with open(path, 'w', encoding='utf-8') as f:
         f.write(Template(link_html).substitute({
         f.write(Template(link_html).substitute({
-            **derived_link_info(link),
-            # **link['latest'],
+            **link,
+            'title': (
+                link['title']
+                or (link['base_url'] if link['is_archived'] else TITLE_LOADING_MSG)
+            ),
+            'archive_url': (
+                wget_output_path(link)
+                or (link['domain'] if link['is_archived'] else 'about:blank')
+            ),
         }))
         }))
 
 
     chmod_file(path)
     chmod_file(path)

+ 23 - 5
archivebox/templates/index.html

@@ -98,6 +98,28 @@
                 overflow-y: scroll;
                 overflow-y: scroll;
                 table-layout: fixed;
                 table-layout: fixed;
             }
             }
+            table tr a span[data-archived~=False] {
+                opacity: 0.2;
+            }
+            .files-spinner {
+                height: 15px;
+                width: auto;
+                opacity: 0.5;
+                vertical-align: -2px;
+            }
+            .link-favicon {
+                padding-right: 8px;
+                vertical-align: -4px;
+            }
+            .in-progress {
+                display: none;
+            }
+            body[data-status~=finished] .files-spinner {
+                display: none;
+            }
+            body[data-status~=running] .in-progress {
+                display: inline-block;
+            }
         </style>
         </style>
     </head>
     </head>
     <body data-status="$status">
     <body data-status="$status">
@@ -121,12 +143,8 @@
             <thead>
             <thead>
                 <tr>
                 <tr>
                     <th style="width: 80px;">Bookmarked</th>
                     <th style="width: 80px;">Bookmarked</th>
-                    <th style="width: 26px;">Files</th>
                     <th style="width: 26vw;">Saved Link ($num_links)</th>
                     <th style="width: 26vw;">Saved Link ($num_links)</th>
-                    <th style="width: 30px;">PNG</th>
-                    <th style="width: 30px">PDF</th>
-                    <th style="width: 30px">HTML</th>
-                    <th style="width: 30px">A.org</th>
+                    <th style="width: 50px">Saved Files</th>
                     <th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
                     <th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
                 </tr>
                 </tr>
             </thead>
             </thead>

+ 10 - 8
archivebox/templates/index_row.html

@@ -1,16 +1,18 @@
-<tr>
+<tr data-url="$url">
     <td title="Bookmarked timestamp: $timestamp">$bookmarked_date</td>
     <td title="Bookmarked timestamp: $timestamp">$bookmarked_date</td>
-    <td>
+    <td style="text-align: left">
         <a href="$link_dir/$index_url" title="Link Index">
         <a href="$link_dir/$index_url" title="Link Index">
             <img src="$link_dir/$favicon_url" onerror="this.src='static/spinner.gif'" class="link-favicon">
             <img src="$link_dir/$favicon_url" onerror="this.src='static/spinner.gif'" class="link-favicon">
         </a>
         </a>
+        <a href="$link_dir/$archive_url" style="font-size:1.4em;text-decoration:none;color:black;" title="$title">
+            <span data-title-for="$url" data-archived="$is_archived">$title</span>
+            <small style="background-color: #eee;border-radius:4px; float:right">$tags</small>
+        </a>
     </td>
     </td>
-    <td style="text-align: left"><a href="$link_dir/$archive_url" style="font-size:1.4em;text-decoration:none;color:black;" title="$title">
-        $title <small style="background-color: #eee;border-radius:4px; float:right">$tags</small>
+    <td>
+        <a href="$link_dir/$index_url">📄 
+            <span data-number-for="$url" title="Fetching any missing files...">$num_outputs <img src="static/spinner.gif" class="files-spinner"/></span>
+        </a>
     </td>
     </td>
-    <td><a href="$link_dir/$screenshot_url" title="Screenshot">🖼</a></td>
-    <td><a href="$link_dir/$pdf_url" title="PDF">📜</a></td>
-    <td><a href="$link_dir/$dom_url" title="DOM">📄</a></td>
-    <td><a href="$archive_org_url" title="Archive.org">🏛</a></td>
     <td style="text-align: left"><!--🔗 <img src="$google_favicon_url" height="16px">--> <a href="$url">$url</a></td>
     <td style="text-align: left"><!--🔗 <img src="$google_favicon_url" height="16px">--> <a href="$url">$url</a></td>
 </tr>
 </tr>

+ 1 - 1
archivebox/util.py

@@ -244,7 +244,7 @@ def fetch_page_title(url, timeout=10, progress=SHOW_PROGRESS):
         # ))
         # ))
         return None
         return None
 
 
-def wget_output_path(link, look_in=None):
+def wget_output_path(link):
     """calculate the path to the wgetted .html file, since wget may
     """calculate the path to the wgetted .html file, since wget may
     adjust some paths to be different than the base_url path.
     adjust some paths to be different than the base_url path.