Browse Source

catch json parse errors in link archiving

Nick Sweeting 6 years ago
parent
commit
ad7038e031
1 changed files with 30 additions and 27 deletions
  1. 30 27
      archivebox/archive_methods.py

+ 30 - 27
archivebox/archive_methods.py

@@ -80,43 +80,46 @@ def archive_links(archive_path, links, source=None, resume=None):
 def archive_link(link_dir, link, overwrite=True):
     """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
 
-    update_existing = os.path.exists(link_dir)
-    if update_existing:
-        link = {
-            **parse_json_link_index(link_dir),
-            **link,
-        }
-    else:
-        os.makedirs(link_dir)
-    
-    log_link_archive(link_dir, link, update_existing)
+    try:
+        update_existing = os.path.exists(link_dir)
+        if update_existing:
+            link = {
+                **parse_json_link_index(link_dir),
+                **link,
+            }
+        else:
+            os.makedirs(link_dir)
+        
+        log_link_archive(link_dir, link, update_existing)
 
-    if FETCH_FAVICON:
-        link = fetch_favicon(link_dir, link, overwrite=overwrite)
+        if FETCH_FAVICON:
+            link = fetch_favicon(link_dir, link, overwrite=overwrite)
 
-    if FETCH_WGET:
-        link = fetch_wget(link_dir, link, overwrite=overwrite)
+        if FETCH_WGET:
+            link = fetch_wget(link_dir, link, overwrite=overwrite)
 
-    if FETCH_PDF:
-        link = fetch_pdf(link_dir, link, overwrite=overwrite)
+        if FETCH_PDF:
+            link = fetch_pdf(link_dir, link, overwrite=overwrite)
 
-    if FETCH_SCREENSHOT:
-        link = fetch_screenshot(link_dir, link, overwrite=overwrite)
+        if FETCH_SCREENSHOT:
+            link = fetch_screenshot(link_dir, link, overwrite=overwrite)
 
-    if FETCH_DOM:
-        link = fetch_dom(link_dir, link, overwrite=overwrite)
+        if FETCH_DOM:
+            link = fetch_dom(link_dir, link, overwrite=overwrite)
 
-    if SUBMIT_ARCHIVE_DOT_ORG:
-        link = archive_dot_org(link_dir, link, overwrite=overwrite)
+        if SUBMIT_ARCHIVE_DOT_ORG:
+            link = archive_dot_org(link_dir, link, overwrite=overwrite)
 
-    if FETCH_GIT:
-        link = fetch_git(link_dir, link, overwrite=overwrite)
+        if FETCH_GIT:
+            link = fetch_git(link_dir, link, overwrite=overwrite)
 
-    if FETCH_MEDIA:
-        link = fetch_media(link_dir, link, overwrite=overwrite)
+        if FETCH_MEDIA:
+            link = fetch_media(link_dir, link, overwrite=overwrite)
 
+        write_link_index(link_dir, link)
 
-    write_link_index(link_dir, link)
+    except Exception as err:
+        print('    ! Failed to archive link: {err.__class__.__name__}: {err}')
     
     return link