Explorar o código

allow parsing to continue even when fetching URL contents fails

Nick Sweeting %!s(int64=3) %!d(string=hai) anos
pai
achega
38e54b93fe
Modificáronse 2 ficheiros con 6 adicións e 3 borrados
  1. 5 2
      archivebox/main.py
  2. 1 1
      archivebox/parsers/__init__.py

+ 5 - 2
archivebox/main.py

@@ -594,8 +594,11 @@ def add(urls: Union[str, List[str]],
     if new_links and depth == 1:
         log_crawl_started(new_links)
         for new_link in new_links:
-            downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
-            new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
+            try:
+                downloaded_file = save_file_as_source(new_link.url, filename=f'{new_link.timestamp}-crawl-{new_link.domain}.txt', out_dir=out_dir)
+                new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
+            except Exception as err:
+                stderr('[!] Failed to get contents of URL {new_link.url}', err, color='red')
 
     imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
     

+ 1 - 1
archivebox/parsers/__init__.py

@@ -176,7 +176,7 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
                 ANSI['reset'],
             ))
             print('    ', e)
-            raise SystemExit(1)
+            raise e
 
     else:
         # Source is a path to a local file on the filesystem