2
0
Эх сурвалжийг харах

fix parsing errors for older archive index formats

Nick Sweeting 6 жил өмнө
parent
commit
204de37eb9

+ 2 - 2
archivebox/index/schema.py

@@ -126,7 +126,7 @@ class Link:
             assert isinstance(self.url, str) and '://' in self.url
             assert isinstance(self.url, str) and '://' in self.url
             assert self.updated is None or isinstance(self.updated, datetime)
             assert self.updated is None or isinstance(self.updated, datetime)
             assert self.title is None or (isinstance(self.title, str) and self.title)
             assert self.title is None or (isinstance(self.title, str) and self.title)
-            assert self.tags is None or (isinstance(self.tags, str) and self.tags)
+            assert self.tags is None or isinstance(self.tags, str)
             assert isinstance(self.sources, list)
             assert isinstance(self.sources, list)
             assert all(isinstance(source, str) and source for source in self.sources)
             assert all(isinstance(source, str) and source for source in self.sources)
             assert isinstance(self.history, dict)
             assert isinstance(self.history, dict)
@@ -186,7 +186,7 @@ class Link:
             for key, val in json_info.items()
             for key, val in json_info.items()
             if key in cls.field_names()
             if key in cls.field_names()
         }
         }
-        info['updated'] = parse_date(info['updated'])
+        info['updated'] = parse_date(info.get('updated'))
         info['sources'] = info.get('sources') or []
         info['sources'] = info.get('sources') or []
 
 
         json_history = info.get('history') or {}
         json_history = info.get('history') or {}

+ 2 - 1
archivebox/parsers/__init__.py

@@ -71,11 +71,12 @@ def parse_links(source_file: str) -> Tuple[List[Link], str]:
                     timer.end()
                     timer.end()
                     return links, parser_name
                     return links, parser_name
             except Exception as err:   # noqa
             except Exception as err:   # noqa
+                pass
                 # Parsers are tried one by one down the list, and the first one
                 # Parsers are tried one by one down the list, and the first one
                 # that succeeds is used. To see why a certain parser was not used
                 # that succeeds is used. To see why a certain parser was not used
                 # due to error or format incompatibility, uncomment this line:
                 # due to error or format incompatibility, uncomment this line:
                 # print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err))
                 # print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err))
-                pass
+                # raise
 
 
     timer.end()
     timer.end()
     return [], 'Failed to parse'
     return [], 'Failed to parse'