ソースを参照

fix parsing errors for older archive index formats

Nick Sweeting 6 年 前
コミット
204de37eb9
2 ファイル変更4 行追加3 行削除
  1. 2 2
      archivebox/index/schema.py
  2. 2 1
      archivebox/parsers/__init__.py

+ 2 - 2
archivebox/index/schema.py

@@ -126,7 +126,7 @@ class Link:
             assert isinstance(self.url, str) and '://' in self.url
             assert self.updated is None or isinstance(self.updated, datetime)
             assert self.title is None or (isinstance(self.title, str) and self.title)
-            assert self.tags is None or (isinstance(self.tags, str) and self.tags)
+            assert self.tags is None or isinstance(self.tags, str)
             assert isinstance(self.sources, list)
             assert all(isinstance(source, str) and source for source in self.sources)
             assert isinstance(self.history, dict)
@@ -186,7 +186,7 @@ class Link:
             for key, val in json_info.items()
             if key in cls.field_names()
         }
-        info['updated'] = parse_date(info['updated'])
+        info['updated'] = parse_date(info.get('updated'))
         info['sources'] = info.get('sources') or []
 
         json_history = info.get('history') or {}

+ 2 - 1
archivebox/parsers/__init__.py

@@ -71,11 +71,12 @@ def parse_links(source_file: str) -> Tuple[List[Link], str]:
                     timer.end()
                     return links, parser_name
             except Exception as err:   # noqa
+                pass
                 # Parsers are tried one by one down the list, and the first one
                 # that succeeds is used. To see why a certain parser was not used
                 # due to error or format incompatibility, uncomment this line:
                 # print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err))
-                pass
+                # raise
 
     timer.end()
     return [], 'Failed to parse'