浏览代码

ensure DOM HTML dump is non-zero length file when retrying

Nick Sweeting 2 年之前
父节点
当前提交
9599845b56
共有 1 个文件被更改,包括 2 次插入1 次删除
  1. 2 1
      archivebox/extractors/dom.py

+ 2 - 1
archivebox/extractors/dom.py

@@ -26,7 +26,8 @@ def should_save_dom(link: Link, out_dir: Optional[Path]=None, overwrite: Optiona
 
     out_dir = out_dir or Path(link.link_dir)
     if not overwrite and (out_dir / 'output.html').exists():
-        return False
+        if (out_dir / 'output.html').stat().st_size > 1:
+            return False
 
     return SAVE_DOM