Ver Fonte

change mercury atomic_write output order

Nick Sweeting há 4 anos atrás
pai
commit
d0f8a5e710

+ 4 - 2
archivebox/extractors/mercury.py

@@ -59,6 +59,8 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
     status = 'succeeded'
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     timer = TimedProgress(timeout, prefix='      ')
     try:
     try:
+        output_folder.mkdir(exist_ok=True)
+
         # Get plain text version of article
         # Get plain text version of article
         cmd = [
         cmd = [
             DEPENDENCIES['MERCURY_BINARY']['path'],
             DEPENDENCIES['MERCURY_BINARY']['path'],
@@ -71,6 +73,8 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
         except json.JSONDecodeError:
         except json.JSONDecodeError:
             raise ShellError(cmd, result)
             raise ShellError(cmd, result)
         
         
+        atomic_write(str(output_folder / "content.txt"), article_text["content"])
+
         # Get HTML version of article
         # Get HTML version of article
         cmd = [
         cmd = [
             DEPENDENCIES['MERCURY_BINARY']['path'],
             DEPENDENCIES['MERCURY_BINARY']['path'],
@@ -82,9 +86,7 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
         except json.JSONDecodeError:
         except json.JSONDecodeError:
             raise ShellError(cmd, result)
             raise ShellError(cmd, result)
 
 
-        output_folder.mkdir(exist_ok=True)
         atomic_write(str(output_folder / "content.html"), article_json.pop("content"))
         atomic_write(str(output_folder / "content.html"), article_json.pop("content"))
-        atomic_write(str(output_folder / "content.txt"), article_text["content"])
         atomic_write(str(output_folder / "article.json"), article_json)
         atomic_write(str(output_folder / "article.json"), article_json)
 
 
         # Check for common failure cases
         # Check for common failure cases

+ 1 - 0
archivebox/extractors/readability.py

@@ -112,6 +112,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
     except (Exception, OSError) as err:
     except (Exception, OSError) as err:
         status = 'failed'
         status = 'failed'
         output = err
         output = err
+        cmd = [cmd[0], './{singlefile,dom}.html']
     finally:
     finally:
         timer.end()
         timer.end()