Переглянути джерело

show full version info using flag

Nick Sweeting 6 роки тому
батько
коміт
a26c2fe467
4 змінених файлів з 115 додано та 95 видалено
  1. 61 5
      archivebox/archive.py
  2. 7 5
      archivebox/archive_methods.py
  3. 44 58
      archivebox/config.py
  4. 3 27
      archivebox/links.py

+ 61 - 5
archivebox/archive.py

@@ -12,7 +12,7 @@ __package__ = 'archivebox'
 
 import os
 import sys
-
+import shutil
 
 from typing import List, Optional
 
@@ -23,8 +23,23 @@ from .archive_methods import archive_link
 from .config import (
     ONLY_NEW,
     OUTPUT_DIR,
-    PYTHON_DIR,
     VERSION,
+    ANSI,
+    CURL_VERSION,
+    GIT_VERSION,
+    WGET_VERSION,
+    YOUTUBEDL_VERSION,
+    CHROME_VERSION,
+    USE_CURL,
+    USE_WGET,
+    USE_CHROME,
+    CURL_BINARY,
+    GIT_BINARY,
+    WGET_BINARY,
+    YOUTUBEDL_BINARY,
+    CHROME_BINARY,
+    FETCH_GIT,
+    FETCH_MEDIA,
 )
 from .util import (
     enforce_types,
@@ -59,8 +74,37 @@ def print_help():
     print("    archivebox add --depth=1 https://example.com/feed.rss")
     print("    archivebox update --resume=15109948213.123")
 
-
-def main(args=None) -> List[Link]:
+def print_version():
+    print('ArchiveBox v{}'.format(__VERSION__))
+    print()
+    print(
+        '[{}] CURL:'.format('√' if USE_CURL else 'X').ljust(14),
+        '{} --version\n'.format(shutil.which(CURL_BINARY)),
+        ' '*13, CURL_VERSION, '\n',
+    )
+    print(
+        '[{}] GIT:'.format('√' if FETCH_GIT else 'X').ljust(14),
+        '{} --version\n'.format(shutil.which(GIT_BINARY)),
+        ' '*13, GIT_VERSION, '\n',
+    )
+    print(
+        '[{}] WGET:'.format('√' if USE_WGET else 'X').ljust(14),
+        '{} --version\n'.format(shutil.which(WGET_BINARY)),
+        ' '*13, WGET_VERSION, '\n',
+    )
+    print(
+        '[{}] YOUTUBEDL:'.format('√' if FETCH_MEDIA else 'X').ljust(14),
+        '{} --version\n'.format(shutil.which(YOUTUBEDL_BINARY)),
+        ' '*13, YOUTUBEDL_VERSION, '\n',
+    )
+    print(
+        '[{}] CHROME:'.format('√' if USE_CHROME else 'X').ljust(14),
+        '{} --version\n'.format(shutil.which(CHROME_BINARY)),
+        ' '*13, CHROME_VERSION, '\n',
+    )
+
+
+def main(args=None) -> None:
     if args is None:
         args = sys.argv
 
@@ -69,7 +113,7 @@ def main(args=None) -> List[Link]:
         raise SystemExit(0)
 
     if set(args).intersection(('--version', 'version')):
-        print('ArchiveBox version {}'.format(__VERSION__))
+        print_version()
         raise SystemExit(0)
 
     ### Handle CLI arguments
@@ -86,7 +130,19 @@ def main(args=None) -> List[Link]:
 
     ### Set up output folder
     if not os.path.exists(OUTPUT_DIR):
+        print('{green}[+] Created a new archive directory: {}{reset}'.format(OUTPUT_DIR, **ANSI))
         os.makedirs(OUTPUT_DIR)
+    else:
+        not_empty = len(set(os.listdir(OUTPUT_DIR)) - {'.DS_Store'})
+        index_exists = os.path.exists(os.path.join(OUTPUT_DIR, 'index.json'))
+        if not_empty and not index_exists:
+            print(
+                ('{red}[X] Could not find index.json in the OUTPUT_DIR: {reset}{}\n'
+                '    You must run ArchiveBox in an existing archive directory, \n'
+                '     or an empty/new directory to start a new archive collection.'
+                ).format(OUTPUT_DIR, **ANSI)
+            )
+            raise SystemExit(1)
 
     ### Handle ingesting urls piped in through stdin
     # (.e.g if user does cat example_urls.txt | ./archive)

+ 7 - 5
archivebox/archive_methods.py

@@ -4,7 +4,7 @@ from typing import Dict, List, Tuple
 from collections import defaultdict
 from datetime import datetime
 
-from .schema import Link, ArchiveResult, ArchiveError
+from .schema import Link, ArchiveResult
 from .index import (
     write_link_index,
     patch_links_index,
@@ -28,8 +28,6 @@ from .config import (
     SUBMIT_ARCHIVE_DOT_ORG,
     TIMEOUT,
     MEDIA_TIMEOUT,
-    ANSI,
-    OUTPUT_DIR,
     GIT_DOMAINS,
     VERSION,
     WGET_USER_AGENT,
@@ -40,7 +38,6 @@ from .config import (
     CHROME_VERSION,
     GIT_VERSION,
     YOUTUBEDL_VERSION,
-    ONLY_NEW,
     WGET_AUTO_COMPRESSION,
 )
 from .util import (
@@ -56,7 +53,6 @@ from .util import (
     wget_output_path,
     chrome_args,
     run, PIPE, DEVNULL,
-    Link,
 )
 from .logs import (
     log_link_archiving_started,
@@ -66,6 +62,12 @@ from .logs import (
 )
 
 
+class ArchiveError(Exception):
+    def __init__(self, message, hints=None):
+        super().__init__(message)
+        self.hints = hints
+
+
 @enforce_types
 def archive_link(link: Link, page=None) -> Link:
     """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""

+ 44 - 58
archivebox/config.py

@@ -59,8 +59,24 @@ CHROME_BINARY =          os.getenv('CHROME_BINARY',          None)
 CHROME_SANDBOX =         os.getenv('CHROME_SANDBOX', 'True').lower() == 'true'
 
 # ******************************************************************************
-# *************************** Directory Settings *******************************
-# ******************************************************************************
+
+### Terminal Configuration
+TERM_WIDTH = lambda: shutil.get_terminal_size((100, 10)).columns
+ANSI = {
+    'reset': '\033[00;00m',
+    'lightblue': '\033[01;30m',
+    'lightyellow': '\033[01;33m',
+    'lightred': '\033[01;35m',
+    'red': '\033[01;31m',
+    'green': '\033[01;32m',
+    'blue': '\033[01;34m',
+    'white': '\033[01;37m',
+    'black': '\033[01;30m',
+}
+if not USE_COLOR:
+    # dont show colors if USE_COLOR is False
+    ANSI = {k: '' for k in ANSI.keys()}
+
 
 REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
 if OUTPUT_DIR:
@@ -68,21 +84,6 @@ if OUTPUT_DIR:
 else:
     OUTPUT_DIR = os.path.abspath(os.curdir)
 
-if not os.path.exists(OUTPUT_DIR):
-    print('{green}[+] Created a new archive directory: {}{reset}'.format(OUTPUT_DIR, **ANSI))
-    os.makedirs(OUTPUT_DIR)
-else:
-    not_empty = len(set(os.listdir(OUTPUT_DIR)) - {'.DS_Store'})
-    index_exists = os.path.exists(os.path.join(OUTPUT_DIR, 'index.json'))
-    if not_empty and not index_exists:
-        print(
-            ('{red}[X] Could not find index.json in the OUTPUT_DIR: {reset}{}\n'
-            '    You must run ArchiveBox in an existing archive directory, \n'
-            '     or an empty/new directory to start a new archive collection.'
-            ).format(OUTPUT_DIR, **ANSI)
-        )
-        raise SystemExit(1)
-    
 ARCHIVE_DIR_NAME = 'archive'
 SOURCES_DIR_NAME = 'sources'
 ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME)
@@ -94,13 +95,34 @@ TEMPLATES_DIR = os.path.join(PYTHON_DIR, 'templates')
 if COOKIES_FILE:
     COOKIES_FILE = os.path.abspath(COOKIES_FILE)
 
+
+VERSION = open(os.path.join(PYTHON_DIR, 'VERSION'), 'r').read().strip()
+GIT_SHA = VERSION.split('+')[1]
+
+### Check Python environment
+python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor))
+if python_vers < 3.5:
+    print('{}[X] Python version is not new enough: {} (>3.5 is required){}'.format(ANSI['red'], python_vers, ANSI['reset']))
+    print('    See https://github.com/pirate/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
+    raise SystemExit(1)
+
+if sys.stdout.encoding.upper() not in ('UTF-8', 'UTF8'):
+    print('[X] Your system is running python3 scripts with a bad locale setting: {} (it should be UTF-8).'.format(sys.stdout.encoding))
+    print('    To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
+    print('')
+    print('    Confirm that it\'s fixed by opening a new shell and running:')
+    print('        python3 -c "import sys; print(sys.stdout.encoding)"   # should output UTF-8')
+    print('')
+    print('    Alternatively, run this script with:')
+    print('        env PYTHONIOENCODING=UTF-8 ./archive.py export.html')
+
 # ******************************************************************************
 # ***************************** Helper Functions *******************************
 # ******************************************************************************
 
 def check_version(binary: str) -> str:
     """check the presence and return valid version line of a specified binary"""
-    if run(['which', binary], stdout=DEVNULL, stderr=DEVNULL).returncode:
+    if not shutil.which(binary):
         print('{red}[X] Missing dependency: wget{reset}'.format(**ANSI))
         print('    Install it, then confirm it works with: {} --version'.format(binary))
         print('    See https://github.com/pirate/ArchiveBox/wiki/Install for help.')
@@ -168,43 +190,6 @@ def find_chrome_data_dir() -> Optional[str]:
 # ******************************************************************************
 
 try:
-    VERSION = open(os.path.join(PYTHON_DIR, 'VERSION'), 'r').read().strip()
-    GIT_SHA = VERSION.split('+')[1]
-
-    ### Terminal Configuration
-    TERM_WIDTH = lambda: shutil.get_terminal_size((100, 10)).columns
-    ANSI = {
-        'reset': '\033[00;00m',
-        'lightblue': '\033[01;30m',
-        'lightyellow': '\033[01;33m',
-        'lightred': '\033[01;35m',
-        'red': '\033[01;31m',
-        'green': '\033[01;32m',
-        'blue': '\033[01;34m',
-        'white': '\033[01;37m',
-        'black': '\033[01;30m',
-    }
-    if not USE_COLOR:
-        # dont show colors if USE_COLOR is False
-        ANSI = {k: '' for k in ANSI.keys()}
-
-    ### Check Python environment
-    python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor))
-    if python_vers < 3.5:
-        print('{}[X] Python version is not new enough: {} (>3.5 is required){}'.format(ANSI['red'], python_vers, ANSI['reset']))
-        print('    See https://github.com/pirate/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
-        raise SystemExit(1)
-
-    if sys.stdout.encoding.upper() not in ('UTF-8', 'UTF8'):
-        print('[X] Your system is running python3 scripts with a bad locale setting: {} (it should be UTF-8).'.format(sys.stdout.encoding))
-        print('    To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
-        print('')
-        print('    Confirm that it\'s fixed by opening a new shell and running:')
-        print('        python3 -c "import sys; print(sys.stdout.encoding)"   # should output UTF-8')
-        print('')
-        print('    Alternatively, run this script with:')
-        print('        env PYTHONIOENCODING=UTF-8 ./archive.py export.html')
-
     ### Make sure curl is installed
     if USE_CURL:
         USE_CURL = FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG
@@ -238,17 +223,18 @@ try:
     ### Make sure youtube-dl is installed
     YOUTUBEDL_VERSION = None
     if FETCH_MEDIA:
-        check_version(YOUTUBEDL_BINARY)
+        YOUTUBEDL_VERSION = check_version(YOUTUBEDL_BINARY)
 
     ### Make sure chrome is installed and calculate version
     if USE_CHROME:
         USE_CHROME = FETCH_PDF or FETCH_SCREENSHOT or FETCH_DOM
     else:
         FETCH_PDF = FETCH_SCREENSHOT = FETCH_DOM = False
+    
+    if CHROME_BINARY is None:
+        CHROME_BINARY = find_chrome_binary() or 'chromium-browser'
     CHROME_VERSION = None
     if USE_CHROME:
-        if CHROME_BINARY is None:
-            CHROME_BINARY = find_chrome_binary()
         if CHROME_BINARY:
             CHROME_VERSION = check_version(CHROME_BINARY)
             # print('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))

+ 3 - 27
archivebox/links.py

@@ -1,24 +1,3 @@
-"""
-In ArchiveBox, a Link represents a single entry that we track in the 
-json index.  All links pass through all archiver functions and the latest,
-most up-to-date canonical output for each is stored in "latest".
-
-Link {
-    timestamp: str,     (how we uniquely id links)    
-    url: str,                                         
-    title: str,                                       
-    tags: str,                                        
-    sources: [str],                                   
-    history: {
-        pdf: [
-            {start_ts, end_ts, cmd, pwd, cmd_version, status, output},
-            ...
-        ],
-        ...
-    },
-}
-"""
-
 from typing import Iterable
 from collections import OrderedDict
 
@@ -27,8 +6,6 @@ from .util import (
     scheme,
     fuzzy_url,
     merge_links,
-    htmldecode,
-    hashurl,
 )
 
 
@@ -68,10 +45,9 @@ def uniquefied_links(sorted_links: Iterable[Link]) -> Iterable[Link]:
 
     unique_timestamps: OrderedDict[str, Link] = OrderedDict()
     for link in unique_urls.values():
-        new_link = Link(**{
-            **link._asdict(),
-            'timestamp': lowest_uniq_timestamp(unique_timestamps, link.timestamp),
-        })
+        new_link = link.overwrite(
+            timestamp=lowest_uniq_timestamp(unique_timestamps, link.timestamp),
+        )
         unique_timestamps[new_link.timestamp] = new_link
 
     return unique_timestamps.values()