2
0
Эх сурвалжийг харах

better dependency checking system and guards

Nick Sweeting 6 жил өмнө
parent
commit
718e25c973

+ 16 - 11
archivebox/cli/archivebox_init.py

@@ -9,12 +9,14 @@ import sys
 import argparse
 
 from ..legacy.util import reject_stdin
+from ..legacy.index import write_links_index
 from ..legacy.config import (
     OUTPUT_DIR,
     SOURCES_DIR,
     ARCHIVE_DIR,
     DATABASE_DIR,
     ANSI,
+    stderr,
 )
 
 
@@ -28,16 +30,16 @@ def init(output_dir: str=OUTPUT_DIR):
 
     if not is_empty:
         if existing_index:
-            print('[√] You already have an archive setup up in this folder. To add new links, you can run:')
-            print('    archivebox add https://example.com')
-            print()
-            print('[i] Fore more usage and examples, run "archivebox help" or visit:')
-            print('    https://github.com/pirate/ArchiveBox/wiki/Usage')
+            stderr('[√] You already have an archive setup up in this folder. To add new links, you can run:')
+            stderr('    archivebox add https://example.com')
+            stderr()
+            stderr('[i] Fore more usage and examples, run "archivebox help" or visit:')
+            stderr('    https://github.com/pirate/ArchiveBox/wiki/Usage')
             # TODO: import old archivebox version's archive data folder
 
             raise SystemExit(1)
         else:
-            print(
+            stderr(
                 ("{red}[X] This folder already has files in it. You must run init inside a completely empty directory.{reset}"
                 "\n\n"
                 "    {lightred}Hint:{reset} To import a data folder created by an older version of ArchiveBox, \n"
@@ -48,14 +50,17 @@ def init(output_dir: str=OUTPUT_DIR):
             raise SystemExit(1)
 
 
-    print('{green}[+] Initializing new archive directory: {}{reset}'.format(output_dir, **ANSI))
+    stderr('{green}[+] Initializing new archive directory: {}{reset}'.format(output_dir, **ANSI))
     os.makedirs(SOURCES_DIR)
-    print(f'    > {SOURCES_DIR}')
+    stderr(f'    > {SOURCES_DIR}')
     os.makedirs(ARCHIVE_DIR)
-    print(f'    > {ARCHIVE_DIR}')
+    stderr(f'    > {ARCHIVE_DIR}')
     os.makedirs(DATABASE_DIR)
-    print(f'    > {DATABASE_DIR}')
-    print('{green}[√] Done.{reset}'.format(**ANSI))
+    stderr(f'    > {DATABASE_DIR}')
+
+    write_links_index([], out_dir=OUTPUT_DIR, finished=True)
+
+    stderr('{green}[√] Done.{reset}'.format(**ANSI))
 
 
 def main(args=None):

+ 79 - 70
archivebox/cli/archivebox_version.py

@@ -4,42 +4,18 @@ __package__ = 'archivebox.cli'
 __command__ = 'archivebox version'
 __description__ = 'Print the ArchiveBox version and dependency information'
 
+import os
+import re
 import sys
-import shutil
 import argparse
 
 from ..legacy.util import reject_stdin
 from ..legacy.config import (
+    ANSI,
     VERSION,
-
-    REPO_DIR,
-    PYTHON_DIR,
-    LEGACY_DIR,
-    TEMPLATES_DIR,
-    OUTPUT_DIR,
-    SOURCES_DIR,
-    ARCHIVE_DIR,
-    DATABASE_DIR,
-
-    USE_CURL,
-    USE_WGET,
-    USE_CHROME,
-    FETCH_GIT,
-    FETCH_MEDIA,
-
-    DJANGO_BINARY,
-    CURL_BINARY,
-    GIT_BINARY,
-    WGET_BINARY,
-    YOUTUBEDL_BINARY,
-    CHROME_BINARY,
-
-    DJANGO_VERSION,
-    CURL_VERSION,
-    GIT_VERSION,
-    WGET_VERSION,
-    YOUTUBEDL_VERSION,
-    CHROME_VERSION,
+    FOLDERS,
+    DEPENDENCIES,
+    check_dependencies,
 )
 
 
@@ -51,51 +27,84 @@ def main(args=None):
         description=__description__,
         add_help=True,
     )
-    parser.parse_args(args)
+    parser.add_argument(
+        '--quiet', '-q',
+        action='store_true',
+        help='Only print ArchiveBox version number and nothing else.',
+    )
+    command = parser.parse_args(args)
     reject_stdin(__command__)
     
-    print('ArchiveBox v{}'.format(VERSION))
-    print()
-    print('[i] Folder locations:')
-    print('    REPO_DIR:      ', REPO_DIR)
-    print('    PYTHON_DIR:    ', PYTHON_DIR)
-    print('    LEGACY_DIR:    ', LEGACY_DIR)
-    print('    TEMPLATES_DIR: ', TEMPLATES_DIR)
-    print()
-    print('    OUTPUT_DIR:    ', OUTPUT_DIR)
-    print('    SOURCES_DIR:   ', SOURCES_DIR)
-    print('    ARCHIVE_DIR:   ', ARCHIVE_DIR)
-    print('    DATABASE_DIR:  ', DATABASE_DIR)
-    print()
-    print(
-        '[√] Django:'.ljust(14),
-        'python3 {} --version\n'.format(DJANGO_BINARY),
-        ' '*13, DJANGO_VERSION, '\n',
-    )
-    print(
-        '[{}] CURL:'.format('√' if USE_CURL else 'X').ljust(14),
-        '{} --version\n'.format(shutil.which(CURL_BINARY)),
-        ' '*13, CURL_VERSION, '\n',
-    )
-    print(
-        '[{}] GIT:'.format('√' if FETCH_GIT else 'X').ljust(14),
-        '{} --version\n'.format(shutil.which(GIT_BINARY)),
-        ' '*13, GIT_VERSION, '\n',
-    )
-    print(
-        '[{}] WGET:'.format('√' if USE_WGET else 'X').ljust(14),
-        '{} --version\n'.format(shutil.which(WGET_BINARY)),
-        ' '*13, WGET_VERSION, '\n',
-    )
+    if command.quiet:
+        print(VERSION)
+    else:
+        print('ArchiveBox v{}'.format(VERSION))
+        print()
+
+        print('{white}[i] Dependency versions:{reset}'.format(**ANSI))
+        for name, dependency in DEPENDENCIES.items():
+            print_dependency_version(name, dependency)
+        print()
+        print('{white}[i] Folder locations:{reset}'.format(**ANSI))
+        for name, folder in FOLDERS.items():
+            print_folder_status(name, folder)
+
+        print()
+        check_dependencies()
+
+
+def print_folder_status(name, folder):
+    if folder['enabled']:
+        if folder['is_valid']:
+            color, symbol, note = 'green', '√', 'valid'
+        else:
+            color, symbol, note, num_files = 'red', 'X', 'invalid', '?'
+    else:
+        color, symbol, note, num_files = 'lightyellow', '-', 'disabled', '-'
+
+    if folder['path']:
+        if os.path.exists(folder['path']):
+            num_files = (
+                f'{len(os.listdir(folder["path"]))} files'
+                if os.path.isdir(folder['path']) else
+                'exists'
+            )
+        else:
+            num_files = '?'
+
     print(
-        '[{}] YOUTUBEDL:'.format('√' if FETCH_MEDIA else 'X').ljust(14),
-        '{} --version\n'.format(shutil.which(YOUTUBEDL_BINARY)),
-        ' '*13, YOUTUBEDL_VERSION, '\n',
+        ANSI[color],
+        symbol,
+        ANSI['reset'],
+        name.ljust(24),
+        (folder["path"] or '').ljust(70),
+        num_files.ljust(14),
+        ANSI[color],
+        note,
+        ANSI['reset'],
     )
+
+
+def print_dependency_version(name, dependency):
+    if dependency['enabled']:
+        if dependency['is_valid']:
+            color, symbol, note = 'green', '√', 'valid'
+            version = 'v' + re.search(r'[\d\.]+', dependency['version'])[0]
+        else:
+            color, symbol, note, version = 'red', 'X', 'invalid', '?'
+    else:
+        color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
+
     print(
-        '[{}] CHROME:'.format('√' if USE_CHROME else 'X').ljust(14),
-        '{} --version\n'.format(shutil.which(CHROME_BINARY)),
-        ' '*13, CHROME_VERSION, '\n',
+        ANSI[color],
+        symbol,
+        ANSI['reset'],
+        name.ljust(24),
+        (dependency["path"] or '').ljust(70),
+        version.ljust(14),
+        ANSI[color],
+        note,
+        ANSI['reset'],
     )
 
 

+ 175 - 29
archivebox/legacy/config.py

@@ -109,45 +109,57 @@ URL_BLACKLIST_PTN = re.compile(URL_BLACKLIST, re.IGNORECASE) if URL_BLACKLIST el
 
 VERSION = open(os.path.join(REPO_DIR, 'VERSION'), 'r').read().strip()
 GIT_SHA = VERSION.split('+')[-1] or 'unknown'
+HAS_INVALID_DEPENDENCIES = False
+HAS_INVALID_DB = not os.path.exists(os.path.join(OUTPUT_DIR, 'index.json'))
+
+def stderr(*args):
+    sys.stderr.write(' '.join(str(a) for a in args) + '\n')
 
 ### Check Python environment
 python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor))
 if python_vers < 3.5:
-    print('{}[X] Python version is not new enough: {} (>3.5 is required){}'.format(ANSI['red'], python_vers, ANSI['reset']))
-    print('    See https://github.com/pirate/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
+    stderr('{}[X] Python version is not new enough: {} (>3.5 is required){}'.format(ANSI['red'], python_vers, ANSI['reset']))
+    stderr('    See https://github.com/pirate/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
     raise SystemExit(1)
 
 if sys.stdout.encoding.upper() not in ('UTF-8', 'UTF8'):
-    print('[X] Your system is running python3 scripts with a bad locale setting: {} (it should be UTF-8).'.format(sys.stdout.encoding))
-    print('    To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
-    print('')
-    print('    Confirm that it\'s fixed by opening a new shell and running:')
-    print('        python3 -c "import sys; print(sys.stdout.encoding)"   # should output UTF-8')
-    print('')
-    print('    Alternatively, run this script with:')
-    print('        env PYTHONIOENCODING=UTF-8 ./archive.py export.html')
+    stderr('[X] Your system is running python3 scripts with a bad locale setting: {} (it should be UTF-8).'.format(sys.stdout.encoding))
+    stderr('    To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
+    stderr('')
+    stderr('    Confirm that it\'s fixed by opening a new shell and running:')
+    stderr('        python3 -c "import sys; print(sys.stdout.encoding)"   # should output UTF-8')
+    stderr('')
+    stderr('    Alternatively, run this script with:')
+    stderr('        env PYTHONIOENCODING=UTF-8 ./archive.py export.html')
 
 # ******************************************************************************
 # ***************************** Helper Functions *******************************
 # ******************************************************************************
 
-def bin_version(binary: str) -> str:
+def bin_version(binary: str) -> Optional[str]:
     """check the presence and return valid version line of a specified binary"""
-    if not shutil.which(binary):
-        print('{red}[X] Missing dependency: wget{reset}'.format(**ANSI))
-        print('    Install it, then confirm it works with: {} --version'.format(binary))
-        print('    See https://github.com/pirate/ArchiveBox/wiki/Install for help.')
-        raise SystemExit(1)
-    
+    global HAS_INVALID_DEPENDENCIES
+    binary = os.path.expanduser(binary)
     try:
+        if not shutil.which(binary):
+            raise Exception
+
         version_str = run([binary, "--version"], stdout=PIPE, cwd=REPO_DIR).stdout.strip().decode()
-        return version_str.split('\n')[0].strip()
+        # take first 3 columns of first line of version info
+        return ' '.join(version_str.split('\n')[0].strip().split()[:3])
     except Exception:
-        print('{red}[X] Unable to find a working version of {cmd}, is it installed and in your $PATH?'.format(cmd=binary, **ANSI))
-        raise SystemExit(1)
-
-
-def find_chrome_binary() -> str:
+        HAS_INVALID_DEPENDENCIES = True
+        stderr('{red}[X] Unable to find working version of dependency: {}{reset}'.format(binary, **ANSI))
+        stderr('    Make sure it\'s installed, then confirm it\'s working by running:')
+        stderr('        {} --version'.format(binary))
+        stderr()
+        stderr('    If you don\'t want to install it, you can disable it via config. See here for more info:')
+        stderr('        https://github.com/pirate/ArchiveBox/wiki/Install')
+        stderr()
+        return None
+
+
+def find_chrome_binary() -> Optional[str]:
     """find any installed chrome binaries in the default locations"""
     # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
     # make sure data dir finding precedence order always matches binary finding order
@@ -169,8 +181,9 @@ def find_chrome_binary() -> str:
         if full_path_exists:
             return name
     
-    print('{red}[X] Unable to find a working version of Chrome/Chromium, is it installed and in your $PATH?'.format(**ANSI))
-    raise SystemExit(1)
+    stderr('{red}[X] Unable to find a working version of Chrome/Chromium, is it installed and in your $PATH?'.format(**ANSI))
+    stderr()
+    return None
 
 
 def find_chrome_data_dir() -> Optional[str]:
@@ -251,14 +264,122 @@ try:
     if not CHROME_BINARY:
         CHROME_BINARY = find_chrome_binary() or 'chromium-browser'
     CHROME_VERSION = None
+
     if USE_CHROME:
         if CHROME_BINARY:
             CHROME_VERSION = bin_version(CHROME_BINARY)
-            # print('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
+            # stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
 
             if CHROME_USER_DATA_DIR is None:
                 CHROME_USER_DATA_DIR = find_chrome_data_dir()
-            # print('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
+            elif CHROME_USER_DATA_DIR == '':
+                CHROME_USER_DATA_DIR = None
+            else:
+                if not os.path.exists(os.path.join(CHROME_USER_DATA_DIR, 'Default')):
+                    stderr('{red}[X] Could not find profile "Default" in CHROME_USER_DATA_DIR:{reset} {}'.format(CHROME_USER_DATA_DIR, **ANSI))
+                    stderr('    Make sure you set it to a Chrome user data directory containing a Default profile folder.')
+                    stderr('    For more info see:')
+                    stderr('        https://github.com/pirate/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
+                    if 'Default' in CHROME_USER_DATA_DIR:
+                        stderr()
+                        stderr('    Try removing /Default from the end e.g.:')
+                        stderr('        CHROME_USER_DATA_DIR="{}"'.format(CHROME_USER_DATA_DIR.split('/Default')[0]))
+                    raise SystemExit(1)
+            # stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
+
+
+    ### Summary Lookup Dicts
+    FOLDERS = {
+        'REPO_DIR': {
+            'path': os.path.abspath(REPO_DIR),
+            'enabled': True,
+            'is_valid': os.path.exists(os.path.join(REPO_DIR, '.github')),
+        },
+        'PYTHON_DIR': {
+            'path': os.path.abspath(PYTHON_DIR),
+            'enabled': True,
+            'is_valid': os.path.exists(os.path.join(PYTHON_DIR, '__main__.py')),
+        },
+        'LEGACY_DIR': {
+            'path': os.path.abspath(LEGACY_DIR),
+            'enabled': True,
+            'is_valid': os.path.exists(os.path.join(LEGACY_DIR, 'util.py')),
+        },
+        'TEMPLATES_DIR': {
+            'path': os.path.abspath(TEMPLATES_DIR),
+            'enabled': True,
+            'is_valid': os.path.exists(os.path.join(TEMPLATES_DIR, 'static')),
+        },
+        'OUTPUT_DIR': {
+            'path': os.path.abspath(OUTPUT_DIR),
+            'enabled': True,
+            'is_valid': os.path.exists(os.path.join(OUTPUT_DIR, 'index.json')),
+        },
+        'SOURCES_DIR': {
+            'path': os.path.abspath(SOURCES_DIR),
+            'enabled': True,
+            'is_valid': os.path.exists(SOURCES_DIR),
+        },
+        'ARCHIVE_DIR': {
+            'path': os.path.abspath(ARCHIVE_DIR),
+            'enabled': True,
+            'is_valid': os.path.exists(ARCHIVE_DIR),
+        },
+        'DATABASE_DIR': {
+            'path': os.path.abspath(DATABASE_DIR),
+            'enabled': True,
+            'is_valid': os.path.exists(os.path.join(DATABASE_DIR, DATABASE_FILE)),
+        },
+        'CHROME_USER_DATA_DIR': {
+            'path': CHROME_USER_DATA_DIR and os.path.abspath(CHROME_USER_DATA_DIR),
+            'enabled': USE_CHROME and CHROME_USER_DATA_DIR,
+            'is_valid': os.path.exists(os.path.join(CHROME_USER_DATA_DIR, 'Default')) if CHROME_USER_DATA_DIR else False,
+        },
+        'COOKIES_FILE': {
+            'path': COOKIES_FILE and os.path.abspath(COOKIES_FILE),
+            'enabled': USE_WGET and COOKIES_FILE,
+            'is_valid': COOKIES_FILE and os.path.exists(COOKIES_FILE),
+        },
+    }
+
+    DEPENDENCIES = {
+        'DJANGO_BINARY': {
+            'path': DJANGO_BINARY,
+            'version': DJANGO_VERSION,
+            'enabled': True,
+            'is_valid': bool(DJANGO_VERSION),
+        },
+        'CURL_BINARY': {
+            'path': CURL_BINARY and shutil.which(CURL_BINARY),
+            'version': CURL_VERSION,
+            'enabled': USE_CURL,
+            'is_valid': bool(CURL_VERSION),
+        },
+        'WGET_BINARY': {
+            'path': WGET_BINARY and shutil.which(WGET_BINARY),
+            'version': WGET_VERSION,
+            'enabled': USE_WGET,
+            'is_valid': bool(WGET_VERSION),
+        },
+        'GIT_BINARY': {
+            'path': GIT_BINARY and shutil.which(GIT_BINARY),
+            'version': GIT_VERSION,
+            'enabled': FETCH_GIT,
+            'is_valid': bool(GIT_VERSION),
+        },
+        'YOUTUBEDL_BINARY': {
+            'path': YOUTUBEDL_BINARY and shutil.which(YOUTUBEDL_BINARY),
+            'version': YOUTUBEDL_VERSION,
+            'enabled': FETCH_MEDIA,
+            'is_valid': bool(YOUTUBEDL_VERSION),
+        },
+        'CHROME_BINARY': {
+            'path': CHROME_BINARY and shutil.which(CHROME_BINARY),
+            'version': CHROME_VERSION,
+            'enabled': USE_CHROME,
+            'is_valid': bool(CHROME_VERSION),
+        },
+    }
 
     CHROME_OPTIONS = {
         'TIMEOUT': TIMEOUT,
@@ -270,14 +391,39 @@ try:
         'CHROME_USER_AGENT': CHROME_USER_AGENT,
         'CHROME_USER_DATA_DIR': CHROME_USER_DATA_DIR,
     }
+
     # PYPPETEER_ARGS = {
     #     'headless': CHROME_HEADLESS,
     #     'ignoreHTTPSErrors': not CHECK_SSL_VALIDITY,
     #     # 'executablePath': CHROME_BINARY,
     # }
+    
 except KeyboardInterrupt:
     raise SystemExit(1)
 
-except:
-    print('[X] There was an error while reading configuration. Your archive data is unaffected.')
+except Exception as e:
+    stderr()
+    stderr('{red}[X] Error during configuration: {} {}{reset}'.format(e.__class__.__name__, e, **ANSI))
+    stderr('    Your archive data is unaffected.')
+    stderr('    Check your config or environemnt variables for mistakes and try again.')
+    stderr('    For more info see:')
+    stderr('        https://github.com/pirate/ArchiveBox/wiki/Configuration')
+    stderr()
     raise
+
+
+def check_dependencies() -> None:
+    if HAS_INVALID_DEPENDENCIES:
+        stderr('{red}[X] Missing some required dependencies.{reset}'.format(**ANSI))
+        raise SystemExit(1)
+        
+    if HAS_INVALID_DB:
+        stderr('{red}[X] No archive data found in:{reset} {}'.format(OUTPUT_DIR, **ANSI))
+        stderr('    Are you running archivebox in the right folder?')
+        stderr('        cd path/to/your/archive')
+        stderr('        archivebox [command]')
+        stderr()
+        stderr('    To create a new archive folder, run:')
+        stderr('        mkdir new_archive_dir && cd new_archive_dir')
+        stderr('        archivebox init')
+        raise SystemExit(1)

+ 3 - 0
archivebox/legacy/main.py

@@ -14,6 +14,7 @@ from .archive_methods import archive_link
 from .config import (
     ONLY_NEW,
     OUTPUT_DIR,
+    check_dependencies,
 )
 from .logs import (
     log_archiving_started,
@@ -26,6 +27,8 @@ from .logs import (
 def update_archive_data(import_path: Optional[str]=None, resume: Optional[float]=None, only_new: bool=False) -> List[Link]:
     """The main ArchiveBox entrancepoint. Everything starts here."""
 
+    check_dependencies()
+
     # Step 1: Load list of links from the existing index
     #         merge in and dedupe new links from import_path
     all_links, new_links = load_links_index(out_dir=OUTPUT_DIR, import_path=import_path)