Przeglądaj źródła

clean up config loading in settings and config file layout

Nick Sweeting 5 lat temu
rodzic
commit
18355dc2c6

+ 17 - 30
archivebox/config/__init__.py → archivebox/config.py

@@ -1,4 +1,4 @@
-__package__ = 'archivebox.config'
+__package__ = 'archivebox'
 
 import os
 import io
@@ -17,7 +17,7 @@ from subprocess import run, PIPE, DEVNULL
 from configparser import ConfigParser
 from collections import defaultdict
 
-from .stubs import (
+from .config_stubs import (
     SimpleConfigValueDict,
     ConfigValue,
     ConfigDict,
@@ -162,6 +162,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
     },
 }
 
+# for backwards compatibility with old config files, check old/deprecated names for each key
 CONFIG_ALIASES = {
     alias: key
     for section in CONFIG_DEFAULTS.values()
@@ -169,6 +170,7 @@ CONFIG_ALIASES = {
             for alias in default.get('aliases', ())
 }
 USER_CONFIG = {key for section in CONFIG_DEFAULTS.values() for key in section.keys()}
+
 def get_real_name(key: str) -> str:
     return CONFIG_ALIASES.get(key.upper().strip(), key.upper().strip())
 
@@ -223,7 +225,7 @@ STATICFILE_EXTENSIONS = {
     # html, htm, shtml, xhtml, xml, aspx, php, cgi
 }
 
-PYTHON_DIR_NAME = 'archivebox'
+PACKAGE_DIR_NAME = 'archivebox'
 TEMPLATES_DIR_NAME = 'themes'
 
 ARCHIVE_DIR_NAME = 'archive'
@@ -257,9 +259,8 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'USER':                     {'default': lambda c: getpass.getuser() or os.getlogin()},
     'ANSI':                     {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
 
-    'REPO_DIR':                 {'default': lambda c: Path(__file__).resolve().parent.parent.parent},
-    'PYTHON_DIR':               {'default': lambda c: c['REPO_DIR'] / PYTHON_DIR_NAME},
-    'TEMPLATES_DIR':            {'default': lambda c: c['PYTHON_DIR'] / TEMPLATES_DIR_NAME / 'legacy'},
+    'PACKAGE_DIR':              {'default': lambda c: Path(__file__).resolve().parent},
+    'TEMPLATES_DIR':            {'default': lambda c: c['PACKAGE_DIR'] / TEMPLATES_DIR_NAME / 'legacy'},
 
     'OUTPUT_DIR':               {'default': lambda c: Path(c['OUTPUT_DIR']).resolve() if c['OUTPUT_DIR'] else Path(os.curdir).resolve()},
     'ARCHIVE_DIR':              {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME},
@@ -271,7 +272,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
     'URL_BLACKLIST_PTN':        {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
 
     'ARCHIVEBOX_BINARY':        {'default': lambda c: sys.argv[0]},
-    'VERSION':                  {'default': lambda c: json.loads((Path(c['PYTHON_DIR']) / 'package.json').read_text().strip())['version']},
+    'VERSION':                  {'default': lambda c: json.loads((Path(c['PACKAGE_DIR']) / 'package.json').read_text().strip())['version']},
     'GIT_SHA':                  {'default': lambda c: c['VERSION'].split('+')[-1] or 'unknown'},
 
     'PYTHON_BINARY':            {'default': lambda c: sys.executable},
@@ -412,7 +413,7 @@ def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
 def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
     """load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
 
-    from ..system import atomic_write
+    from .system import atomic_write
 
     out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
     config_path = Path(out_dir) /  CONFIG_FILENAME
@@ -652,15 +653,10 @@ def wget_supports_compression(config):
 
 def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
     return {
-        'REPO_DIR': {
-            'path': config['REPO_DIR'].resolve(),
-            'enabled': True,
-            'is_valid': (config['REPO_DIR'] / 'archivebox').exists(),
-        },
-        'PYTHON_DIR': {
-            'path': (config['PYTHON_DIR']).resolve(),
+        'PACKAGE_DIR': {
+            'path': (config['PACKAGE_DIR']).resolve(),
             'enabled': True,
-            'is_valid': (config['PYTHON_DIR'] / '__main__.py').exists(),
+            'is_valid': (config['PACKAGE_DIR'] / '__main__.py').exists(),
         },
         'TEMPLATES_DIR': {
             'path': (config['TEMPLATES_DIR']).resolve(),
@@ -689,7 +685,7 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
         'OUTPUT_DIR': {
             'path': config['OUTPUT_DIR'].resolve(),
             'enabled': True,
-            'is_valid': (config['OUTPUT_DIR'] / JSON_INDEX_FILENAME).exists(),
+            'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
         },
         'SOURCES_DIR': {
             'path': config['SOURCES_DIR'].resolve(),
@@ -716,16 +712,6 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
             'enabled': True,
             'is_valid': (config['OUTPUT_DIR'] / SQL_INDEX_FILENAME).exists(),
         },
-        'JSON_INDEX': {
-            'path': (config['OUTPUT_DIR'] / JSON_INDEX_FILENAME).resolve(),
-            'enabled': True,
-            'is_valid': (config['OUTPUT_DIR'] / JSON_INDEX_FILENAME).exists(),
-        },
-        'HTML_INDEX': {
-            'path': (config['OUTPUT_DIR'] / HTML_INDEX_FILENAME).resolve(),
-            'enabled': True,
-            'is_valid': (config['OUTPUT_DIR'] / HTML_INDEX_FILENAME).exists(),
-        },
     }
 
 def get_dependency_info(config: ConfigDict) -> ConfigValue:
@@ -943,7 +929,7 @@ def check_data_folder(out_dir: Optional[str]=None, config: ConfigDict=CONFIG) ->
         stderr('        archivebox init')
         raise SystemExit(2)
 
-    from ..index.sql import list_migrations
+    from .index.sql import list_migrations
 
     pending_migrations = [name for status, name in list_migrations() if not status]
 
@@ -971,12 +957,13 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG)
     
     output_dir = out_dir or Path(config['OUTPUT_DIR'])
 
-    assert isinstance(output_dir, Path) and isinstance(config['PYTHON_DIR'], Path)
+    assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
 
     try:
         import django
-        sys.path.append(str(config['PYTHON_DIR']))
+        sys.path.append(str(config['PACKAGE_DIR']))
         os.environ.setdefault('OUTPUT_DIR', str(output_dir))
+        assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
         os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
         django.setup()
 

+ 3 - 2
archivebox/config/stubs.py → archivebox/config_stubs.py

@@ -33,8 +33,9 @@ class ConfigDict(BaseConfig, total=False):
     SHOW_PROGRESS: bool
     IN_DOCKER: bool
 
-    OUTPUT_DIR: Union[str, Path, None]
-    CONFIG_FILE: Union[str, Path, None]
+    PACKAGE_DIR: Path
+    OUTPUT_DIR: Path
+    CONFIG_FILE: Path
     ONLY_NEW: bool
     TIMEOUT: int
     MEDIA_TIMEOUT: int

+ 69 - 36
archivebox/core/settings.py

@@ -2,24 +2,36 @@ __package__ = 'archivebox.core'
 
 import os
 import sys
+
 from pathlib import Path
 from django.utils.crypto import get_random_string
 
-
 from ..config import (                                                          # noqa: F401
     DEBUG,
     SECRET_KEY,
     ALLOWED_HOSTS,
-    PYTHON_DIR,
+    PACKAGE_DIR,
     ACTIVE_THEME,
     SQL_INDEX_FILENAME,
     OUTPUT_DIR,
 )
 
-ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
+
+IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
+IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
 IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
 
-SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.')
+################################################################################
+### Django Core Settings
+################################################################################
+
+WSGI_APPLICATION = 'core.wsgi.application'
+ROOT_URLCONF = 'core.urls'
+
+LOGIN_URL = '/accounts/login/'
+LOGOUT_REDIRECT_URL = '/'
+PASSWORD_RESET_URL = '/accounts/password_reset/'
+APPEND_SLASH = True
 
 INSTALLED_APPS = [
     'django.contrib.auth',
@@ -44,16 +56,32 @@ MIDDLEWARE = [
     'django.contrib.messages.middleware.MessageMiddleware',
 ]
 
-ROOT_URLCONF = 'core.urls'
-APPEND_SLASH = True
+AUTHENTICATION_BACKENDS = [
+    'django.contrib.auth.backends.ModelBackend',
+]
+
+
+################################################################################
+### Staticfile and Template Settings
+################################################################################
+
+STATIC_URL = '/static/'
+
+STATICFILES_DIRS = [
+    str(Path(PACKAGE_DIR) / 'themes' / ACTIVE_THEME / 'static'),
+    str(Path(PACKAGE_DIR) / 'themes' / 'default' / 'static'),
+]
+
+TEMPLATE_DIRS = [
+    str(Path(PACKAGE_DIR) / 'themes' / ACTIVE_THEME),
+    str(Path(PACKAGE_DIR) / 'themes' / 'default'),
+    str(Path(PACKAGE_DIR) / 'themes'),
+]
+
 TEMPLATES = [
     {
         'BACKEND': 'django.template.backends.django.DjangoTemplates',
-        'DIRS': [
-            str(Path(PYTHON_DIR) / 'themes' / ACTIVE_THEME),
-            str(Path(PYTHON_DIR) / 'themes' / 'default'),
-            str(Path(PYTHON_DIR) / 'themes'),
-        ],
+        'DIRS': TEMPLATE_DIRS,
         'APP_DIRS': True,
         'OPTIONS': {
             'context_processors': [
@@ -66,7 +94,10 @@ TEMPLATES = [
     },
 ]
 
-WSGI_APPLICATION = 'core.wsgi.application'
+
+################################################################################
+### External Service Settings
+################################################################################
 
 DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME
 DATABASES = {
@@ -76,40 +107,51 @@ DATABASES = {
     }
 }
 
-AUTHENTICATION_BACKENDS = [
-    'django.contrib.auth.backends.ModelBackend',
-]
-AUTH_PASSWORD_VALIDATORS = [
-    {'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'},
-    {'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'},
-    {'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator'},
-    {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
-]
+EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
+
 
 ################################################################################
 ### Security Settings
 ################################################################################
+
+SECRET_KEY = SECRET_KEY or get_random_string(50, 'abcdefghijklmnopqrstuvwxyz0123456789-_+!.')
+
+ALLOWED_HOSTS = ALLOWED_HOSTS.split(',')
+
 SECURE_BROWSER_XSS_FILTER = True
 SECURE_CONTENT_TYPE_NOSNIFF = True
-SESSION_COOKIE_SECURE = False
+
 CSRF_COOKIE_SECURE = False
+SESSION_COOKIE_SECURE = False
 SESSION_COOKIE_DOMAIN = None
+SESSION_COOKIE_AGE = 1209600  # 2 weeks
 SESSION_EXPIRE_AT_BROWSER_CLOSE = False
 SESSION_SAVE_EVERY_REQUEST = True
-SESSION_COOKIE_AGE = 1209600  # 2 weeks
-LOGIN_URL = '/accounts/login/'
-LOGOUT_REDIRECT_URL = '/'
-PASSWORD_RESET_URL = '/accounts/password_reset/'
 
+AUTH_PASSWORD_VALIDATORS = [
+    {'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'},
+    {'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'},
+    {'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator'},
+    {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
+]
+
+
+################################################################################
+### Shell Settings
+################################################################################
 
 SHELL_PLUS = 'ipython'
 SHELL_PLUS_PRINT_SQL = False
 IPYTHON_ARGUMENTS = ['--no-confirm-exit', '--no-banner']
 IPYTHON_KERNEL_DISPLAY_NAME = 'ArchiveBox Django Shell'
 if IS_SHELL:
-    os.environ['PYTHONSTARTUP'] = str(Path(PYTHON_DIR) / 'core' / 'welcome_message.py')
+    os.environ['PYTHONSTARTUP'] = str(Path(PACKAGE_DIR) / 'core' / 'welcome_message.py')
 
 
+################################################################################
+### Internationalization & Localization Settings
+################################################################################
+
 LANGUAGE_CODE = 'en-us'
 TIME_ZONE = 'UTC'
 USE_I18N = False
@@ -118,12 +160,3 @@ USE_TZ = False
 
 DATETIME_FORMAT = 'Y-m-d g:iA'
 SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
-
-
-EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
-
-STATIC_URL = '/static/'
-STATICFILES_DIRS = [
-    str(Path(PYTHON_DIR) / 'themes' / ACTIVE_THEME / 'static'),
-    str(Path(PYTHON_DIR) / 'themes' / 'default' / 'static'),
-]

+ 1 - 2
archivebox/extractors/mercury.py

@@ -61,8 +61,7 @@ def save_mercury(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)
         atomic_write(str(output_folder / "content.txt"), txtresult_json["content"])
         atomic_write(str(output_folder / "article.json"), result_json)
 
-        # parse out number of files downloaded from last line of stderr:
-        #  "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
+        # parse out last line of stderr
         output_tail = [
             line.strip()
             for line in (result.stdout + result.stderr).decode().rsplit('\n', 20)[-20:]

+ 0 - 2
archivebox/logging_util.py

@@ -15,8 +15,6 @@ from typing import Optional, List, Dict, Union, IO, TYPE_CHECKING
 if TYPE_CHECKING:
     from .index.schema import Link, ArchiveResult
 
-from .index.json import MAIN_INDEX_HEADER
-
 from .util import enforce_types
 from .config import (
     ConfigDict,

+ 1 - 1
archivebox/main.py

@@ -216,7 +216,7 @@ def version(quiet: bool=False,
             print(printable_dependency_version(name, dependency))
         
         print()
-        print('{white}[i] Code locations:{reset}'.format(**ANSI))
+        print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
         for name, folder in CODE_LOCATIONS.items():
             print(printable_folder_status(name, folder))