Browse Source

better config comments and docstrings

Nick Sweeting 5 years ago
parent
commit
9784dcb816
3 changed files with 112 additions and 60 deletions
  1. 1 1
      .github/workflows/debian.yml
  2. 8 2
      Dockerfile
  3. 103 57
      archivebox/config.py

+ 1 - 1
.github/workflows/debian.yml

@@ -47,7 +47,7 @@ jobs:
           archivebox config --set SAVE_READABILITY=False
           archivebox config --set SAVE_MERCURY=False
           archivebox config --set SAVE_SINGLEFILE=False
-          archivebox version
+          archivebox --version
 
       - name: Add some links to test
         run: |

+ 8 - 2
Dockerfile

@@ -50,6 +50,13 @@ RUN apt-get update -qq \
         fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
     && rm -rf /var/lib/apt/lists/*
 
+# Install apt development dependencies
+# RUN apt-get install -qq \
+#     && apt-get install -qq -y --no-install-recommends \
+#         python3 python3-dev python3-pip python3-venv python3-all \
+#         dh-python debhelper devscripts dput software-properties-common \
+#         python3-distutils python3-setuptools python3-wheel python3-stdeb
+
 # Install Node environment
 RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
     && echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
@@ -62,7 +69,6 @@ RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -
 WORKDIR "$NODE_DIR"
 ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
     npm_config_loglevel=error
-# RUN npm install -g npm
 ADD ./package.json ./package.json
 ADD ./package-lock.json ./package-lock.json
 RUN npm ci
@@ -82,7 +88,7 @@ RUN apt-get update -qq \
     && apt-get autoremove -y \
     && rm -rf /var/lib/apt/lists/*
 
-# Install ArchiveBox Python package
+# Install ArchiveBox Python package and its dependencies
 WORKDIR "$CODE_DIR"
 ADD . "$CODE_DIR"
 RUN pip install -e .

+ 103 - 57
archivebox/config.py

@@ -1,3 +1,24 @@
+"""
+ArchiveBox config definitons (including defaults and dynamic config options).
+
+Config Usage Example:
+
+    archivebox config --set MEDIA_TIMEOUT=600
+    env MEDIA_TIMEOUT=600 USE_COLOR=False ... archivebox [subcommand] ...
+
+Config Precedence Order:
+
+  1. cli args                 (--update-all / --index-only / etc.)
+  2. shell environment vars   (env USE_COLOR=False archivebox add '...')
+  3. config file              (echo "SAVE_FAVICON=False" >> ArchiveBox.conf)
+  4. defaults                 (defined below in Python)
+
+Documentation:
+
+  https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
+
+"""
+
 __package__ = 'archivebox'
 
 import os
@@ -24,26 +45,9 @@ from .config_stubs import (
     ConfigDefaultDict,
 )
 
-# precedence order for config:
-# 1. cli args                 (e.g. )
-# 2. shell environment vars   (env USE_COLOR=False archivebox add '...')
-# 3. config file              (echo "SAVE_FAVICON=False" >> ArchiveBox.conf)
-# 4. defaults                 (defined below in Python)
-
-#
-# env SHOW_PROGRESS=1 archivebox add '...'
-# archivebox config --set TIMEOUT=600
-# 
+############################### Config Schema ##################################
 
-# ******************************************************************************
-# Documentation: https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
-# Use the 'env' command to pass config options to ArchiveBox.  e.g.:
-#     env USE_COLOR=True CHROME_BINARY=chromium archivebox add < example.html
-# ******************************************************************************
-
-################################# User Config ##################################
-
-CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
+CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
     'SHELL_CONFIG': {
         'IS_TTY':                   {'type': bool,  'default': lambda _: sys.stdout.isatty()},
         'USE_COLOR':                {'type': bool,  'default': lambda c: c['IS_TTY']},
@@ -179,21 +183,40 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
     },
 }
 
+
+########################## Backwards-Compatibility #############################
+
+
 # for backwards compatibility with old config files, check old/deprecated names for each key
 CONFIG_ALIASES = {
     alias: key
-    for section in CONFIG_DEFAULTS.values()
+    for section in CONFIG_SCHEMA.values()
         for key, default in section.items()
             for alias in default.get('aliases', ())
 }
-USER_CONFIG = {key for section in CONFIG_DEFAULTS.values() for key in section.keys()}
+USER_CONFIG = {key for section in CONFIG_SCHEMA.values() for key in section.keys()}
 
 def get_real_name(key: str) -> str:
+    """get the current canonical name for a given deprecated config key"""
     return CONFIG_ALIASES.get(key.upper().strip(), key.upper().strip())
 
-############################## Derived Config ##############################
 
-# Constants
+
+################################ Constants #####################################
+
+PACKAGE_DIR_NAME = 'archivebox'
+TEMPLATES_DIR_NAME = 'themes'
+
+ARCHIVE_DIR_NAME = 'archive'
+SOURCES_DIR_NAME = 'sources'
+LOGS_DIR_NAME = 'logs'
+STATIC_DIR_NAME = 'static'
+SQL_INDEX_FILENAME = 'index.sqlite3'
+JSON_INDEX_FILENAME = 'index.json'
+HTML_INDEX_FILENAME = 'index.html'
+ROBOTS_TXT_FILENAME = 'robots.txt'
+FAVICON_FILENAME = 'favicon.ico'
+CONFIG_FILENAME = 'ArchiveBox.conf'
 
 DEFAULT_CLI_COLORS = {
     'reset': '\033[00;00m',
@@ -242,36 +265,12 @@ STATICFILE_EXTENSIONS = {
     # html, htm, shtml, xhtml, xml, aspx, php, cgi
 }
 
-PACKAGE_DIR_NAME = 'archivebox'
-TEMPLATES_DIR_NAME = 'themes'
-
-ARCHIVE_DIR_NAME = 'archive'
-SOURCES_DIR_NAME = 'sources'
-LOGS_DIR_NAME = 'logs'
-STATIC_DIR_NAME = 'static'
-SQL_INDEX_FILENAME = 'index.sqlite3'
-JSON_INDEX_FILENAME = 'index.json'
-HTML_INDEX_FILENAME = 'index.html'
-ROBOTS_TXT_FILENAME = 'robots.txt'
-FAVICON_FILENAME = 'favicon.ico'
-CONFIG_FILENAME = 'ArchiveBox.conf'
 
-CONFIG_HEADER = (
-"""# This is the config file for your ArchiveBox collection.
-#
-# You can add options here manually in INI format, or automatically by running:
-#    archivebox config --set KEY=VALUE
-# 
-# If you modify this file manually, make sure to update your archive after by running:
-#    archivebox init
-#
-# A list of all possible config with documentation and examples can be found here:
-#    https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
 
-""")
+############################## Derived Config ##################################
 
 
-DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
+DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
     'TERM_WIDTH':               {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
     'USER':                     {'default': lambda c: getpass.getuser() or os.getlogin()},
     'ANSI':                     {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
@@ -359,6 +358,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
 
 ################################### Helpers ####################################
 
+
 def load_config_val(key: str,
                     default: ConfigDefaultValue=None,
                     type: Optional[Type]=None,
@@ -437,6 +437,20 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
 
     from .system import atomic_write
 
+    CONFIG_HEADER = (
+    """# This is the config file for your ArchiveBox collection.
+    #
+    # You can add options here manually in INI format, or automatically by running:
+    #    archivebox config --set KEY=VALUE
+    # 
+    # If you modify this file manually, make sure to update your archive after by running:
+    #    archivebox init
+    #
+    # A list of all possible config with documentation and examples can be found here:
+    #    https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
+
+    """)
+
     out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
     config_path = Path(out_dir) /  CONFIG_FILENAME
     
@@ -450,7 +464,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
     with open(config_path, 'r') as old:
         atomic_write(f'{config_path}.bak', old.read())
 
-    find_section = lambda key: [name for name, opts in CONFIG_DEFAULTS.items() if key in opts][0]
+    find_section = lambda key: [name for name, opts in CONFIG_SCHEMA.items() if key in opts][0]
 
     # Set up sections in empty config file
     for key, val in config.items():
@@ -539,6 +553,8 @@ def load_config(defaults: ConfigDefaultDict,
 
 #     with open(os.path.join(config['OUTPUT_DIR'], CONFIG_FILENAME), 'w+') as f:
 
+
+# Logging Helpers
 def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
     ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
 
@@ -570,6 +586,7 @@ def hint(text: Union[Tuple[str, ...], List[str], str], prefix='    ', config: Op
             stderr('{}      {}'.format(prefix, line))
 
 
+# Dependency Metadata Helpers
 def bin_version(binary: Optional[str]) -> Optional[str]:
     """check the presence and return valid version line of a specified binary"""
 
@@ -837,6 +854,14 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
             'enabled': config['USE_RIPGREP'],
             'is_valid': bool(config['RIPGREP_VERSION']),
         },
+        # TODO: add an entry for the sonic search backend?
+        # 'SONIC_BINARY': {
+        #     'path': bin_path(config['SONIC_BINARY']),
+        #     'version': config['SONIC_VERSION'],
+        #     'hash': bin_hash(config['SONIC_BINARY']),
+        #     'enabled': config['USE_SONIC'],
+        #     'is_valid': bool(config['SONIC_VERSION']),
+        # },
     }
 
 def get_chrome_info(config: ConfigDict) -> ConfigValue:
@@ -852,28 +877,51 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
     }
 
 
-################################## Load Config #################################
+# ******************************************************************************
+# ******************************************************************************
+# ******************************** Load Config *********************************
+# ******* (compile the defaults, configs, and metadata all into CONFIG) ********
+# ******************************************************************************
+# ******************************************************************************
 
 
 def load_all_config():
     CONFIG: ConfigDict = {}
-    for section_name, section_config in CONFIG_DEFAULTS.items():
+    for section_name, section_config in CONFIG_SCHEMA.items():
         CONFIG = load_config(section_config, CONFIG)
 
-    return load_config(DERIVED_CONFIG_DEFAULTS, CONFIG)
+    return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
 
+# add all final config values in CONFIG to globals in this file
 CONFIG = load_all_config()
 globals().update(CONFIG)
+# this lets us do:  from .config import DEBUG, MEDIA_TIMEOUT, ...
 
-# Timezone set as UTC
+
+# ******************************************************************************
+# ******************************************************************************
+# ******************************************************************************
+# ******************************************************************************
+# ******************************************************************************
+
+
+
+########################### System Environment Setup ###########################
+
+
+# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
 os.environ["TZ"] = 'UTC'
+os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8))  # noqa: F821
 
 # add ./node_modules/.bin to $PATH so we can use node scripts in extractors
 NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
 sys.path.append(NODE_BIN_PATH)
 
 
-############################## Importable Checkers #############################
+
+
+########################### Config Validity Checkers ###########################
+
 
 def check_system_config(config: ConfigDict=CONFIG) -> None:
     ### Check system environment
@@ -1031,5 +1079,3 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
                 f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
     except KeyboardInterrupt:
         raise SystemExit(2)
-
-os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8))  # noqa: F821