|
@@ -28,21 +28,19 @@ import sys
|
|
|
import json
|
|
import json
|
|
|
import inspect
|
|
import inspect
|
|
|
import getpass
|
|
import getpass
|
|
|
-import platform
|
|
|
|
|
import shutil
|
|
import shutil
|
|
|
import requests
|
|
import requests
|
|
|
|
|
|
|
|
from hashlib import md5
|
|
from hashlib import md5
|
|
|
from pathlib import Path
|
|
from pathlib import Path
|
|
|
-from benedict import benedict
|
|
|
|
|
from datetime import datetime, timezone
|
|
from datetime import datetime, timezone
|
|
|
-from typing import Optional, Type, Tuple, Dict, Union, List
|
|
|
|
|
|
|
+from typing import Optional, Type, Tuple, Dict
|
|
|
from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
|
|
from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
|
|
|
from configparser import ConfigParser
|
|
from configparser import ConfigParser
|
|
|
-from collections import defaultdict
|
|
|
|
|
import importlib.metadata
|
|
import importlib.metadata
|
|
|
|
|
|
|
|
from pydantic_pkgr import SemVer
|
|
from pydantic_pkgr import SemVer
|
|
|
|
|
+from rich.progress import Progress
|
|
|
|
|
|
|
|
import django
|
|
import django
|
|
|
from django.db.backends.sqlite3.base import Database as sqlite3
|
|
from django.db.backends.sqlite3.base import Database as sqlite3
|
|
@@ -56,6 +54,17 @@ from .config_stubs import (
|
|
|
ConfigDefaultDict,
|
|
ConfigDefaultDict,
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
+from .misc.logging import (
|
|
|
|
|
+ CONSOLE,
|
|
|
|
|
+ SHOW_PROGRESS,
|
|
|
|
|
+ DEFAULT_CLI_COLORS,
|
|
|
|
|
+ ANSI,
|
|
|
|
|
+ COLOR_DICT,
|
|
|
|
|
+ stderr,
|
|
|
|
|
+ hint,
|
|
|
|
|
+)
|
|
|
|
|
+from .misc.checks import check_system_config
|
|
|
|
|
+
|
|
|
# print('STARTING CONFIG LOADING')
|
|
# print('STARTING CONFIG LOADING')
|
|
|
|
|
|
|
|
# load fallback libraries from vendor dir
|
|
# load fallback libraries from vendor dir
|
|
@@ -70,7 +79,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
|
|
'SHELL_CONFIG': {
|
|
'SHELL_CONFIG': {
|
|
|
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
|
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
|
|
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
|
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
|
|
- 'SHOW_PROGRESS': {'type': bool, 'default': lambda c: (c['IS_TTY'] and platform.system() != 'Darwin')}, # progress bars are buggy on mac, disable for now
|
|
|
|
|
|
|
+ 'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']}, # progress bars are buggy on mac, disable for now
|
|
|
'IN_DOCKER': {'type': bool, 'default': False},
|
|
'IN_DOCKER': {'type': bool, 'default': False},
|
|
|
'IN_QEMU': {'type': bool, 'default': False},
|
|
'IN_QEMU': {'type': bool, 'default': False},
|
|
|
'PUID': {'type': int, 'default': os.getuid()},
|
|
'PUID': {'type': int, 'default': os.getuid()},
|
|
@@ -306,32 +315,7 @@ ROBOTS_TXT_FILENAME = 'robots.txt'
|
|
|
FAVICON_FILENAME = 'favicon.ico'
|
|
FAVICON_FILENAME = 'favicon.ico'
|
|
|
CONFIG_FILENAME = 'ArchiveBox.conf'
|
|
CONFIG_FILENAME = 'ArchiveBox.conf'
|
|
|
|
|
|
|
|
-DEFAULT_CLI_COLORS = benedict(
|
|
|
|
|
- {
|
|
|
|
|
- "reset": "\033[00;00m",
|
|
|
|
|
- "lightblue": "\033[01;30m",
|
|
|
|
|
- "lightyellow": "\033[01;33m",
|
|
|
|
|
- "lightred": "\033[01;35m",
|
|
|
|
|
- "red": "\033[01;31m",
|
|
|
|
|
- "green": "\033[01;32m",
|
|
|
|
|
- "blue": "\033[01;34m",
|
|
|
|
|
- "white": "\033[01;37m",
|
|
|
|
|
- "black": "\033[01;30m",
|
|
|
|
|
- }
|
|
|
|
|
-)
|
|
|
|
|
-ANSI = AttrDict({k: '' for k in DEFAULT_CLI_COLORS.keys()})
|
|
|
|
|
-
|
|
|
|
|
-COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], {
|
|
|
|
|
- '00': [(0, 0, 0), (0, 0, 0)],
|
|
|
|
|
- '30': [(0, 0, 0), (0, 0, 0)],
|
|
|
|
|
- '31': [(255, 0, 0), (128, 0, 0)],
|
|
|
|
|
- '32': [(0, 200, 0), (0, 128, 0)],
|
|
|
|
|
- '33': [(255, 255, 0), (128, 128, 0)],
|
|
|
|
|
- '34': [(0, 0, 255), (0, 0, 128)],
|
|
|
|
|
- '35': [(255, 0, 255), (128, 0, 128)],
|
|
|
|
|
- '36': [(0, 255, 255), (0, 128, 128)],
|
|
|
|
|
- '37': [(255, 255, 255), (255, 255, 255)],
|
|
|
|
|
-})
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
STATICFILE_EXTENSIONS = {
|
|
STATICFILE_EXTENSIONS = {
|
|
|
# 99.999% of the time, URLs ending in these extensions are static files
|
|
# 99.999% of the time, URLs ending in these extensions are static files
|
|
@@ -880,37 +864,6 @@ def parse_version_string(version: str) -> Tuple[int, int, int]:
|
|
|
return tuple(int(part) for part in base.split('.'))[:3]
|
|
return tuple(int(part) for part in base.split('.'))[:3]
|
|
|
|
|
|
|
|
|
|
|
|
|
-# Logging Helpers
|
|
|
|
|
-def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
|
|
|
|
- ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
|
|
|
|
-
|
|
|
|
|
- if color:
|
|
|
|
|
- strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
|
|
|
|
- else:
|
|
|
|
|
- strs = [' '.join(str(a) for a in args), '\n']
|
|
|
|
|
-
|
|
|
|
|
- sys.stdout.write(prefix + ''.join(strs))
|
|
|
|
|
-
|
|
|
|
|
-def stderr(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
|
|
|
|
- ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
|
|
|
|
-
|
|
|
|
|
- if color:
|
|
|
|
|
- strs = [ansi[color], ' '.join(str(a) for a in args), ansi['reset'], '\n']
|
|
|
|
|
- else:
|
|
|
|
|
- strs = [' '.join(str(a) for a in args), '\n']
|
|
|
|
|
-
|
|
|
|
|
- sys.stderr.write(prefix + ''.join(strs))
|
|
|
|
|
-
|
|
|
|
|
-def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Optional[ConfigDict]=None) -> None:
|
|
|
|
|
- ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
|
|
|
|
-
|
|
|
|
|
- if isinstance(text, str):
|
|
|
|
|
- stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text, **ansi))
|
|
|
|
|
- else:
|
|
|
|
|
- stderr('{}{lightred}Hint:{reset} {}'.format(prefix, text[0], **ansi))
|
|
|
|
|
- for line in text[1:]:
|
|
|
|
|
- stderr('{} {}'.format(prefix, line))
|
|
|
|
|
-
|
|
|
|
|
|
|
|
|
|
# Dependency Metadata Helpers
|
|
# Dependency Metadata Helpers
|
|
|
def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3) -> Optional[str]:
|
|
def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3) -> Optional[str]:
|
|
@@ -919,6 +872,10 @@ def bin_version(binary: Optional[str], cmd: Optional[str]=None, timeout: int=3)
|
|
|
abspath = bin_path(binary)
|
|
abspath = bin_path(binary)
|
|
|
if not binary or not abspath:
|
|
if not binary or not abspath:
|
|
|
return None
|
|
return None
|
|
|
|
|
+
|
|
|
|
|
+ return '999.999.999'
|
|
|
|
|
+
|
|
|
|
|
+ # Now handled by new BinProvider plugin system, no longer needed:
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
bin_env = os.environ | {'LANG': 'C'}
|
|
bin_env = os.environ | {'LANG': 'C'}
|
|
@@ -960,6 +917,9 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
|
|
|
return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
|
|
return shutil.which(str(Path(binary).expanduser())) or shutil.which(str(binary)) or binary
|
|
|
|
|
|
|
|
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
|
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
|
|
|
|
+ return 'UNUSED'
|
|
|
|
|
+ # DEPRECATED: now handled by new BinProvider plugin system, no longer needed:
|
|
|
|
|
+
|
|
|
if binary is None:
|
|
if binary is None:
|
|
|
return None
|
|
return None
|
|
|
abs_path = bin_path(binary)
|
|
abs_path = bin_path(binary)
|
|
@@ -1329,246 +1289,123 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
|
|
|
|
|
|
|
|
########################### Config Validity Checkers ###########################
|
|
########################### Config Validity Checkers ###########################
|
|
|
|
|
|
|
|
|
|
+INITIAL_STARTUP_PROGRESS = None
|
|
|
|
|
+INITIAL_STARTUP_PROGRESS_TASK = 0
|
|
|
|
|
|
|
|
-def check_system_config(config: ConfigDict=CONFIG) -> None:
|
|
|
|
|
- ### Check system environment
|
|
|
|
|
- if config['USER'] == 'root' or str(config['PUID']) == "0":
|
|
|
|
|
- stderr('[!] ArchiveBox should never be run as root!', color='red')
|
|
|
|
|
- stderr(' For more information, see the security overview documentation:')
|
|
|
|
|
- stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
|
|
|
|
-
|
|
|
|
|
- if config['IN_DOCKER']:
|
|
|
|
|
- attempted_command = ' '.join(sys.argv[:3])
|
|
|
|
|
- stderr('')
|
|
|
|
|
- stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
|
|
|
|
|
- stderr(f' docker compose run archivebox {attempted_command}')
|
|
|
|
|
- stderr(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}')
|
|
|
|
|
- stderr(' or:')
|
|
|
|
|
- stderr(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"')
|
|
|
|
|
- stderr(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"')
|
|
|
|
|
-
|
|
|
|
|
- raise SystemExit(2)
|
|
|
|
|
-
|
|
|
|
|
- ### Check Python environment
|
|
|
|
|
- if sys.version_info[:3] < (3, 7, 0):
|
|
|
|
|
- stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
|
|
|
|
|
- stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
|
|
|
|
|
- raise SystemExit(2)
|
|
|
|
|
-
|
|
|
|
|
- if int(CONFIG['DJANGO_VERSION'].split('.')[0]) < 3:
|
|
|
|
|
- stderr(f'[X] Django version is not new enough: {config["DJANGO_VERSION"]} (>3.0 is required)', color='red')
|
|
|
|
|
- stderr(' Upgrade django using pip or your system package manager: pip3 install --upgrade django')
|
|
|
|
|
- raise SystemExit(2)
|
|
|
|
|
-
|
|
|
|
|
- if config['PYTHON_ENCODING'] not in ('UTF-8', 'UTF8'):
|
|
|
|
|
- stderr(f'[X] Your system is running python3 scripts with a bad locale setting: {config["PYTHON_ENCODING"]} (it should be UTF-8).', color='red')
|
|
|
|
|
- stderr(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)')
|
|
|
|
|
- stderr(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"')
|
|
|
|
|
- stderr('')
|
|
|
|
|
- stderr(' Confirm that it\'s fixed by opening a new shell and running:')
|
|
|
|
|
- stderr(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8')
|
|
|
|
|
- raise SystemExit(2)
|
|
|
|
|
-
|
|
|
|
|
- # stderr('[i] Using Chrome binary: {}'.format(shutil.which(CHROME_BINARY) or CHROME_BINARY))
|
|
|
|
|
- # stderr('[i] Using Chrome data dir: {}'.format(os.path.abspath(CHROME_USER_DATA_DIR)))
|
|
|
|
|
- if config['CHROME_USER_DATA_DIR'] is not None and Path(config['CHROME_USER_DATA_DIR']).exists():
|
|
|
|
|
- if not (Path(config['CHROME_USER_DATA_DIR']) / 'Default').exists():
|
|
|
|
|
- stderr('[X] Could not find profile "Default" in CHROME_USER_DATA_DIR.', color='red')
|
|
|
|
|
- stderr(f' {config["CHROME_USER_DATA_DIR"]}')
|
|
|
|
|
- stderr(' Make sure you set it to a Chrome user data directory containing a Default profile folder.')
|
|
|
|
|
- stderr(' For more info see:')
|
|
|
|
|
- stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#CHROME_USER_DATA_DIR')
|
|
|
|
|
- if '/Default' in str(config['CHROME_USER_DATA_DIR']):
|
|
|
|
|
- stderr()
|
|
|
|
|
- stderr(' Try removing /Default from the end e.g.:')
|
|
|
|
|
- stderr(' CHROME_USER_DATA_DIR="{}"'.format(str(config['CHROME_USER_DATA_DIR']).split('/Default')[0]))
|
|
|
|
|
-
|
|
|
|
|
- # hard error is too annoying here, instead just set it to nothing
|
|
|
|
|
- # raise SystemExit(2)
|
|
|
|
|
- config['CHROME_USER_DATA_DIR'] = None
|
|
|
|
|
- else:
|
|
|
|
|
- config['CHROME_USER_DATA_DIR'] = None
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
|
|
|
|
- invalid_dependencies = [
|
|
|
|
|
- (name, info) for name, info in config['DEPENDENCIES'].items()
|
|
|
|
|
- if info['enabled'] and not info['is_valid']
|
|
|
|
|
- ]
|
|
|
|
|
- if invalid_dependencies and show_help:
|
|
|
|
|
- stderr(f'[!] Warning: Missing {len(invalid_dependencies)} recommended dependencies', color='lightyellow')
|
|
|
|
|
- for dependency, info in invalid_dependencies:
|
|
|
|
|
- stderr(
|
|
|
|
|
- ' ! {}: {} ({})'.format(
|
|
|
|
|
- dependency,
|
|
|
|
|
- info['path'] or 'unable to find binary',
|
|
|
|
|
- info['version'] or 'unable to detect version',
|
|
|
|
|
- )
|
|
|
|
|
- )
|
|
|
|
|
- if dependency in ('YOUTUBEDL_BINARY', 'CHROME_BINARY', 'SINGLEFILE_BINARY', 'READABILITY_BINARY', 'MERCURY_BINARY'):
|
|
|
|
|
- hint(('To install all packages automatically run: archivebox setup',
|
|
|
|
|
- f'or to disable it and silence this warning: archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False',
|
|
|
|
|
- ''), prefix=' ')
|
|
|
|
|
- stderr('')
|
|
|
|
|
-
|
|
|
|
|
- if config['TIMEOUT'] < 5:
|
|
|
|
|
- stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
|
|
|
|
|
- stderr(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.')
|
|
|
|
|
- stderr(' (Setting it to somewhere between 30 and 3000 seconds is recommended)')
|
|
|
|
|
- stderr()
|
|
|
|
|
- stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
|
|
|
|
|
- stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
|
|
|
|
|
- stderr()
|
|
|
|
|
-
|
|
|
|
|
- elif config['USE_CHROME'] and config['TIMEOUT'] < 15:
|
|
|
|
|
- stderr(f'[!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={config["TIMEOUT"]} seconds)', color='red')
|
|
|
|
|
- stderr(' Chrome will fail to archive all sites if set to less than ~15 seconds.')
|
|
|
|
|
- stderr(' (Setting it to somewhere between 30 and 300 seconds is recommended)')
|
|
|
|
|
- stderr()
|
|
|
|
|
- stderr(' If you want to make ArchiveBox run faster, disable specific archive methods instead:')
|
|
|
|
|
- stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles')
|
|
|
|
|
- stderr()
|
|
|
|
|
-
|
|
|
|
|
- if config['USE_YOUTUBEDL'] and config['MEDIA_TIMEOUT'] < 20:
|
|
|
|
|
- stderr(f'[!] Warning: MEDIA_TIMEOUT is set too low! (currently set to MEDIA_TIMEOUT={config["MEDIA_TIMEOUT"]} seconds)', color='red')
|
|
|
|
|
- stderr(' youtube-dl/yt-dlp will fail to archive any media if set to less than ~20 seconds.')
|
|
|
|
|
- stderr(' (Setting it somewhere over 60 seconds is recommended)')
|
|
|
|
|
- stderr()
|
|
|
|
|
- stderr(' If you want to disable media archiving entirely, set SAVE_MEDIA=False instead:')
|
|
|
|
|
- stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#save_media')
|
|
|
|
|
- stderr()
|
|
|
|
|
|
|
+def bump_startup_progress_bar():
|
|
|
|
|
+ global INITIAL_STARTUP_PROGRESS
|
|
|
|
|
+ global INITIAL_STARTUP_PROGRESS_TASK
|
|
|
|
|
+ if INITIAL_STARTUP_PROGRESS:
|
|
|
|
|
+ INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=1) # type: ignore
|
|
|
|
|
|
|
|
-
|
|
|
|
|
-def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG) -> None:
|
|
|
|
|
- output_dir = out_dir or config['OUTPUT_DIR']
|
|
|
|
|
- assert isinstance(output_dir, (str, Path))
|
|
|
|
|
-
|
|
|
|
|
- archive_dir_exists = (Path(output_dir) / ARCHIVE_DIR_NAME).exists()
|
|
|
|
|
- if not archive_dir_exists:
|
|
|
|
|
- stderr('[X] No archivebox index found in the current directory.', color='red')
|
|
|
|
|
- stderr(f' {output_dir}', color='lightyellow')
|
|
|
|
|
- stderr()
|
|
|
|
|
- stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**config['ANSI']))
|
|
|
|
|
- stderr(' cd path/to/your/archive/folder')
|
|
|
|
|
- stderr(' archivebox [command]')
|
|
|
|
|
- stderr()
|
|
|
|
|
- stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**config['ANSI']))
|
|
|
|
|
- stderr(' archivebox init')
|
|
|
|
|
- raise SystemExit(2)
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG):
|
|
|
|
|
- output_dir = out_dir or config['OUTPUT_DIR']
|
|
|
|
|
- from .index.sql import list_migrations
|
|
|
|
|
-
|
|
|
|
|
- pending_migrations = [name for status, name in list_migrations() if not status]
|
|
|
|
|
-
|
|
|
|
|
- if pending_migrations:
|
|
|
|
|
- stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
|
|
|
|
|
- stderr(f' {output_dir}')
|
|
|
|
|
- stderr()
|
|
|
|
|
- stderr(f' To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
|
|
|
|
|
- stderr(' archivebox init')
|
|
|
|
|
- raise SystemExit(3)
|
|
|
|
|
-
|
|
|
|
|
- (Path(output_dir) / SOURCES_DIR_NAME).mkdir(exist_ok=True)
|
|
|
|
|
- (Path(output_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True)
|
|
|
|
|
- (Path(output_dir) / CACHE_DIR_NAME).mkdir(exist_ok=True)
|
|
|
|
|
- (Path(output_dir) / LIB_DIR_NAME / 'bin').mkdir(exist_ok=True, parents=True)
|
|
|
|
|
- (Path(output_dir) / PERSONAS_DIR_NAME / 'Default').mkdir(exist_ok=True, parents=True)
|
|
|
|
|
|
|
+def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
|
|
|
|
+ global INITIAL_STARTUP_PROGRESS
|
|
|
|
|
+ global INITIAL_STARTUP_PROGRESS_TASK
|
|
|
|
|
+
|
|
|
|
|
+ with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
|
|
|
|
|
+ INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
|
|
|
|
|
+ check_system_config(config)
|
|
|
|
|
|
|
|
|
|
+ output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
|
|
|
|
|
|
|
|
|
+ assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
|
|
|
|
|
|
|
-def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None:
|
|
|
|
|
- check_system_config()
|
|
|
|
|
|
|
+ bump_startup_progress_bar()
|
|
|
|
|
+ try:
|
|
|
|
|
+ from django.core.management import call_command
|
|
|
|
|
|
|
|
- output_dir = out_dir or Path(config['OUTPUT_DIR'])
|
|
|
|
|
|
|
+ sys.path.append(str(config['PACKAGE_DIR']))
|
|
|
|
|
+ os.environ.setdefault('OUTPUT_DIR', str(output_dir))
|
|
|
|
|
+ assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
|
|
|
|
|
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
|
|
|
|
|
|
|
- assert isinstance(output_dir, Path) and isinstance(config['PACKAGE_DIR'], Path)
|
|
|
|
|
|
|
+ # Check to make sure JSON extension is available in our Sqlite3 instance
|
|
|
|
|
+ try:
|
|
|
|
|
+ cursor = sqlite3.connect(':memory:').cursor()
|
|
|
|
|
+ cursor.execute('SELECT JSON(\'{"a": "b"}\')')
|
|
|
|
|
+ except sqlite3.OperationalError as exc:
|
|
|
|
|
+ stderr(f'[X] Your SQLite3 version is missing the required JSON1 extension: {exc}', color='red')
|
|
|
|
|
+ hint([
|
|
|
|
|
+ 'Upgrade your Python version or install the extension manually:',
|
|
|
|
|
+ 'https://code.djangoproject.com/wiki/JSON1Extension'
|
|
|
|
|
+ ])
|
|
|
|
|
+
|
|
|
|
|
+ bump_startup_progress_bar()
|
|
|
|
|
+
|
|
|
|
|
+ if in_memory_db:
|
|
|
|
|
+ # some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
|
|
|
|
|
+ # in those cases we create a temporary in-memory db and run the migrations
|
|
|
|
|
+ # immediately to get a usable in-memory-database at startup
|
|
|
|
|
+ os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
|
|
|
|
|
+ django.setup()
|
|
|
|
|
+
|
|
|
|
|
+ bump_startup_progress_bar()
|
|
|
|
|
+ call_command("migrate", interactive=False, verbosity=0)
|
|
|
|
|
+ else:
|
|
|
|
|
+ # Otherwise use default sqlite3 file-based database and initialize django
|
|
|
|
|
+ # without running migrations automatically (user runs them manually by calling init)
|
|
|
|
|
+ django.setup()
|
|
|
|
|
+
|
|
|
|
|
+ bump_startup_progress_bar()
|
|
|
|
|
|
|
|
- try:
|
|
|
|
|
- from django.core.management import call_command
|
|
|
|
|
|
|
+ from django.conf import settings
|
|
|
|
|
|
|
|
- sys.path.append(str(config['PACKAGE_DIR']))
|
|
|
|
|
- os.environ.setdefault('OUTPUT_DIR', str(output_dir))
|
|
|
|
|
- assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py'
|
|
|
|
|
- os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
|
|
|
|
|
|
+ # log startup message to the error log
|
|
|
|
|
+ with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
|
|
|
|
|
+ command = ' '.join(sys.argv)
|
|
|
|
|
+ ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
|
|
|
|
+ f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
|
|
|
|
|
|
|
|
- # Check to make sure JSON extension is available in our Sqlite3 instance
|
|
|
|
|
- try:
|
|
|
|
|
- cursor = sqlite3.connect(':memory:').cursor()
|
|
|
|
|
- cursor.execute('SELECT JSON(\'{"a": "b"}\')')
|
|
|
|
|
- except sqlite3.OperationalError as exc:
|
|
|
|
|
- stderr(f'[X] Your SQLite3 version is missing the required JSON1 extension: {exc}', color='red')
|
|
|
|
|
- hint([
|
|
|
|
|
- 'Upgrade your Python version or install the extension manually:',
|
|
|
|
|
- 'https://code.djangoproject.com/wiki/JSON1Extension'
|
|
|
|
|
- ])
|
|
|
|
|
-
|
|
|
|
|
- if in_memory_db:
|
|
|
|
|
- # some commands (e.g. oneshot) dont store a long-lived sqlite3 db file on disk.
|
|
|
|
|
- # in those cases we create a temporary in-memory db and run the migrations
|
|
|
|
|
- # immediately to get a usable in-memory-database at startup
|
|
|
|
|
- os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:")
|
|
|
|
|
- django.setup()
|
|
|
|
|
- call_command("migrate", interactive=False, verbosity=0)
|
|
|
|
|
- else:
|
|
|
|
|
- # Otherwise use default sqlite3 file-based database and initialize django
|
|
|
|
|
- # without running migrations automatically (user runs them manually by calling init)
|
|
|
|
|
- django.setup()
|
|
|
|
|
-
|
|
|
|
|
- from django.conf import settings
|
|
|
|
|
-
|
|
|
|
|
- # log startup message to the error log
|
|
|
|
|
- with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
|
|
|
|
|
- command = ' '.join(sys.argv)
|
|
|
|
|
- ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
|
|
|
|
- f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
|
|
|
|
|
-
|
|
|
|
|
- if check_db:
|
|
|
|
|
- # Enable WAL mode in sqlite3
|
|
|
|
|
- from django.db import connection
|
|
|
|
|
- with connection.cursor() as cursor:
|
|
|
|
|
-
|
|
|
|
|
- # Set Journal mode to WAL to allow for multiple writers
|
|
|
|
|
- current_mode = cursor.execute("PRAGMA journal_mode")
|
|
|
|
|
- if current_mode != 'wal':
|
|
|
|
|
- cursor.execute("PRAGMA journal_mode=wal;")
|
|
|
|
|
-
|
|
|
|
|
- # Set max blocking delay for concurrent writes and write sync mode
|
|
|
|
|
- # https://litestream.io/tips/#busy-timeout
|
|
|
|
|
- cursor.execute("PRAGMA busy_timeout = 5000;")
|
|
|
|
|
- cursor.execute("PRAGMA synchronous = NORMAL;")
|
|
|
|
|
-
|
|
|
|
|
- # Create cache table in DB if needed
|
|
|
|
|
- try:
|
|
|
|
|
- from django.core.cache import cache
|
|
|
|
|
- cache.get('test', None)
|
|
|
|
|
- except django.db.utils.OperationalError:
|
|
|
|
|
- call_command("createcachetable", verbosity=0)
|
|
|
|
|
|
|
+ if check_db:
|
|
|
|
|
+ # Enable WAL mode in sqlite3
|
|
|
|
|
+ from django.db import connection
|
|
|
|
|
+ with connection.cursor() as cursor:
|
|
|
|
|
+
|
|
|
|
|
+ # Set Journal mode to WAL to allow for multiple writers
|
|
|
|
|
+ current_mode = cursor.execute("PRAGMA journal_mode")
|
|
|
|
|
+ if current_mode != 'wal':
|
|
|
|
|
+ cursor.execute("PRAGMA journal_mode=wal;")
|
|
|
|
|
+
|
|
|
|
|
+ # Set max blocking delay for concurrent writes and write sync mode
|
|
|
|
|
+ # https://litestream.io/tips/#busy-timeout
|
|
|
|
|
+ cursor.execute("PRAGMA busy_timeout = 5000;")
|
|
|
|
|
+ cursor.execute("PRAGMA synchronous = NORMAL;")
|
|
|
|
|
+
|
|
|
|
|
+ # Create cache table in DB if needed
|
|
|
|
|
+ try:
|
|
|
|
|
+ from django.core.cache import cache
|
|
|
|
|
+ cache.get('test', None)
|
|
|
|
|
+ except django.db.utils.OperationalError:
|
|
|
|
|
+ call_command("createcachetable", verbosity=0)
|
|
|
|
|
|
|
|
- # if archivebox gets imported multiple times, we have to close
|
|
|
|
|
- # the sqlite3 whenever we init from scratch to avoid multiple threads
|
|
|
|
|
- # sharing the same connection by accident
|
|
|
|
|
- from django.db import connections
|
|
|
|
|
- for conn in connections.all():
|
|
|
|
|
- conn.close_if_unusable_or_obsolete()
|
|
|
|
|
|
|
+ bump_startup_progress_bar()
|
|
|
|
|
|
|
|
- sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
|
|
|
|
|
- assert sql_index_path.exists(), (
|
|
|
|
|
- f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
|
|
|
|
|
|
|
+ # if archivebox gets imported multiple times, we have to close
|
|
|
|
|
+ # the sqlite3 whenever we init from scratch to avoid multiple threads
|
|
|
|
|
+ # sharing the same connection by accident
|
|
|
|
|
+ from django.db import connections
|
|
|
|
|
+ for conn in connections.all():
|
|
|
|
|
+ conn.close_if_unusable_or_obsolete()
|
|
|
|
|
|
|
|
|
|
+ sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
|
|
|
|
|
+ assert sql_index_path.exists(), (
|
|
|
|
|
+ f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
|
|
|
|
|
|
|
|
- # https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
|
|
|
|
|
- if settings.DEBUG_LOGFIRE:
|
|
|
|
|
- from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
|
|
|
|
|
- SQLite3Instrumentor().instrument()
|
|
|
|
|
|
|
+ bump_startup_progress_bar()
|
|
|
|
|
|
|
|
- import logfire
|
|
|
|
|
|
|
+ # https://docs.pydantic.dev/logfire/integrations/django/ Logfire Debugging
|
|
|
|
|
+ if settings.DEBUG_LOGFIRE:
|
|
|
|
|
+ from opentelemetry.instrumentation.sqlite3 import SQLite3Instrumentor
|
|
|
|
|
+ SQLite3Instrumentor().instrument()
|
|
|
|
|
|
|
|
- logfire.configure()
|
|
|
|
|
- logfire.instrument_django(is_sql_commentor_enabled=True)
|
|
|
|
|
- logfire.info(f'Started ArchiveBox v{CONFIG.VERSION}', argv=sys.argv)
|
|
|
|
|
|
|
+ import logfire
|
|
|
|
|
+
|
|
|
|
|
+ logfire.configure()
|
|
|
|
|
+ logfire.instrument_django(is_sql_commentor_enabled=True)
|
|
|
|
|
+ logfire.info(f'Started ArchiveBox v{CONFIG.VERSION}', argv=sys.argv)
|
|
|
|
|
+
|
|
|
|
|
+ except KeyboardInterrupt:
|
|
|
|
|
+ raise SystemExit(2)
|
|
|
|
|
|
|
|
- except KeyboardInterrupt:
|
|
|
|
|
- raise SystemExit(2)
|
|
|
|
|
|
|
+ INITIAL_STARTUP_PROGRESS = None
|
|
|
|
|
+ INITIAL_STARTUP_PROGRESS_TASK = None
|