Browse Source

move to new vendoring fallback logic

Nick Sweeting 1 year ago
parent
commit
2c2d034d6d

+ 5 - 0
archivebox/config.py

@@ -52,6 +52,11 @@ from .config_stubs import (
     ConfigDefaultDict,
 )
 
+# load fallback libraries from vendor dir
+from .vendor import load_vendored_libs
+load_vendored_libs()
+
+
 
 ############################### Config Schema ##################################
 

+ 1 - 1
archivebox/core/forms.py

@@ -4,7 +4,7 @@ from django import forms
 
 from ..util import URL_REGEX
 from ..parsers import PARSERS
-from ..vendor.taggit_utils import edit_string_for_tags, parse_tags
+from taggit.utils import edit_string_for_tags, parse_tags
 
 PARSER_CHOICES = [
     (parser_key, parser[0])

+ 1 - 1
archivebox/parsers/pocket_api.py

@@ -7,7 +7,7 @@ from typing import IO, Iterable, Optional
 from configparser import ConfigParser
 
 from pathlib import Path
-from ..vendor.pocket import Pocket
+from pocket import Pocket
 
 from ..index.schema import Link
 from ..util import enforce_types

+ 1 - 2
archivebox/system.py

@@ -11,13 +11,12 @@ from typing import Optional, Union, Set, Tuple
 from subprocess import _mswindows, PIPE, Popen, CalledProcessError, CompletedProcess, TimeoutExpired
 
 from crontab import CronTab
-from .vendor.atomicwrites import atomic_write as lib_atomic_write
+from atomicwrites import atomic_write as lib_atomic_write
 
 from .util import enforce_types, ExtendedEncoder
 from .config import PYTHON_BINARY, OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES
 
 
-
 def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False, text=False, start_new_session=True, **kwargs):
     """Patched of subprocess.run to kill forked child subprocesses and fix blocking io making timeout=innefective
         Mostly copied from https://github.com/python/cpython/blob/master/Lib/subprocess.py

+ 1 - 1
archivebox/util.py

@@ -16,7 +16,7 @@ from datetime import datetime, timezone
 from dateparser import parse as dateparser
 from requests.exceptions import RequestException, ReadTimeout
 
-from .vendor.base32_crockford import encode as base32_encode                            # type: ignore
+from base32_crockford import encode as base32_encode                            # type: ignore
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 from os.path import lexists
 from os import remove as remove_file

+ 34 - 0
archivebox/vendor/__init__.py

@@ -0,0 +1,34 @@
+import sys
+import inspect
+import importlib
+from pathlib import Path
+
+VENDOR_DIR = Path(__file__).parent
+
+VENDORED_LIBS = {
+    # sys.path dir:         library name
+    'python-atomicwrites':  'atomicwrites',
+    'django-taggit':        'taggit',
+    'pydantic-pkgr':        'pydantic_pkgr',
+    'pocket':               'pocket',
+    'base32-crockford':     'base32_crockford',
+}
+
+def load_vendored_libs():
+    for lib_subdir, lib_name in VENDORED_LIBS.items():
+        lib_dir = VENDOR_DIR / lib_subdir
+        assert lib_dir.is_dir(), 'Expected vendor libary {lib_name} could not be found in {lib_dir}'
+
+        try:
+            lib = importlib.import_module(lib_name)
+            # print(f"Successfully imported lib from environment {lib_name}: {inspect.getfile(lib)}")
+        except ImportError:
+            sys.path.append(str(lib_dir))
+            try:
+                lib = importlib.import_module(lib_name)
+                # print(f"Successfully imported lib from vendored fallback {lib_name}: {inspect.getfile(lib)}")
+            except ImportError as e:
+                print(f"Failed to import lib from environment or vendored fallback {lib_name}: {e}", file=sys.stderr)
+                sys.exit(1)
+        
+

+ 0 - 1
archivebox/vendor/atomicwrites.py

@@ -1 +0,0 @@
-python-atomicwrites/atomicwrites/__init__.py

+ 0 - 1
archivebox/vendor/base32_crockford.py

@@ -1 +0,0 @@
-base32-crockford/base32_crockford.py

+ 0 - 1
archivebox/vendor/package-lock.json

@@ -1 +0,0 @@
-../../package-lock.json

+ 0 - 1
archivebox/vendor/package.json

@@ -1 +0,0 @@
-../../package.json

+ 0 - 1
archivebox/vendor/pocket.py

@@ -1 +0,0 @@
-pocket/pocket.py

+ 1 - 0
archivebox/vendor/pydantic-pkgr

@@ -0,0 +1 @@
+Subproject commit 61cf4bf5db18c9ab374d5f947c41921c728dc731

+ 0 - 1
archivebox/vendor/taggit_utils.py

@@ -1 +0,0 @@
-django-taggit/taggit/utils.py

+ 10 - 5
pyproject.toml

@@ -29,12 +29,9 @@ dependencies = [
     "croniter>=2.0.5",                # for: archivebox schedule
     "ipython>=8.23.0",                # for: archivebox shell
     # Extractor Dependencies
-    "yt-dlp>=2024.4.9",               # for: media
+    "yt-dlp>=2024.8.6",               # for: media
     # "playwright>=1.43.0; platform_machine != 'armv7l'",  # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages
-    # TODO: add more extractors
-    #  - gallery-dl
-    #  - scihubdl
-    #  - See Github issues for more...
+
     "django-signal-webhooks>=0.3.0",
     "django-admin-data-views>=0.3.1",
     "ulid-py>=1.1.0",
@@ -43,6 +40,14 @@ dependencies = [
     "django-pydantic-field>=0.3.9",
     "django-jsonform>=2.22.0",
     "django-stubs>=5.0.2",
+
+    # these can be safely omitted when installation subsystem does not provide these as packages (e.g. apt/debian)
+    # archivebox will automatically load fallback vendored copies bundled via archivebox/vendor/__init__.py
+    "pydantic-pkgr>=0.1.4",
+    "atomicwrites==1.4.0",
+    "pocket==0.3.7",
+    "django-taggit==1.3.0",
+    "base32-crockford==0.3.0",
 ]
 
 homepage = "https://github.com/ArchiveBox/ArchiveBox"