2
0
Эх сурвалжийг харах

fix plugin loading order, admin, abx-pkg

Nick Sweeting 1 жил өмнө
parent
commit
c8e186f21b
78 өөрчлөгдсөн 633 нэмэгдсэн , 735 устгасан
  1. 27 19
      archivebox/cli/archivebox_update.py
  2. 1 1
      archivebox/config/__init__.py
  3. 2 0
      archivebox/config/common.py
  4. 9 6
      archivebox/config/views.py
  5. 1 1
      archivebox/core/__init__.py
  6. 2 2
      archivebox/core/actors.py
  7. 5 5
      archivebox/core/admin_archiveresults.py
  8. 3 3
      archivebox/core/admin_snapshots.py
  9. 63 21
      archivebox/core/models.py
  10. 48 13
      archivebox/core/statemachines.py
  11. 10 9
      archivebox/core/views.py
  12. 1 0
      archivebox/crawls/__init__.py
  13. 1 1
      archivebox/crawls/actors.py
  14. 12 11
      archivebox/crawls/models.py
  15. 8 4
      archivebox/crawls/statemachines.py
  16. 2 2
      archivebox/machine/models.py
  17. 1 1
      archivebox/main.py
  18. 1 1
      archivebox/pkgs/__init__.py
  19. 9 7
      archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py
  20. 1 1
      archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py
  21. 1 1
      archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py
  22. 1 1
      archivebox/pkgs/abx-plugin-chrome/pyproject.toml
  23. 1 1
      archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py
  24. 1 1
      archivebox/pkgs/abx-plugin-curl/pyproject.toml
  25. 1 1
      archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
  26. 2 2
      archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml
  27. 6 6
      archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py
  28. 1 1
      archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py
  29. 13 8
      archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py
  30. 1 1
      archivebox/pkgs/abx-plugin-git/pyproject.toml
  31. 1 1
      archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py
  32. 1 1
      archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py
  33. 11 8
      archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py
  34. 2 2
      archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py
  35. 1 1
      archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py
  36. 2 2
      archivebox/pkgs/abx-plugin-npm/pyproject.toml
  37. 1 1
      archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/.plugin_order
  38. 1 0
      archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py
  39. 1 1
      archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py
  40. 1 1
      archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py
  41. 2 2
      archivebox/pkgs/abx-plugin-pip/pyproject.toml
  42. 1 1
      archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py
  43. 1 1
      archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py
  44. 2 2
      archivebox/pkgs/abx-plugin-playwright/pyproject.toml
  45. 1 1
      archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py
  46. 1 1
      archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py
  47. 2 2
      archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml
  48. 1 1
      archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py
  49. 10 10
      archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py
  50. 1 1
      archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py
  51. 1 1
      archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py
  52. 1 1
      archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py
  53. 12 9
      archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py
  54. 0 0
      archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py
  55. 2 2
      archivebox/pkgs/abx-plugin-singlefile/pyproject.toml
  56. 1 1
      archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py
  57. 1 1
      archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py
  58. 2 2
      archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml
  59. 8 0
      archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py
  60. 1 1
      archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py
  61. 24 22
      archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py
  62. 1 1
      archivebox/pkgs/abx-plugin-wget/pyproject.toml
  63. 1 1
      archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py
  64. 1 1
      archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py
  65. 2 2
      archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml
  66. 3 3
      archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py
  67. 35 18
      archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py
  68. 2 1
      archivebox/pkgs/abx-spec-django/abx_spec_django.py
  69. 4 2
      archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py
  70. 0 0
      archivebox/pkgs/abx-spec-pydantic-pkgr/README.md
  71. 0 114
      archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py
  72. 0 17
      archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml
  73. 3 0
      archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py
  74. 17 7
      archivebox/pkgs/abx/abx.py
  75. 1 0
      archivebox/seeds/__init__.py
  76. 5 5
      pyproject.toml
  77. 61 69
      requirements.txt
  78. 165 283
      uv.lock

+ 27 - 19
archivebox/cli/archivebox_update.py

@@ -5,12 +5,10 @@ __command__ = 'archivebox update'
 
 import sys
 import argparse
-from pathlib import Path
 from typing import List, Optional, IO
 
 from archivebox.misc.util import docstring
-from archivebox.config import DATA_DIR
-from ..index import (
+from archivebox.index import (
     LINK_FILTERS,
     get_indexed_folders,
     get_archived_folders,
@@ -23,8 +21,16 @@ from ..index import (
     get_corrupted_folders,
     get_unrecognized_folders,
 )
-from ..logging_util import SmartFormatter, accept_stdin
-from ..main import update
+from archivebox.logging_util import SmartFormatter, accept_stdin
+# from ..main import update
+
+def update():
+    from archivebox.config.django import setup_django
+    setup_django()
+    
+    from actors.orchestrator import Orchestrator
+    orchestrator = Orchestrator()
+    orchestrator.start()
 
 
 @docstring(update.__doc__)
@@ -116,20 +122,22 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     if not command.filter_patterns:
         filter_patterns_str = accept_stdin(stdin)
 
-    update(
-        resume=command.resume,
-        only_new=command.only_new,
-        index_only=command.index_only,
-        overwrite=command.overwrite,
-        filter_patterns_str=filter_patterns_str,
-        filter_patterns=command.filter_patterns,
-        filter_type=command.filter_type,
-        status=command.status,
-        after=command.after,
-        before=command.before,
-        out_dir=Path(pwd) if pwd else DATA_DIR,
-        extractors=command.extract,
-    )
+    update()
+    
+    # update(
+    #     resume=command.resume,
+    #     only_new=command.only_new,
+    #     index_only=command.index_only,
+    #     overwrite=command.overwrite,
+    #     filter_patterns_str=filter_patterns_str,
+    #     filter_patterns=command.filter_patterns,
+    #     filter_type=command.filter_type,
+    #     status=command.status,
+    #     after=command.after,
+    #     before=command.before,
+    #     out_dir=Path(pwd) if pwd else DATA_DIR,
+    #     extractors=command.extract,
+    # )
     
 
 if __name__ == '__main__':

+ 1 - 1
archivebox/config/__init__.py

@@ -1,4 +1,4 @@
-__package__ = 'config'
+__package__ = 'archivebox.config'
 __order__ = 200
 
 from .paths import (

+ 2 - 0
archivebox/config/common.py

@@ -120,6 +120,8 @@ class ArchivingConfig(BaseConfigSet):
     SAVE_ALLOWLIST: Dict[str, List[str]]  = Field(default={})  # mapping of regex patterns to list of archive methods
     SAVE_DENYLIST: Dict[str, List[str]]   = Field(default={})
     
+    DEFAULT_PERSONA: str                  = Field(default='Default')
+    
     # GIT_DOMAINS: str                    = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht')
     # WGET_USER_AGENT: str                = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}')
     # CURL_USER_AGENT: str                = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}')

+ 9 - 6
archivebox/config/views.py

@@ -86,10 +86,11 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
     }
 
     for plugin_id, plugin in abx.get_all_plugins().items():
-        if not plugin.hooks.get('get_BINARIES'):
+        plugin = benedict(plugin)
+        if not hasattr(plugin.plugin, 'get_BINARIES'):
             continue
         
-        for binary in plugin.hooks.get_BINARIES().values():
+        for binary in plugin.plugin.get_BINARIES().values():
             try:
                 installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary)
                 binary = installed_binary.load_from_db()
@@ -214,9 +215,9 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
         return 'black'
 
     for plugin_id, plugin in abx.get_all_plugins().items():
-        plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {})
-        plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {})
-        plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {})
+        plugin.hooks.get_BINPROVIDERS = getattr(plugin.plugin, 'get_BINPROVIDERS', lambda: {})
+        plugin.hooks.get_BINARIES = getattr(plugin.plugin, 'get_BINARIES', lambda: {})
+        plugin.hooks.get_CONFIG = getattr(plugin.plugin, 'get_CONFIG', lambda: {})
         
         rows['Label'].append(ItemLink(plugin.label, key=plugin.package))
         rows['Version'].append(str(plugin.version))
@@ -251,8 +252,10 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
 
     assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
 
+    plugins = abx.get_all_plugins()
+
     plugin_id = None
-    for check_plugin_id, loaded_plugin in settings.PLUGINS.items():
+    for check_plugin_id, loaded_plugin in plugins.items():
         if check_plugin_id.split('.')[-1] == key.split('.')[-1]:
             plugin_id = check_plugin_id
             break

+ 1 - 1
archivebox/core/__init__.py

@@ -1,5 +1,5 @@
 __package__ = 'archivebox.core'
-
+__order__ = 100
 import abx
 
 @abx.hookimpl

+ 2 - 2
archivebox/core/actors.py

@@ -21,7 +21,7 @@ class SnapshotActor(ActorType[Snapshot]):
     FINAL_STATES: ClassVar[list[State]] = SnapshotMachine.final_states         # ['sealed']
     STATE_FIELD_NAME: ClassVar[str] = Snapshot.state_field_name                # status
     
-    MAX_CONCURRENT_ACTORS: ClassVar[int] = 3
+    MAX_CONCURRENT_ACTORS: ClassVar[int] = 1 # 3
     MAX_TICK_TIME: ClassVar[int] = 10
     CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10
 
@@ -39,7 +39,7 @@ class ArchiveResultActor(ActorType[ArchiveResult]):
     FINAL_STATES: ClassVar[list[State]] = ArchiveResultMachine.final_states     # ['succeeded', 'failed', 'skipped']
     STATE_FIELD_NAME: ClassVar[str] = ArchiveResult.state_field_name            # status
     
-    MAX_CONCURRENT_ACTORS: ClassVar[int] = 6
+    MAX_CONCURRENT_ACTORS: ClassVar[int] = 1 # 6
     MAX_TICK_TIME: ClassVar[int] = 60
     CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10
 

+ 5 - 5
archivebox/core/admin_archiveresults.py

@@ -39,7 +39,7 @@ class ArchiveResultInline(admin.TabularInline):
     extra = 0
     sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
     readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
-    fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output')
+    fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'retry_at', 'output')
     # exclude = ('id',)
     ordering = ('end_ts',)
     show_change_link = True
@@ -105,11 +105,11 @@ class ArchiveResultInline(admin.TabularInline):
 
 
 class ArchiveResultAdmin(ABIDModelAdmin):
-    list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
-    sort_fields = ('start_ts', 'extractor', 'status')
+    list_display = ('abid', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
+    sort_fields = ('abid', 'created_by', 'created_at', 'extractor', 'status')
     readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
     search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
-    fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
+    fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
     autocomplete_fields = ['snapshot']
 
     list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
@@ -169,7 +169,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
             result.output,
         )
         output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
-        path_from_output_str = (snapshot_dir / result.output)
+        path_from_output_str = (snapshot_dir / (result.output or ''))
         output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
         if os.access(path_from_output_str, os.R_OK):
             root_dir = str(path_from_output_str)

+ 3 - 3
archivebox/core/admin_snapshots.py

@@ -56,12 +56,12 @@ class SnapshotActionForm(ActionForm):
 
 
 class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
-    list_display = ('created_at', 'title_str', 'files', 'size', 'url_str', 'crawl')
-    sort_fields = ('title_str', 'url_str', 'created_at', 'crawl')
+    list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
+    sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
     readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
     search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
     list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
-    fields = ('url', 'title', 'created_by', 'bookmarked_at', 'crawl', *readonly_fields)
+    fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', *readonly_fields)
     ordering = ['-created_at']
     actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
     inlines = [TagInline, ArchiveResultInline]

+ 63 - 21
archivebox/core/models.py

@@ -1,7 +1,7 @@
 __package__ = 'archivebox.core'
 
 
-from typing import Optional, Dict, Iterable
+from typing import Optional, Dict, Iterable, Any
 from django_stubs_ext.db.models import TypedModelMeta
 
 import os
@@ -20,20 +20,22 @@ from django.db.models import Case, When, Value, IntegerField
 from django.contrib import admin
 from django.conf import settings
 
-from actors.models import ModelWithStateMachine
+
+import abx
 
 from archivebox.config import CONSTANTS
 
 from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
+from actors.models import ModelWithStateMachine
 from queues.tasks import bg_archive_snapshot
 from crawls.models import Crawl
 # from machine.models import Machine, NetworkInterface
 
 from archivebox.misc.system import get_dir_size
 from archivebox.misc.util import parse_date, base_url
-from ..index.schema import Link
-from ..index.html import snapshot_icons
-from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
+from archivebox.index.schema import Link
+from archivebox.index.html import snapshot_icons
+from archivebox.extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE
 
 
 # class BaseModel(models.Model):
@@ -195,13 +197,21 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
     tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
     title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
 
-    keys = ('url', 'timestamp', 'title', 'tags', 'downloaded_at')
+    # config = models.JSONField(default=dict, null=False, blank=False, editable=True)
+
+    keys = ('url', 'timestamp', 'title', 'tags', 'downloaded_at', 'created_at', 'status', 'retry_at', 'abid', 'id')
 
     archiveresult_set: models.Manager['ArchiveResult']
 
     objects = SnapshotManager()
 
     def save(self, *args, **kwargs):
+        if self.pk:
+            existing_snapshot = self.__class__.objects.filter(pk=self.pk).first()
+            if existing_snapshot and existing_snapshot.status == self.StatusChoices.SEALED:
+                if self.as_json() != existing_snapshot.as_json():
+                    raise Exception(f'Snapshot {self.pk} is already sealed, it cannot be modified any further. NEW: {self.as_json()} != Existing: {existing_snapshot.as_json()}')
+        
         if not self.bookmarked_at:
             self.bookmarked_at = self.created_at or self._init_timestamp
             
@@ -427,7 +437,7 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
         ALL_EXTRACTORS = ['favicon', 'title', 'screenshot', 'headers', 'singlefile', 'dom', 'git', 'archive_org', 'readability', 'mercury', 'pdf', 'wget']
         
         # config = get_scope_config(snapshot=self)
-        config = {'EXTRACTORS': ''}
+        config = {'EXTRACTORS': ','.join(ALL_EXTRACTORS)}
         
         if config.get('EXTRACTORS', 'auto') == 'auto':
             EXTRACTORS = ALL_EXTRACTORS
@@ -438,10 +448,13 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
         for extractor in EXTRACTORS:
             if not extractor:
                 continue
-            archiveresult, _created = ArchiveResult.objects.get_or_create(
+            archiveresult = ArchiveResult.objects.update_or_create(
                 snapshot=self,
                 extractor=extractor,
                 status=ArchiveResult.INITIAL_STATE,
+                defaults={
+                    'retry_at': timezone.now(),
+                },
             )
             archiveresults.append(archiveresult)
         return archiveresults
@@ -560,6 +573,8 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
     # uplink = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used')
 
     objects = ArchiveResultManager()
+    
+    keys = ('snapshot_id', 'extractor', 'cmd', 'pwd', 'cmd_version', 'output', 'start_ts', 'end_ts', 'created_at', 'status', 'retry_at', 'abid', 'id')
 
     class Meta(TypedModelMeta):
         verbose_name = 'Archive Result'
@@ -576,6 +591,16 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
 
     def __str__(self):
         return repr(self)
+    
+    def save(self, *args, **kwargs):
+        # if (self.pk and self.__class__.objects.filter(pk=self.pk).values_list('status', flat=True)[0] in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]):
+        #     raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further.')
+        if self.pk:
+            existing_archiveresult = self.__class__.objects.filter(pk=self.pk).first()
+            if existing_archiveresult and existing_archiveresult.status in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]:
+                if self.as_json() != existing_archiveresult.as_json():
+                    raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further. NEW: {self.as_json()} != Existing: {existing_archiveresult.as_json()}')
+        super().save(*args, **kwargs)
 
     # TODO: finish connecting machine.models
     # @cached_property
@@ -603,36 +628,53 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
         return f'/{self.snapshot.archive_path}/{self.output_path()}'
 
     @property
-    def extractor_module(self):
-        return EXTRACTORS[self.extractor]
+    def extractor_module(self) -> Any | None:
+        return abx.as_dict(abx.pm.hook.get_EXTRACTORS()).get(self.extractor, None)
 
-    def output_path(self) -> str:
+    def output_path(self) -> str | None:
         """return the canonical output filename or directory name within the snapshot dir"""
-        return self.extractor_module.get_output_path()
+        try:
+            return self.extractor_module.get_output_path(self.snapshot)
+        except Exception as e:
+            print(f'Error getting output path for {self.extractor} extractor: {e}')
+            return None
 
-    def embed_path(self) -> str:
+    def embed_path(self) -> str | None:
         """
         return the actual runtime-calculated path to the file on-disk that
         should be used for user-facing iframe embeds of this result
         """
 
-        if get_embed_path_func := getattr(self.extractor_module, 'get_embed_path', None):
-            return get_embed_path_func(self)
-
-        return self.extractor_module.get_output_path()
+        try:
+            return self.extractor_module.get_embed_path(self)
+        except Exception as e:
+            print(f'Error getting embed path for {self.extractor} extractor: {e}')
+            return None
 
     def legacy_output_path(self):
         link = self.snapshot.as_link()
         return link.canonical_outputs().get(f'{self.extractor}_path')
 
     def output_exists(self) -> bool:
-        return os.path.exists(self.output_path())
-        
+        output_path = self.output_path()
+        return bool(output_path and os.path.exists(output_path))
+            
     def create_output_dir(self):
-        snap_dir = self.snapshot_dir
+        snap_dir = Path(self.snapshot_dir)
         snap_dir.mkdir(parents=True, exist_ok=True)
-        return snap_dir / self.output_path()
+        output_path = self.output_path()
+        if output_path:
+            (snap_dir / output_path).mkdir(parents=True, exist_ok=True)
+        else:
+            raise ValueError(f'Not able to calculate output path for {self.extractor} extractor in {snap_dir}')
+        return snap_dir / output_path
 
+    def as_json(self, *args) -> dict:
+        args = args or self.keys
+        return {
+            key: getattr(self, key)
+            for key in args
+        }
 
     # def get_storage_dir(self, create=True, symlink=True):
     #     date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d')

+ 48 - 13
archivebox/core/statemachines.py

@@ -37,25 +37,44 @@ class SnapshotMachine(StateMachine, strict_states=True):
         super().__init__(snapshot, *args, **kwargs)
         
     def can_start(self) -> bool:
-        return self.snapshot.url
+        can_start = bool(self.snapshot.url and (self.snapshot.retry_at < timezone.now()))
+        if not can_start:
+            print(f'SnapshotMachine[{self.snapshot.ABID}].can_start() False: {self.snapshot.url} {self.snapshot.retry_at} {timezone.now()}')
+        return can_start
         
     def is_finished(self) -> bool:
+        # if no archiveresults exist yet, it's not finished
         if not self.snapshot.archiveresult_set.exists():
             return False
+        # if archiveresults exist but are still pending, it's not finished
         if self.snapshot.pending_archiveresults().exists():
             return False
+        
+        # otherwise archiveresults exist and are all finished, so it's finished
         return True
         
+    def on_transition(self, event, state):
+        print(f'SnapshotMachine[{self.snapshot.ABID}].on_transition() {event} -> {state}')
+        
+    @queued.enter
+    def enter_queued(self):
+        print(f'SnapshotMachine[{self.snapshot.ABID}].on_queued(): snapshot.retry_at = now()')
+        self.snapshot.status = Snapshot.StatusChoices.QUEUED
+        self.snapshot.retry_at = timezone.now()
+        self.snapshot.save()
+        
     @started.enter
-    def on_started(self):
+    def enter_started(self):
         print(f'SnapshotMachine[{self.snapshot.ABID}].on_started(): snapshot.create_pending_archiveresults() + snapshot.bump_retry_at(+60s)')
-        self.snapshot.create_pending_archiveresults()
+        self.snapshot.status = Snapshot.StatusChoices.STARTED
         self.snapshot.bump_retry_at(seconds=60)
         self.snapshot.save()
+        self.snapshot.create_pending_archiveresults()
         
     @sealed.enter
-    def on_sealed(self):
+    def enter_sealed(self):
         print(f'SnapshotMachine[{self.snapshot.ABID}].on_sealed(): snapshot.retry_at=None')
+        self.snapshot.status = Snapshot.StatusChoices.SEALED
         self.snapshot.retry_at = None
         self.snapshot.save()
 
@@ -95,7 +114,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
         super().__init__(archiveresult, *args, **kwargs)
         
     def can_start(self) -> bool:
-        return self.archiveresult.snapshot and self.archiveresult.snapshot.STATE == Snapshot.active_state
+        return self.archiveresult.snapshot and (self.archiveresult.retry_at < timezone.now())
     
     def is_succeeded(self) -> bool:
         return self.archiveresult.output_exists()
@@ -109,29 +128,45 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
     def is_finished(self) -> bool:
         return self.is_failed() or self.is_succeeded()
 
+
+    @queued.enter
+    def enter_queued(self):
+        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_queued(): archiveresult.retry_at = now()')
+        self.archiveresult.status = ArchiveResult.StatusChoices.QUEUED
+        self.archiveresult.retry_at = timezone.now()
+        self.archiveresult.save()
+        
     @started.enter
-    def on_started(self):
+    def enter_started(self):
         print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_started(): archiveresult.start_ts + create_output_dir() + bump_retry_at(+60s)')
+        self.archiveresult.status = ArchiveResult.StatusChoices.STARTED
         self.archiveresult.start_ts = timezone.now()
-        self.archiveresult.create_output_dir()
         self.archiveresult.bump_retry_at(seconds=60)
         self.archiveresult.save()
+        self.archiveresult.create_output_dir()
 
     @backoff.enter
-    def on_backoff(self):
-        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_backoff(): archiveresult.bump_retry_at(+60s)')
+    def enter_backoff(self):
+        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_backoff(): archiveresult.retries += 1, archiveresult.bump_retry_at(+60s), archiveresult.end_ts = None')
+        self.archiveresult.status = ArchiveResult.StatusChoices.BACKOFF
+        self.archiveresult.retries = getattr(self.archiveresult, 'retries', 0) + 1
         self.archiveresult.bump_retry_at(seconds=60)
+        self.archiveresult.end_ts = None
         self.archiveresult.save()
 
     @succeeded.enter
-    def on_succeeded(self):
-        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_succeeded(): archiveresult.end_ts')
+    def enter_succeeded(self):
+        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_succeeded(): archiveresult.retry_at = None, archiveresult.end_ts = now()')
+        self.archiveresult.status = ArchiveResult.StatusChoices.SUCCEEDED
+        self.archiveresult.retry_at = None
         self.archiveresult.end_ts = timezone.now()
         self.archiveresult.save()
 
     @failed.enter
-    def on_failed(self):
-        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_failed(): archiveresult.end_ts')
+    def enter_failed(self):
+        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_failed(): archivebox.retry_at = None, archiveresult.end_ts = now()')
+        self.archiveresult.status = ArchiveResult.StatusChoices.FAILED
+        self.archiveresult.retry_at = None
         self.archiveresult.end_ts = timezone.now()
         self.archiveresult.save()
         

+ 10 - 9
archivebox/core/views.py

@@ -102,7 +102,8 @@ class SnapshotView(View):
 
         # iterate through all the files in the snapshot dir and add the biggest ones to1 the result list
         snap_dir = Path(snapshot.link_dir)
-        assert os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK)
+        if not os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK):
+            return {}
         
         for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
             extension = result_file.suffix.lstrip('.').lower()
@@ -504,7 +505,7 @@ def find_config_section(key: str) -> str:
     if key in CONSTANTS_CONFIG:
         return 'CONSTANT'
     matching_sections = [
-        section_id for section_id, section in CONFIGS.items() if key in section.model_fields
+        section_id for section_id, section in CONFIGS.items() if key in dict(section)
     ]
     section = matching_sections[0] if matching_sections else 'DYNAMIC'
     return section
@@ -518,8 +519,9 @@ def find_config_default(key: str) -> str:
     default_val = None
 
     for config in CONFIGS.values():
-        if key in config.model_fields:
-            default_val = config.model_fields[key].default
+        if key in dict(config):
+            default_field = getattr(config, 'model_fields', dict(config))[key]
+            default_val = default_field.default if hasattr(default_field, 'default') else default_field
             break
         
     if isinstance(default_val, Callable):
@@ -529,7 +531,6 @@ def find_config_default(key: str) -> str:
     else:
         default_val = str(default_val)
         
-        
     return default_val
 
 def find_config_type(key: str) -> str:
@@ -567,7 +568,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
     }
 
     for section_id, section in reversed(list(CONFIGS.items())):
-        for key, field in section.model_fields.items():
+        for key in dict(section).keys():
             rows['Section'].append(section_id)   # section.replace('_', ' ').title().replace(' Config', '')
             rows['Key'].append(ItemLink(key, key=key))
             rows['Type'].append(format_html('<code>{}</code>', find_config_type(key)))
@@ -580,7 +581,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
     for key in CONSTANTS_CONFIG.keys():
         rows['Section'].append(section)   # section.replace('_', ' ').title().replace(' Config', '')
         rows['Key'].append(ItemLink(key, key=key))
-        rows['Type'].append(format_html('<code>{}</code>', getattr(type(CONSTANTS_CONFIG[key]), '__name__', repr(CONSTANTS_CONFIG[key]))))
+        rows['Type'].append(format_html('<code>{}</code>', getattr(type(CONSTANTS_CONFIG[key]), '__name__', str(CONSTANTS_CONFIG[key]))))
         rows['Value'].append(format_html('<code>{}</code>', CONSTANTS_CONFIG[key]) if key_is_safe(key) else '******** (redacted)')
         rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
         # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
@@ -642,13 +643,13 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
                             <code>{find_config_default(key) or '↗️ See in ArchiveBox source code...'}</code>
                         </a>
                         <br/><br/>
-                        <p style="display: {"block" if key in FLAT_CONFIG else "none"}">
+                        <p style="display: {"block" if key in FLAT_CONFIG and key not in CONSTANTS_CONFIG else "none"}">
                             <i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
                             <br/><br/>
                             <code>archivebox config --set {key}="{
                                 val.strip("'")
                                 if (val := find_config_default(key)) else
-                                (repr(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
+                                (str(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
                             }"</code>
                         </p>
                     '''),

+ 1 - 0
archivebox/crawls/__init__.py

@@ -1,4 +1,5 @@
 __package__ = 'archivebox.crawls'
+__order__ = 100
 
 import abx
 

+ 1 - 1
archivebox/crawls/actors.py

@@ -18,6 +18,6 @@ class CrawlActor(ActorType[Crawl]):
     FINAL_STATES: ClassVar[list[State]] = CrawlMachine.final_states
     STATE_FIELD_NAME: ClassVar[str] = Crawl.state_field_name
     
-    MAX_CONCURRENT_ACTORS: ClassVar[int] = 3
+    MAX_CONCURRENT_ACTORS: ClassVar[int] = 1
     MAX_TICK_TIME: ClassVar[int] = 10
     CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10

+ 12 - 11
archivebox/crawls/models.py

@@ -150,8 +150,8 @@ class Crawl(ABIDModel, ModelWithHealthStats, ModelWithStateMachine):
         parser = (self.seed and self.seed.extractor) or 'auto'
         created_at = self.created_at.strftime("%Y-%m-%d %H:%M") if self.created_at else '<no timestamp set>'
         if self.id and self.seed:
-            return f'[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
-        return f'[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
+            return f'\\[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
+        return f'\\[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
         
     @classmethod
     def from_seed(cls, seed: Seed, max_depth: int=0, persona: str='Default', tags_str: str='', config: dict|None=None, created_by: int|None=None):
@@ -184,26 +184,27 @@ class Crawl(ABIDModel, ModelWithHealthStats, ModelWithStateMachine):
         return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl'
     
     def pending_snapshots(self) -> QuerySet['Snapshot']:
-        from core.models import Snapshot
-        return self.snapshot_set.exclude(status__in=Snapshot.FINAL_OR_ACTIVE_STATES)
+        return self.snapshot_set.filter(retry_at__isnull=False)
     
     def pending_archiveresults(self) -> QuerySet['ArchiveResult']:
         from core.models import ArchiveResult
         
         snapshot_ids = self.snapshot_set.values_list('id', flat=True)
-        pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids).exclude(status__in=ArchiveResult.FINAL_OR_ACTIVE_STATES)
+        pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, retry_at__isnull=True)
         return pending_archiveresults
     
     def create_root_snapshot(self) -> 'Snapshot':
         from core.models import Snapshot
         
-        root_snapshot, _ = Snapshot.objects.get_or_create(
-            crawl=self,
+        root_snapshot, _ = Snapshot.objects.update_or_create(
             url=self.seed.uri,
-            status=Snapshot.INITIAL_STATE,
-            retry_at=timezone.now(),
-            timestamp=str(timezone.now().timestamp()),
-            # config=self.seed.config,
+            defaults={
+                'crawl': self,
+                'status': Snapshot.INITIAL_STATE,
+                'retry_at': timezone.now(),
+                'timestamp': str(timezone.now().timestamp()),
+                # 'config': self.seed.config,
+            },
         )
         return root_snapshot
 

+ 8 - 4
archivebox/crawls/statemachines.py

@@ -1,5 +1,7 @@
 __package__ = 'archivebox.crawls'
 
+from django.utils import timezone
+
 from statemachine import State, StateMachine
 
 from crawls.models import Crawl
@@ -31,7 +33,7 @@ class CrawlMachine(StateMachine, strict_states=True):
         super().__init__(crawl, *args, **kwargs)
         
     def can_start(self) -> bool:
-        return self.crawl.seed and self.crawl.seed.uri
+        return bool(self.crawl.seed and self.crawl.seed.uri and (self.retry_at < timezone.now()))
         
     def is_finished(self) -> bool:
         if not self.crawl.snapshot_set.exists():
@@ -47,15 +49,17 @@ class CrawlMachine(StateMachine, strict_states=True):
     #     return "before_transition_return"
 
     @started.enter
-    def on_started(self):
+    def enter_started(self):
         print(f'CrawlMachine[{self.crawl.ABID}].on_started(): crawl.create_root_snapshot() + crawl.bump_retry_at(+10s)')
-        self.crawl.create_root_snapshot()
+        self.crawl.status = Crawl.StatusChoices.STARTED
         self.crawl.bump_retry_at(seconds=10)
         self.crawl.save()
+        self.crawl.create_root_snapshot()
 
     @sealed.enter        
-    def on_sealed(self):
+    def enter_sealed(self):
         print(f'CrawlMachine[{self.crawl.ABID}].on_sealed(): crawl.retry_at=None')
+        self.crawl.status = Crawl.StatusChoices.SEALED
         self.crawl.retry_at = None
         self.crawl.save()
 

+ 2 - 2
archivebox/machine/models.py

@@ -11,7 +11,7 @@ from django.utils.functional import cached_property
 import abx
 import archivebox
 
-from pydantic_pkgr import Binary, BinProvider
+from abx_pkg import Binary, BinProvider
 from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
 
 from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats
@@ -323,7 +323,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
     # whereas a loaded binary is a not-yet saved instance that may not have the same config
     # why would we want to load a binary record from the db when it could be freshly loaded?
     def load_from_db(self) -> Binary:
-        # TODO: implement defaults arg in pydantic_pkgr
+        # TODO: implement defaults arg in abx_pkg
         # return self.BINARY.load(defaults={
         #     'binprovider': self.BINPROVIDER,
         #     'abspath': Path(self.abspath),

+ 1 - 1
archivebox/main.py

@@ -14,7 +14,7 @@ from crontab import CronTab, CronSlices
 from django.db.models import QuerySet
 from django.utils import timezone
 
-from pydantic_pkgr import Binary
+from abx_pkg import Binary
 
 import abx
 import archivebox

+ 1 - 1
archivebox/pkgs/__init__.py

@@ -6,7 +6,7 @@ PKGS_DIR = Path(__file__).parent
 
 VENDORED_PKGS = [
     'abx',
-    # 'pydantic-pkgr',
+    # 'abx-pkg',
     # ... everything else in archivebox/pkgs/* comes after ...
 ]
 

+ 9 - 7
archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py

@@ -1,3 +1,4 @@
+__package__ = 'abx_plugin_chrome'
 __label__ = 'Chrome'
 __author__ = 'ArchiveBox'
 
@@ -25,10 +26,11 @@ def ready():
     CHROME_CONFIG.validate()
 
 
-# @abx.hookimpl
-# def get_EXTRACTORS():
-#     return {
-#         'pdf': PDF_EXTRACTOR,
-#         'screenshot': SCREENSHOT_EXTRACTOR,
-#         'dom': DOM_EXTRACTOR,
-#     }
[email protected]
+def get_EXTRACTORS():
+    from .extractors import PDF_EXTRACTOR, SCREENSHOT_EXTRACTOR, DOM_EXTRACTOR
+    return {
+        'pdf': PDF_EXTRACTOR,
+        'screenshot': SCREENSHOT_EXTRACTOR,
+        'dom': DOM_EXTRACTOR,
+    }

+ 1 - 1
archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py

@@ -4,7 +4,7 @@ from pathlib import Path
 from typing import List, Optional
 
 from pydantic import InstanceOf
-from pydantic_pkgr import (
+from abx_pkg import (
     Binary,
     BinProvider,
     BinName,

+ 1 - 1
archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py

@@ -3,7 +3,7 @@ from pathlib import Path
 from typing import List, Optional
 
 from pydantic import Field
-from pydantic_pkgr import bin_abspath
+from abx_pkg import bin_abspath
 
 from abx_spec_config.base_configset import BaseConfigSet
 from abx_plugin_default_binproviders import env

+ 1 - 1
archivebox/pkgs/abx-plugin-chrome/pyproject.toml

@@ -7,7 +7,7 @@ requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
 ]
 
 [build-system]

+ 1 - 1
archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_curl'
 from typing import List
 
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, Binary
+from abx_pkg import BinProvider, BinName, Binary
 
 from abx_plugin_default_binproviders import apt, brew, env
 

+ 1 - 1
archivebox/pkgs/abx-plugin-curl/pyproject.toml

@@ -7,7 +7,7 @@ requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
 ]
 
 [build-system]

+ 1 - 1
archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py

@@ -3,7 +3,7 @@ import abx
 
 from typing import Dict
 
-from pydantic_pkgr import (
+from abx_pkg import (
     AptProvider,
     BrewProvider,
     EnvProvider,

+ 2 - 2
archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml

@@ -6,8 +6,8 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-pkg>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
 ]
 
 [build-system]

+ 6 - 6
archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py

@@ -20,10 +20,10 @@ def get_CONFIG():
     }
 
 
-# @abx.hookimpl
-# def get_EXTRACTORS():
-#     from .extractors import FAVICON_EXTRACTOR
[email protected]
+def get_EXTRACTORS():
+    from .extractors import FAVICON_EXTRACTOR
     
-#     return {
-#         'favicon': FAVICON_EXTRACTOR,
-#     }
+    return {
+        'favicon': FAVICON_EXTRACTOR,
+    }

+ 1 - 1
archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_git'
 from typing import List
 
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, Binary
+from abx_pkg import BinProvider, BinName, Binary
 
 from abx_plugin_default_binproviders import apt, brew, env
 

+ 13 - 8
archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py

@@ -1,15 +1,20 @@
 __package__ = 'abx_plugin_git'
 
-# from pathlib import Path
+from pathlib import Path
 
-# from .binaries import GIT_BINARY
 
+from abx_pkg import BinName
 
-# class GitExtractor(BaseExtractor):
-#     name: ExtractorName = 'git'
-#     binary: str = GIT_BINARY.name
+from abx_spec_extractor import BaseExtractor, ExtractorName
 
-#     def get_output_path(self, snapshot) -> Path | None:
-#         return snapshot.as_link() / 'git'
+from .binaries import GIT_BINARY
 
-# GIT_EXTRACTOR = GitExtractor()
+
+class GitExtractor(BaseExtractor):
+    name: ExtractorName = 'git'
+    binary: BinName = GIT_BINARY.name
+
+    def get_output_path(self, snapshot) -> Path | None:
+        return snapshot.as_link() / 'git'
+
+GIT_EXTRACTOR = GitExtractor()

+ 1 - 1
archivebox/pkgs/abx-plugin-git/pyproject.toml

@@ -7,7 +7,7 @@ requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
     "abx-plugin-default-binproviders>=2024.10.24",
 ]
 

+ 1 - 1
archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py

@@ -6,7 +6,7 @@ from typing import List
 from pathlib import Path
 from pydantic import InstanceOf
 
-from pydantic_pkgr import BinaryOverrides, SemVer, Binary, BinProvider
+from abx_pkg import BinaryOverrides, SemVer, Binary, BinProvider
 
 from abx_plugin_default_binproviders import apt
 from abx_plugin_pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES

+ 1 - 1
archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_mercury'
 from typing import List
 
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary
+from abx_pkg import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary
 
 from abx_plugin_default_binproviders import env
 

+ 11 - 8
archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py

@@ -1,17 +1,20 @@
 __package__ = 'abx_plugin_mercury'
 
-# from pathlib import Path
+from pathlib import Path
 
-# from .binaries import MERCURY_BINARY
+from abx_pkg import BinName
+from abx_spec_extractor import BaseExtractor, ExtractorName
 
+from .binaries import MERCURY_BINARY
 
 
-# class MercuryExtractor(BaseExtractor):
-#     name: ExtractorName = 'mercury'
-#     binary: str = MERCURY_BINARY.name
 
-#     def get_output_path(self, snapshot) -> Path | None:
-#         return snapshot.link_dir / 'mercury' / 'content.html'
+class MercuryExtractor(BaseExtractor):
+    name: ExtractorName = 'mercury'
+    binary: BinName = MERCURY_BINARY.name
 
+    def get_output_path(self, snapshot) -> Path | None:
+        return snapshot.link_dir / 'mercury' / 'content.html'
 
-# MERCURY_EXTRACTOR = MercuryExtractor()
+
+MERCURY_EXTRACTOR = MercuryExtractor()

+ 2 - 2
archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py

@@ -1,4 +1,4 @@
-__package__ = 'plugins_pkg.npm'
+__package__ = 'abx_plugin_npm'
 
 
 from typing import List
@@ -6,7 +6,7 @@ from typing import List
 from pydantic import InstanceOf
 from benedict import benedict
 
-from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides
+from abx_pkg import BinProvider, Binary, BinName, BinaryOverrides
 
 from abx_plugin_default_binproviders import get_BINPROVIDERS
 

+ 1 - 1
archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py

@@ -2,7 +2,7 @@ import os
 from pathlib import Path
 from typing import Optional
 
-from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
+from abx_pkg import NpmProvider, PATHStr, BinProviderName
 
 import abx
 

+ 2 - 2
archivebox/pkgs/abx-plugin-npm/pyproject.toml

@@ -6,8 +6,8 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-pkg>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-plugin-default-binproviders>=2024.10.24",
 ]

+ 1 - 1
archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/.plugin_order

@@ -1 +1 @@
-0
+400

+ 1 - 0
archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py

@@ -1,5 +1,6 @@
 __package__ = 'abx_plugin_pip'
 __label__ = 'PIP'
+__order__ = 200
 
 import abx
 

+ 1 - 1
archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py

@@ -9,7 +9,7 @@ from pydantic import InstanceOf, Field, model_validator
 import django
 import django.db.backends.sqlite3.base
 from django.db.backends.sqlite3.base import Database as django_sqlite3     # type: ignore[import-type]
-from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides, SemVer
+from abx_pkg import BinProvider, Binary, BinName, BinaryOverrides, SemVer
 
 
 from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew

+ 1 - 1
archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py

@@ -6,7 +6,7 @@ from typing import Optional
 
 from benedict import benedict
 
-from pydantic_pkgr import PipProvider, BinName, BinProviderName
+from abx_pkg import PipProvider, BinName, BinProviderName
 
 import abx
 

+ 2 - 2
archivebox/pkgs/abx-plugin-pip/pyproject.toml

@@ -6,9 +6,9 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
+    "abx-pkg>=0.5.4",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
     "abx-plugin-default-binproviders>=2024.10.24",
     "django>=5.0.0",
 ]

+ 1 - 1
archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_playwright'
 from typing import List
 
 from pydantic import InstanceOf
-from pydantic_pkgr import BinName, BinProvider, Binary
+from abx_pkg import BinName, BinProvider, Binary
 
 
 from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER

+ 1 - 1
archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py

@@ -7,7 +7,7 @@ from pathlib import Path
 from typing import List, Optional, Dict, ClassVar
 
 from pydantic import Field
-from pydantic_pkgr import (
+from abx_pkg import (
     BinName,
     BinProvider,
     BinProviderName,

+ 2 - 2
archivebox/pkgs/abx-plugin-playwright/pyproject.toml

@@ -7,8 +7,8 @@ requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
     "pydantic>=2.4.2",
-    "pydantic-pkgr>=0.5.4",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-pkg>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
     "abx-spec-config>=0.1.0",
 ]
 

+ 1 - 1
archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_puppeteer'
 from typing import List
 
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, Binary
+from abx_pkg import BinProvider, BinName, Binary
 
 
 from abx_plugin_default_binproviders import env

+ 1 - 1
archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py

@@ -4,7 +4,7 @@ from pathlib import Path
 from typing import List, Optional, Dict, ClassVar
 
 from pydantic import Field
-from pydantic_pkgr import (
+from abx_pkg import (
     BinProvider,
     BinName,
     BinProviderName,

+ 2 - 2
archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml

@@ -7,8 +7,8 @@ requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
+    "abx-pkg>=0.5.4",
 ]
 
 [build-system]

+ 1 - 1
archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_readability'
 from typing import List
 
 from pydantic import InstanceOf
-from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName
+from abx_pkg import Binary, BinProvider, BinaryOverrides, BinName
 
 from abx_plugin_default_binproviders import env
 from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER

+ 10 - 10
archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py

@@ -1,19 +1,19 @@
 # __package__ = 'abx_plugin_readability'
 
-# from pathlib import Path
+from pathlib import Path
 
-# from pydantic_pkgr import BinName
+from abx_pkg import BinName
 
+from abx_spec_extractor import BaseExtractor, ExtractorName
+from .binaries import READABILITY_BINARY
 
-# from .binaries import READABILITY_BINARY
 
+class ReadabilityExtractor(BaseExtractor):
+    name: ExtractorName = 'readability'
+    binary: BinName = READABILITY_BINARY.name
 
-# class ReadabilityExtractor(BaseExtractor):
-#     name: str = 'readability'
-#     binary: BinName = READABILITY_BINARY.name
+    def get_output_path(self, snapshot) -> Path:
+        return Path(snapshot.link_dir) / 'readability' / 'content.html'
 
-#     def get_output_path(self, snapshot) -> Path:
-#         return Path(snapshot.link_dir) / 'readability' / 'content.html'
 
-
-# READABILITY_EXTRACTOR = ReadabilityExtractor()
+READABILITY_EXTRACTOR = ReadabilityExtractor()

+ 1 - 1
archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py

@@ -3,7 +3,7 @@ __id__ = 'abx_plugin_readwise_extractor'
 __label__ = 'Readwise API'
 __version__ = '2024.10.27'
 __author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise'
+__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/pkgs/abx-plugin-readwise-extractor'
 __dependencies__ = []
 
 import abx

+ 1 - 1
archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_ripgrep_search'
 from typing import List
 
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary
+from abx_pkg import BinProvider, BinaryOverrides, BinName, Binary
 
 from abx_plugin_default_binproviders import apt, brew, env
 

+ 1 - 1
archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py

@@ -1,7 +1,7 @@
 from typing import List
 
 from pydantic import InstanceOf
-from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath
+from abx_pkg import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath
 
 from abx_plugin_default_binproviders import env
 from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER

+ 12 - 9
archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py

@@ -1,18 +1,21 @@
 __package__ = 'abx_plugin_singlefile'
 
-# from pathlib import Path
 
-# from pydantic_pkgr import BinName
+from pathlib import Path
 
-# from .binaries import SINGLEFILE_BINARY
+from abx_pkg import BinName
 
+from abx_spec_extractor import BaseExtractor, ExtractorName
 
-# class SinglefileExtractor(BaseExtractor):
-#     name: str = 'singlefile'
-#     binary: BinName = SINGLEFILE_BINARY.name
+from .binaries import SINGLEFILE_BINARY
 
-#     def get_output_path(self, snapshot) -> Path:
-#         return Path(snapshot.link_dir) / 'singlefile.html'
 
+class SinglefileExtractor(BaseExtractor):
+    name: ExtractorName = 'singlefile'
+    binary: BinName = SINGLEFILE_BINARY.name
 
-# SINGLEFILE_EXTRACTOR = SinglefileExtractor()
+    def get_output_path(self, snapshot) -> Path:
+        return Path(snapshot.link_dir) / 'singlefile.html'
+
+
+SINGLEFILE_EXTRACTOR = SinglefileExtractor()

+ 0 - 0
archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py


+ 2 - 2
archivebox/pkgs/abx-plugin-singlefile/pyproject.toml

@@ -7,8 +7,8 @@ requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
+    "abx-pkg>=0.5.4",
 ]
 
 [build-system]

+ 1 - 1
archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_sonic_search'
 from typing import List
 
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary
+from abx_pkg import BinProvider, BinaryOverrides, BinName, Binary
 
 from abx_plugin_default_binproviders import brew, env
 

+ 1 - 1
archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py

@@ -1,4 +1,4 @@
-__package__ = 'plugins_search.sonic'
+__package__ = 'abx_plugin_sonic_search'
 
 from typing import List, Generator, cast
 

+ 2 - 2
archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml

@@ -7,9 +7,9 @@ requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
     "abx-spec-searchbackend>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
+    "abx-pkg>=0.5.4",
 ]
 
 [build-system]

+ 8 - 0
archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py

@@ -7,3 +7,11 @@ import abx
 #     return {
 #         'title_extractor': TITLE_EXTRACTOR_CONFIG
 #     }
+
+
[email protected]
+def get_EXTRACTORS():
+    from .extractors import TITLE_EXTRACTOR
+    return {
+        'title': TITLE_EXTRACTOR,
+    }

+ 1 - 1
archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py

@@ -4,7 +4,7 @@ from typing import List
 
 
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, Binary
+from abx_pkg import BinProvider, BinName, Binary
 
 from abx_plugin_default_binproviders import apt, brew, env
 

+ 24 - 22
archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py

@@ -1,35 +1,37 @@
 __package__ = 'abx_plugin_wget'
 
-# from pathlib import Path
+from pathlib import Path
 
-# from pydantic_pkgr import BinName
+from abx_pkg import BinName
 
-# from .binaries import WGET_BINARY
-# from .wget_util import wget_output_path
+from abx_spec_extractor import BaseExtractor, ExtractorName
 
-# class WgetExtractor(BaseExtractor):
-#     name: ExtractorName = 'wget'
-#     binary: BinName = WGET_BINARY.name
+from .binaries import WGET_BINARY
+from .wget_util import wget_output_path
 
-#     def get_output_path(self, snapshot) -> Path | None:
-#         wget_index_path = wget_output_path(snapshot.as_link())
-#         if wget_index_path:
-#             return Path(wget_index_path)
-#         return None
+class WgetExtractor(BaseExtractor):
+    name: ExtractorName = 'wget'
+    binary: BinName = WGET_BINARY.name
 
-# WGET_EXTRACTOR = WgetExtractor()
+    def get_output_path(self, snapshot) -> Path | None:
+        wget_index_path = wget_output_path(snapshot.as_link())
+        if wget_index_path:
+            return Path(wget_index_path)
+        return None
 
+WGET_EXTRACTOR = WgetExtractor()
 
-# class WarcExtractor(BaseExtractor):
-#     name: ExtractorName = 'warc'
-#     binary: BinName = WGET_BINARY.name
 
-#     def get_output_path(self, snapshot) -> Path | None:
-#         warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
-#         if warc_files:
-#             return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
-#         return None
+class WarcExtractor(BaseExtractor):
+    name: ExtractorName = 'warc'
+    binary: BinName = WGET_BINARY.name
 
+    def get_output_path(self, snapshot) -> Path | None:
+        warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
+        if warc_files:
+            return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
+        return None
 
-# WARC_EXTRACTOR = WarcExtractor()
+
+WARC_EXTRACTOR = WarcExtractor()
 

+ 1 - 1
archivebox/pkgs/abx-plugin-wget/pyproject.toml

@@ -7,7 +7,7 @@ requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
 ]
 
 [build-system]

+ 1 - 1
archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py

@@ -4,7 +4,7 @@ import subprocess
 from typing import List
 
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, Binary
+from abx_pkg import BinProvider, BinName, BinaryOverrides, Binary
 
 from abx_plugin_default_binproviders import apt, brew, env
 from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER

+ 1 - 1
archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py

@@ -1,4 +1,4 @@
-__package__ = 'plugins_extractor.ytdlp'
+__package__ = 'abx_plugin_ytdlp'
 
 from typing import List
 

+ 2 - 2
archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml

@@ -7,8 +7,8 @@ requires-python = ">=3.10"
 dependencies = [
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
+    "abx-pkg>=0.5.4",
 ]
 
 [build-system]

+ 3 - 3
archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py

@@ -11,13 +11,13 @@ from typing import cast
 
 import abx
 from abx_spec_config import ConfigPluginSpec
-from abx_spec_pydantic_pkgr import PydanticPkgrPluginSpec
+from abx_spec_abx_pkg import AbxPkgPluginSpec
 from abx_spec_django import DjangoPluginSpec
 from abx_spec_searchbackend import SearchBackendPluginSpec
 
-class ArchiveBoxPluginSpec(ConfigPluginSpec, PydanticPkgrPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec):
+class ArchiveBoxPluginSpec(ConfigPluginSpec, AbxPkgPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec):
     """
-    ArchiveBox plugins can use any of the hooks from the Config, PydanticPkgr, and Django plugin specs.
+    ArchiveBox plugins can use any of the hooks from the Config, AbxPkg, and Django plugin specs.
     """
     pass
 

+ 35 - 18
archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py

@@ -2,10 +2,12 @@ __order__ = 100
 
 import os
 from pathlib import Path
-from typing import Dict, Any, cast
+from typing import Any, cast, TYPE_CHECKING
 
 from benedict import benedict
 
+if TYPE_CHECKING:
+    from archivebox.config.constants import ConstantsDict
 
 import abx
 
@@ -13,38 +15,43 @@ from .base_configset import BaseConfigSet, ConfigKeyStr
 
 
 class ConfigPluginSpec:
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_collection_config_path(self) -> Path:
+    def get_collection_config_path() -> Path:
         return Path(os.getcwd()) / "ArchiveBox.conf"
 
 
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_system_config_path(self) -> Path:
+    def get_system_config_path() -> Path:
         return Path('~/.config/abx/abx.conf').expanduser()
 
 
+    @staticmethod
     @abx.hookspec
     @abx.hookimpl
-    def get_CONFIG(self) -> Dict[abx.PluginId, BaseConfigSet]:
+    def get_CONFIG() -> dict[abx.PluginId, 'BaseConfigSet | ConstantsDict']:
+        from archivebox import CONSTANTS
         """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}"""
         return {
-            # override this in your plugin to return your plugin's config, e.g.
-            # 'ytdlp': YtdlpConfig(...),
+            'CONSTANTS': CONSTANTS,
         }
 
 
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_CONFIGS(self) -> Dict[abx.PluginId, BaseConfigSet]:
+    def get_CONFIGS() -> dict[abx.PluginId, BaseConfigSet]:
         """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}"""
         return abx.as_dict(pm.hook.get_CONFIG())
 
 
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_FLAT_CONFIG(self) -> Dict[ConfigKeyStr, Any]:
+    def get_FLAT_CONFIG() -> dict[ConfigKeyStr, Any]:
         """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}"""
         return benedict({
             key: value
@@ -52,9 +59,10 @@ class ConfigPluginSpec:
                 for key, value in benedict(configset).items()
         })
         
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_SCOPE_CONFIG(self, extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> Dict[ConfigKeyStr, Any]:
+    def get_SCOPE_CONFIG(extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> dict[ConfigKeyStr, Any]:
         """Get the config as it applies to you right now, based on the current context"""
         return benedict({
             **pm.hook.get_default_config(default=default),
@@ -69,35 +77,41 @@ class ConfigPluginSpec:
             **(extra or {}),
         })
         
+    @staticmethod
     # @abx.hookspec(firstresult=True)
     # @abx.hookimpl
-    # def get_request_config(self, request) -> dict:
+    # def get_request_config(request) -> dict:
     #     session = getattr(request, 'session', None)
     #     return getattr(session, 'config', None) or {}
         
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_archiveresult_config(self, archiveresult) -> Dict[ConfigKeyStr, Any]:
+    def get_archiveresult_config(archiveresult) -> dict[ConfigKeyStr, Any]:
         return getattr(archiveresult, 'config', None) or {}
     
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_snapshot_config(self, snapshot) -> Dict[ConfigKeyStr, Any]:
+    def get_snapshot_config(snapshot) -> dict[ConfigKeyStr, Any]:
         return getattr(snapshot, 'config', None) or {}
     
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_crawl_config(self, crawl) -> Dict[ConfigKeyStr, Any]:
+    def get_crawl_config(crawl) -> dict[ConfigKeyStr, Any]:
         return getattr(crawl, 'config', None) or {}
     
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_user_config(self, user=None) -> Dict[ConfigKeyStr, Any]:
+    def get_user_config(user=None) -> dict[ConfigKeyStr, Any]:
         return getattr(user, 'config', None) or {}
     
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_collection_config(self, collection=...) -> Dict[ConfigKeyStr, Any]:
+    def get_collection_config(collection=...) -> dict[ConfigKeyStr, Any]:
         # ... = ellipsis, means automatically get the collection config from the active data/ArchiveBox.conf file
         # {} = empty dict, override to ignore the collection config
         return benedict({
@@ -106,9 +120,10 @@ class ConfigPluginSpec:
                 for key, value in configset.from_collection().items()
         }) if collection == ... else collection
     
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_environment_config(self, environment=...) -> Dict[ConfigKeyStr, Any]:
+    def get_environment_config(environment=...) -> dict[ConfigKeyStr, Any]:
         # ... = ellipsis, means automatically get the environment config from the active environment variables
         # {} = empty dict, override to ignore the environment config
         return benedict({
@@ -117,18 +132,20 @@ class ConfigPluginSpec:
                 for key, value in configset.from_environment().items()
         }) if environment == ... else environment
     
+    @staticmethod
     # @abx.hookspec(firstresult=True)
     # @abx.hookimpl
-    # def get_machine_config(self, machine=...) -> dict:
+    # def get_machine_config(machine=...) -> dict:
     #     # ... = ellipsis, means automatically get the machine config from the currently executing machine
     #     # {} = empty dict, override to ignore the machine config
     #     if machine == ...:
     #         machine = Machine.objects.get_current()
     #     return getattr(machine, 'config', None) or {}
         
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
-    def get_default_config(self, default=...) -> Dict[ConfigKeyStr, Any]:
+    def get_default_config(default=...) -> dict[ConfigKeyStr, Any]:
         # ... = ellipsis, means automatically get the machine config from the currently executing machine
         # {} = empty dict, override to ignore the machine config
         return benedict({

+ 2 - 1
archivebox/pkgs/abx-spec-django/abx_spec_django.py

@@ -1,4 +1,3 @@
-__order__ = 300
 
 import abx
 from typing import List, Dict, Any, cast
@@ -6,6 +5,8 @@ from typing import List, Dict, Any, cast
 ###########################################################################################
 
 class DjangoPluginSpec:
+    __order__ = 10
+    
     @abx.hookspec
     def get_INSTALLED_APPS() -> List[str]:
         return ['abx_spec_django']

+ 4 - 2
archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py

@@ -1,10 +1,12 @@
+__order__ = 10
+
 import os
 
 from typing import Optional, List, Annotated, Tuple
 from pathlib import Path
 
 from pydantic import AfterValidator
-from pydantic_pkgr import BinName
+from abx_pkg import BinName
 
 
 import abx
@@ -23,7 +25,7 @@ CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_em
 @abx.hookspec
 @abx.hookimpl
 def get_EXTRACTORS():
-    return []
+    return {}
 
 @abx.hookspec
 @abx.hookimpl

+ 0 - 0
archivebox/pkgs/abx-spec-pydantic-pkgr/README.md


+ 0 - 114
archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py

@@ -1,114 +0,0 @@
-__order__ = 200
-
-import os
-
-from typing import Dict, cast
-from pathlib import Path
-
-from pydantic_pkgr import Binary, BinProvider
-
-import abx
-
-from abx_spec_config import ConfigPluginSpec
-
-###########################################################################################
-
-class PydanticPkgrPluginSpec:
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def get_LIB_DIR(self) -> Path:
-        """Get the directory where shared runtime libraries/dependencies should be installed"""
-        FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
-        LIB_DIR = Path(FLAT_CONFIG.get('LIB_DIR', '/usr/local/share/abx'))
-        return LIB_DIR
-        
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def get_BIN_DIR(self) -> Path:
-        """Get the directory where binaries should be symlinked to"""
-        FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
-        LIB_DIR = pm.hook.get_LIB_DIR()
-        BIN_DIR = Path(FLAT_CONFIG.get('BIN_DIR') or LIB_DIR / 'bin')
-        return BIN_DIR
-    
-    @abx.hookspec
-    @abx.hookimpl
-    def get_BINPROVIDERS(self) -> Dict[str, BinProvider]:
-        return {
-            # to be implemented by plugins, e.g.:
-            # 'npm': NpmBinProvider(npm_prefix=Path('/usr/local/share/abx/npm')),
-        }
-
-    @abx.hookspec
-    @abx.hookimpl
-    def get_BINARIES(self) -> Dict[str, Binary]:
-        return {
-            # to be implemented by plugins, e.g.:
-            # 'yt-dlp': Binary(name='yt-dlp', binproviders=[npm]),
-        }
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def get_BINPROVIDER(self, binprovider_name: str) -> BinProvider:
-        """Get a specific BinProvider by name"""
-        return abx.as_dict(pm.hook.get_BINPROVIDERS())[binprovider_name]
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def get_BINARY(self, bin_name: str) -> Binary:
-        """Get a specific Binary by name"""
-        return abx.as_dict(pm.hook.get_BINARIES())[bin_name]
-
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def binary_load(self, binary: Binary, **kwargs) -> Binary:
-        """Load a binary from the filesystem (override to load a binary from a different source, e.g. DB, cache, etc.)"""
-        loaded_binary = binary.load(**kwargs)
-        pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
-        return loaded_binary
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def binary_install(self, binary: Binary, **kwargs) -> Binary:
-        """Override to change how a binary is installed (e.g. by downloading from a remote source, etc.)"""
-        loaded_binary = binary.install(**kwargs)
-        pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
-        return loaded_binary
-        
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def binary_load_or_install(self, binary: Binary, **kwargs) -> Binary:
-        """Override to change how a binary is loaded or installed (e.g. by downloading from a remote source, etc.)"""
-        loaded_binary = binary.load_or_install(**kwargs)
-        pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
-        return loaded_binary
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def binary_symlink_to_bin_dir(self, binary: Binary, bin_dir: Path | None=None):
-        if not (binary.abspath and os.path.isfile(binary.abspath)):
-            return
-                
-        BIN_DIR = pm.hook.get_BIN_DIR()
-        try:
-            BIN_DIR.mkdir(parents=True, exist_ok=True)
-            symlink = BIN_DIR / binary.name
-            symlink.unlink(missing_ok=True)
-            symlink.symlink_to(binary.abspath)
-            symlink.chmod(0o777)   # make sure its executable by everyone
-        except Exception:
-            # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
-            # not actually needed, we can just run without it
-            pass
-
-
-PLUGIN_SPEC = PydanticPkgrPluginSpec
-
-
-class RequiredSpecsAvailable(ConfigPluginSpec, PydanticPkgrPluginSpec):
-    pass
-
-TypedPluginManager = abx.ABXPluginManager[RequiredSpecsAvailable]
-pm = cast(TypedPluginManager, abx.pm)

+ 0 - 17
archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml

@@ -1,17 +0,0 @@
-[project]
-name = "abx-spec-pydantic-pkgr"
-version = "0.1.0"
-description = "The ABX plugin specification for Binaries and BinProviders"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = [
-    "abx>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
-]
-
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
-
-[project.entry-points.abx]
-abx_spec_pydantic_pkgr = "abx_spec_pydantic_pkgr"

+ 3 - 0
archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py

@@ -25,6 +25,9 @@ class BaseSearchBackend(abc.ABC):
 
 
 class SearchBackendPluginSpec:
+    __order__ = 10
+    
+    @staticmethod
     @abx.hookspec
     @abx.hookimpl
     def get_SEARCHBACKENDS() -> Dict[abx.PluginId, BaseSearchBackend]:

+ 17 - 7
archivebox/pkgs/abx/abx.py

@@ -244,10 +244,12 @@ def get_plugin_order(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int,
         except FileNotFoundError:
             pass
     
+    default_order = 10 if '_spec_' in str(plugin_dir).lower() else 999
+    
     if plugin_module:
-        order = getattr(plugin_module, '__order__', 999)
+        order = getattr(plugin_module, '__order__', default_order)
     else:
-        order = 999
+        order = default_order
     
     assert order is not None
     assert plugin_dir
@@ -270,7 +272,10 @@ def get_plugin(plugin: PluginId | ModuleType | Type) -> PluginInfo:
     elif inspect.isclass(plugin):
         module = inspect.getmodule(plugin)
     else:
-        raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}')
+        plugin = type(plugin)
+        module = inspect.getmodule(plugin)
+        
+        # raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}')
     
     assert module
     
@@ -416,9 +421,14 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
     PLUGINS_TO_LOAD = []
     LOADED_PLUGINS = {}
     
-    for plugin in plugins:
-        plugin_info = get_plugin(plugin)
-        assert plugin_info, f'No plugin metadata found for {plugin}'
+    plugin_infos = sorted([
+        get_plugin(plugin)
+        for plugin in plugins
+    ], key=lambda plugin: plugin.get('order', 999))
+    
+    
+    for plugin_info in plugin_infos:
+        assert plugin_info, 'No plugin metadata found for plugin'
         assert 'id' in plugin_info and 'module' in plugin_info
         if plugin_info['module'] in pm.get_plugins():
             LOADED_PLUGINS[plugin_info['id']] = plugin_info
@@ -431,7 +441,7 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
     for plugin_info in PLUGINS_TO_LOAD:
         pm.register(plugin_info['module'])
         LOADED_PLUGINS[plugin_info['id']] = plugin_info
-        # print(f'    √ Loaded plugin: {plugin_id}')
+        print(f'    √ Loaded plugin: {plugin_info["id"]}')
     return benedict(LOADED_PLUGINS)
 
 @cache

+ 1 - 0
archivebox/seeds/__init__.py

@@ -1,5 +1,6 @@
 
 __package__ = 'archivebox.seeds'
+__order__ = 100
 
 import abx
 

+ 5 - 5
pyproject.toml

@@ -1,6 +1,6 @@
 [project]
 name = "archivebox"
-version = "0.8.6rc1"
+version = "0.8.6rc2"
 requires-python = ">=3.10"
 description = "Self-hosted internet archiving solution."
 authors = [{name = "Nick Sweeting", email = "[email protected]"}]
@@ -80,13 +80,13 @@ dependencies = [
     "django-taggit==6.1.0",
     "base32-crockford==0.3.0",
     "platformdirs>=4.3.6",
-    "pydantic-pkgr>=0.5.4",
+    "abx-pkg>=0.6.0",
     "pocket>=0.3.6",
     "sonic-client>=1.0.0",
     "yt-dlp>=2024.8.6", # for: media"
     ############# Plugin Dependencies ################
     "abx>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.1",
     "abx-spec-config>=0.1.0",
     "abx-spec-archivebox>=0.1.0",
     "abx-spec-django>=0.1.0",
@@ -178,10 +178,10 @@ dev-dependencies = [
 ]
 
 [tool.uv.sources]
-# pydantic-pkgr = { workspace = true }
+# abx-pkg = { workspace = true }
 
 abx = { workspace = true }
-abx-spec-pydantic-pkgr = { workspace = true }
+abx-spec-abx-pkg = { workspace = true }
 abx-spec-config = { workspace = true }
 abx-spec-archivebox = { workspace = true }
 abx-spec-django = { workspace = true }

+ 61 - 69
requirements.txt

@@ -1,6 +1,6 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml --all-extras -o requirements.txt
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx
     # via
     #   archivebox (pyproject.toml)
     #   abx-plugin-archivedotorg
@@ -24,65 +24,65 @@
     #   abx-plugin-title
     #   abx-plugin-wget
     #   abx-plugin-ytdlp
+    #   abx-spec-abx-pkg
     #   abx-spec-archivebox
     #   abx-spec-config
     #   abx-spec-django
     #   abx-spec-extractor
-    #   abx-spec-pydantic-pkgr
     #   abx-spec-searchbackend
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-archivedotorg
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-archivedotorg
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-chrome
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-chrome
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-curl
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-curl
     # via
     #   archivebox (pyproject.toml)
     #   abx-plugin-archivedotorg
     #   abx-plugin-favicon
     #   abx-plugin-title
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-default-binproviders
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-default-binproviders
     # via
     #   archivebox (pyproject.toml)
     #   abx-plugin-git
     #   abx-plugin-npm
     #   abx-plugin-pip
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-favicon
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-favicon
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-git
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-git
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-htmltotext
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-htmltotext
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ldap-auth
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ldap-auth
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-mercury
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-mercury
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-npm
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-npm
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-pip
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-pip
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-playwright
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-playwright
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-puppeteer
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-puppeteer
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-readability
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-readability
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ripgrep-search
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ripgrep-search
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-singlefile
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-singlefile
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sonic-search
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-sonic-search
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sqlitefts-search
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-sqlitefts-search
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-title
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-title
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-wget
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-wget
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ytdlp
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ytdlp
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-archivebox
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-archivebox
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-config
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-config
     # via
     #   archivebox (pyproject.toml)
     #   abx-plugin-archivedotorg
@@ -105,13 +105,13 @@
     #   abx-plugin-title
     #   abx-plugin-wget
     #   abx-plugin-ytdlp
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-django
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-django
     # via
     #   archivebox (pyproject.toml)
     #   abx-plugin-ldap-auth
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-extractor
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-extractor
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-pydantic-pkgr
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-abx-pkg
     # via
     #   archivebox (pyproject.toml)
     #   abx-plugin-chrome
@@ -126,12 +126,24 @@
     #   abx-plugin-sonic-search
     #   abx-plugin-wget
     #   abx-plugin-ytdlp
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-searchbackend
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-searchbackend
     # via
     #   archivebox (pyproject.toml)
     #   abx-plugin-ripgrep-search
     #   abx-plugin-sonic-search
     #   abx-plugin-sqlitefts-search
+abx-pkg==0.6.0
+    # via
+    #   archivebox (pyproject.toml)
+    #   abx-plugin-default-binproviders
+    #   abx-plugin-npm
+    #   abx-plugin-pip
+    #   abx-plugin-playwright
+    #   abx-plugin-puppeteer
+    #   abx-plugin-singlefile
+    #   abx-plugin-sonic-search
+    #   abx-plugin-ytdlp
+    #   abx-spec-abx-pkg
 annotated-types==0.7.0
     # via pydantic
 anyio==4.6.2.post1
@@ -159,11 +171,9 @@ base32-crockford==0.3.0
     # via archivebox (pyproject.toml)
 beautifulsoup4==4.12.3
     # via python-benedict
-brotli==1.1.0
-    # via yt-dlp
-bx-django-utils==81
+bx-django-utils==82
     # via django-huey-monitor
-bx-py-utils==105
+bx-py-utils==106
     # via
     #   bx-django-utils
     #   django-huey-monitor
@@ -172,10 +182,9 @@ certifi==2024.8.30
     #   httpcore
     #   httpx
     #   requests
-    #   yt-dlp
 cffi==1.17.1
     # via cryptography
-channels==4.1.0
+channels==4.2.0
     # via archivebox (pyproject.toml)
 charset-normalizer==3.4.0
     # via requests
@@ -197,7 +206,7 @@ decorator==5.1.1
     # via
     #   ipdb
     #   ipython
-django==5.1.2
+django==5.1.3
     # via
     #   archivebox (pyproject.toml)
     #   abx
@@ -270,7 +279,7 @@ ftfy==6.3.1
     # via python-benedict
 h11==0.14.0
     # via httpcore
-httpcore==1.0.6
+httpcore==1.0.7
     # via httpx
 httpx==0.27.2
     # via django-signal-webhooks
@@ -297,7 +306,7 @@ ipython==8.29.0
     # via
     #   archivebox (pyproject.toml)
     #   ipdb
-jedi==0.19.1
+jedi==0.19.2
     # via ipython
 libcst==1.5.0
     # via django-autotyping
@@ -309,8 +318,6 @@ matplotlib-inline==0.1.7
     # via ipython
 mdurl==0.1.2
     # via markdown-it-py
-mutagen==1.47.0
-    # via yt-dlp
 mypy-extensions==1.0.0
     # via archivebox (pyproject.toml)
 openpyxl==3.1.5
@@ -319,12 +326,14 @@ parso==0.8.4
     # via jedi
 pexpect==4.9.0
     # via ipython
-phonenumbers==8.13.48
+phonenumbers==8.13.50
     # via python-benedict
+pip==24.3.1
+    # via abx-pkg
 platformdirs==4.3.6
     # via
     #   archivebox (pyproject.toml)
-    #   pydantic-pkgr
+    #   abx-pkg
 pluggy==1.5.0
     # via
     #   archivebox (pyproject.toml)
@@ -352,34 +361,20 @@ pyasn1-modules==0.4.1
     #   service-identity
 pycparser==2.22
     # via cffi
-pycryptodomex==3.21.0
-    # via yt-dlp
 pydantic==2.9.2
     # via
+    #   abx-pkg
     #   abx-plugin-playwright
     #   abx-spec-config
     #   abx-spec-extractor
     #   abx-spec-searchbackend
     #   django-ninja
     #   django-pydantic-field
-    #   pydantic-pkgr
     #   pydantic-settings
 pydantic-core==2.23.4
     # via
+    #   abx-pkg
     #   pydantic
-    #   pydantic-pkgr
-pydantic-pkgr==0.5.4
-    # via
-    #   archivebox (pyproject.toml)
-    #   abx-plugin-default-binproviders
-    #   abx-plugin-npm
-    #   abx-plugin-pip
-    #   abx-plugin-playwright
-    #   abx-plugin-puppeteer
-    #   abx-plugin-singlefile
-    #   abx-plugin-sonic-search
-    #   abx-plugin-ytdlp
-    #   abx-spec-pydantic-pkgr
 pydantic-settings==2.6.1
     # via
     #   archivebox (pyproject.toml)
@@ -414,6 +409,8 @@ python-ldap==3.4.4
     #   django-auth-ldap
 python-slugify==8.0.4
     # via python-benedict
+python-statemachine==2.4.0
+    # via archivebox (pyproject.toml)
 python-stdnum==1.20
     # via bx-django-utils
 pytz==2024.2
@@ -424,14 +421,13 @@ pyyaml==6.0.2
     # via
     #   libcst
     #   python-benedict
-regex==2024.9.11
+regex==2024.11.6
     # via dateparser
 requests==2.32.3
     # via
     #   archivebox (pyproject.toml)
     #   pocket
     #   python-benedict
-    #   yt-dlp
 requests-tracker==0.3.3
     # via archivebox (pyproject.toml)
 rich==13.9.4
@@ -443,7 +439,7 @@ rich-argparse==1.6.0
     # via archivebox (pyproject.toml)
 service-identity==24.2.0
     # via twisted
-setuptools==75.3.0
+setuptools==75.5.0
     # via
     #   archivebox (pyproject.toml)
     #   autobahn
@@ -464,7 +460,7 @@ sonic-client==1.0.0
     # via archivebox (pyproject.toml)
 soupsieve==2.6
     # via beautifulsoup4
-sqlparse==0.5.1
+sqlparse==0.5.2
     # via
     #   django
     #   django-debug-toolbar
@@ -492,12 +488,12 @@ types-pyyaml==6.0.12.20240917
 typing-extensions==4.12.2
     # via
     #   archivebox (pyproject.toml)
+    #   abx-pkg
     #   django-pydantic-field
     #   django-stubs
     #   django-stubs-ext
     #   pydantic
     #   pydantic-core
-    #   pydantic-pkgr
     #   twisted
 tzdata==2024.2
     # via archivebox (pyproject.toml)
@@ -506,9 +502,7 @@ tzlocal==5.2
 ulid-py==1.1.0
     # via archivebox (pyproject.toml)
 urllib3==2.2.3
-    # via
-    #   requests
-    #   yt-dlp
+    # via requests
 uuid6==2024.7.10
     # via typeid-python
 w3lib==2.2.1
@@ -517,13 +511,11 @@ wcwidth==0.2.13
     # via
     #   ftfy
     #   prompt-toolkit
-websockets==13.1
-    # via yt-dlp
 xlrd==2.0.1
     # via python-benedict
 xmltodict==0.14.2
     # via python-benedict
-yt-dlp==2024.10.22
+yt-dlp==2024.11.4
     # via archivebox (pyproject.toml)
 zope-interface==7.1.1
     # via twisted

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 165 - 283
uv.lock


Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно