Browse Source

fix plugin loading order, admin, abx-pkg

Nick Sweeting 1 year ago
parent
commit
c8e186f21b
78 changed files with 633 additions and 735 deletions
  1. 27 19
      archivebox/cli/archivebox_update.py
  2. 1 1
      archivebox/config/__init__.py
  3. 2 0
      archivebox/config/common.py
  4. 9 6
      archivebox/config/views.py
  5. 1 1
      archivebox/core/__init__.py
  6. 2 2
      archivebox/core/actors.py
  7. 5 5
      archivebox/core/admin_archiveresults.py
  8. 3 3
      archivebox/core/admin_snapshots.py
  9. 63 21
      archivebox/core/models.py
  10. 48 13
      archivebox/core/statemachines.py
  11. 10 9
      archivebox/core/views.py
  12. 1 0
      archivebox/crawls/__init__.py
  13. 1 1
      archivebox/crawls/actors.py
  14. 12 11
      archivebox/crawls/models.py
  15. 8 4
      archivebox/crawls/statemachines.py
  16. 2 2
      archivebox/machine/models.py
  17. 1 1
      archivebox/main.py
  18. 1 1
      archivebox/pkgs/__init__.py
  19. 9 7
      archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py
  20. 1 1
      archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py
  21. 1 1
      archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py
  22. 1 1
      archivebox/pkgs/abx-plugin-chrome/pyproject.toml
  23. 1 1
      archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py
  24. 1 1
      archivebox/pkgs/abx-plugin-curl/pyproject.toml
  25. 1 1
      archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
  26. 2 2
      archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml
  27. 6 6
      archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py
  28. 1 1
      archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py
  29. 13 8
      archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py
  30. 1 1
      archivebox/pkgs/abx-plugin-git/pyproject.toml
  31. 1 1
      archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py
  32. 1 1
      archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py
  33. 11 8
      archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py
  34. 2 2
      archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py
  35. 1 1
      archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py
  36. 2 2
      archivebox/pkgs/abx-plugin-npm/pyproject.toml
  37. 1 1
      archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/.plugin_order
  38. 1 0
      archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py
  39. 1 1
      archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py
  40. 1 1
      archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py
  41. 2 2
      archivebox/pkgs/abx-plugin-pip/pyproject.toml
  42. 1 1
      archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py
  43. 1 1
      archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py
  44. 2 2
      archivebox/pkgs/abx-plugin-playwright/pyproject.toml
  45. 1 1
      archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py
  46. 1 1
      archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py
  47. 2 2
      archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml
  48. 1 1
      archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py
  49. 10 10
      archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py
  50. 1 1
      archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py
  51. 1 1
      archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py
  52. 1 1
      archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py
  53. 12 9
      archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py
  54. 0 0
      archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py
  55. 2 2
      archivebox/pkgs/abx-plugin-singlefile/pyproject.toml
  56. 1 1
      archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py
  57. 1 1
      archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py
  58. 2 2
      archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml
  59. 8 0
      archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py
  60. 1 1
      archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py
  61. 24 22
      archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py
  62. 1 1
      archivebox/pkgs/abx-plugin-wget/pyproject.toml
  63. 1 1
      archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py
  64. 1 1
      archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py
  65. 2 2
      archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml
  66. 3 3
      archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py
  67. 35 18
      archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py
  68. 2 1
      archivebox/pkgs/abx-spec-django/abx_spec_django.py
  69. 4 2
      archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py
  70. 0 0
      archivebox/pkgs/abx-spec-pydantic-pkgr/README.md
  71. 0 114
      archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py
  72. 0 17
      archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml
  73. 3 0
      archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py
  74. 17 7
      archivebox/pkgs/abx/abx.py
  75. 1 0
      archivebox/seeds/__init__.py
  76. 5 5
      pyproject.toml
  77. 61 69
      requirements.txt
  78. 165 283
      uv.lock

+ 27 - 19
archivebox/cli/archivebox_update.py

@@ -5,12 +5,10 @@ __command__ = 'archivebox update'
 
 
 import sys
 import sys
 import argparse
 import argparse
-from pathlib import Path
 from typing import List, Optional, IO
 from typing import List, Optional, IO
 
 
 from archivebox.misc.util import docstring
 from archivebox.misc.util import docstring
-from archivebox.config import DATA_DIR
-from ..index import (
+from archivebox.index import (
     LINK_FILTERS,
     LINK_FILTERS,
     get_indexed_folders,
     get_indexed_folders,
     get_archived_folders,
     get_archived_folders,
@@ -23,8 +21,16 @@ from ..index import (
     get_corrupted_folders,
     get_corrupted_folders,
     get_unrecognized_folders,
     get_unrecognized_folders,
 )
 )
-from ..logging_util import SmartFormatter, accept_stdin
-from ..main import update
+from archivebox.logging_util import SmartFormatter, accept_stdin
+# from ..main import update
+
+def update():
+    from archivebox.config.django import setup_django
+    setup_django()
+    
+    from actors.orchestrator import Orchestrator
+    orchestrator = Orchestrator()
+    orchestrator.start()
 
 
 
 
 @docstring(update.__doc__)
 @docstring(update.__doc__)
@@ -116,20 +122,22 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     if not command.filter_patterns:
     if not command.filter_patterns:
         filter_patterns_str = accept_stdin(stdin)
         filter_patterns_str = accept_stdin(stdin)
 
 
-    update(
-        resume=command.resume,
-        only_new=command.only_new,
-        index_only=command.index_only,
-        overwrite=command.overwrite,
-        filter_patterns_str=filter_patterns_str,
-        filter_patterns=command.filter_patterns,
-        filter_type=command.filter_type,
-        status=command.status,
-        after=command.after,
-        before=command.before,
-        out_dir=Path(pwd) if pwd else DATA_DIR,
-        extractors=command.extract,
-    )
+    update()
+    
+    # update(
+    #     resume=command.resume,
+    #     only_new=command.only_new,
+    #     index_only=command.index_only,
+    #     overwrite=command.overwrite,
+    #     filter_patterns_str=filter_patterns_str,
+    #     filter_patterns=command.filter_patterns,
+    #     filter_type=command.filter_type,
+    #     status=command.status,
+    #     after=command.after,
+    #     before=command.before,
+    #     out_dir=Path(pwd) if pwd else DATA_DIR,
+    #     extractors=command.extract,
+    # )
     
     
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 1 - 1
archivebox/config/__init__.py

@@ -1,4 +1,4 @@
-__package__ = 'config'
+__package__ = 'archivebox.config'
 __order__ = 200
 __order__ = 200
 
 
 from .paths import (
 from .paths import (

+ 2 - 0
archivebox/config/common.py

@@ -120,6 +120,8 @@ class ArchivingConfig(BaseConfigSet):
     SAVE_ALLOWLIST: Dict[str, List[str]]  = Field(default={})  # mapping of regex patterns to list of archive methods
     SAVE_ALLOWLIST: Dict[str, List[str]]  = Field(default={})  # mapping of regex patterns to list of archive methods
     SAVE_DENYLIST: Dict[str, List[str]]   = Field(default={})
     SAVE_DENYLIST: Dict[str, List[str]]   = Field(default={})
     
     
+    DEFAULT_PERSONA: str                  = Field(default='Default')
+    
     # GIT_DOMAINS: str                    = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht')
     # GIT_DOMAINS: str                    = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht')
     # WGET_USER_AGENT: str                = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}')
     # WGET_USER_AGENT: str                = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}')
     # CURL_USER_AGENT: str                = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}')
     # CURL_USER_AGENT: str                = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}')

+ 9 - 6
archivebox/config/views.py

@@ -86,10 +86,11 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
     }
     }
 
 
     for plugin_id, plugin in abx.get_all_plugins().items():
     for plugin_id, plugin in abx.get_all_plugins().items():
-        if not plugin.hooks.get('get_BINARIES'):
+        plugin = benedict(plugin)
+        if not hasattr(plugin.plugin, 'get_BINARIES'):
             continue
             continue
         
         
-        for binary in plugin.hooks.get_BINARIES().values():
+        for binary in plugin.plugin.get_BINARIES().values():
             try:
             try:
                 installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary)
                 installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary)
                 binary = installed_binary.load_from_db()
                 binary = installed_binary.load_from_db()
@@ -214,9 +215,9 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
         return 'black'
         return 'black'
 
 
     for plugin_id, plugin in abx.get_all_plugins().items():
     for plugin_id, plugin in abx.get_all_plugins().items():
-        plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {})
-        plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {})
-        plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {})
+        plugin.hooks.get_BINPROVIDERS = getattr(plugin.plugin, 'get_BINPROVIDERS', lambda: {})
+        plugin.hooks.get_BINARIES = getattr(plugin.plugin, 'get_BINARIES', lambda: {})
+        plugin.hooks.get_CONFIG = getattr(plugin.plugin, 'get_CONFIG', lambda: {})
         
         
         rows['Label'].append(ItemLink(plugin.label, key=plugin.package))
         rows['Label'].append(ItemLink(plugin.label, key=plugin.package))
         rows['Version'].append(str(plugin.version))
         rows['Version'].append(str(plugin.version))
@@ -251,8 +252,10 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
 
 
     assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
     assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
 
 
+    plugins = abx.get_all_plugins()
+
     plugin_id = None
     plugin_id = None
-    for check_plugin_id, loaded_plugin in settings.PLUGINS.items():
+    for check_plugin_id, loaded_plugin in plugins.items():
         if check_plugin_id.split('.')[-1] == key.split('.')[-1]:
         if check_plugin_id.split('.')[-1] == key.split('.')[-1]:
             plugin_id = check_plugin_id
             plugin_id = check_plugin_id
             break
             break

+ 1 - 1
archivebox/core/__init__.py

@@ -1,5 +1,5 @@
 __package__ = 'archivebox.core'
 __package__ = 'archivebox.core'
-
+__order__ = 100
 import abx
 import abx
 
 
 @abx.hookimpl
 @abx.hookimpl

+ 2 - 2
archivebox/core/actors.py

@@ -21,7 +21,7 @@ class SnapshotActor(ActorType[Snapshot]):
     FINAL_STATES: ClassVar[list[State]] = SnapshotMachine.final_states         # ['sealed']
     FINAL_STATES: ClassVar[list[State]] = SnapshotMachine.final_states         # ['sealed']
     STATE_FIELD_NAME: ClassVar[str] = Snapshot.state_field_name                # status
     STATE_FIELD_NAME: ClassVar[str] = Snapshot.state_field_name                # status
     
     
-    MAX_CONCURRENT_ACTORS: ClassVar[int] = 3
+    MAX_CONCURRENT_ACTORS: ClassVar[int] = 1 # 3
     MAX_TICK_TIME: ClassVar[int] = 10
     MAX_TICK_TIME: ClassVar[int] = 10
     CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10
     CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10
 
 
@@ -39,7 +39,7 @@ class ArchiveResultActor(ActorType[ArchiveResult]):
     FINAL_STATES: ClassVar[list[State]] = ArchiveResultMachine.final_states     # ['succeeded', 'failed', 'skipped']
     FINAL_STATES: ClassVar[list[State]] = ArchiveResultMachine.final_states     # ['succeeded', 'failed', 'skipped']
     STATE_FIELD_NAME: ClassVar[str] = ArchiveResult.state_field_name            # status
     STATE_FIELD_NAME: ClassVar[str] = ArchiveResult.state_field_name            # status
     
     
-    MAX_CONCURRENT_ACTORS: ClassVar[int] = 6
+    MAX_CONCURRENT_ACTORS: ClassVar[int] = 1 # 6
     MAX_TICK_TIME: ClassVar[int] = 60
     MAX_TICK_TIME: ClassVar[int] = 60
     CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10
     CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10
 
 

+ 5 - 5
archivebox/core/admin_archiveresults.py

@@ -39,7 +39,7 @@ class ArchiveResultInline(admin.TabularInline):
     extra = 0
     extra = 0
     sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
     sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
     readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
     readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
-    fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output')
+    fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'retry_at', 'output')
     # exclude = ('id',)
     # exclude = ('id',)
     ordering = ('end_ts',)
     ordering = ('end_ts',)
     show_change_link = True
     show_change_link = True
@@ -105,11 +105,11 @@ class ArchiveResultInline(admin.TabularInline):
 
 
 
 
 class ArchiveResultAdmin(ABIDModelAdmin):
 class ArchiveResultAdmin(ABIDModelAdmin):
-    list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
-    sort_fields = ('start_ts', 'extractor', 'status')
+    list_display = ('abid', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
+    sort_fields = ('abid', 'created_by', 'created_at', 'extractor', 'status')
     readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
     readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
     search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
     search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
-    fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
+    fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
     autocomplete_fields = ['snapshot']
     autocomplete_fields = ['snapshot']
 
 
     list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
     list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
@@ -169,7 +169,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
             result.output,
             result.output,
         )
         )
         output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
         output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
-        path_from_output_str = (snapshot_dir / result.output)
+        path_from_output_str = (snapshot_dir / (result.output or ''))
         output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
         output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
         if os.access(path_from_output_str, os.R_OK):
         if os.access(path_from_output_str, os.R_OK):
             root_dir = str(path_from_output_str)
             root_dir = str(path_from_output_str)

+ 3 - 3
archivebox/core/admin_snapshots.py

@@ -56,12 +56,12 @@ class SnapshotActionForm(ActionForm):
 
 
 
 
 class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
 class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
-    list_display = ('created_at', 'title_str', 'files', 'size', 'url_str', 'crawl')
-    sort_fields = ('title_str', 'url_str', 'created_at', 'crawl')
+    list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
+    sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
     readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
     readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
     search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
     search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
     list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
     list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
-    fields = ('url', 'title', 'created_by', 'bookmarked_at', 'crawl', *readonly_fields)
+    fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', *readonly_fields)
     ordering = ['-created_at']
     ordering = ['-created_at']
     actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
     actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
     inlines = [TagInline, ArchiveResultInline]
     inlines = [TagInline, ArchiveResultInline]

+ 63 - 21
archivebox/core/models.py

@@ -1,7 +1,7 @@
 __package__ = 'archivebox.core'
 __package__ = 'archivebox.core'
 
 
 
 
-from typing import Optional, Dict, Iterable
+from typing import Optional, Dict, Iterable, Any
 from django_stubs_ext.db.models import TypedModelMeta
 from django_stubs_ext.db.models import TypedModelMeta
 
 
 import os
 import os
@@ -20,20 +20,22 @@ from django.db.models import Case, When, Value, IntegerField
 from django.contrib import admin
 from django.contrib import admin
 from django.conf import settings
 from django.conf import settings
 
 
-from actors.models import ModelWithStateMachine
+
+import abx
 
 
 from archivebox.config import CONSTANTS
 from archivebox.config import CONSTANTS
 
 
 from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
 from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
+from actors.models import ModelWithStateMachine
 from queues.tasks import bg_archive_snapshot
 from queues.tasks import bg_archive_snapshot
 from crawls.models import Crawl
 from crawls.models import Crawl
 # from machine.models import Machine, NetworkInterface
 # from machine.models import Machine, NetworkInterface
 
 
 from archivebox.misc.system import get_dir_size
 from archivebox.misc.system import get_dir_size
 from archivebox.misc.util import parse_date, base_url
 from archivebox.misc.util import parse_date, base_url
-from ..index.schema import Link
-from ..index.html import snapshot_icons
-from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
+from archivebox.index.schema import Link
+from archivebox.index.html import snapshot_icons
+from archivebox.extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE
 
 
 
 
 # class BaseModel(models.Model):
 # class BaseModel(models.Model):
@@ -195,13 +197,21 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
     tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
     tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
     title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
     title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
 
 
-    keys = ('url', 'timestamp', 'title', 'tags', 'downloaded_at')
+    # config = models.JSONField(default=dict, null=False, blank=False, editable=True)
+
+    keys = ('url', 'timestamp', 'title', 'tags', 'downloaded_at', 'created_at', 'status', 'retry_at', 'abid', 'id')
 
 
     archiveresult_set: models.Manager['ArchiveResult']
     archiveresult_set: models.Manager['ArchiveResult']
 
 
     objects = SnapshotManager()
     objects = SnapshotManager()
 
 
     def save(self, *args, **kwargs):
     def save(self, *args, **kwargs):
+        if self.pk:
+            existing_snapshot = self.__class__.objects.filter(pk=self.pk).first()
+            if existing_snapshot and existing_snapshot.status == self.StatusChoices.SEALED:
+                if self.as_json() != existing_snapshot.as_json():
+                    raise Exception(f'Snapshot {self.pk} is already sealed, it cannot be modified any further. NEW: {self.as_json()} != Existing: {existing_snapshot.as_json()}')
+        
         if not self.bookmarked_at:
         if not self.bookmarked_at:
             self.bookmarked_at = self.created_at or self._init_timestamp
             self.bookmarked_at = self.created_at or self._init_timestamp
             
             
@@ -427,7 +437,7 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
         ALL_EXTRACTORS = ['favicon', 'title', 'screenshot', 'headers', 'singlefile', 'dom', 'git', 'archive_org', 'readability', 'mercury', 'pdf', 'wget']
         ALL_EXTRACTORS = ['favicon', 'title', 'screenshot', 'headers', 'singlefile', 'dom', 'git', 'archive_org', 'readability', 'mercury', 'pdf', 'wget']
         
         
         # config = get_scope_config(snapshot=self)
         # config = get_scope_config(snapshot=self)
-        config = {'EXTRACTORS': ''}
+        config = {'EXTRACTORS': ','.join(ALL_EXTRACTORS)}
         
         
         if config.get('EXTRACTORS', 'auto') == 'auto':
         if config.get('EXTRACTORS', 'auto') == 'auto':
             EXTRACTORS = ALL_EXTRACTORS
             EXTRACTORS = ALL_EXTRACTORS
@@ -438,10 +448,13 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
         for extractor in EXTRACTORS:
         for extractor in EXTRACTORS:
             if not extractor:
             if not extractor:
                 continue
                 continue
-            archiveresult, _created = ArchiveResult.objects.get_or_create(
+            archiveresult = ArchiveResult.objects.update_or_create(
                 snapshot=self,
                 snapshot=self,
                 extractor=extractor,
                 extractor=extractor,
                 status=ArchiveResult.INITIAL_STATE,
                 status=ArchiveResult.INITIAL_STATE,
+                defaults={
+                    'retry_at': timezone.now(),
+                },
             )
             )
             archiveresults.append(archiveresult)
             archiveresults.append(archiveresult)
         return archiveresults
         return archiveresults
@@ -560,6 +573,8 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
     # uplink = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used')
     # uplink = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used')
 
 
     objects = ArchiveResultManager()
     objects = ArchiveResultManager()
+    
+    keys = ('snapshot_id', 'extractor', 'cmd', 'pwd', 'cmd_version', 'output', 'start_ts', 'end_ts', 'created_at', 'status', 'retry_at', 'abid', 'id')
 
 
     class Meta(TypedModelMeta):
     class Meta(TypedModelMeta):
         verbose_name = 'Archive Result'
         verbose_name = 'Archive Result'
@@ -576,6 +591,16 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
 
 
     def __str__(self):
     def __str__(self):
         return repr(self)
         return repr(self)
+    
+    def save(self, *args, **kwargs):
+        # if (self.pk and self.__class__.objects.filter(pk=self.pk).values_list('status', flat=True)[0] in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]):
+        #     raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further.')
+        if self.pk:
+            existing_archiveresult = self.__class__.objects.filter(pk=self.pk).first()
+            if existing_archiveresult and existing_archiveresult.status in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]:
+                if self.as_json() != existing_archiveresult.as_json():
+                    raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further. NEW: {self.as_json()} != Existing: {existing_archiveresult.as_json()}')
+        super().save(*args, **kwargs)
 
 
     # TODO: finish connecting machine.models
     # TODO: finish connecting machine.models
     # @cached_property
     # @cached_property
@@ -603,36 +628,53 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
         return f'/{self.snapshot.archive_path}/{self.output_path()}'
         return f'/{self.snapshot.archive_path}/{self.output_path()}'
 
 
     @property
     @property
-    def extractor_module(self):
-        return EXTRACTORS[self.extractor]
+    def extractor_module(self) -> Any | None:
+        return abx.as_dict(abx.pm.hook.get_EXTRACTORS()).get(self.extractor, None)
 
 
-    def output_path(self) -> str:
+    def output_path(self) -> str | None:
         """return the canonical output filename or directory name within the snapshot dir"""
         """return the canonical output filename or directory name within the snapshot dir"""
-        return self.extractor_module.get_output_path()
+        try:
+            return self.extractor_module.get_output_path(self.snapshot)
+        except Exception as e:
+            print(f'Error getting output path for {self.extractor} extractor: {e}')
+            return None
 
 
-    def embed_path(self) -> str:
+    def embed_path(self) -> str | None:
         """
         """
         return the actual runtime-calculated path to the file on-disk that
         return the actual runtime-calculated path to the file on-disk that
         should be used for user-facing iframe embeds of this result
         should be used for user-facing iframe embeds of this result
         """
         """
 
 
-        if get_embed_path_func := getattr(self.extractor_module, 'get_embed_path', None):
-            return get_embed_path_func(self)
-
-        return self.extractor_module.get_output_path()
+        try:
+            return self.extractor_module.get_embed_path(self)
+        except Exception as e:
+            print(f'Error getting embed path for {self.extractor} extractor: {e}')
+            return None
 
 
     def legacy_output_path(self):
     def legacy_output_path(self):
         link = self.snapshot.as_link()
         link = self.snapshot.as_link()
         return link.canonical_outputs().get(f'{self.extractor}_path')
         return link.canonical_outputs().get(f'{self.extractor}_path')
 
 
     def output_exists(self) -> bool:
     def output_exists(self) -> bool:
-        return os.path.exists(self.output_path())
-        
+        output_path = self.output_path()
+        return bool(output_path and os.path.exists(output_path))
+            
     def create_output_dir(self):
     def create_output_dir(self):
-        snap_dir = self.snapshot_dir
+        snap_dir = Path(self.snapshot_dir)
         snap_dir.mkdir(parents=True, exist_ok=True)
         snap_dir.mkdir(parents=True, exist_ok=True)
-        return snap_dir / self.output_path()
+        output_path = self.output_path()
+        if output_path:
+            (snap_dir / output_path).mkdir(parents=True, exist_ok=True)
+        else:
+            raise ValueError(f'Not able to calculate output path for {self.extractor} extractor in {snap_dir}')
+        return snap_dir / output_path
 
 
+    def as_json(self, *args) -> dict:
+        args = args or self.keys
+        return {
+            key: getattr(self, key)
+            for key in args
+        }
 
 
     # def get_storage_dir(self, create=True, symlink=True):
     # def get_storage_dir(self, create=True, symlink=True):
     #     date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d')
     #     date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d')

+ 48 - 13
archivebox/core/statemachines.py

@@ -37,25 +37,44 @@ class SnapshotMachine(StateMachine, strict_states=True):
         super().__init__(snapshot, *args, **kwargs)
         super().__init__(snapshot, *args, **kwargs)
         
         
     def can_start(self) -> bool:
     def can_start(self) -> bool:
-        return self.snapshot.url
+        can_start = bool(self.snapshot.url and (self.snapshot.retry_at < timezone.now()))
+        if not can_start:
+            print(f'SnapshotMachine[{self.snapshot.ABID}].can_start() False: {self.snapshot.url} {self.snapshot.retry_at} {timezone.now()}')
+        return can_start
         
         
     def is_finished(self) -> bool:
     def is_finished(self) -> bool:
+        # if no archiveresults exist yet, it's not finished
         if not self.snapshot.archiveresult_set.exists():
         if not self.snapshot.archiveresult_set.exists():
             return False
             return False
+        # if archiveresults exist but are still pending, it's not finished
         if self.snapshot.pending_archiveresults().exists():
         if self.snapshot.pending_archiveresults().exists():
             return False
             return False
+        
+        # otherwise archiveresults exist and are all finished, so it's finished
         return True
         return True
         
         
+    def on_transition(self, event, state):
+        print(f'SnapshotMachine[{self.snapshot.ABID}].on_transition() {event} -> {state}')
+        
+    @queued.enter
+    def enter_queued(self):
+        print(f'SnapshotMachine[{self.snapshot.ABID}].on_queued(): snapshot.retry_at = now()')
+        self.snapshot.status = Snapshot.StatusChoices.QUEUED
+        self.snapshot.retry_at = timezone.now()
+        self.snapshot.save()
+        
     @started.enter
     @started.enter
-    def on_started(self):
+    def enter_started(self):
         print(f'SnapshotMachine[{self.snapshot.ABID}].on_started(): snapshot.create_pending_archiveresults() + snapshot.bump_retry_at(+60s)')
         print(f'SnapshotMachine[{self.snapshot.ABID}].on_started(): snapshot.create_pending_archiveresults() + snapshot.bump_retry_at(+60s)')
-        self.snapshot.create_pending_archiveresults()
+        self.snapshot.status = Snapshot.StatusChoices.STARTED
         self.snapshot.bump_retry_at(seconds=60)
         self.snapshot.bump_retry_at(seconds=60)
         self.snapshot.save()
         self.snapshot.save()
+        self.snapshot.create_pending_archiveresults()
         
         
     @sealed.enter
     @sealed.enter
-    def on_sealed(self):
+    def enter_sealed(self):
         print(f'SnapshotMachine[{self.snapshot.ABID}].on_sealed(): snapshot.retry_at=None')
         print(f'SnapshotMachine[{self.snapshot.ABID}].on_sealed(): snapshot.retry_at=None')
+        self.snapshot.status = Snapshot.StatusChoices.SEALED
         self.snapshot.retry_at = None
         self.snapshot.retry_at = None
         self.snapshot.save()
         self.snapshot.save()
 
 
@@ -95,7 +114,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
         super().__init__(archiveresult, *args, **kwargs)
         super().__init__(archiveresult, *args, **kwargs)
         
         
     def can_start(self) -> bool:
     def can_start(self) -> bool:
-        return self.archiveresult.snapshot and self.archiveresult.snapshot.STATE == Snapshot.active_state
+        return self.archiveresult.snapshot and (self.archiveresult.retry_at < timezone.now())
     
     
     def is_succeeded(self) -> bool:
     def is_succeeded(self) -> bool:
         return self.archiveresult.output_exists()
         return self.archiveresult.output_exists()
@@ -109,29 +128,45 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
     def is_finished(self) -> bool:
     def is_finished(self) -> bool:
         return self.is_failed() or self.is_succeeded()
         return self.is_failed() or self.is_succeeded()
 
 
+
+    @queued.enter
+    def enter_queued(self):
+        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_queued(): archiveresult.retry_at = now()')
+        self.archiveresult.status = ArchiveResult.StatusChoices.QUEUED
+        self.archiveresult.retry_at = timezone.now()
+        self.archiveresult.save()
+        
     @started.enter
     @started.enter
-    def on_started(self):
+    def enter_started(self):
         print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_started(): archiveresult.start_ts + create_output_dir() + bump_retry_at(+60s)')
         print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_started(): archiveresult.start_ts + create_output_dir() + bump_retry_at(+60s)')
+        self.archiveresult.status = ArchiveResult.StatusChoices.STARTED
         self.archiveresult.start_ts = timezone.now()
         self.archiveresult.start_ts = timezone.now()
-        self.archiveresult.create_output_dir()
         self.archiveresult.bump_retry_at(seconds=60)
         self.archiveresult.bump_retry_at(seconds=60)
         self.archiveresult.save()
         self.archiveresult.save()
+        self.archiveresult.create_output_dir()
 
 
     @backoff.enter
     @backoff.enter
-    def on_backoff(self):
-        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_backoff(): archiveresult.bump_retry_at(+60s)')
+    def enter_backoff(self):
+        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_backoff(): archiveresult.retries += 1, archiveresult.bump_retry_at(+60s), archiveresult.end_ts = None')
+        self.archiveresult.status = ArchiveResult.StatusChoices.BACKOFF
+        self.archiveresult.retries = getattr(self.archiveresult, 'retries', 0) + 1
         self.archiveresult.bump_retry_at(seconds=60)
         self.archiveresult.bump_retry_at(seconds=60)
+        self.archiveresult.end_ts = None
         self.archiveresult.save()
         self.archiveresult.save()
 
 
     @succeeded.enter
     @succeeded.enter
-    def on_succeeded(self):
-        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_succeeded(): archiveresult.end_ts')
+    def enter_succeeded(self):
+        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_succeeded(): archiveresult.retry_at = None, archiveresult.end_ts = now()')
+        self.archiveresult.status = ArchiveResult.StatusChoices.SUCCEEDED
+        self.archiveresult.retry_at = None
         self.archiveresult.end_ts = timezone.now()
         self.archiveresult.end_ts = timezone.now()
         self.archiveresult.save()
         self.archiveresult.save()
 
 
     @failed.enter
     @failed.enter
-    def on_failed(self):
-        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_failed(): archiveresult.end_ts')
+    def enter_failed(self):
+        print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_failed(): archivebox.retry_at = None, archiveresult.end_ts = now()')
+        self.archiveresult.status = ArchiveResult.StatusChoices.FAILED
+        self.archiveresult.retry_at = None
         self.archiveresult.end_ts = timezone.now()
         self.archiveresult.end_ts = timezone.now()
         self.archiveresult.save()
         self.archiveresult.save()
         
         

+ 10 - 9
archivebox/core/views.py

@@ -102,7 +102,8 @@ class SnapshotView(View):
 
 
         # iterate through all the files in the snapshot dir and add the biggest ones to1 the result list
         # iterate through all the files in the snapshot dir and add the biggest ones to1 the result list
         snap_dir = Path(snapshot.link_dir)
         snap_dir = Path(snapshot.link_dir)
-        assert os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK)
+        if not os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK):
+            return {}
         
         
         for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
         for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
             extension = result_file.suffix.lstrip('.').lower()
             extension = result_file.suffix.lstrip('.').lower()
@@ -504,7 +505,7 @@ def find_config_section(key: str) -> str:
     if key in CONSTANTS_CONFIG:
     if key in CONSTANTS_CONFIG:
         return 'CONSTANT'
         return 'CONSTANT'
     matching_sections = [
     matching_sections = [
-        section_id for section_id, section in CONFIGS.items() if key in section.model_fields
+        section_id for section_id, section in CONFIGS.items() if key in dict(section)
     ]
     ]
     section = matching_sections[0] if matching_sections else 'DYNAMIC'
     section = matching_sections[0] if matching_sections else 'DYNAMIC'
     return section
     return section
@@ -518,8 +519,9 @@ def find_config_default(key: str) -> str:
     default_val = None
     default_val = None
 
 
     for config in CONFIGS.values():
     for config in CONFIGS.values():
-        if key in config.model_fields:
-            default_val = config.model_fields[key].default
+        if key in dict(config):
+            default_field = getattr(config, 'model_fields', dict(config))[key]
+            default_val = default_field.default if hasattr(default_field, 'default') else default_field
             break
             break
         
         
     if isinstance(default_val, Callable):
     if isinstance(default_val, Callable):
@@ -529,7 +531,6 @@ def find_config_default(key: str) -> str:
     else:
     else:
         default_val = str(default_val)
         default_val = str(default_val)
         
         
-        
     return default_val
     return default_val
 
 
 def find_config_type(key: str) -> str:
 def find_config_type(key: str) -> str:
@@ -567,7 +568,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
     }
     }
 
 
     for section_id, section in reversed(list(CONFIGS.items())):
     for section_id, section in reversed(list(CONFIGS.items())):
-        for key, field in section.model_fields.items():
+        for key in dict(section).keys():
             rows['Section'].append(section_id)   # section.replace('_', ' ').title().replace(' Config', '')
             rows['Section'].append(section_id)   # section.replace('_', ' ').title().replace(' Config', '')
             rows['Key'].append(ItemLink(key, key=key))
             rows['Key'].append(ItemLink(key, key=key))
             rows['Type'].append(format_html('<code>{}</code>', find_config_type(key)))
             rows['Type'].append(format_html('<code>{}</code>', find_config_type(key)))
@@ -580,7 +581,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
     for key in CONSTANTS_CONFIG.keys():
     for key in CONSTANTS_CONFIG.keys():
         rows['Section'].append(section)   # section.replace('_', ' ').title().replace(' Config', '')
         rows['Section'].append(section)   # section.replace('_', ' ').title().replace(' Config', '')
         rows['Key'].append(ItemLink(key, key=key))
         rows['Key'].append(ItemLink(key, key=key))
-        rows['Type'].append(format_html('<code>{}</code>', getattr(type(CONSTANTS_CONFIG[key]), '__name__', repr(CONSTANTS_CONFIG[key]))))
+        rows['Type'].append(format_html('<code>{}</code>', getattr(type(CONSTANTS_CONFIG[key]), '__name__', str(CONSTANTS_CONFIG[key]))))
         rows['Value'].append(format_html('<code>{}</code>', CONSTANTS_CONFIG[key]) if key_is_safe(key) else '******** (redacted)')
         rows['Value'].append(format_html('<code>{}</code>', CONSTANTS_CONFIG[key]) if key_is_safe(key) else '******** (redacted)')
         rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
         rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
         # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
         # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
@@ -642,13 +643,13 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
                             <code>{find_config_default(key) or '↗️ See in ArchiveBox source code...'}</code>
                             <code>{find_config_default(key) or '↗️ See in ArchiveBox source code...'}</code>
                         </a>
                         </a>
                         <br/><br/>
                         <br/><br/>
-                        <p style="display: {"block" if key in FLAT_CONFIG else "none"}">
+                        <p style="display: {"block" if key in FLAT_CONFIG and key not in CONSTANTS_CONFIG else "none"}">
                             <i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
                             <i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
                             <br/><br/>
                             <br/><br/>
                             <code>archivebox config --set {key}="{
                             <code>archivebox config --set {key}="{
                                 val.strip("'")
                                 val.strip("'")
                                 if (val := find_config_default(key)) else
                                 if (val := find_config_default(key)) else
-                                (repr(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
+                                (str(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
                             }"</code>
                             }"</code>
                         </p>
                         </p>
                     '''),
                     '''),

+ 1 - 0
archivebox/crawls/__init__.py

@@ -1,4 +1,5 @@
 __package__ = 'archivebox.crawls'
 __package__ = 'archivebox.crawls'
+__order__ = 100
 
 
 import abx
 import abx
 
 

+ 1 - 1
archivebox/crawls/actors.py

@@ -18,6 +18,6 @@ class CrawlActor(ActorType[Crawl]):
     FINAL_STATES: ClassVar[list[State]] = CrawlMachine.final_states
     FINAL_STATES: ClassVar[list[State]] = CrawlMachine.final_states
     STATE_FIELD_NAME: ClassVar[str] = Crawl.state_field_name
     STATE_FIELD_NAME: ClassVar[str] = Crawl.state_field_name
     
     
-    MAX_CONCURRENT_ACTORS: ClassVar[int] = 3
+    MAX_CONCURRENT_ACTORS: ClassVar[int] = 1
     MAX_TICK_TIME: ClassVar[int] = 10
     MAX_TICK_TIME: ClassVar[int] = 10
     CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10
     CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10

+ 12 - 11
archivebox/crawls/models.py

@@ -150,8 +150,8 @@ class Crawl(ABIDModel, ModelWithHealthStats, ModelWithStateMachine):
         parser = (self.seed and self.seed.extractor) or 'auto'
         parser = (self.seed and self.seed.extractor) or 'auto'
         created_at = self.created_at.strftime("%Y-%m-%d %H:%M") if self.created_at else '<no timestamp set>'
         created_at = self.created_at.strftime("%Y-%m-%d %H:%M") if self.created_at else '<no timestamp set>'
         if self.id and self.seed:
         if self.id and self.seed:
-            return f'[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
-        return f'[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
+            return f'\\[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
+        return f'\\[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
         
         
     @classmethod
     @classmethod
     def from_seed(cls, seed: Seed, max_depth: int=0, persona: str='Default', tags_str: str='', config: dict|None=None, created_by: int|None=None):
     def from_seed(cls, seed: Seed, max_depth: int=0, persona: str='Default', tags_str: str='', config: dict|None=None, created_by: int|None=None):
@@ -184,26 +184,27 @@ class Crawl(ABIDModel, ModelWithHealthStats, ModelWithStateMachine):
         return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl'
         return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl'
     
     
     def pending_snapshots(self) -> QuerySet['Snapshot']:
     def pending_snapshots(self) -> QuerySet['Snapshot']:
-        from core.models import Snapshot
-        return self.snapshot_set.exclude(status__in=Snapshot.FINAL_OR_ACTIVE_STATES)
+        return self.snapshot_set.filter(retry_at__isnull=False)
     
     
     def pending_archiveresults(self) -> QuerySet['ArchiveResult']:
     def pending_archiveresults(self) -> QuerySet['ArchiveResult']:
         from core.models import ArchiveResult
         from core.models import ArchiveResult
         
         
         snapshot_ids = self.snapshot_set.values_list('id', flat=True)
         snapshot_ids = self.snapshot_set.values_list('id', flat=True)
-        pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids).exclude(status__in=ArchiveResult.FINAL_OR_ACTIVE_STATES)
+        pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, retry_at__isnull=True)
         return pending_archiveresults
         return pending_archiveresults
     
     
     def create_root_snapshot(self) -> 'Snapshot':
     def create_root_snapshot(self) -> 'Snapshot':
         from core.models import Snapshot
         from core.models import Snapshot
         
         
-        root_snapshot, _ = Snapshot.objects.get_or_create(
-            crawl=self,
+        root_snapshot, _ = Snapshot.objects.update_or_create(
             url=self.seed.uri,
             url=self.seed.uri,
-            status=Snapshot.INITIAL_STATE,
-            retry_at=timezone.now(),
-            timestamp=str(timezone.now().timestamp()),
-            # config=self.seed.config,
+            defaults={
+                'crawl': self,
+                'status': Snapshot.INITIAL_STATE,
+                'retry_at': timezone.now(),
+                'timestamp': str(timezone.now().timestamp()),
+                # 'config': self.seed.config,
+            },
         )
         )
         return root_snapshot
         return root_snapshot
 
 

+ 8 - 4
archivebox/crawls/statemachines.py

@@ -1,5 +1,7 @@
 __package__ = 'archivebox.crawls'
 __package__ = 'archivebox.crawls'
 
 
+from django.utils import timezone
+
 from statemachine import State, StateMachine
 from statemachine import State, StateMachine
 
 
 from crawls.models import Crawl
 from crawls.models import Crawl
@@ -31,7 +33,7 @@ class CrawlMachine(StateMachine, strict_states=True):
         super().__init__(crawl, *args, **kwargs)
         super().__init__(crawl, *args, **kwargs)
         
         
     def can_start(self) -> bool:
     def can_start(self) -> bool:
-        return self.crawl.seed and self.crawl.seed.uri
+        return bool(self.crawl.seed and self.crawl.seed.uri and (self.retry_at < timezone.now()))
         
         
     def is_finished(self) -> bool:
     def is_finished(self) -> bool:
         if not self.crawl.snapshot_set.exists():
         if not self.crawl.snapshot_set.exists():
@@ -47,15 +49,17 @@ class CrawlMachine(StateMachine, strict_states=True):
     #     return "before_transition_return"
     #     return "before_transition_return"
 
 
     @started.enter
     @started.enter
-    def on_started(self):
+    def enter_started(self):
         print(f'CrawlMachine[{self.crawl.ABID}].on_started(): crawl.create_root_snapshot() + crawl.bump_retry_at(+10s)')
         print(f'CrawlMachine[{self.crawl.ABID}].on_started(): crawl.create_root_snapshot() + crawl.bump_retry_at(+10s)')
-        self.crawl.create_root_snapshot()
+        self.crawl.status = Crawl.StatusChoices.STARTED
         self.crawl.bump_retry_at(seconds=10)
         self.crawl.bump_retry_at(seconds=10)
         self.crawl.save()
         self.crawl.save()
+        self.crawl.create_root_snapshot()
 
 
     @sealed.enter        
     @sealed.enter        
-    def on_sealed(self):
+    def enter_sealed(self):
         print(f'CrawlMachine[{self.crawl.ABID}].on_sealed(): crawl.retry_at=None')
         print(f'CrawlMachine[{self.crawl.ABID}].on_sealed(): crawl.retry_at=None')
+        self.crawl.status = Crawl.StatusChoices.SEALED
         self.crawl.retry_at = None
         self.crawl.retry_at = None
         self.crawl.save()
         self.crawl.save()
 
 

+ 2 - 2
archivebox/machine/models.py

@@ -11,7 +11,7 @@ from django.utils.functional import cached_property
 import abx
 import abx
 import archivebox
 import archivebox
 
 
-from pydantic_pkgr import Binary, BinProvider
+from abx_pkg import Binary, BinProvider
 from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
 from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
 
 
 from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats
 from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats
@@ -323,7 +323,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
     # whereas a loaded binary is a not-yet saved instance that may not have the same config
     # whereas a loaded binary is a not-yet saved instance that may not have the same config
     # why would we want to load a binary record from the db when it could be freshly loaded?
     # why would we want to load a binary record from the db when it could be freshly loaded?
     def load_from_db(self) -> Binary:
     def load_from_db(self) -> Binary:
-        # TODO: implement defaults arg in pydantic_pkgr
+        # TODO: implement defaults arg in abx_pkg
         # return self.BINARY.load(defaults={
         # return self.BINARY.load(defaults={
         #     'binprovider': self.BINPROVIDER,
         #     'binprovider': self.BINPROVIDER,
         #     'abspath': Path(self.abspath),
         #     'abspath': Path(self.abspath),

+ 1 - 1
archivebox/main.py

@@ -14,7 +14,7 @@ from crontab import CronTab, CronSlices
 from django.db.models import QuerySet
 from django.db.models import QuerySet
 from django.utils import timezone
 from django.utils import timezone
 
 
-from pydantic_pkgr import Binary
+from abx_pkg import Binary
 
 
 import abx
 import abx
 import archivebox
 import archivebox

+ 1 - 1
archivebox/pkgs/__init__.py

@@ -6,7 +6,7 @@ PKGS_DIR = Path(__file__).parent
 
 
 VENDORED_PKGS = [
 VENDORED_PKGS = [
     'abx',
     'abx',
-    # 'pydantic-pkgr',
+    # 'abx-pkg',
     # ... everything else in archivebox/pkgs/* comes after ...
     # ... everything else in archivebox/pkgs/* comes after ...
 ]
 ]
 
 

+ 9 - 7
archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py

@@ -1,3 +1,4 @@
+__package__ = 'abx_plugin_chrome'
 __label__ = 'Chrome'
 __label__ = 'Chrome'
 __author__ = 'ArchiveBox'
 __author__ = 'ArchiveBox'
 
 
@@ -25,10 +26,11 @@ def ready():
     CHROME_CONFIG.validate()
     CHROME_CONFIG.validate()
 
 
 
 
-# @abx.hookimpl
-# def get_EXTRACTORS():
-#     return {
-#         'pdf': PDF_EXTRACTOR,
-#         'screenshot': SCREENSHOT_EXTRACTOR,
-#         'dom': DOM_EXTRACTOR,
-#     }
[email protected]
+def get_EXTRACTORS():
+    from .extractors import PDF_EXTRACTOR, SCREENSHOT_EXTRACTOR, DOM_EXTRACTOR
+    return {
+        'pdf': PDF_EXTRACTOR,
+        'screenshot': SCREENSHOT_EXTRACTOR,
+        'dom': DOM_EXTRACTOR,
+    }

+ 1 - 1
archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py

@@ -4,7 +4,7 @@ from pathlib import Path
 from typing import List, Optional
 from typing import List, Optional
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import (
+from abx_pkg import (
     Binary,
     Binary,
     BinProvider,
     BinProvider,
     BinName,
     BinName,

+ 1 - 1
archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py

@@ -3,7 +3,7 @@ from pathlib import Path
 from typing import List, Optional
 from typing import List, Optional
 
 
 from pydantic import Field
 from pydantic import Field
-from pydantic_pkgr import bin_abspath
+from abx_pkg import bin_abspath
 
 
 from abx_spec_config.base_configset import BaseConfigSet
 from abx_spec_config.base_configset import BaseConfigSet
 from abx_plugin_default_binproviders import env
 from abx_plugin_default_binproviders import env

+ 1 - 1
archivebox/pkgs/abx-plugin-chrome/pyproject.toml

@@ -7,7 +7,7 @@ requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
 ]
 ]
 
 
 [build-system]
 [build-system]

+ 1 - 1
archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_curl'
 from typing import List
 from typing import List
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, Binary
+from abx_pkg import BinProvider, BinName, Binary
 
 
 from abx_plugin_default_binproviders import apt, brew, env
 from abx_plugin_default_binproviders import apt, brew, env
 
 

+ 1 - 1
archivebox/pkgs/abx-plugin-curl/pyproject.toml

@@ -7,7 +7,7 @@ requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
 ]
 ]
 
 
 [build-system]
 [build-system]

+ 1 - 1
archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py

@@ -3,7 +3,7 @@ import abx
 
 
 from typing import Dict
 from typing import Dict
 
 
-from pydantic_pkgr import (
+from abx_pkg import (
     AptProvider,
     AptProvider,
     BrewProvider,
     BrewProvider,
     EnvProvider,
     EnvProvider,

+ 2 - 2
archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml

@@ -6,8 +6,8 @@ readme = "README.md"
 requires-python = ">=3.10"
 requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-pkg>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
 ]
 ]
 
 
 [build-system]
 [build-system]

+ 6 - 6
archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py

@@ -20,10 +20,10 @@ def get_CONFIG():
     }
     }
 
 
 
 
-# @abx.hookimpl
-# def get_EXTRACTORS():
-#     from .extractors import FAVICON_EXTRACTOR
[email protected]
+def get_EXTRACTORS():
+    from .extractors import FAVICON_EXTRACTOR
     
     
-#     return {
-#         'favicon': FAVICON_EXTRACTOR,
-#     }
+    return {
+        'favicon': FAVICON_EXTRACTOR,
+    }

+ 1 - 1
archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_git'
 from typing import List
 from typing import List
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, Binary
+from abx_pkg import BinProvider, BinName, Binary
 
 
 from abx_plugin_default_binproviders import apt, brew, env
 from abx_plugin_default_binproviders import apt, brew, env
 
 

+ 13 - 8
archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py

@@ -1,15 +1,20 @@
 __package__ = 'abx_plugin_git'
 __package__ = 'abx_plugin_git'
 
 
-# from pathlib import Path
+from pathlib import Path
 
 
-# from .binaries import GIT_BINARY
 
 
+from abx_pkg import BinName
 
 
-# class GitExtractor(BaseExtractor):
-#     name: ExtractorName = 'git'
-#     binary: str = GIT_BINARY.name
+from abx_spec_extractor import BaseExtractor, ExtractorName
 
 
-#     def get_output_path(self, snapshot) -> Path | None:
-#         return snapshot.as_link() / 'git'
+from .binaries import GIT_BINARY
 
 
-# GIT_EXTRACTOR = GitExtractor()
+
+class GitExtractor(BaseExtractor):
+    name: ExtractorName = 'git'
+    binary: BinName = GIT_BINARY.name
+
+    def get_output_path(self, snapshot) -> Path | None:
+        return snapshot.as_link() / 'git'
+
+GIT_EXTRACTOR = GitExtractor()

+ 1 - 1
archivebox/pkgs/abx-plugin-git/pyproject.toml

@@ -7,7 +7,7 @@ requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
     "abx-plugin-default-binproviders>=2024.10.24",
     "abx-plugin-default-binproviders>=2024.10.24",
 ]
 ]
 
 

+ 1 - 1
archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py

@@ -6,7 +6,7 @@ from typing import List
 from pathlib import Path
 from pathlib import Path
 from pydantic import InstanceOf
 from pydantic import InstanceOf
 
 
-from pydantic_pkgr import BinaryOverrides, SemVer, Binary, BinProvider
+from abx_pkg import BinaryOverrides, SemVer, Binary, BinProvider
 
 
 from abx_plugin_default_binproviders import apt
 from abx_plugin_default_binproviders import apt
 from abx_plugin_pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES
 from abx_plugin_pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES

+ 1 - 1
archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_mercury'
 from typing import List
 from typing import List
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary
+from abx_pkg import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary
 
 
 from abx_plugin_default_binproviders import env
 from abx_plugin_default_binproviders import env
 
 

+ 11 - 8
archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py

@@ -1,17 +1,20 @@
 __package__ = 'abx_plugin_mercury'
 __package__ = 'abx_plugin_mercury'
 
 
-# from pathlib import Path
+from pathlib import Path
 
 
-# from .binaries import MERCURY_BINARY
+from abx_pkg import BinName
+from abx_spec_extractor import BaseExtractor, ExtractorName
 
 
+from .binaries import MERCURY_BINARY
 
 
 
 
-# class MercuryExtractor(BaseExtractor):
-#     name: ExtractorName = 'mercury'
-#     binary: str = MERCURY_BINARY.name
 
 
-#     def get_output_path(self, snapshot) -> Path | None:
-#         return snapshot.link_dir / 'mercury' / 'content.html'
+class MercuryExtractor(BaseExtractor):
+    name: ExtractorName = 'mercury'
+    binary: BinName = MERCURY_BINARY.name
 
 
+    def get_output_path(self, snapshot) -> Path | None:
+        return snapshot.link_dir / 'mercury' / 'content.html'
 
 
-# MERCURY_EXTRACTOR = MercuryExtractor()
+
+MERCURY_EXTRACTOR = MercuryExtractor()

+ 2 - 2
archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py

@@ -1,4 +1,4 @@
-__package__ = 'plugins_pkg.npm'
+__package__ = 'abx_plugin_npm'
 
 
 
 
 from typing import List
 from typing import List
@@ -6,7 +6,7 @@ from typing import List
 from pydantic import InstanceOf
 from pydantic import InstanceOf
 from benedict import benedict
 from benedict import benedict
 
 
-from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides
+from abx_pkg import BinProvider, Binary, BinName, BinaryOverrides
 
 
 from abx_plugin_default_binproviders import get_BINPROVIDERS
 from abx_plugin_default_binproviders import get_BINPROVIDERS
 
 

+ 1 - 1
archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py

@@ -2,7 +2,7 @@ import os
 from pathlib import Path
 from pathlib import Path
 from typing import Optional
 from typing import Optional
 
 
-from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
+from abx_pkg import NpmProvider, PATHStr, BinProviderName
 
 
 import abx
 import abx
 
 

+ 2 - 2
archivebox/pkgs/abx-plugin-npm/pyproject.toml

@@ -6,8 +6,8 @@ readme = "README.md"
 requires-python = ">=3.10"
 requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-pkg>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-plugin-default-binproviders>=2024.10.24",
     "abx-plugin-default-binproviders>=2024.10.24",
 ]
 ]

+ 1 - 1
archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/.plugin_order

@@ -1 +1 @@
-0
+400

+ 1 - 0
archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py

@@ -1,5 +1,6 @@
 __package__ = 'abx_plugin_pip'
 __package__ = 'abx_plugin_pip'
 __label__ = 'PIP'
 __label__ = 'PIP'
+__order__ = 200
 
 
 import abx
 import abx
 
 

+ 1 - 1
archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py

@@ -9,7 +9,7 @@ from pydantic import InstanceOf, Field, model_validator
 import django
 import django
 import django.db.backends.sqlite3.base
 import django.db.backends.sqlite3.base
 from django.db.backends.sqlite3.base import Database as django_sqlite3     # type: ignore[import-type]
 from django.db.backends.sqlite3.base import Database as django_sqlite3     # type: ignore[import-type]
-from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides, SemVer
+from abx_pkg import BinProvider, Binary, BinName, BinaryOverrides, SemVer
 
 
 
 
 from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew
 from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew

+ 1 - 1
archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py

@@ -6,7 +6,7 @@ from typing import Optional
 
 
 from benedict import benedict
 from benedict import benedict
 
 
-from pydantic_pkgr import PipProvider, BinName, BinProviderName
+from abx_pkg import PipProvider, BinName, BinProviderName
 
 
 import abx
 import abx
 
 

+ 2 - 2
archivebox/pkgs/abx-plugin-pip/pyproject.toml

@@ -6,9 +6,9 @@ readme = "README.md"
 requires-python = ">=3.10"
 requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
+    "abx-pkg>=0.5.4",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
     "abx-plugin-default-binproviders>=2024.10.24",
     "abx-plugin-default-binproviders>=2024.10.24",
     "django>=5.0.0",
     "django>=5.0.0",
 ]
 ]

+ 1 - 1
archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_playwright'
 from typing import List
 from typing import List
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import BinName, BinProvider, Binary
+from abx_pkg import BinName, BinProvider, Binary
 
 
 
 
 from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
 from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER

+ 1 - 1
archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py

@@ -7,7 +7,7 @@ from pathlib import Path
 from typing import List, Optional, Dict, ClassVar
 from typing import List, Optional, Dict, ClassVar
 
 
 from pydantic import Field
 from pydantic import Field
-from pydantic_pkgr import (
+from abx_pkg import (
     BinName,
     BinName,
     BinProvider,
     BinProvider,
     BinProviderName,
     BinProviderName,

+ 2 - 2
archivebox/pkgs/abx-plugin-playwright/pyproject.toml

@@ -7,8 +7,8 @@ requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
     "pydantic>=2.4.2",
     "pydantic>=2.4.2",
-    "pydantic-pkgr>=0.5.4",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-pkg>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
 ]
 ]
 
 

+ 1 - 1
archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_puppeteer'
 from typing import List
 from typing import List
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, Binary
+from abx_pkg import BinProvider, BinName, Binary
 
 
 
 
 from abx_plugin_default_binproviders import env
 from abx_plugin_default_binproviders import env

+ 1 - 1
archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py

@@ -4,7 +4,7 @@ from pathlib import Path
 from typing import List, Optional, Dict, ClassVar
 from typing import List, Optional, Dict, ClassVar
 
 
 from pydantic import Field
 from pydantic import Field
-from pydantic_pkgr import (
+from abx_pkg import (
     BinProvider,
     BinProvider,
     BinName,
     BinName,
     BinProviderName,
     BinProviderName,

+ 2 - 2
archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml

@@ -7,8 +7,8 @@ requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
+    "abx-pkg>=0.5.4",
 ]
 ]
 
 
 [build-system]
 [build-system]

+ 1 - 1
archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_readability'
 from typing import List
 from typing import List
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName
+from abx_pkg import Binary, BinProvider, BinaryOverrides, BinName
 
 
 from abx_plugin_default_binproviders import env
 from abx_plugin_default_binproviders import env
 from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
 from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER

+ 10 - 10
archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py

@@ -1,19 +1,19 @@
 # __package__ = 'abx_plugin_readability'
 # __package__ = 'abx_plugin_readability'
 
 
-# from pathlib import Path
+from pathlib import Path
 
 
-# from pydantic_pkgr import BinName
+from abx_pkg import BinName
 
 
+from abx_spec_extractor import BaseExtractor, ExtractorName
+from .binaries import READABILITY_BINARY
 
 
-# from .binaries import READABILITY_BINARY
 
 
+class ReadabilityExtractor(BaseExtractor):
+    name: ExtractorName = 'readability'
+    binary: BinName = READABILITY_BINARY.name
 
 
-# class ReadabilityExtractor(BaseExtractor):
-#     name: str = 'readability'
-#     binary: BinName = READABILITY_BINARY.name
+    def get_output_path(self, snapshot) -> Path:
+        return Path(snapshot.link_dir) / 'readability' / 'content.html'
 
 
-#     def get_output_path(self, snapshot) -> Path:
-#         return Path(snapshot.link_dir) / 'readability' / 'content.html'
 
 
-
-# READABILITY_EXTRACTOR = ReadabilityExtractor()
+READABILITY_EXTRACTOR = ReadabilityExtractor()

+ 1 - 1
archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py

@@ -3,7 +3,7 @@ __id__ = 'abx_plugin_readwise_extractor'
 __label__ = 'Readwise API'
 __label__ = 'Readwise API'
 __version__ = '2024.10.27'
 __version__ = '2024.10.27'
 __author__ = 'ArchiveBox'
 __author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise'
+__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/pkgs/abx-plugin-readwise-extractor'
 __dependencies__ = []
 __dependencies__ = []
 
 
 import abx
 import abx

+ 1 - 1
archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_ripgrep_search'
 from typing import List
 from typing import List
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary
+from abx_pkg import BinProvider, BinaryOverrides, BinName, Binary
 
 
 from abx_plugin_default_binproviders import apt, brew, env
 from abx_plugin_default_binproviders import apt, brew, env
 
 

+ 1 - 1
archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py

@@ -1,7 +1,7 @@
 from typing import List
 from typing import List
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath
+from abx_pkg import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath
 
 
 from abx_plugin_default_binproviders import env
 from abx_plugin_default_binproviders import env
 from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
 from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER

+ 12 - 9
archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py

@@ -1,18 +1,21 @@
 __package__ = 'abx_plugin_singlefile'
 __package__ = 'abx_plugin_singlefile'
 
 
-# from pathlib import Path
 
 
-# from pydantic_pkgr import BinName
+from pathlib import Path
 
 
-# from .binaries import SINGLEFILE_BINARY
+from abx_pkg import BinName
 
 
+from abx_spec_extractor import BaseExtractor, ExtractorName
 
 
-# class SinglefileExtractor(BaseExtractor):
-#     name: str = 'singlefile'
-#     binary: BinName = SINGLEFILE_BINARY.name
+from .binaries import SINGLEFILE_BINARY
 
 
-#     def get_output_path(self, snapshot) -> Path:
-#         return Path(snapshot.link_dir) / 'singlefile.html'
 
 
+class SinglefileExtractor(BaseExtractor):
+    name: ExtractorName = 'singlefile'
+    binary: BinName = SINGLEFILE_BINARY.name
 
 
-# SINGLEFILE_EXTRACTOR = SinglefileExtractor()
+    def get_output_path(self, snapshot) -> Path:
+        return Path(snapshot.link_dir) / 'singlefile.html'
+
+
+SINGLEFILE_EXTRACTOR = SinglefileExtractor()

+ 0 - 0
archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py


+ 2 - 2
archivebox/pkgs/abx-plugin-singlefile/pyproject.toml

@@ -7,8 +7,8 @@ requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
+    "abx-pkg>=0.5.4",
 ]
 ]
 
 
 [build-system]
 [build-system]

+ 1 - 1
archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py

@@ -3,7 +3,7 @@ __package__ = 'abx_plugin_sonic_search'
 from typing import List
 from typing import List
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary
+from abx_pkg import BinProvider, BinaryOverrides, BinName, Binary
 
 
 from abx_plugin_default_binproviders import brew, env
 from abx_plugin_default_binproviders import brew, env
 
 

+ 1 - 1
archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py

@@ -1,4 +1,4 @@
-__package__ = 'plugins_search.sonic'
+__package__ = 'abx_plugin_sonic_search'
 
 
 from typing import List, Generator, cast
 from typing import List, Generator, cast
 
 

+ 2 - 2
archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml

@@ -7,9 +7,9 @@ requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
     "abx-spec-searchbackend>=0.1.0",
     "abx-spec-searchbackend>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
+    "abx-pkg>=0.5.4",
 ]
 ]
 
 
 [build-system]
 [build-system]

+ 8 - 0
archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py

@@ -7,3 +7,11 @@ import abx
 #     return {
 #     return {
 #         'title_extractor': TITLE_EXTRACTOR_CONFIG
 #         'title_extractor': TITLE_EXTRACTOR_CONFIG
 #     }
 #     }
+
+
[email protected]
+def get_EXTRACTORS():
+    from .extractors import TITLE_EXTRACTOR
+    return {
+        'title': TITLE_EXTRACTOR,
+    }

+ 1 - 1
archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py

@@ -4,7 +4,7 @@ from typing import List
 
 
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, Binary
+from abx_pkg import BinProvider, BinName, Binary
 
 
 from abx_plugin_default_binproviders import apt, brew, env
 from abx_plugin_default_binproviders import apt, brew, env
 
 

+ 24 - 22
archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py

@@ -1,35 +1,37 @@
 __package__ = 'abx_plugin_wget'
 __package__ = 'abx_plugin_wget'
 
 
-# from pathlib import Path
+from pathlib import Path
 
 
-# from pydantic_pkgr import BinName
+from abx_pkg import BinName
 
 
-# from .binaries import WGET_BINARY
-# from .wget_util import wget_output_path
+from abx_spec_extractor import BaseExtractor, ExtractorName
 
 
-# class WgetExtractor(BaseExtractor):
-#     name: ExtractorName = 'wget'
-#     binary: BinName = WGET_BINARY.name
+from .binaries import WGET_BINARY
+from .wget_util import wget_output_path
 
 
-#     def get_output_path(self, snapshot) -> Path | None:
-#         wget_index_path = wget_output_path(snapshot.as_link())
-#         if wget_index_path:
-#             return Path(wget_index_path)
-#         return None
+class WgetExtractor(BaseExtractor):
+    name: ExtractorName = 'wget'
+    binary: BinName = WGET_BINARY.name
 
 
-# WGET_EXTRACTOR = WgetExtractor()
+    def get_output_path(self, snapshot) -> Path | None:
+        wget_index_path = wget_output_path(snapshot.as_link())
+        if wget_index_path:
+            return Path(wget_index_path)
+        return None
 
 
+WGET_EXTRACTOR = WgetExtractor()
 
 
-# class WarcExtractor(BaseExtractor):
-#     name: ExtractorName = 'warc'
-#     binary: BinName = WGET_BINARY.name
 
 
-#     def get_output_path(self, snapshot) -> Path | None:
-#         warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
-#         if warc_files:
-#             return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
-#         return None
+class WarcExtractor(BaseExtractor):
+    name: ExtractorName = 'warc'
+    binary: BinName = WGET_BINARY.name
 
 
+    def get_output_path(self, snapshot) -> Path | None:
+        warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
+        if warc_files:
+            return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
+        return None
 
 
-# WARC_EXTRACTOR = WarcExtractor()
+
+WARC_EXTRACTOR = WarcExtractor()
 
 

+ 1 - 1
archivebox/pkgs/abx-plugin-wget/pyproject.toml

@@ -7,7 +7,7 @@ requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.0",
 ]
 ]
 
 
 [build-system]
 [build-system]

+ 1 - 1
archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py

@@ -4,7 +4,7 @@ import subprocess
 from typing import List
 from typing import List
 
 
 from pydantic import InstanceOf
 from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, Binary
+from abx_pkg import BinProvider, BinName, BinaryOverrides, Binary
 
 
 from abx_plugin_default_binproviders import apt, brew, env
 from abx_plugin_default_binproviders import apt, brew, env
 from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
 from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER

+ 1 - 1
archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py

@@ -1,4 +1,4 @@
-__package__ = 'plugins_extractor.ytdlp'
+__package__ = 'abx_plugin_ytdlp'
 
 
 from typing import List
 from typing import List
 
 

+ 2 - 2
archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml

@@ -7,8 +7,8 @@ requires-python = ">=3.10"
 dependencies = [
 dependencies = [
     "abx>=0.1.0",
     "abx>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
+    "abx-spec-abx-pkg>=0.1.0",
+    "abx-pkg>=0.5.4",
 ]
 ]
 
 
 [build-system]
 [build-system]

+ 3 - 3
archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py

@@ -11,13 +11,13 @@ from typing import cast
 
 
 import abx
 import abx
 from abx_spec_config import ConfigPluginSpec
 from abx_spec_config import ConfigPluginSpec
-from abx_spec_pydantic_pkgr import PydanticPkgrPluginSpec
+from abx_spec_abx_pkg import AbxPkgPluginSpec
 from abx_spec_django import DjangoPluginSpec
 from abx_spec_django import DjangoPluginSpec
 from abx_spec_searchbackend import SearchBackendPluginSpec
 from abx_spec_searchbackend import SearchBackendPluginSpec
 
 
-class ArchiveBoxPluginSpec(ConfigPluginSpec, PydanticPkgrPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec):
+class ArchiveBoxPluginSpec(ConfigPluginSpec, AbxPkgPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec):
     """
     """
-    ArchiveBox plugins can use any of the hooks from the Config, PydanticPkgr, and Django plugin specs.
+    ArchiveBox plugins can use any of the hooks from the Config, AbxPkg, and Django plugin specs.
     """
     """
     pass
     pass
 
 

+ 35 - 18
archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py

@@ -2,10 +2,12 @@ __order__ = 100
 
 
 import os
 import os
 from pathlib import Path
 from pathlib import Path
-from typing import Dict, Any, cast
+from typing import Any, cast, TYPE_CHECKING
 
 
 from benedict import benedict
 from benedict import benedict
 
 
+if TYPE_CHECKING:
+    from archivebox.config.constants import ConstantsDict
 
 
 import abx
 import abx
 
 
@@ -13,38 +15,43 @@ from .base_configset import BaseConfigSet, ConfigKeyStr
 
 
 
 
 class ConfigPluginSpec:
 class ConfigPluginSpec:
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_collection_config_path(self) -> Path:
+    def get_collection_config_path() -> Path:
         return Path(os.getcwd()) / "ArchiveBox.conf"
         return Path(os.getcwd()) / "ArchiveBox.conf"
 
 
 
 
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_system_config_path(self) -> Path:
+    def get_system_config_path() -> Path:
         return Path('~/.config/abx/abx.conf').expanduser()
         return Path('~/.config/abx/abx.conf').expanduser()
 
 
 
 
+    @staticmethod
     @abx.hookspec
     @abx.hookspec
     @abx.hookimpl
     @abx.hookimpl
-    def get_CONFIG(self) -> Dict[abx.PluginId, BaseConfigSet]:
+    def get_CONFIG() -> dict[abx.PluginId, 'BaseConfigSet | ConstantsDict']:
+        from archivebox import CONSTANTS
         """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}"""
         """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}"""
         return {
         return {
-            # override this in your plugin to return your plugin's config, e.g.
-            # 'ytdlp': YtdlpConfig(...),
+            'CONSTANTS': CONSTANTS,
         }
         }
 
 
 
 
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_CONFIGS(self) -> Dict[abx.PluginId, BaseConfigSet]:
+    def get_CONFIGS() -> dict[abx.PluginId, BaseConfigSet]:
         """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}"""
         """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}"""
         return abx.as_dict(pm.hook.get_CONFIG())
         return abx.as_dict(pm.hook.get_CONFIG())
 
 
 
 
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_FLAT_CONFIG(self) -> Dict[ConfigKeyStr, Any]:
+    def get_FLAT_CONFIG() -> dict[ConfigKeyStr, Any]:
         """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}"""
         """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}"""
         return benedict({
         return benedict({
             key: value
             key: value
@@ -52,9 +59,10 @@ class ConfigPluginSpec:
                 for key, value in benedict(configset).items()
                 for key, value in benedict(configset).items()
         })
         })
         
         
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_SCOPE_CONFIG(self, extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> Dict[ConfigKeyStr, Any]:
+    def get_SCOPE_CONFIG(extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> dict[ConfigKeyStr, Any]:
         """Get the config as it applies to you right now, based on the current context"""
         """Get the config as it applies to you right now, based on the current context"""
         return benedict({
         return benedict({
             **pm.hook.get_default_config(default=default),
             **pm.hook.get_default_config(default=default),
@@ -69,35 +77,41 @@ class ConfigPluginSpec:
             **(extra or {}),
             **(extra or {}),
         })
         })
         
         
+    @staticmethod
     # @abx.hookspec(firstresult=True)
     # @abx.hookspec(firstresult=True)
     # @abx.hookimpl
     # @abx.hookimpl
-    # def get_request_config(self, request) -> dict:
+    # def get_request_config(request) -> dict:
     #     session = getattr(request, 'session', None)
     #     session = getattr(request, 'session', None)
     #     return getattr(session, 'config', None) or {}
     #     return getattr(session, 'config', None) or {}
         
         
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_archiveresult_config(self, archiveresult) -> Dict[ConfigKeyStr, Any]:
+    def get_archiveresult_config(archiveresult) -> dict[ConfigKeyStr, Any]:
         return getattr(archiveresult, 'config', None) or {}
         return getattr(archiveresult, 'config', None) or {}
     
     
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_snapshot_config(self, snapshot) -> Dict[ConfigKeyStr, Any]:
+    def get_snapshot_config(snapshot) -> dict[ConfigKeyStr, Any]:
         return getattr(snapshot, 'config', None) or {}
         return getattr(snapshot, 'config', None) or {}
     
     
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_crawl_config(self, crawl) -> Dict[ConfigKeyStr, Any]:
+    def get_crawl_config(crawl) -> dict[ConfigKeyStr, Any]:
         return getattr(crawl, 'config', None) or {}
         return getattr(crawl, 'config', None) or {}
     
     
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_user_config(self, user=None) -> Dict[ConfigKeyStr, Any]:
+    def get_user_config(user=None) -> dict[ConfigKeyStr, Any]:
         return getattr(user, 'config', None) or {}
         return getattr(user, 'config', None) or {}
     
     
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_collection_config(self, collection=...) -> Dict[ConfigKeyStr, Any]:
+    def get_collection_config(collection=...) -> dict[ConfigKeyStr, Any]:
         # ... = ellipsis, means automatically get the collection config from the active data/ArchiveBox.conf file
         # ... = ellipsis, means automatically get the collection config from the active data/ArchiveBox.conf file
         # {} = empty dict, override to ignore the collection config
         # {} = empty dict, override to ignore the collection config
         return benedict({
         return benedict({
@@ -106,9 +120,10 @@ class ConfigPluginSpec:
                 for key, value in configset.from_collection().items()
                 for key, value in configset.from_collection().items()
         }) if collection == ... else collection
         }) if collection == ... else collection
     
     
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_environment_config(self, environment=...) -> Dict[ConfigKeyStr, Any]:
+    def get_environment_config(environment=...) -> dict[ConfigKeyStr, Any]:
         # ... = ellipsis, means automatically get the environment config from the active environment variables
         # ... = ellipsis, means automatically get the environment config from the active environment variables
         # {} = empty dict, override to ignore the environment config
         # {} = empty dict, override to ignore the environment config
         return benedict({
         return benedict({
@@ -117,18 +132,20 @@ class ConfigPluginSpec:
                 for key, value in configset.from_environment().items()
                 for key, value in configset.from_environment().items()
         }) if environment == ... else environment
         }) if environment == ... else environment
     
     
+    @staticmethod
     # @abx.hookspec(firstresult=True)
     # @abx.hookspec(firstresult=True)
     # @abx.hookimpl
     # @abx.hookimpl
-    # def get_machine_config(self, machine=...) -> dict:
+    # def get_machine_config(machine=...) -> dict:
     #     # ... = ellipsis, means automatically get the machine config from the currently executing machine
     #     # ... = ellipsis, means automatically get the machine config from the currently executing machine
     #     # {} = empty dict, override to ignore the machine config
     #     # {} = empty dict, override to ignore the machine config
     #     if machine == ...:
     #     if machine == ...:
     #         machine = Machine.objects.get_current()
     #         machine = Machine.objects.get_current()
     #     return getattr(machine, 'config', None) or {}
     #     return getattr(machine, 'config', None) or {}
         
         
+    @staticmethod
     @abx.hookspec(firstresult=True)
     @abx.hookspec(firstresult=True)
     @abx.hookimpl
     @abx.hookimpl
-    def get_default_config(self, default=...) -> Dict[ConfigKeyStr, Any]:
+    def get_default_config(default=...) -> dict[ConfigKeyStr, Any]:
         # ... = ellipsis, means automatically get the machine config from the currently executing machine
         # ... = ellipsis, means automatically get the machine config from the currently executing machine
         # {} = empty dict, override to ignore the machine config
         # {} = empty dict, override to ignore the machine config
         return benedict({
         return benedict({

+ 2 - 1
archivebox/pkgs/abx-spec-django/abx_spec_django.py

@@ -1,4 +1,3 @@
-__order__ = 300
 
 
 import abx
 import abx
 from typing import List, Dict, Any, cast
 from typing import List, Dict, Any, cast
@@ -6,6 +5,8 @@ from typing import List, Dict, Any, cast
 ###########################################################################################
 ###########################################################################################
 
 
 class DjangoPluginSpec:
 class DjangoPluginSpec:
+    __order__ = 10
+    
     @abx.hookspec
     @abx.hookspec
     def get_INSTALLED_APPS() -> List[str]:
     def get_INSTALLED_APPS() -> List[str]:
         return ['abx_spec_django']
         return ['abx_spec_django']

+ 4 - 2
archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py

@@ -1,10 +1,12 @@
+__order__ = 10
+
 import os
 import os
 
 
 from typing import Optional, List, Annotated, Tuple
 from typing import Optional, List, Annotated, Tuple
 from pathlib import Path
 from pathlib import Path
 
 
 from pydantic import AfterValidator
 from pydantic import AfterValidator
-from pydantic_pkgr import BinName
+from abx_pkg import BinName
 
 
 
 
 import abx
 import abx
@@ -23,7 +25,7 @@ CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_em
 @abx.hookspec
 @abx.hookspec
 @abx.hookimpl
 @abx.hookimpl
 def get_EXTRACTORS():
 def get_EXTRACTORS():
-    return []
+    return {}
 
 
 @abx.hookspec
 @abx.hookspec
 @abx.hookimpl
 @abx.hookimpl

+ 0 - 0
archivebox/pkgs/abx-spec-pydantic-pkgr/README.md


+ 0 - 114
archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py

@@ -1,114 +0,0 @@
-__order__ = 200
-
-import os
-
-from typing import Dict, cast
-from pathlib import Path
-
-from pydantic_pkgr import Binary, BinProvider
-
-import abx
-
-from abx_spec_config import ConfigPluginSpec
-
-###########################################################################################
-
-class PydanticPkgrPluginSpec:
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def get_LIB_DIR(self) -> Path:
-        """Get the directory where shared runtime libraries/dependencies should be installed"""
-        FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
-        LIB_DIR = Path(FLAT_CONFIG.get('LIB_DIR', '/usr/local/share/abx'))
-        return LIB_DIR
-        
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def get_BIN_DIR(self) -> Path:
-        """Get the directory where binaries should be symlinked to"""
-        FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
-        LIB_DIR = pm.hook.get_LIB_DIR()
-        BIN_DIR = Path(FLAT_CONFIG.get('BIN_DIR') or LIB_DIR / 'bin')
-        return BIN_DIR
-    
-    @abx.hookspec
-    @abx.hookimpl
-    def get_BINPROVIDERS(self) -> Dict[str, BinProvider]:
-        return {
-            # to be implemented by plugins, e.g.:
-            # 'npm': NpmBinProvider(npm_prefix=Path('/usr/local/share/abx/npm')),
-        }
-
-    @abx.hookspec
-    @abx.hookimpl
-    def get_BINARIES(self) -> Dict[str, Binary]:
-        return {
-            # to be implemented by plugins, e.g.:
-            # 'yt-dlp': Binary(name='yt-dlp', binproviders=[npm]),
-        }
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def get_BINPROVIDER(self, binprovider_name: str) -> BinProvider:
-        """Get a specific BinProvider by name"""
-        return abx.as_dict(pm.hook.get_BINPROVIDERS())[binprovider_name]
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def get_BINARY(self, bin_name: str) -> Binary:
-        """Get a specific Binary by name"""
-        return abx.as_dict(pm.hook.get_BINARIES())[bin_name]
-
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def binary_load(self, binary: Binary, **kwargs) -> Binary:
-        """Load a binary from the filesystem (override to load a binary from a different source, e.g. DB, cache, etc.)"""
-        loaded_binary = binary.load(**kwargs)
-        pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
-        return loaded_binary
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def binary_install(self, binary: Binary, **kwargs) -> Binary:
-        """Override to change how a binary is installed (e.g. by downloading from a remote source, etc.)"""
-        loaded_binary = binary.install(**kwargs)
-        pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
-        return loaded_binary
-        
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def binary_load_or_install(self, binary: Binary, **kwargs) -> Binary:
-        """Override to change how a binary is loaded or installed (e.g. by downloading from a remote source, etc.)"""
-        loaded_binary = binary.load_or_install(**kwargs)
-        pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
-        return loaded_binary
-
-    @abx.hookspec(firstresult=True)
-    @abx.hookimpl
-    def binary_symlink_to_bin_dir(self, binary: Binary, bin_dir: Path | None=None):
-        if not (binary.abspath and os.path.isfile(binary.abspath)):
-            return
-                
-        BIN_DIR = pm.hook.get_BIN_DIR()
-        try:
-            BIN_DIR.mkdir(parents=True, exist_ok=True)
-            symlink = BIN_DIR / binary.name
-            symlink.unlink(missing_ok=True)
-            symlink.symlink_to(binary.abspath)
-            symlink.chmod(0o777)   # make sure its executable by everyone
-        except Exception:
-            # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
-            # not actually needed, we can just run without it
-            pass
-
-
-PLUGIN_SPEC = PydanticPkgrPluginSpec
-
-
-class RequiredSpecsAvailable(ConfigPluginSpec, PydanticPkgrPluginSpec):
-    pass
-
-TypedPluginManager = abx.ABXPluginManager[RequiredSpecsAvailable]
-pm = cast(TypedPluginManager, abx.pm)

+ 0 - 17
archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml

@@ -1,17 +0,0 @@
-[project]
-name = "abx-spec-pydantic-pkgr"
-version = "0.1.0"
-description = "The ABX plugin specification for Binaries and BinProviders"
-readme = "README.md"
-requires-python = ">=3.10"
-dependencies = [
-    "abx>=0.1.0",
-    "pydantic-pkgr>=0.5.4",
-]
-
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
-
-[project.entry-points.abx]
-abx_spec_pydantic_pkgr = "abx_spec_pydantic_pkgr"

+ 3 - 0
archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py

@@ -25,6 +25,9 @@ class BaseSearchBackend(abc.ABC):
 
 
 
 
 class SearchBackendPluginSpec:
 class SearchBackendPluginSpec:
+    __order__ = 10
+    
+    @staticmethod
     @abx.hookspec
     @abx.hookspec
     @abx.hookimpl
     @abx.hookimpl
     def get_SEARCHBACKENDS() -> Dict[abx.PluginId, BaseSearchBackend]:
     def get_SEARCHBACKENDS() -> Dict[abx.PluginId, BaseSearchBackend]:

+ 17 - 7
archivebox/pkgs/abx/abx.py

@@ -244,10 +244,12 @@ def get_plugin_order(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int,
         except FileNotFoundError:
         except FileNotFoundError:
             pass
             pass
     
     
+    default_order = 10 if '_spec_' in str(plugin_dir).lower() else 999
+    
     if plugin_module:
     if plugin_module:
-        order = getattr(plugin_module, '__order__', 999)
+        order = getattr(plugin_module, '__order__', default_order)
     else:
     else:
-        order = 999
+        order = default_order
     
     
     assert order is not None
     assert order is not None
     assert plugin_dir
     assert plugin_dir
@@ -270,7 +272,10 @@ def get_plugin(plugin: PluginId | ModuleType | Type) -> PluginInfo:
     elif inspect.isclass(plugin):
     elif inspect.isclass(plugin):
         module = inspect.getmodule(plugin)
         module = inspect.getmodule(plugin)
     else:
     else:
-        raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}')
+        plugin = type(plugin)
+        module = inspect.getmodule(plugin)
+        
+        # raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}')
     
     
     assert module
     assert module
     
     
@@ -416,9 +421,14 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
     PLUGINS_TO_LOAD = []
     PLUGINS_TO_LOAD = []
     LOADED_PLUGINS = {}
     LOADED_PLUGINS = {}
     
     
-    for plugin in plugins:
-        plugin_info = get_plugin(plugin)
-        assert plugin_info, f'No plugin metadata found for {plugin}'
+    plugin_infos = sorted([
+        get_plugin(plugin)
+        for plugin in plugins
+    ], key=lambda plugin: plugin.get('order', 999))
+    
+    
+    for plugin_info in plugin_infos:
+        assert plugin_info, 'No plugin metadata found for plugin'
         assert 'id' in plugin_info and 'module' in plugin_info
         assert 'id' in plugin_info and 'module' in plugin_info
         if plugin_info['module'] in pm.get_plugins():
         if plugin_info['module'] in pm.get_plugins():
             LOADED_PLUGINS[plugin_info['id']] = plugin_info
             LOADED_PLUGINS[plugin_info['id']] = plugin_info
@@ -431,7 +441,7 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
     for plugin_info in PLUGINS_TO_LOAD:
     for plugin_info in PLUGINS_TO_LOAD:
         pm.register(plugin_info['module'])
         pm.register(plugin_info['module'])
         LOADED_PLUGINS[plugin_info['id']] = plugin_info
         LOADED_PLUGINS[plugin_info['id']] = plugin_info
-        # print(f'    √ Loaded plugin: {plugin_id}')
+        print(f'    √ Loaded plugin: {plugin_info["id"]}')
     return benedict(LOADED_PLUGINS)
     return benedict(LOADED_PLUGINS)
 
 
 @cache
 @cache

+ 1 - 0
archivebox/seeds/__init__.py

@@ -1,5 +1,6 @@
 
 
 __package__ = 'archivebox.seeds'
 __package__ = 'archivebox.seeds'
+__order__ = 100
 
 
 import abx
 import abx
 
 

+ 5 - 5
pyproject.toml

@@ -1,6 +1,6 @@
 [project]
 [project]
 name = "archivebox"
 name = "archivebox"
-version = "0.8.6rc1"
+version = "0.8.6rc2"
 requires-python = ">=3.10"
 requires-python = ">=3.10"
 description = "Self-hosted internet archiving solution."
 description = "Self-hosted internet archiving solution."
 authors = [{name = "Nick Sweeting", email = "[email protected]"}]
 authors = [{name = "Nick Sweeting", email = "[email protected]"}]
@@ -80,13 +80,13 @@ dependencies = [
     "django-taggit==6.1.0",
     "django-taggit==6.1.0",
     "base32-crockford==0.3.0",
     "base32-crockford==0.3.0",
     "platformdirs>=4.3.6",
     "platformdirs>=4.3.6",
-    "pydantic-pkgr>=0.5.4",
+    "abx-pkg>=0.6.0",
     "pocket>=0.3.6",
     "pocket>=0.3.6",
     "sonic-client>=1.0.0",
     "sonic-client>=1.0.0",
     "yt-dlp>=2024.8.6", # for: media"
     "yt-dlp>=2024.8.6", # for: media"
     ############# Plugin Dependencies ################
     ############# Plugin Dependencies ################
     "abx>=0.1.0",
     "abx>=0.1.0",
-    "abx-spec-pydantic-pkgr>=0.1.0",
+    "abx-spec-abx-pkg>=0.1.1",
     "abx-spec-config>=0.1.0",
     "abx-spec-config>=0.1.0",
     "abx-spec-archivebox>=0.1.0",
     "abx-spec-archivebox>=0.1.0",
     "abx-spec-django>=0.1.0",
     "abx-spec-django>=0.1.0",
@@ -178,10 +178,10 @@ dev-dependencies = [
 ]
 ]
 
 
 [tool.uv.sources]
 [tool.uv.sources]
-# pydantic-pkgr = { workspace = true }
+# abx-pkg = { workspace = true }
 
 
 abx = { workspace = true }
 abx = { workspace = true }
-abx-spec-pydantic-pkgr = { workspace = true }
+abx-spec-abx-pkg = { workspace = true }
 abx-spec-config = { workspace = true }
 abx-spec-config = { workspace = true }
 abx-spec-archivebox = { workspace = true }
 abx-spec-archivebox = { workspace = true }
 abx-spec-django = { workspace = true }
 abx-spec-django = { workspace = true }

+ 61 - 69
requirements.txt

@@ -1,6 +1,6 @@
 # This file was autogenerated by uv via the following command:
 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml --all-extras -o requirements.txt
 #    uv pip compile pyproject.toml --all-extras -o requirements.txt
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   abx-plugin-archivedotorg
     #   abx-plugin-archivedotorg
@@ -24,65 +24,65 @@
     #   abx-plugin-title
     #   abx-plugin-title
     #   abx-plugin-wget
     #   abx-plugin-wget
     #   abx-plugin-ytdlp
     #   abx-plugin-ytdlp
+    #   abx-spec-abx-pkg
     #   abx-spec-archivebox
     #   abx-spec-archivebox
     #   abx-spec-config
     #   abx-spec-config
     #   abx-spec-django
     #   abx-spec-django
     #   abx-spec-extractor
     #   abx-spec-extractor
-    #   abx-spec-pydantic-pkgr
     #   abx-spec-searchbackend
     #   abx-spec-searchbackend
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-archivedotorg
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-archivedotorg
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-chrome
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-chrome
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-curl
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-curl
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   abx-plugin-archivedotorg
     #   abx-plugin-archivedotorg
     #   abx-plugin-favicon
     #   abx-plugin-favicon
     #   abx-plugin-title
     #   abx-plugin-title
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-default-binproviders
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-default-binproviders
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   abx-plugin-git
     #   abx-plugin-git
     #   abx-plugin-npm
     #   abx-plugin-npm
     #   abx-plugin-pip
     #   abx-plugin-pip
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-favicon
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-favicon
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-git
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-git
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-htmltotext
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-htmltotext
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ldap-auth
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ldap-auth
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-mercury
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-mercury
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-npm
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-npm
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-pip
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-pip
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-playwright
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-playwright
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-puppeteer
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-puppeteer
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-readability
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-readability
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ripgrep-search
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ripgrep-search
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-singlefile
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-singlefile
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sonic-search
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-sonic-search
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sqlitefts-search
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-sqlitefts-search
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-title
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-title
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-wget
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-wget
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ytdlp
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ytdlp
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-archivebox
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-archivebox
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-config
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-config
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   abx-plugin-archivedotorg
     #   abx-plugin-archivedotorg
@@ -105,13 +105,13 @@
     #   abx-plugin-title
     #   abx-plugin-title
     #   abx-plugin-wget
     #   abx-plugin-wget
     #   abx-plugin-ytdlp
     #   abx-plugin-ytdlp
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-django
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-django
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   abx-plugin-ldap-auth
     #   abx-plugin-ldap-auth
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-extractor
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-extractor
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-pydantic-pkgr
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-abx-pkg
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   abx-plugin-chrome
     #   abx-plugin-chrome
@@ -126,12 +126,24 @@
     #   abx-plugin-sonic-search
     #   abx-plugin-sonic-search
     #   abx-plugin-wget
     #   abx-plugin-wget
     #   abx-plugin-ytdlp
     #   abx-plugin-ytdlp
--e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-searchbackend
+-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-searchbackend
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   abx-plugin-ripgrep-search
     #   abx-plugin-ripgrep-search
     #   abx-plugin-sonic-search
     #   abx-plugin-sonic-search
     #   abx-plugin-sqlitefts-search
     #   abx-plugin-sqlitefts-search
+abx-pkg==0.6.0
+    # via
+    #   archivebox (pyproject.toml)
+    #   abx-plugin-default-binproviders
+    #   abx-plugin-npm
+    #   abx-plugin-pip
+    #   abx-plugin-playwright
+    #   abx-plugin-puppeteer
+    #   abx-plugin-singlefile
+    #   abx-plugin-sonic-search
+    #   abx-plugin-ytdlp
+    #   abx-spec-abx-pkg
 annotated-types==0.7.0
 annotated-types==0.7.0
     # via pydantic
     # via pydantic
 anyio==4.6.2.post1
 anyio==4.6.2.post1
@@ -159,11 +171,9 @@ base32-crockford==0.3.0
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
 beautifulsoup4==4.12.3
 beautifulsoup4==4.12.3
     # via python-benedict
     # via python-benedict
-brotli==1.1.0
-    # via yt-dlp
-bx-django-utils==81
+bx-django-utils==82
     # via django-huey-monitor
     # via django-huey-monitor
-bx-py-utils==105
+bx-py-utils==106
     # via
     # via
     #   bx-django-utils
     #   bx-django-utils
     #   django-huey-monitor
     #   django-huey-monitor
@@ -172,10 +182,9 @@ certifi==2024.8.30
     #   httpcore
     #   httpcore
     #   httpx
     #   httpx
     #   requests
     #   requests
-    #   yt-dlp
 cffi==1.17.1
 cffi==1.17.1
     # via cryptography
     # via cryptography
-channels==4.1.0
+channels==4.2.0
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
 charset-normalizer==3.4.0
 charset-normalizer==3.4.0
     # via requests
     # via requests
@@ -197,7 +206,7 @@ decorator==5.1.1
     # via
     # via
     #   ipdb
     #   ipdb
     #   ipython
     #   ipython
-django==5.1.2
+django==5.1.3
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   abx
     #   abx
@@ -270,7 +279,7 @@ ftfy==6.3.1
     # via python-benedict
     # via python-benedict
 h11==0.14.0
 h11==0.14.0
     # via httpcore
     # via httpcore
-httpcore==1.0.6
+httpcore==1.0.7
     # via httpx
     # via httpx
 httpx==0.27.2
 httpx==0.27.2
     # via django-signal-webhooks
     # via django-signal-webhooks
@@ -297,7 +306,7 @@ ipython==8.29.0
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   ipdb
     #   ipdb
-jedi==0.19.1
+jedi==0.19.2
     # via ipython
     # via ipython
 libcst==1.5.0
 libcst==1.5.0
     # via django-autotyping
     # via django-autotyping
@@ -309,8 +318,6 @@ matplotlib-inline==0.1.7
     # via ipython
     # via ipython
 mdurl==0.1.2
 mdurl==0.1.2
     # via markdown-it-py
     # via markdown-it-py
-mutagen==1.47.0
-    # via yt-dlp
 mypy-extensions==1.0.0
 mypy-extensions==1.0.0
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
 openpyxl==3.1.5
 openpyxl==3.1.5
@@ -319,12 +326,14 @@ parso==0.8.4
     # via jedi
     # via jedi
 pexpect==4.9.0
 pexpect==4.9.0
     # via ipython
     # via ipython
-phonenumbers==8.13.48
+phonenumbers==8.13.50
     # via python-benedict
     # via python-benedict
+pip==24.3.1
+    # via abx-pkg
 platformdirs==4.3.6
 platformdirs==4.3.6
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
-    #   pydantic-pkgr
+    #   abx-pkg
 pluggy==1.5.0
 pluggy==1.5.0
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
@@ -352,34 +361,20 @@ pyasn1-modules==0.4.1
     #   service-identity
     #   service-identity
 pycparser==2.22
 pycparser==2.22
     # via cffi
     # via cffi
-pycryptodomex==3.21.0
-    # via yt-dlp
 pydantic==2.9.2
 pydantic==2.9.2
     # via
     # via
+    #   abx-pkg
     #   abx-plugin-playwright
     #   abx-plugin-playwright
     #   abx-spec-config
     #   abx-spec-config
     #   abx-spec-extractor
     #   abx-spec-extractor
     #   abx-spec-searchbackend
     #   abx-spec-searchbackend
     #   django-ninja
     #   django-ninja
     #   django-pydantic-field
     #   django-pydantic-field
-    #   pydantic-pkgr
     #   pydantic-settings
     #   pydantic-settings
 pydantic-core==2.23.4
 pydantic-core==2.23.4
     # via
     # via
+    #   abx-pkg
     #   pydantic
     #   pydantic
-    #   pydantic-pkgr
-pydantic-pkgr==0.5.4
-    # via
-    #   archivebox (pyproject.toml)
-    #   abx-plugin-default-binproviders
-    #   abx-plugin-npm
-    #   abx-plugin-pip
-    #   abx-plugin-playwright
-    #   abx-plugin-puppeteer
-    #   abx-plugin-singlefile
-    #   abx-plugin-sonic-search
-    #   abx-plugin-ytdlp
-    #   abx-spec-pydantic-pkgr
 pydantic-settings==2.6.1
 pydantic-settings==2.6.1
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
@@ -414,6 +409,8 @@ python-ldap==3.4.4
     #   django-auth-ldap
     #   django-auth-ldap
 python-slugify==8.0.4
 python-slugify==8.0.4
     # via python-benedict
     # via python-benedict
+python-statemachine==2.4.0
+    # via archivebox (pyproject.toml)
 python-stdnum==1.20
 python-stdnum==1.20
     # via bx-django-utils
     # via bx-django-utils
 pytz==2024.2
 pytz==2024.2
@@ -424,14 +421,13 @@ pyyaml==6.0.2
     # via
     # via
     #   libcst
     #   libcst
     #   python-benedict
     #   python-benedict
-regex==2024.9.11
+regex==2024.11.6
     # via dateparser
     # via dateparser
 requests==2.32.3
 requests==2.32.3
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   pocket
     #   pocket
     #   python-benedict
     #   python-benedict
-    #   yt-dlp
 requests-tracker==0.3.3
 requests-tracker==0.3.3
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
 rich==13.9.4
 rich==13.9.4
@@ -443,7 +439,7 @@ rich-argparse==1.6.0
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
 service-identity==24.2.0
 service-identity==24.2.0
     # via twisted
     # via twisted
-setuptools==75.3.0
+setuptools==75.5.0
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
     #   autobahn
     #   autobahn
@@ -464,7 +460,7 @@ sonic-client==1.0.0
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
 soupsieve==2.6
 soupsieve==2.6
     # via beautifulsoup4
     # via beautifulsoup4
-sqlparse==0.5.1
+sqlparse==0.5.2
     # via
     # via
     #   django
     #   django
     #   django-debug-toolbar
     #   django-debug-toolbar
@@ -492,12 +488,12 @@ types-pyyaml==6.0.12.20240917
 typing-extensions==4.12.2
 typing-extensions==4.12.2
     # via
     # via
     #   archivebox (pyproject.toml)
     #   archivebox (pyproject.toml)
+    #   abx-pkg
     #   django-pydantic-field
     #   django-pydantic-field
     #   django-stubs
     #   django-stubs
     #   django-stubs-ext
     #   django-stubs-ext
     #   pydantic
     #   pydantic
     #   pydantic-core
     #   pydantic-core
-    #   pydantic-pkgr
     #   twisted
     #   twisted
 tzdata==2024.2
 tzdata==2024.2
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
@@ -506,9 +502,7 @@ tzlocal==5.2
 ulid-py==1.1.0
 ulid-py==1.1.0
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
 urllib3==2.2.3
 urllib3==2.2.3
-    # via
-    #   requests
-    #   yt-dlp
+    # via requests
 uuid6==2024.7.10
 uuid6==2024.7.10
     # via typeid-python
     # via typeid-python
 w3lib==2.2.1
 w3lib==2.2.1
@@ -517,13 +511,11 @@ wcwidth==0.2.13
     # via
     # via
     #   ftfy
     #   ftfy
     #   prompt-toolkit
     #   prompt-toolkit
-websockets==13.1
-    # via yt-dlp
 xlrd==2.0.1
 xlrd==2.0.1
     # via python-benedict
     # via python-benedict
 xmltodict==0.14.2
 xmltodict==0.14.2
     # via python-benedict
     # via python-benedict
-yt-dlp==2024.10.22
+yt-dlp==2024.11.4
     # via archivebox (pyproject.toml)
     # via archivebox (pyproject.toml)
 zope-interface==7.1.1
 zope-interface==7.1.1
     # via twisted
     # via twisted

File diff suppressed because it is too large
+ 165 - 283
uv.lock


Some files were not shown because too many files changed in this diff