Browse Source

merge queues and actors apps into new workers app

Nick Sweeting 1 year ago
parent
commit
e469c5a344
37 changed files with 89 additions and 304 deletions
  1. 1 2
      archivebox/__init__.py
  2. 0 2
      archivebox/actors/__init__.py
  3. 0 3
      archivebox/actors/admin.py
  4. 0 6
      archivebox/actors/apps.py
  5. 0 202
      archivebox/actors/templates/jobs_dashboard.html
  6. 1 1
      archivebox/api/v1_api.py
  7. 4 4
      archivebox/api/v1_workers.py
  8. 1 1
      archivebox/cli/archivebox_add.py
  9. 1 1
      archivebox/cli/archivebox_update.py
  10. 6 7
      archivebox/config/views.py
  11. 1 1
      archivebox/core/admin_snapshots.py
  12. 2 2
      archivebox/core/models.py
  13. 2 3
      archivebox/core/settings.py
  14. 1 1
      archivebox/core/statemachines.py
  15. 1 1
      archivebox/core/urls.py
  16. 1 1
      archivebox/core/views.py
  17. 1 1
      archivebox/crawls/models.py
  18. 1 1
      archivebox/crawls/statemachines.py
  19. 4 4
      archivebox/main.py
  20. 20 20
      archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py
  21. 5 5
      archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py
  22. 10 10
      archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py
  23. 0 8
      archivebox/queues/__init__.py
  24. 0 0
      archivebox/queues/migrations/__init__.py
  25. 9 0
      archivebox/workers/__init__.py
  26. 1 1
      archivebox/workers/actor.py
  27. 1 1
      archivebox/workers/admin.py
  28. 2 2
      archivebox/workers/apps.py
  29. 1 1
      archivebox/workers/management/commands/orchestrator.py
  30. 0 0
      archivebox/workers/migrations/__init__.py
  31. 9 9
      archivebox/workers/models.py
  32. 1 1
      archivebox/workers/orchestrator.py
  33. 0 0
      archivebox/workers/semaphores.py
  34. 1 1
      archivebox/workers/supervisor_util.py
  35. 1 1
      archivebox/workers/tasks.py
  36. 0 0
      archivebox/workers/tests.py
  37. 0 0
      archivebox/workers/views.py

+ 1 - 2
archivebox/__init__.py

@@ -82,11 +82,10 @@ ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
 # Load all built-in ArchiveBox plugins
 # Load all built-in ArchiveBox plugins
 ARCHIVEBOX_BUILTIN_PLUGINS = {
 ARCHIVEBOX_BUILTIN_PLUGINS = {
     'config': PACKAGE_DIR / 'config',
     'config': PACKAGE_DIR / 'config',
+    'workers': PACKAGE_DIR / 'workers',
     'core': PACKAGE_DIR / 'core',
     'core': PACKAGE_DIR / 'core',
     'crawls': PACKAGE_DIR / 'crawls',
     'crawls': PACKAGE_DIR / 'crawls',
-    'queues': PACKAGE_DIR / 'queues',
     'seeds': PACKAGE_DIR / 'seeds',
     'seeds': PACKAGE_DIR / 'seeds',
-    'actors': PACKAGE_DIR / 'actors',
     # 'search': PACKAGE_DIR / 'search',
     # 'search': PACKAGE_DIR / 'search',
     # 'core': PACKAGE_DIR / 'core',
     # 'core': PACKAGE_DIR / 'core',
 }
 }

+ 0 - 2
archivebox/actors/__init__.py

@@ -1,2 +0,0 @@
-__package__ = 'archivebox.actors'
-__order__ = 100

+ 0 - 3
archivebox/actors/admin.py

@@ -1,3 +0,0 @@
-from django.contrib import admin
-
-# Register your models here.

+ 0 - 6
archivebox/actors/apps.py

@@ -1,6 +0,0 @@
-from django.apps import AppConfig
-
-
-class ActorsConfig(AppConfig):
-    default_auto_field = "django.db.models.BigAutoField"
-    name = "actors"

+ 0 - 202
archivebox/actors/templates/jobs_dashboard.html

@@ -1,202 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Job Dashboard</title>
-    <style>
-        body {
-            font-family: Arial, sans-serif;
-            line-height: 1.6;
-            color: #333;
-            width: 100%;
-            margin: 0 auto;
-            padding: 20px;
-        }
-        @keyframes pulse {
-            0% { opacity: 1; }
-            48% { opacity: 0.2; }
-            52% { opacity: 1; }
-            100% { opacity: 1; }
-        }
-        h1 {
-            text-align: center;
-        }
-        h1 a {
-            animation: pulse 1s;
-        }
-        .dashboard {
-            display: grid;
-            grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
-            gap: 20px;
-        }
-        .card {
-            border: 1px solid #ddd;
-            border-radius: 8px;
-            padding: 15px;
-            background-color: #f9f9f9;
-        }
-        .card h2 {
-            margin-top: 0;
-            border-bottom: 2px solid #ddd;
-            padding-bottom: 10px;
-            font-family: monospace;
-        }
-        .scroll-area {
-            /*height: 800px;
-            overflow-y: scroll; */
-            height: auto;
-            border: 1px solid #ddd;
-            padding: 10px;
-            background-color: #fff;
-        }
-        .job-item {
-            border: 1px solid #eee;
-            border-radius: 4px;
-            padding: 10px;
-            margin-bottom: 10px;
-        }
-        .job-item:last-child {
-            margin-bottom: 0;
-        }
-        .badge {
-            display: inline-block;
-            padding: 3px 7px;
-            border-radius: 3px;
-            font-size: 12px;
-            font-weight: bold;
-        }
-        .badge-started {
-            background-color: #4CAF50;
-            color: white;
-        }
-        .badge-queued {
-            background-color: #2196F3;
-            color: white;
-        }
-        .badge-failed {
-            background-color: #f44336;
-            color: white;
-        }
-        .badge-succeeded {
-            background-color: #666;
-            color: white;
-        }
-        .badge-sealed {
-            background-color: #666;
-            color: white;
-        }
-        .date {
-            font-size: 16px;
-            color: #666;
-            float: right;
-        }
-    </style>
-</head>
-<body>
-    <h1>Job Dashboard <small><a href="?refresh=true" id="current-time">♻️ {{now}}}</a></small></h1>
-    <div id="dashboard" class="dashboard"></div>
-
-    <script>
-        function formatDate(dateString) {
-            const now = Date.now()
-            const date = new Date(dateString)
-            // return new Date(dateString).toLocaleString();
-            // return date.toISOString().split('T').at(-1).replace('Z', '');
-            const seconds_diff = Math.round((date - now) / 1000, 0)
-            if (seconds_diff < 0) {
-                return `${seconds_diff}s ago`;
-            } else {
-                return `${seconds_diff}s in the future`;
-            }
-        }
-
-        function createJobElement(job) {
-            const jobElement = document.createElement('div');
-            jobElement.className = 'job-item';
-            jobElement.innerHTML = `
-                <p><a href="/api/v1/core/any/${job.abid}?api_key={{api_token|default:'NONE PROVIDED BY VIEW'}}"><code>${job.abid}</code></a></p>
-                <p>
-                    <span class="badge badge-${job.status}">${job.status}</span>
-                    <span class="date">♻️ ${formatDate(job.retry_at)}</span>
-                </p>
-                <p style="font-size: 12px; color: #666;">${job.description}</p>
-            `;
-            return jobElement;
-        }
-
-        function updateDashboard(data) {
-            const currentTime = document.getElementById('current-time');
-            window.now = new Date();
-            currentTime.innerHTML = `♻️ ${window.now.toISOString().split('T').at(-1).replace('Z', '')}`;
-
-            const dashboard = document.getElementById('dashboard');
-            dashboard.innerHTML = '';
-
-            data.forEach(actor => {
-                const card = document.createElement('div');
-                card.className = 'card';
-                card.innerHTML = `
-                    <h2>${actor.model}</h2>
-                    <hr/>
-                    Future
-                    <div class="scroll-area" style="background-color: white;" id="future-${actor.model}"></div>
-                    <hr/>
-                    Pending
-                    <div class="scroll-area" style="background-color: lightblue;" id="pending-${actor.model}"></div>
-                    <hr/>
-                    Stalled
-                    <div class="scroll-area" style="background-color: lightcoral;" id="stalled-${actor.model}"></div>
-                    <hr/>
-                    Active
-                    <div class="scroll-area" style="background-color: lightgreen;" id="active-${actor.model}"></div>
-                    <hr/>
-                    Past
-                    <div class="scroll-area" style="background-color: lightgrey;" id="past-${actor.model}"></div>
-                `;
-                dashboard.appendChild(card);
-
-                const futureContainer = document.getElementById(`future-${actor.model}`);
-                actor.future.forEach(job => {
-                    futureContainer.appendChild(createJobElement(job));
-                });
-
-                const pendingContainer = document.getElementById(`pending-${actor.model}`);
-                actor.pending.forEach(job => {
-                    pendingContainer.appendChild(createJobElement(job));
-                });
-
-                const stalledContainer = document.getElementById(`stalled-${actor.model}`);
-                actor.stalled.forEach(job => {
-                    stalledContainer.appendChild(createJobElement(job));
-                });
-
-                const activeContainer = document.getElementById(`active-${actor.model}`);
-                actor.active.forEach(job => {
-                    activeContainer.appendChild(createJobElement(job));
-                });
-
-                const pastContainer = document.getElementById(`past-${actor.model}`);
-                actor.past.forEach(job => {
-                    pastContainer.appendChild(createJobElement(job));
-                });
-            });
-        }
-
-        function fetchData() {
-            fetch('/api/v1/jobs/actors', {
-                headers: {
-                    'Authorization': `Bearer {{api_token|default:'NONE PROVIDED BY VIEW'}}`
-                }
-            })
-                .then(response => response.json())
-                .then(data => updateDashboard(data))
-                .catch(error => console.error('Error fetching data:', error));
-        }
-
-        fetchData();
-
-        setInterval(fetchData, 750);
-    </script>
-</body>
-</html>

+ 1 - 1
archivebox/api/v1_api.py

@@ -41,7 +41,7 @@ def register_urls(api: NinjaAPI) -> NinjaAPI:
     api.add_router('/core/',     'api.v1_core.router')
     api.add_router('/core/',     'api.v1_core.router')
     api.add_router('/crawls/',   'api.v1_crawls.router')
     api.add_router('/crawls/',   'api.v1_crawls.router')
     api.add_router('/cli/',      'api.v1_cli.router')
     api.add_router('/cli/',      'api.v1_cli.router')
-    api.add_router('/jobs/',     'api.v1_actors.router')
+    api.add_router('/workers/',  'api.v1_workers.router')
     return api
     return api
 
 
 
 

+ 4 - 4
archivebox/api/v1_actors.py → archivebox/api/v1_workers.py

@@ -31,7 +31,7 @@ class TaskSchema(Schema):
 
 
 
 
 class ActorSchema(Schema):
 class ActorSchema(Schema):
-    # TYPE: str = 'actors.actor.ActorType'
+    # TYPE: str = 'workers.actor.ActorType'
 
 
     # name: str
     # name: str
     #pid: int | None
     #pid: int | None
@@ -97,7 +97,7 @@ class ActorSchema(Schema):
 
 
 
 
 class OrchestratorSchema(Schema):
 class OrchestratorSchema(Schema):
-    # TYPE: str = 'actors.orchestrator.Orchestrator'
+    # TYPE: str = 'workers.orchestrator.Orchestrator'
 
 
     #pid: int | None
     #pid: int | None
     exit_on_idle: bool
     exit_on_idle: bool
@@ -114,7 +114,7 @@ class OrchestratorSchema(Schema):
 def get_orchestrators(request):
 def get_orchestrators(request):
     """List all the task orchestrators (aka Orchestrators) that are currently running"""
     """List all the task orchestrators (aka Orchestrators) that are currently running"""
 
 
-    from actors.orchestrator import Orchestrator
+    from workers.orchestrator import Orchestrator
     orchestrator = Orchestrator()
     orchestrator = Orchestrator()
 
 
     return [orchestrator]
     return [orchestrator]
@@ -124,6 +124,6 @@ def get_orchestrators(request):
 def get_actors(request):
 def get_actors(request):
     """List all the task consumer workers (aka Actors) that are currently running"""
     """List all the task consumer workers (aka Actors) that are currently running"""
 
 
-    from actors.orchestrator import Orchestrator
+    from workers.orchestrator import Orchestrator
     orchestrator = Orchestrator()
     orchestrator = Orchestrator()
     return orchestrator.actor_types.values()
     return orchestrator.actor_types.values()

+ 1 - 1
archivebox/cli/archivebox_add.py

@@ -53,7 +53,7 @@ def add(urls: str | list[str],
     
     
     from seeds.models import Seed
     from seeds.models import Seed
     from crawls.models import Crawl
     from crawls.models import Crawl
-    from actors.orchestrator import Orchestrator
+    from workers.orchestrator import Orchestrator
     from abid_utils.models import get_or_create_system_user_pk
     from abid_utils.models import get_or_create_system_user_pk
 
 
 
 

+ 1 - 1
archivebox/cli/archivebox_update.py

@@ -28,7 +28,7 @@ def update():
     from archivebox.config.django import setup_django
     from archivebox.config.django import setup_django
     setup_django()
     setup_django()
     
     
-    from actors.orchestrator import Orchestrator
+    from workers.orchestrator import Orchestrator
     orchestrator = Orchestrator(exit_on_idle=False)
     orchestrator = Orchestrator(exit_on_idle=False)
     orchestrator.start()
     orchestrator.start()
 
 

+ 6 - 7
archivebox/config/views.py

@@ -7,7 +7,6 @@ from typing import Any, List, Dict, cast
 from benedict import benedict
 from benedict import benedict
 
 
 from django.http import HttpRequest
 from django.http import HttpRequest
-from django.conf import settings
 from django.utils import timezone
 from django.utils import timezone
 from django.utils.html import format_html, mark_safe
 from django.utils.html import format_html, mark_safe
 
 
@@ -304,7 +303,7 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
         "Exit Status": [],
         "Exit Status": [],
     }
     }
     
     
-    from queues.supervisor_util import get_existing_supervisord_process
+    from workers.supervisor_util import get_existing_supervisord_process
     
     
     supervisor = get_existing_supervisord_process()
     supervisor = get_existing_supervisord_process()
     if supervisor is None:
     if supervisor is None:
@@ -374,8 +373,10 @@ def worker_list_view(request: HttpRequest, **kwargs) -> TableContext:
 def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
 def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
     assert request.user.is_superuser, "Must be a superuser to view configuration settings."
     assert request.user.is_superuser, "Must be a superuser to view configuration settings."
 
 
-    from queues.supervisor_util import get_existing_supervisord_process, get_worker
-    from queues.settings import SUPERVISORD_CONFIG_FILE
+    from workers.supervisor_util import get_existing_supervisord_process, get_worker, get_sock_file, CONFIG_FILE_NAME
+
+    SOCK_FILE = get_sock_file()
+    CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
 
 
     supervisor = get_existing_supervisord_process()
     supervisor = get_existing_supervisord_process()
     if supervisor is None:
     if supervisor is None:
@@ -388,7 +389,7 @@ def worker_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
     all_config = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
     all_config = cast(List[Dict[str, Any]], supervisor.getAllConfigInfo() or [])
 
 
     if key == 'supervisord':
     if key == 'supervisord':
-        relevant_config = SUPERVISORD_CONFIG_FILE.read_text()
+        relevant_config = CONFIG_FILE.read_text()
         relevant_logs = cast(str, supervisor.readLog(0, 10_000_000))
         relevant_logs = cast(str, supervisor.readLog(0, 10_000_000))
         start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
         start_ts = [line for line in relevant_logs.split("\n") if "RPC interface 'supervisor' initialized" in line][-1].split(",", 1)[0]
         uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0]
         uptime = str(timezone.now() - parse_date(start_ts)).split(".")[0]
@@ -475,8 +476,6 @@ def log_list_view(request: HttpRequest, **kwargs) -> TableContext:
 @render_with_item_view
 @render_with_item_view
 def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
 def log_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
     assert request.user.is_superuser, "Must be a superuser to view configuration settings."
     assert request.user.is_superuser, "Must be a superuser to view configuration settings."
-
-    from django.conf import settings
     
     
     log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
     log_file = [logfile for logfile in CONSTANTS.LOGS_DIR.glob('*.log') if key in logfile.name][0]
 
 

+ 1 - 1
archivebox/core/admin_snapshots.py

@@ -25,7 +25,7 @@ from archivebox.extractors import archive_links
 from archivebox.main import remove
 from archivebox.main import remove
 
 
 from archivebox.abid_utils.admin import ABIDModelAdmin
 from archivebox.abid_utils.admin import ABIDModelAdmin
-from archivebox.queues.tasks import bg_archive_links, bg_add
+from archivebox.workers.tasks import bg_archive_links, bg_add
 
 
 from core.models import Tag
 from core.models import Tag
 from core.admin_tags import TagInline
 from core.admin_tags import TagInline

+ 2 - 2
archivebox/core/models.py

@@ -26,8 +26,8 @@ import abx
 from archivebox.config import CONSTANTS
 from archivebox.config import CONSTANTS
 
 
 from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithOutputDir
 from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithOutputDir
-from actors.models import ModelWithStateMachine
-from queues.tasks import bg_archive_snapshot
+from workers.models import ModelWithStateMachine
+from workers.tasks import bg_archive_snapshot
 from crawls.models import Crawl
 from crawls.models import Crawl
 # from machine.models import Machine, NetworkInterface
 # from machine.models import Machine, NetworkInterface
 
 

+ 2 - 3
archivebox/core/settings.py

@@ -61,11 +61,10 @@ INSTALLED_APPS = [
     'django_object_actions',     # provides easy Django Admin action buttons on change views       https://github.com/crccheck/django-object-actions
     'django_object_actions',     # provides easy Django Admin action buttons on change views       https://github.com/crccheck/django-object-actions
 
 
     # Our ArchiveBox-provided apps
     # Our ArchiveBox-provided apps
-    # 'abid_utils',                # handles ABID ID creation, handling, and models
+    # 'abid_utils',              # handles ABID ID creation, handling, and models
     'config',                    # ArchiveBox config settings (loaded as a plugin, don't need to add it here) 
     'config',                    # ArchiveBox config settings (loaded as a plugin, don't need to add it here) 
     'machine',                   # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc.
     'machine',                   # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc.
-    'actors',                    # handles starting and managing background workers and processes (orchestrators and actors)
-    'queues',                    # handles starting and managing background workers and processes (supervisord)
+    'workers',                   # handles starting and managing background workers and processes (orchestrators and actors)
     'seeds',                     # handles Seed model and URL source management
     'seeds',                     # handles Seed model and URL source management
     'crawls',                    # handles Crawl and CrawlSchedule models and management
     'crawls',                    # handles Crawl and CrawlSchedule models and management
     'personas',                  # handles Persona and session management
     'personas',                  # handles Persona and session management

+ 1 - 1
archivebox/core/statemachines.py

@@ -8,7 +8,7 @@ from django.utils import timezone
 
 
 from statemachine import State, StateMachine
 from statemachine import State, StateMachine
 
 
-from actors.actor import ActorType
+from workers.actor import ActorType
 
 
 from core.models import Snapshot, ArchiveResult
 from core.models import Snapshot, ArchiveResult
 
 

+ 1 - 1
archivebox/core/urls.py

@@ -10,7 +10,7 @@ from archivebox.misc.serve_static import serve_static
 from core.admin_site import archivebox_admin
 from core.admin_site import archivebox_admin
 from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
 from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
 
 
-from actors.views import JobsDashboardView
+from workers.views import JobsDashboardView
 
 
 # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
 # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
 # from archivebox.config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
 # from archivebox.config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE

+ 1 - 1
archivebox/core/views.py

@@ -25,7 +25,7 @@ import archivebox
 from core.models import Snapshot
 from core.models import Snapshot
 from core.forms import AddLinkForm
 from core.forms import AddLinkForm
 
 
-from queues.tasks import bg_add
+from workers.tasks import bg_add
 
 
 from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION
 from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION
 from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
 from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG

+ 1 - 1
archivebox/crawls/models.py

@@ -10,7 +10,7 @@ from django.conf import settings
 from django.urls import reverse_lazy
 from django.urls import reverse_lazy
 from django.utils import timezone
 from django.utils import timezone
 
 
-from actors.models import ModelWithStateMachine
+from workers.models import ModelWithStateMachine
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
     from core.models import Snapshot, ArchiveResult
     from core.models import Snapshot, ArchiveResult

+ 1 - 1
archivebox/crawls/statemachines.py

@@ -6,7 +6,7 @@ from django.utils import timezone
 
 
 from statemachine import State, StateMachine
 from statemachine import State, StateMachine
 
 
-from actors.actor import ActorType
+from workers.actor import ActorType
 from crawls.models import Crawl
 from crawls.models import Crawl
 
 
 
 

+ 4 - 4
archivebox/main.py

@@ -675,8 +675,8 @@ def add(urls: Union[str, List[str]],
     """Add a new URL or list of URLs to your archive"""
     """Add a new URL or list of URLs to your archive"""
 
 
     from core.models import Snapshot, Tag
     from core.models import Snapshot, Tag
-    # from queues.supervisor_util import start_cli_workers, tail_worker_logs
-    # from queues.tasks import bg_archive_link
+    # from workers.supervisor_util import start_cli_workers, tail_worker_logs
+    # from workers.tasks import bg_archive_link
     
     
 
 
     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
     assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
@@ -873,7 +873,7 @@ def update(resume: Optional[float]=None,
 
 
     from core.models import ArchiveResult
     from core.models import ArchiveResult
     from .search import index_links
     from .search import index_links
-    # from .queues.supervisor_util import start_cli_workers
+    # from workers.supervisor_util import start_cli_workers
     
     
 
 
     check_data_folder()
     check_data_folder()
@@ -1494,7 +1494,7 @@ def server(runserver_args: Optional[List[str]]=None,
             runserver_args.append('--noreload')  # '--insecure'
             runserver_args.append('--noreload')  # '--insecure'
         call_command("runserver", *runserver_args)
         call_command("runserver", *runserver_args)
     else:
     else:
-        from queues.supervisor_util import start_server_workers
+        from workers.supervisor_util import start_server_workers
 
 
         print()
         print()
         start_server_workers(host=host, port=port, daemonize=False)
         start_server_workers(host=host, port=port, daemonize=False)

+ 20 - 20
archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py

@@ -1,27 +1,27 @@
-__package__ = 'abx_plugin_singlefile'
+# __package__ = 'abx_plugin_singlefile'
 
 
-from typing import ClassVar
-from django.db.models import QuerySet
-from django.utils.functional import classproperty
+# from typing import ClassVar
+# from django.db.models import QuerySet
+# from django.utils.functional import classproperty
 
 
-from actors.actor import ActorType
+# from workers.actor import ActorType
 
 
-from .models import SinglefileResult
+# from .models import SinglefileResult
 
 
 
 
-class SinglefileActor(ActorType[SinglefileResult]):
-    CLAIM_ORDER: ClassVar[str] = 'created_at DESC'
-    CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"'
-    CLAIM_SET: ClassVar[str] = 'status = "started"'
+# class SinglefileActor(ActorType[SinglefileResult]):
+#     CLAIM_ORDER: ClassVar[str] = 'created_at DESC'
+#     CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"'
+#     CLAIM_SET: ClassVar[str] = 'status = "started"'
     
     
-    @classproperty
-    def QUERYSET(cls) -> QuerySet:
-        return SinglefileResult.objects.filter(status='queued')
+#     @classproperty
+#     def QUERYSET(cls) -> QuerySet:
+#         return SinglefileResult.objects.filter(status='queued')
 
 
-    def tick(self, obj: SinglefileResult):
-        print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count())
-        updated = SinglefileResult.objects.filter(id=obj.id, status='started').update(status='success') == 1
-        if not updated:
-            raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object')
-        obj.refresh_from_db()
-        obj.save()
+#     def tick(self, obj: SinglefileResult):
+#         print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count())
+#         updated = SinglefileResult.objects.filter(id=obj.id, status='started').update(status='success') == 1
+#         if not updated:
+#             raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object')
+#         obj.refresh_from_db()
+#         obj.save()

+ 5 - 5
archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py

@@ -13,11 +13,11 @@ class WgetExtractor(BaseExtractor):
     name: ExtractorName = 'wget'
     name: ExtractorName = 'wget'
     binary: BinName = WGET_BINARY.name
     binary: BinName = WGET_BINARY.name
 
 
-    def get_output_path(self, snapshot) -> Path | None:
-        wget_index_path = wget_output_path(snapshot.as_link())
-        if wget_index_path:
-            return Path(wget_index_path)
-        return None
+    def get_output_path(self, snapshot) -> str:
+        # wget_index_path = wget_output_path(snapshot.as_link())
+        # if wget_index_path:
+        #     return Path(wget_index_path)
+        return 'wget'
 
 
 WGET_EXTRACTOR = WgetExtractor()
 WGET_EXTRACTOR = WgetExtractor()
 
 

+ 10 - 10
archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py

@@ -65,16 +65,16 @@ class ConfigPluginSpec:
     def get_SCOPE_CONFIG(extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, request=None, collection=..., environment=..., machine=..., default=...) -> dict[ConfigKeyStr, Any]:
     def get_SCOPE_CONFIG(extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, request=None, collection=..., environment=..., machine=..., default=...) -> dict[ConfigKeyStr, Any]:
         """Get the config as it applies to you right now, based on the current context"""
         """Get the config as it applies to you right now, based on the current context"""
         return benedict({
         return benedict({
-            **pm.hook.get_default_config(default=default),
-            **pm.hook.get_machine_config(machine=machine),
-            **pm.hook.get_environment_config(environment=environment),
-            **pm.hook.get_collection_config(collection=collection),
-            **pm.hook.get_user_config(user=user),
-            **pm.hook.get_crawl_config(crawl=crawl),
-            **pm.hook.get_snapshot_config(snapshot=snapshot),
-            **pm.hook.get_archiveresult_config(archiveresult=archiveresult),
-            **pm.hook.get_request_config(request=request),
-            **(extra or {}),
+            **pm.hook.get_default_config(default=default),                       # schema defaults defined in source code
+            **pm.hook.get_machine_config(machine=machine),                       # machine defaults set on the Machine model
+            **pm.hook.get_environment_config(environment=environment),           # env config set for just this run on this machine
+            **pm.hook.get_collection_config(collection=collection),              # collection defaults set in ArchiveBox.conf
+            **pm.hook.get_user_config(user=user),                                # user config set on User model
+            **pm.hook.get_request_config(request=request),                       # extra config derived from the current request
+            **pm.hook.get_crawl_config(crawl=crawl),                             # extra config set on the Crawl model
+            **pm.hook.get_snapshot_config(snapshot=snapshot),                    # extra config set on the Snapshot model
+            **pm.hook.get_archiveresult_config(archiveresult=archiveresult),     # extra config set on the ArchiveResult model
+            **(extra or {}),                                                     # extra config passed in by the caller
         })
         })
         
         
     @staticmethod
     @staticmethod

+ 0 - 8
archivebox/queues/__init__.py

@@ -1,8 +0,0 @@
-__package__ = 'archivebox.queues'
-
-import abx
-
[email protected]
-def register_admin(admin_site):
-    from queues.admin import register_admin
-    register_admin(admin_site)

+ 0 - 0
archivebox/queues/migrations/__init__.py


+ 9 - 0
archivebox/workers/__init__.py

@@ -0,0 +1,9 @@
+__package__ = 'archivebox.workers'
+__order__ = 100
+
+import abx
+
[email protected]
+def register_admin(admin_site):
+    from workers.admin import register_admin
+    register_admin(admin_site)

+ 1 - 1
archivebox/actors/actor.py → archivebox/workers/actor.py

@@ -1,4 +1,4 @@
-__package__ = 'archivebox.actors'
+__package__ = 'archivebox.workers'
 
 
 import os
 import os
 import time
 import time

+ 1 - 1
archivebox/queues/admin.py → archivebox/workers/admin.py

@@ -1,4 +1,4 @@
-__package__ = 'archivebox.queues'
+__package__ = 'archivebox.workers'
 
 
 import abx
 import abx
 
 

+ 2 - 2
archivebox/queues/apps.py → archivebox/workers/apps.py

@@ -1,7 +1,7 @@
 from django.apps import AppConfig
 from django.apps import AppConfig
 
 
 
 
-class QueuesConfig(AppConfig):
+class WorkersConfig(AppConfig):
     default_auto_field = 'django.db.models.BigAutoField'
     default_auto_field = 'django.db.models.BigAutoField'
-    name = 'queues'
+    name = 'workers'
 
 

+ 1 - 1
archivebox/actors/management/commands/orchestrator.py → archivebox/workers/management/commands/orchestrator.py

@@ -2,7 +2,7 @@
 
 
 from django.core.management.base import BaseCommand
 from django.core.management.base import BaseCommand
 
 
-from actors.orchestrator import ArchivingOrchestrator
+from workers.orchestrator import ArchivingOrchestrator
 
 
 
 
 class Command(BaseCommand):
 class Command(BaseCommand):

+ 0 - 0
archivebox/actors/migrations/__init__.py → archivebox/workers/migrations/__init__.py


+ 9 - 9
archivebox/actors/models.py → archivebox/workers/models.py

@@ -60,7 +60,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
                             f'{cls.__name__}.{field.name} must have choices set to {cls.__name__}.StatusChoices.choices',
                             f'{cls.__name__}.{field.name} must have choices set to {cls.__name__}.StatusChoices.choices',
                             hint=f'{cls.__name__}.{field.name}.choices = {getattr(field, "choices", None)!r}',
                             hint=f'{cls.__name__}.{field.name}.choices = {getattr(field, "choices", None)!r}',
                             obj=cls,
                             obj=cls,
-                            id='actors.E011',
+                            id='workers.E011',
                         ))
                         ))
             if getattr(field, '_is_retry_at_field', False):
             if getattr(field, '_is_retry_at_field', False):
                 if cls.retry_at_field_name == field.name:
                 if cls.retry_at_field_name == field.name:
@@ -73,14 +73,14 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
                 f'{cls.__name__}.state_field_name must be defined and point to a StatusField()',
                 f'{cls.__name__}.state_field_name must be defined and point to a StatusField()',
                 hint=f'{cls.__name__}.state_field_name = {cls.state_field_name!r} but {cls.__name__}.{cls.state_field_name!r} was not found or does not refer to StatusField',
                 hint=f'{cls.__name__}.state_field_name = {cls.state_field_name!r} but {cls.__name__}.{cls.state_field_name!r} was not found or does not refer to StatusField',
                 obj=cls,
                 obj=cls,
-                id='actors.E012',
+                id='workers.E012',
             ))
             ))
         if not found_retry_at_field:
         if not found_retry_at_field:
             errors.append(checks.Error(
             errors.append(checks.Error(
                 f'{cls.__name__}.retry_at_field_name must be defined and point to a RetryAtField()',
                 f'{cls.__name__}.retry_at_field_name must be defined and point to a RetryAtField()',
                 hint=f'{cls.__name__}.retry_at_field_name = {cls.retry_at_field_name!r} but {cls.__name__}.{cls.retry_at_field_name!r} was not found or does not refer to RetryAtField',
                 hint=f'{cls.__name__}.retry_at_field_name = {cls.retry_at_field_name!r} but {cls.__name__}.{cls.retry_at_field_name!r} was not found or does not refer to RetryAtField',
                 obj=cls,
                 obj=cls,
-                id='actors.E013',
+                id='workers.E013',
             ))
             ))
             
             
         if not found_id_field:
         if not found_id_field:
@@ -88,7 +88,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
                 f'{cls.__name__} must have an id field that is a primary key',
                 f'{cls.__name__} must have an id field that is a primary key',
                 hint=f'{cls.__name__}.id = {cls.id!r}',
                 hint=f'{cls.__name__}.id = {cls.id!r}',
                 obj=cls,
                 obj=cls,
-                id='actors.E014',
+                id='workers.E014',
             ))
             ))
             
             
         if not isinstance(cls.state_machine_name, str):
         if not isinstance(cls.state_machine_name, str):
@@ -96,7 +96,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
                 f'{cls.__name__}.state_machine_name must be a dotted-import path to a StateMachine class',
                 f'{cls.__name__}.state_machine_name must be a dotted-import path to a StateMachine class',
                 hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}',
                 hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}',
                 obj=cls,
                 obj=cls,
-                id='actors.E015',
+                id='workers.E015',
             ))
             ))
         
         
         try:
         try:
@@ -106,7 +106,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
                 f'{cls.__name__}.state_machine_name must point to a valid StateMachine class, but got {type(err).__name__} {err} when trying to access {cls.__name__}.StateMachineClass',
                 f'{cls.__name__}.state_machine_name must point to a valid StateMachine class, but got {type(err).__name__} {err} when trying to access {cls.__name__}.StateMachineClass',
                 hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}',
                 hint=f'{cls.__name__}.state_machine_name = {cls.state_machine_name!r}',
                 obj=cls,
                 obj=cls,
-                id='actors.E016',
+                id='workers.E016',
             ))
             ))
         
         
         if cls.INITIAL_STATE not in cls.StatusChoices.values:
         if cls.INITIAL_STATE not in cls.StatusChoices.values:
@@ -114,7 +114,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
                 f'{cls.__name__}.StateMachineClass.initial_state must be present within {cls.__name__}.StatusChoices',
                 f'{cls.__name__}.StateMachineClass.initial_state must be present within {cls.__name__}.StatusChoices',
                 hint=f'{cls.__name__}.StateMachineClass.initial_state = {cls.StateMachineClass.initial_state!r}',
                 hint=f'{cls.__name__}.StateMachineClass.initial_state = {cls.StateMachineClass.initial_state!r}',
                 obj=cls,
                 obj=cls,
-                id='actors.E017',
+                id='workers.E017',
             ))
             ))
             
             
         if cls.ACTIVE_STATE not in cls.StatusChoices.values:
         if cls.ACTIVE_STATE not in cls.StatusChoices.values:
@@ -122,7 +122,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
                 f'{cls.__name__}.active_state must be set to a valid State present within {cls.__name__}.StatusChoices',
                 f'{cls.__name__}.active_state must be set to a valid State present within {cls.__name__}.StatusChoices',
                 hint=f'{cls.__name__}.active_state = {cls.active_state!r}',
                 hint=f'{cls.__name__}.active_state = {cls.active_state!r}',
                 obj=cls,
                 obj=cls,
-                id='actors.E018',
+                id='workers.E018',
             ))
             ))
             
             
         
         
@@ -132,7 +132,7 @@ class BaseModelWithStateMachine(models.Model, MachineMixin):
                     f'{cls.__name__}.StateMachineClass.final_states must all be present within {cls.__name__}.StatusChoices',
                     f'{cls.__name__}.StateMachineClass.final_states must all be present within {cls.__name__}.StatusChoices',
                     hint=f'{cls.__name__}.StateMachineClass.final_states = {cls.StateMachineClass.final_states!r}',
                     hint=f'{cls.__name__}.StateMachineClass.final_states = {cls.StateMachineClass.final_states!r}',
                     obj=cls,
                     obj=cls,
-                    id='actors.E019',
+                    id='workers.E019',
                 ))
                 ))
                 break
                 break
         return errors
         return errors

+ 1 - 1
archivebox/actors/orchestrator.py → archivebox/workers/orchestrator.py

@@ -1,4 +1,4 @@
-__package__ = 'archivebox.actors'
+__package__ = 'archivebox.workers'
 
 
 import os
 import os
 import time
 import time

+ 0 - 0
archivebox/queues/semaphores.py → archivebox/workers/semaphores.py


+ 1 - 1
archivebox/queues/supervisor_util.py → archivebox/workers/supervisor_util.py

@@ -1,4 +1,4 @@
-__package__ = 'archivebox.queues'
+__package__ = 'archivebox.workers'
 
 
 import sys
 import sys
 import time
 import time

+ 1 - 1
archivebox/queues/tasks.py → archivebox/workers/tasks.py

@@ -1,4 +1,4 @@
-__package__ = 'archivebox.queues'
+__package__ = 'archivebox.workers'
 
 
 from functools import wraps
 from functools import wraps
 # from django.utils import timezone
 # from django.utils import timezone

+ 0 - 0
archivebox/actors/tests.py → archivebox/workers/tests.py


+ 0 - 0
archivebox/actors/views.py → archivebox/workers/views.py