|
|
@@ -1,191 +0,0 @@
|
|
|
-"""
|
|
|
-PID file utilities for tracking worker and orchestrator processes.
|
|
|
-
|
|
|
-PID files are stored in data/tmp/workers/ and contain:
|
|
|
-- Line 1: PID
|
|
|
-- Line 2: Worker type (orchestrator, crawl, snapshot, archiveresult)
|
|
|
-- Line 3: Extractor filter (optional, for archiveresult workers)
|
|
|
-- Line 4: Started at ISO timestamp
|
|
|
-"""
|
|
|
-
|
|
|
-__package__ = 'archivebox.workers'
|
|
|
-
|
|
|
-import os
|
|
|
-import signal
|
|
|
-from pathlib import Path
|
|
|
-from datetime import datetime, timezone
|
|
|
-
|
|
|
-from django.conf import settings
|
|
|
-
|
|
|
-
|
|
|
-def get_pid_dir() -> Path:
|
|
|
- """Get the directory for PID files, creating it if needed."""
|
|
|
- pid_dir = Path(settings.DATA_DIR) / 'tmp' / 'workers'
|
|
|
- pid_dir.mkdir(parents=True, exist_ok=True)
|
|
|
- return pid_dir
|
|
|
-
|
|
|
-
|
|
|
-def write_pid_file(worker_type: str, worker_id: int = 0, extractor: str | None = None) -> Path:
|
|
|
- """
|
|
|
- Write a PID file for the current process.
|
|
|
- Returns the path to the PID file.
|
|
|
- """
|
|
|
- pid_dir = get_pid_dir()
|
|
|
-
|
|
|
- if worker_type == 'orchestrator':
|
|
|
- pid_file = pid_dir / 'orchestrator.pid'
|
|
|
- else:
|
|
|
- pid_file = pid_dir / f'{worker_type}_worker_{worker_id}.pid'
|
|
|
-
|
|
|
- content = f"{os.getpid()}\n{worker_type}\n{extractor or ''}\n{datetime.now(timezone.utc).isoformat()}\n"
|
|
|
- pid_file.write_text(content)
|
|
|
-
|
|
|
- return pid_file
|
|
|
-
|
|
|
-
|
|
|
-def read_pid_file(path: Path) -> dict | None:
|
|
|
- """
|
|
|
- Read and parse a PID file.
|
|
|
- Returns dict with pid, worker_type, extractor, started_at or None if invalid.
|
|
|
- """
|
|
|
- try:
|
|
|
- if not path.exists():
|
|
|
- return None
|
|
|
-
|
|
|
- lines = path.read_text().strip().split('\n')
|
|
|
- if len(lines) < 4:
|
|
|
- return None
|
|
|
-
|
|
|
- return {
|
|
|
- 'pid': int(lines[0]),
|
|
|
- 'worker_type': lines[1],
|
|
|
- 'extractor': lines[2] or None,
|
|
|
- 'started_at': datetime.fromisoformat(lines[3]),
|
|
|
- 'pid_file': path,
|
|
|
- }
|
|
|
- except (ValueError, IndexError, OSError):
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def remove_pid_file(path: Path) -> None:
|
|
|
- """Remove a PID file if it exists."""
|
|
|
- try:
|
|
|
- path.unlink(missing_ok=True)
|
|
|
- except OSError:
|
|
|
- pass
|
|
|
-
|
|
|
-
|
|
|
-def is_process_alive(pid: int) -> bool:
|
|
|
- """Check if a process with the given PID is still running."""
|
|
|
- try:
|
|
|
- os.kill(pid, 0) # Signal 0 doesn't kill, just checks
|
|
|
- return True
|
|
|
- except (OSError, ProcessLookupError):
|
|
|
- return False
|
|
|
-
|
|
|
-
|
|
|
-def get_all_pid_files() -> list[Path]:
|
|
|
- """Get all PID files in the workers directory."""
|
|
|
- pid_dir = get_pid_dir()
|
|
|
- return list(pid_dir.glob('*.pid'))
|
|
|
-
|
|
|
-
|
|
|
-def get_all_worker_pids(worker_type: str | None = None) -> list[dict]:
|
|
|
- """
|
|
|
- Get info about all running workers.
|
|
|
- Optionally filter by worker_type.
|
|
|
- """
|
|
|
- workers = []
|
|
|
-
|
|
|
- for pid_file in get_all_pid_files():
|
|
|
- info = read_pid_file(pid_file)
|
|
|
- if info is None:
|
|
|
- continue
|
|
|
-
|
|
|
- # Skip if process is dead
|
|
|
- if not is_process_alive(info['pid']):
|
|
|
- continue
|
|
|
-
|
|
|
- # Filter by type if specified
|
|
|
- if worker_type and info['worker_type'] != worker_type:
|
|
|
- continue
|
|
|
-
|
|
|
- workers.append(info)
|
|
|
-
|
|
|
- return workers
|
|
|
-
|
|
|
-
|
|
|
-def cleanup_stale_pid_files() -> int:
|
|
|
- """
|
|
|
- Remove PID files for processes that are no longer running.
|
|
|
- Returns the number of stale files removed.
|
|
|
- """
|
|
|
- removed = 0
|
|
|
-
|
|
|
- for pid_file in get_all_pid_files():
|
|
|
- info = read_pid_file(pid_file)
|
|
|
- if info is None:
|
|
|
- # Invalid PID file, remove it
|
|
|
- remove_pid_file(pid_file)
|
|
|
- removed += 1
|
|
|
- continue
|
|
|
-
|
|
|
- if not is_process_alive(info['pid']):
|
|
|
- remove_pid_file(pid_file)
|
|
|
- removed += 1
|
|
|
-
|
|
|
- return removed
|
|
|
-
|
|
|
-
|
|
|
-def get_running_worker_count(worker_type: str) -> int:
|
|
|
- """Get the count of running workers of a specific type."""
|
|
|
- return len(get_all_worker_pids(worker_type))
|
|
|
-
|
|
|
-
|
|
|
-def get_next_worker_id(worker_type: str) -> int:
|
|
|
- """Get the next available worker ID for a given type."""
|
|
|
- existing_ids = set()
|
|
|
-
|
|
|
- for pid_file in get_all_pid_files():
|
|
|
- # Parse worker ID from filename like "snapshot_worker_3.pid"
|
|
|
- name = pid_file.stem
|
|
|
- if name.startswith(f'{worker_type}_worker_'):
|
|
|
- try:
|
|
|
- worker_id = int(name.split('_')[-1])
|
|
|
- existing_ids.add(worker_id)
|
|
|
- except ValueError:
|
|
|
- continue
|
|
|
-
|
|
|
- # Find the lowest unused ID
|
|
|
- next_id = 0
|
|
|
- while next_id in existing_ids:
|
|
|
- next_id += 1
|
|
|
-
|
|
|
- return next_id
|
|
|
-
|
|
|
-
|
|
|
-def stop_worker(pid: int, graceful: bool = True) -> bool:
|
|
|
- """
|
|
|
- Stop a worker process.
|
|
|
- If graceful=True, sends SIGTERM first, then SIGKILL after timeout.
|
|
|
- Returns True if process was stopped.
|
|
|
- """
|
|
|
- if not is_process_alive(pid):
|
|
|
- return True
|
|
|
-
|
|
|
- try:
|
|
|
- if graceful:
|
|
|
- os.kill(pid, signal.SIGTERM)
|
|
|
- # Give it a moment to shut down
|
|
|
- import time
|
|
|
- for _ in range(10): # Wait up to 1 second
|
|
|
- time.sleep(0.1)
|
|
|
- if not is_process_alive(pid):
|
|
|
- return True
|
|
|
- # Force kill if still running
|
|
|
- os.kill(pid, signal.SIGKILL)
|
|
|
- else:
|
|
|
- os.kill(pid, signal.SIGKILL)
|
|
|
- return True
|
|
|
- except (OSError, ProcessLookupError):
|
|
|
- return True # Process already dead
|