Browse Source

Capture exit codes and stderr from background hooks

Extended graceful_terminate_background_hooks() to:
- Reap processes with os.waitpid() to get exit codes
- Write returncode to .returncode file for update_from_output()
- Return detailed result dict with status, returncode, and pid

Updated update_from_output() to:
- Read .returncode and .stderr.log files
- Determine status from returncode if no ArchiveResult JSONL record
- Include stderr in output_str for failed hooks
- Handle signal termination (negative returncodes like -9 for SIGKILL)
- Clean up .returncode files along with other hook output files
Claude 1 month ago
parent
commit
524e8e98c3
2 changed files with 122 additions and 22 deletions
  1. 40 5
      archivebox/core/models.py
  2. 82 17
      archivebox/hooks.py

+ 40 - 5
archivebox/core/models.py

@@ -2711,7 +2711,20 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
 
         # Read and parse JSONL output from hook-specific stdout log
         stdout_file = plugin_dir / f'{hook_basename}.stdout.log'
+        stderr_file = plugin_dir / f'{hook_basename}.stderr.log'
+        returncode_file = plugin_dir / f'{hook_basename}.returncode'
+
         stdout = stdout_file.read_text() if stdout_file.exists() else ''
+        stderr = stderr_file.read_text() if stderr_file.exists() else ''
+
+        # Read returncode from file (written by graceful_terminate_background_hooks)
+        returncode = None
+        if returncode_file.exists():
+            try:
+                rc_text = returncode_file.read_text().strip()
+                returncode = int(rc_text) if rc_text else None
+            except (ValueError, OSError):
+                pass
 
         records = []
         for line in stdout.splitlines():
@@ -2746,9 +2759,30 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
                 self._set_binary_from_cmd(hook_data['cmd'])
             # Note: cmd_version is derived from binary.version, not stored on Process
         else:
-            # No ArchiveResult record = failed
-            self.status = self.StatusChoices.FAILED
-            self.output_str = 'Hook did not output ArchiveResult record'
+            # No ArchiveResult JSONL record - determine status from returncode
+            if returncode is not None:
+                if returncode == 0:
+                    self.status = self.StatusChoices.SUCCEEDED
+                    self.output_str = 'Hook completed successfully (no JSONL output)'
+                elif returncode < 0:
+                    # Negative = killed by signal (e.g., -9 for SIGKILL, -15 for SIGTERM)
+                    sig_num = abs(returncode)
+                    sig_name = {9: 'SIGKILL', 15: 'SIGTERM'}.get(sig_num, f'signal {sig_num}')
+                    self.status = self.StatusChoices.FAILED
+                    self.output_str = f'Hook killed by {sig_name}'
+                    if stderr:
+                        self.output_str += f'\n\nstderr:\n{stderr[:2000]}'
+                else:
+                    self.status = self.StatusChoices.FAILED
+                    self.output_str = f'Hook failed with exit code {returncode}'
+                    if stderr:
+                        self.output_str += f'\n\nstderr:\n{stderr[:2000]}'
+            else:
+                # No returncode file and no JSONL = failed
+                self.status = self.StatusChoices.FAILED
+                self.output_str = 'Hook did not output ArchiveResult record'
+                if stderr:
+                    self.output_str += f'\n\nstderr:\n{stderr[:2000]}'
 
         # Walk filesystem and populate output_files, output_size, output_mimetypes
         # Exclude hook output files (hook-specific names like on_Snapshot__50_wget.stdout.log)
@@ -2758,6 +2792,7 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
                 name.endswith('.stdout.log') or
                 name.endswith('.stderr.log') or
                 name.endswith('.pid') or
+                name.endswith('.returncode') or
                 (name.endswith('.sh') and name.startswith('on_'))
             )
 
@@ -2826,10 +2861,10 @@ class ArchiveResult(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWi
         }
         process_hook_records(filtered_records, overrides=overrides)
 
-        # Cleanup PID files and empty logs (hook-specific names)
+        # Cleanup PID files, returncode files, and empty logs (hook-specific names)
         pid_file = plugin_dir / f'{hook_basename}.pid'
         pid_file.unlink(missing_ok=True)
-        stderr_file = plugin_dir / f'{hook_basename}.stderr.log'
+        returncode_file.unlink(missing_ok=True)
         if stdout_file.exists() and stdout_file.stat().st_size == 0:
             stdout_file.unlink()
         if stderr_file.exists() and stderr_file.stat().st_size == 0:

+ 82 - 17
archivebox/hooks.py

@@ -1270,7 +1270,7 @@ def graceful_terminate_background_hooks(
     output_dir: Path,
     config: Dict[str, Any],
     poll_interval: float = 0.5,
-) -> Dict[str, str]:
+) -> Dict[str, Dict[str, Any]]:
     """
     Gracefully terminate all background hooks in an output directory.
 
@@ -1278,6 +1278,8 @@ def graceful_terminate_background_hooks(
         1. Send SIGTERM to all background hook processes (polite shutdown request)
         2. For each hook, wait up to its plugin-specific timeout
         3. Send SIGKILL to any hooks still running after their timeout expires
+        4. Reap each process with waitpid() to get exit code
+        5. Write returncode to .returncode file for update_from_output()
 
     Args:
         output_dir: Snapshot output directory containing plugin subdirs with .pid files
@@ -1285,19 +1287,22 @@ def graceful_terminate_background_hooks(
         poll_interval: Seconds between process liveness checks (default: 0.5s)
 
     Returns:
-        Dict mapping hook names to termination status:
-            - 'sigterm': Exited cleanly after SIGTERM
-            - 'sigkill': Required SIGKILL after timeout
-            - 'already_dead': Process was already dead
-            - 'invalid': PID file was stale/invalid
+        Dict mapping hook names to result info:
+            {
+                'hook_name': {
+                    'status': 'sigterm' | 'sigkill' | 'already_dead' | 'invalid',
+                    'returncode': int or None,
+                    'pid': int or None,
+                }
+            }
 
     Example:
         from archivebox.config.configset import get_config
         config = get_config(crawl=my_crawl, snapshot=my_snapshot)
         results = graceful_terminate_background_hooks(snapshot.OUTPUT_DIR, config)
-        # {'on_Snapshot__20_chrome_tab.bg': 'sigterm', 'on_Snapshot__63_media.bg': 'sigkill'}
+        # {'on_Snapshot__20_chrome_tab.bg': {'status': 'sigterm', 'returncode': 0, 'pid': 12345}}
     """
-    from archivebox.misc.process_utils import validate_pid_file, safe_kill_process
+    from archivebox.misc.process_utils import validate_pid_file
 
     if not output_dir.exists():
         return {}
@@ -1317,20 +1322,23 @@ def graceful_terminate_background_hooks(
 
         # Validate and get PID
         if not validate_pid_file(pid_file, cmd_file):
-            results[hook_name] = 'invalid'
+            results[hook_name] = {'status': 'invalid', 'returncode': None, 'pid': None}
             pid_file.unlink(missing_ok=True)
             continue
 
         try:
             pid = int(pid_file.read_text().strip())
         except (ValueError, OSError):
-            results[hook_name] = 'invalid'
+            results[hook_name] = {'status': 'invalid', 'returncode': None, 'pid': None}
             pid_file.unlink(missing_ok=True)
             continue
 
         # Check if process is still alive
         if not process_is_alive(pid_file):
-            results[hook_name] = 'already_dead'
+            # Process already dead - try to reap it and get exit code
+            returncode = _reap_process(pid)
+            results[hook_name] = {'status': 'already_dead', 'returncode': returncode, 'pid': pid}
+            _write_returncode_file(pid_file, returncode)
             pid_file.unlink(missing_ok=True)
             continue
 
@@ -1345,7 +1353,9 @@ def graceful_terminate_background_hooks(
         try:
             os.kill(pid, signal.SIGTERM)
         except (OSError, ProcessLookupError):
-            results[hook_name] = 'already_dead'
+            returncode = _reap_process(pid)
+            results[hook_name] = {'status': 'already_dead', 'returncode': returncode, 'pid': pid}
+            _write_returncode_file(pid_file, returncode)
             pid_file.unlink(missing_ok=True)
             continue
 
@@ -1364,17 +1374,72 @@ def graceful_terminate_background_hooks(
             time.sleep(poll_interval)
 
         if exited_cleanly:
-            results[hook_name] = 'sigterm'
-            pid_file.unlink(missing_ok=True)
+            # Process exited from SIGTERM - reap it to get exit code
+            returncode = _reap_process(pid)
+            results[hook_name] = {'status': 'sigterm', 'returncode': returncode, 'pid': pid}
         else:
             # Timeout expired, send SIGKILL
             try:
                 os.kill(pid, signal.SIGKILL)
-                results[hook_name] = 'sigkill'
             except (OSError, ProcessLookupError):
-                results[hook_name] = 'sigterm'  # Died between check and kill
-            pid_file.unlink(missing_ok=True)
+                pass  # Process died between check and kill
+
+            # Wait briefly for SIGKILL to take effect, then reap
+            time.sleep(0.1)
+            returncode = _reap_process(pid)
+
+            # returncode from SIGKILL is typically -9 (negative signal number)
+            results[hook_name] = {'status': 'sigkill', 'returncode': returncode, 'pid': pid}
+
+        # Write returncode file for update_from_output() to read
+        _write_returncode_file(pid_file, results[hook_name]['returncode'])
+        pid_file.unlink(missing_ok=True)
 
     return results
 
 
+def _reap_process(pid: int) -> Optional[int]:
+    """
+    Reap a terminated process and return its exit code.
+
+    Uses os.waitpid() with WNOHANG to avoid blocking.
+    Returns None if process cannot be reaped (not a child, already reaped, etc).
+    """
+    try:
+        # WNOHANG: return immediately if process hasn't exited
+        # We call this after we know process is dead, so it should return immediately
+        wpid, status = os.waitpid(pid, os.WNOHANG)
+        if wpid == 0:
+            # Process still running (shouldn't happen since we checked)
+            return None
+        if os.WIFEXITED(status):
+            return os.WEXITSTATUS(status)
+        elif os.WIFSIGNALED(status):
+            # Killed by signal - return negative signal number (convention)
+            return -os.WTERMSIG(status)
+        return None
+    except ChildProcessError:
+        # Not our child process (was started by subprocess.Popen which already reaped it,
+        # or process was started by different parent). This is expected for hooks.
+        return None
+    except OSError:
+        return None
+
+
+def _write_returncode_file(pid_file: Path, returncode: Optional[int]) -> None:
+    """
+    Write returncode to a .returncode file next to the .pid file.
+
+    This allows update_from_output() to know the exit code even for background hooks.
+    """
+    returncode_file = pid_file.with_suffix('.returncode')
+    try:
+        if returncode is not None:
+            returncode_file.write_text(str(returncode))
+        else:
+            # Unknown exit code - write empty file to indicate process was terminated
+            returncode_file.write_text('')
+    except OSError:
+        pass  # Best effort
+
+