فهرست منبع

make Claude.md stricter and improve migration tests

Nick Sweeting 2 ماه پیش
والد
کامیت
cffbef84ed
3فایلهای تغییر یافته به همراه63 افزوده شده و 50 حذف شده
  1. 9 7
      CLAUDE.md
  2. 29 22
      archivebox/tests/test_migrations_08_to_09.py
  3. 25 21
      archivebox/tests/test_migrations_helpers.py

+ 9 - 7
CLAUDE.md

@@ -3,10 +3,10 @@
 ## Quick Start
 
 ```bash
-# Set up dev environment
-uv sync --dev
+# Set up dev environment (always use uv, never pip directly)
+uv sync --dev --all-extras
 
-# Run tests as non-root user (required - ArchiveBox refuses to run as root)
+# Run tests as non-root user (required - ArchiveBox always refuses to run as root)
 sudo -u testuser bash -c 'source .venv/bin/activate && python -m pytest archivebox/tests/ -v'
 ```
 
@@ -19,7 +19,7 @@ sudo -u testuser bash -c 'source .venv/bin/activate && python -m pytest archiveb
 
 ### Install Dependencies
 ```bash
-uv sync --dev
+uv sync --dev --all-extras  # Always use uv, never pip directly
 ```
 
 ### Activate Virtual Environment
@@ -30,7 +30,7 @@ source .venv/bin/activate
 ## Running Tests
 
 ### CRITICAL: Never Run as Root
-ArchiveBox has a root check that prevents running as root user. Always run tests as a non-root user:
+ArchiveBox has a root check that prevents running as root user. All ArchiveBox commands (including tests) must run as non-root user inside a data directory:
 
 ```bash
 # Run all migration tests
@@ -62,8 +62,10 @@ Tests must exercise real code paths:
 - Run actual `python -m archivebox` commands via subprocess
 - Query SQLite directly to verify results
 
+**If something is hard to test**: Modify the implementation to make it easier to test, or fix the underlying issue. Never mock, skip, simulate, or exit early from a test because you can't get something working inside the test.
+
 ### NO SKIPS
-Never use `@skip`, `skipTest`, or `pytest.mark.skip`. Every test must run.
+Never use `@skip`, `skipTest`, or `pytest.mark.skip`. Every test must run. If a test is difficult, fix the code or test environment - don't disable the test.
 
 ### Strict Assertions
 - `init` command must return exit code 0 (not `[0, 1]`)
@@ -115,7 +117,7 @@ chmod 644 archivebox/tests/test_*.py
 ```
 
 ### 2. DATA_DIR Environment Variable
-Tests use temp directories. The `run_archivebox()` helper sets `DATA_DIR` automatically.
+ArchiveBox commands must run inside a data directory. Tests use temp directories - the `run_archivebox()` helper sets `DATA_DIR` automatically.
 
 ### 3. Extractors Disabled for Speed
 Tests disable all extractors via environment variables for faster execution:

+ 29 - 22
archivebox/tests/test_migrations_08_to_09.py

@@ -12,6 +12,7 @@ Migration tests from 0.8.x to 0.9.x.
 
 import shutil
 import sqlite3
+import subprocess
 import tempfile
 import unittest
 from pathlib import Path
@@ -440,28 +441,34 @@ class TestFilesystemMigration08to09(unittest.TestCase):
         result = run_archivebox(self.work_dir, ['init'], timeout=45)
         self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
 
-        # Step 2: Archive example.com with some extractors enabled
-        # Enable a subset of fast extractors for testing
-        result = run_archivebox(
-            self.work_dir,
-            ['add', '--depth=0', 'https://example.com'],
-            timeout=120,
-            env={
-                'SAVE_TITLE': 'True',
-                'SAVE_FAVICON': 'True',
-                'SAVE_WGET': 'True',
-                'SAVE_SCREENSHOT': 'False',  # Disable slow extractors
-                'SAVE_DOM': 'False',
-                'SAVE_SINGLEFILE': 'False',
-                'SAVE_READABILITY': 'False',
-                'SAVE_MERCURY': 'False',
-                'SAVE_PDF': 'False',
-                'SAVE_MEDIA': 'False',
-                'SAVE_ARCHIVE_DOT_ORG': 'False',
-            }
-        )
-        # Note: Add may fail if network is down or extractors fail, but we still want to test
-        # the filesystem migration logic even with partial failures
+        # Step 2: Archive example.com with ALL extractors enabled
+        # This ensures we test migration with all file types
+        try:
+            result = run_archivebox(
+                self.work_dir,
+                ['add', '--depth=0', 'https://example.com'],
+                timeout=300,  # 5 minutes for all extractors
+                env={
+                    'SAVE_TITLE': 'True',
+                    'SAVE_FAVICON': 'True',
+                    'SAVE_WGET': 'True',
+                    'SAVE_SCREENSHOT': 'True',
+                    'SAVE_DOM': 'True',
+                    'SAVE_SINGLEFILE': 'True',
+                    'SAVE_READABILITY': 'True',
+                    'SAVE_MERCURY': 'True',
+                    'SAVE_PDF': 'True',
+                    'SAVE_MEDIA': 'True',
+                    'SAVE_ARCHIVE_DOT_ORG': 'True',
+                    'SAVE_HEADERS': 'True',
+                    'SAVE_HTMLTOTEXT': 'True',
+                    'SAVE_GIT': 'True',
+                }
+            )
+        except subprocess.TimeoutExpired as e:
+            # If timeout, still continue - we want to test with whatever files were created
+            print(f"\n[!] Add command timed out after {e.timeout}s, continuing with partial results...")
+            # Note: Snapshot may still have been created even if command timed out
 
         # Step 3: Get the snapshot and verify files were created
         conn = sqlite3.connect(str(self.db_path))

+ 25 - 21
archivebox/tests/test_migrations_helpers.py

@@ -986,27 +986,31 @@ def seed_0_8_data(db_path: Path) -> Dict[str, List[Dict]]:
 # Helper Functions
 # =============================================================================
 
-def run_archivebox(data_dir: Path, args: list, timeout: int = 60) -> subprocess.CompletedProcess:
+def run_archivebox(data_dir: Path, args: list, timeout: int = 60, env: dict = None) -> subprocess.CompletedProcess:
     """Run archivebox command in subprocess with given data directory."""
-    env = os.environ.copy()
-    env['DATA_DIR'] = str(data_dir)
-    env['USE_COLOR'] = 'False'
-    env['SHOW_PROGRESS'] = 'False'
-    # Disable ALL extractors for faster tests
-    env['SAVE_ARCHIVE_DOT_ORG'] = 'False'
-    env['SAVE_TITLE'] = 'False'
-    env['SAVE_FAVICON'] = 'False'
-    env['SAVE_WGET'] = 'False'
-    env['SAVE_SINGLEFILE'] = 'False'
-    env['SAVE_SCREENSHOT'] = 'False'
-    env['SAVE_PDF'] = 'False'
-    env['SAVE_DOM'] = 'False'
-    env['SAVE_READABILITY'] = 'False'
-    env['SAVE_MERCURY'] = 'False'
-    env['SAVE_GIT'] = 'False'
-    env['SAVE_MEDIA'] = 'False'
-    env['SAVE_HEADERS'] = 'False'
-    env['SAVE_HTMLTOTEXT'] = 'False'
+    base_env = os.environ.copy()
+    base_env['DATA_DIR'] = str(data_dir)
+    base_env['USE_COLOR'] = 'False'
+    base_env['SHOW_PROGRESS'] = 'False'
+    # Disable ALL extractors for faster tests (can be overridden by env parameter)
+    base_env['SAVE_ARCHIVE_DOT_ORG'] = 'False'
+    base_env['SAVE_TITLE'] = 'False'
+    base_env['SAVE_FAVICON'] = 'False'
+    base_env['SAVE_WGET'] = 'False'
+    base_env['SAVE_SINGLEFILE'] = 'False'
+    base_env['SAVE_SCREENSHOT'] = 'False'
+    base_env['SAVE_PDF'] = 'False'
+    base_env['SAVE_DOM'] = 'False'
+    base_env['SAVE_READABILITY'] = 'False'
+    base_env['SAVE_MERCURY'] = 'False'
+    base_env['SAVE_GIT'] = 'False'
+    base_env['SAVE_MEDIA'] = 'False'
+    base_env['SAVE_HEADERS'] = 'False'
+    base_env['SAVE_HTMLTOTEXT'] = 'False'
+
+    # Override with any custom env vars
+    if env:
+        base_env.update(env)
 
     cmd = [sys.executable, '-m', 'archivebox'] + args
 
@@ -1014,7 +1018,7 @@ def run_archivebox(data_dir: Path, args: list, timeout: int = 60) -> subprocess.
         cmd,
         capture_output=True,
         text=True,
-        env=env,
+        env=base_env,
         cwd=str(data_dir),
         timeout=timeout,
     )