Browse Source

Refactor test_chrome.py to use shared helpers

- Add get_machine_type() to chrome_test_helpers.py
- Update get_test_env() to include MACHINE_TYPE
- Refactor test_chrome.py to import from shared helpers
- Removes ~50 lines of duplicate code
Claude 1 month ago
parent
commit
ef92a99c4a

+ 22 - 2
archivebox/plugins/chrome/tests/chrome_test_helpers.py

@@ -78,16 +78,36 @@ def get_node_modules_dir() -> Path:
     return lib_dir / 'npm' / 'node_modules'
 
 
+def get_machine_type() -> str:
+    """Get machine type string (e.g., 'x86_64-linux', 'arm64-darwin').
+
+    Returns the machine type, checking:
+    1. MACHINE_TYPE environment variable
+    2. Computed from platform.machine() and platform.system()
+    """
+    if os.environ.get('MACHINE_TYPE'):
+        return os.environ['MACHINE_TYPE']
+
+    machine = platform.machine().lower()
+    system = platform.system().lower()
+    if machine in ('arm64', 'aarch64'):
+        machine = 'arm64'
+    elif machine in ('x86_64', 'amd64'):
+        machine = 'x86_64'
+    return f"{machine}-{system}"
+
+
 def get_test_env() -> dict:
-    """Get environment dict with NODE_MODULES_DIR and LIB_DIR set correctly for tests.
+    """Get environment dict with NODE_MODULES_DIR, LIB_DIR, and MACHINE_TYPE set correctly for tests.
 
-    Returns a copy of os.environ with NODE_MODULES_DIR and LIB_DIR added/updated.
+    Returns a copy of os.environ with NODE_MODULES_DIR, LIB_DIR, and MACHINE_TYPE added/updated.
     Use this for all subprocess calls in simple plugin tests (screenshot, dom, pdf).
     """
     env = os.environ.copy()
     lib_dir = get_lib_dir()
     env['LIB_DIR'] = str(lib_dir)
     env['NODE_MODULES_DIR'] = str(get_node_modules_dir())
+    env['MACHINE_TYPE'] = get_machine_type()
     return env
 
 

+ 14 - 59
archivebox/plugins/chrome/tests/test_chrome.py

@@ -28,70 +28,25 @@ import tempfile
 import shutil
 import platform
 
-PLUGIN_DIR = Path(__file__).parent.parent
-CHROME_LAUNCH_HOOK = PLUGIN_DIR / 'on_Crawl__30_chrome_launch.bg.js'
-CHROME_TAB_HOOK = PLUGIN_DIR / 'on_Snapshot__20_chrome_tab.bg.js'
-CHROME_NAVIGATE_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_chrome_navigate.*'), None)
-
-# Get LIB_DIR and MACHINE_TYPE from environment or compute them
-def get_lib_dir_and_machine_type():
-    """Get or compute LIB_DIR and MACHINE_TYPE for tests."""
-    from archivebox.config.paths import get_machine_type
-    from archivebox.config.common import STORAGE_CONFIG
-
-    lib_dir = os.environ.get('LIB_DIR') or str(STORAGE_CONFIG.LIB_DIR)
-    machine_type = os.environ.get('MACHINE_TYPE') or get_machine_type()
-
-    return Path(lib_dir), machine_type
-
-# Setup NODE_MODULES_DIR to find npm packages
-LIB_DIR, MACHINE_TYPE = get_lib_dir_and_machine_type()
-# Note: LIB_DIR already includes machine_type (e.g., data/lib/arm64-darwin)
-NODE_MODULES_DIR = LIB_DIR / 'npm' / 'node_modules'
+from archivebox.plugins.chrome.tests.chrome_test_helpers import (
+    get_test_env,
+    get_lib_dir,
+    get_node_modules_dir,
+    find_chromium_binary,
+    CHROME_PLUGIN_DIR as PLUGIN_DIR,
+    CHROME_LAUNCH_HOOK,
+    CHROME_TAB_HOOK,
+    CHROME_NAVIGATE_HOOK,
+)
+
+# Get LIB_DIR and NODE_MODULES_DIR from shared helpers
+LIB_DIR = get_lib_dir()
+NODE_MODULES_DIR = get_node_modules_dir()
 NPM_PREFIX = LIB_DIR / 'npm'
 
 # Chromium install location (relative to DATA_DIR)
 CHROMIUM_INSTALL_DIR = Path(os.environ.get('DATA_DIR', '.')).resolve() / 'chromium'
 
-def get_test_env():
-    """Get environment with NODE_MODULES_DIR and CHROME_BINARY set correctly."""
-    env = os.environ.copy()
-    env['NODE_MODULES_DIR'] = str(NODE_MODULES_DIR)
-    env['LIB_DIR'] = str(LIB_DIR)
-    env['MACHINE_TYPE'] = MACHINE_TYPE
-    # Ensure CHROME_BINARY is set to Chromium
-    if 'CHROME_BINARY' not in env:
-        chromium = find_chromium_binary()
-        if chromium:
-            env['CHROME_BINARY'] = chromium
-    return env
-
-
-def find_chromium_binary(data_dir=None):
-    """Find the Chromium binary using chrome_utils.js findChromium().
-
-    This uses the centralized findChromium() function which checks:
-    - CHROME_BINARY env var
-    - @puppeteer/browsers install locations (in data_dir/chromium)
-    - System Chromium locations
-    - Falls back to Chrome (with warning)
-
-    Args:
-        data_dir: Directory where chromium was installed (contains chromium/ subdir)
-    """
-    chrome_utils = PLUGIN_DIR / 'chrome_utils.js'
-    # Use provided data_dir, or fall back to env var, or current dir
-    search_dir = data_dir or os.environ.get('DATA_DIR', '.')
-    result = subprocess.run(
-        ['node', str(chrome_utils), 'findChromium', str(search_dir)],
-        capture_output=True,
-        text=True,
-        timeout=10
-    )
-    if result.returncode == 0 and result.stdout.strip():
-        return result.stdout.strip()
-    return None
-
 
 @pytest.fixture(scope="session", autouse=True)
 def ensure_chromium_and_puppeteer_installed():