|
|
@@ -0,0 +1,135 @@
|
|
|
+"""
|
|
|
+Tests for the SEO plugin.
|
|
|
+
|
|
|
+Tests the real SEO hook with an actual URL to verify
|
|
|
+meta tag extraction.
|
|
|
+"""
|
|
|
+
|
|
|
+import json
|
|
|
+import shutil
|
|
|
+import subprocess
|
|
|
+import sys
|
|
|
+import tempfile
|
|
|
+from pathlib import Path
|
|
|
+
|
|
|
+import pytest
|
|
|
+from django.test import TestCase
|
|
|
+
|
|
|
+# Import chrome test helpers
|
|
|
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
|
|
|
+from chrome_test_helpers import (
|
|
|
+ chrome_session,
|
|
|
+ get_test_env,
|
|
|
+ get_plugin_dir,
|
|
|
+ get_hook_script,
|
|
|
+)
|
|
|
+
|
|
|
+
|
|
|
+def chrome_available() -> bool:
|
|
|
+ """Check if Chrome/Chromium is available."""
|
|
|
+ for name in ['chromium', 'chromium-browser', 'google-chrome', 'chrome']:
|
|
|
+ if shutil.which(name):
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+# Get the path to the SEO hook
|
|
|
+PLUGIN_DIR = get_plugin_dir(__file__)
|
|
|
+SEO_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_seo.*')
|
|
|
+
|
|
|
+
|
|
|
+class TestSEOPlugin(TestCase):
|
|
|
+ """Test the SEO plugin."""
|
|
|
+
|
|
|
+ def test_seo_hook_exists(self):
|
|
|
+ """SEO hook script should exist."""
|
|
|
+ self.assertIsNotNone(SEO_HOOK, "SEO hook not found in plugin directory")
|
|
|
+ self.assertTrue(SEO_HOOK.exists(), f"Hook not found: {SEO_HOOK}")
|
|
|
+
|
|
|
+
|
|
|
[email protected](not chrome_available(), reason="Chrome not installed")
|
|
|
+class TestSEOWithChrome(TestCase):
|
|
|
+ """Integration tests for SEO plugin with Chrome."""
|
|
|
+
|
|
|
+ def setUp(self):
|
|
|
+ """Set up test environment."""
|
|
|
+ self.temp_dir = Path(tempfile.mkdtemp())
|
|
|
+
|
|
|
+ def tearDown(self):
|
|
|
+ """Clean up."""
|
|
|
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
|
+
|
|
|
+ def test_seo_extracts_meta_tags(self):
|
|
|
+ """SEO hook should extract meta tags from a real URL."""
|
|
|
+ test_url = 'https://example.com'
|
|
|
+ snapshot_id = 'test-seo-snapshot'
|
|
|
+
|
|
|
+ try:
|
|
|
+ with chrome_session(
|
|
|
+ self.temp_dir,
|
|
|
+ crawl_id='test-seo-crawl',
|
|
|
+ snapshot_id=snapshot_id,
|
|
|
+ test_url=test_url,
|
|
|
+ navigate=True,
|
|
|
+ timeout=30,
|
|
|
+ ) as (chrome_process, chrome_pid, snapshot_chrome_dir):
|
|
|
+ # Get environment and run the SEO hook
|
|
|
+ env = get_test_env()
|
|
|
+ env['CHROME_HEADLESS'] = 'true'
|
|
|
+
|
|
|
+ # Run SEO hook with the active Chrome session
|
|
|
+ result = subprocess.run(
|
|
|
+ ['node', str(SEO_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
|
|
|
+ cwd=str(snapshot_chrome_dir),
|
|
|
+ capture_output=True,
|
|
|
+ text=True,
|
|
|
+ timeout=60,
|
|
|
+ env=env
|
|
|
+ )
|
|
|
+
|
|
|
+ # Check for output file
|
|
|
+ seo_output = snapshot_chrome_dir / 'seo.json'
|
|
|
+
|
|
|
+ seo_data = None
|
|
|
+
|
|
|
+ # Try parsing from file first
|
|
|
+ if seo_output.exists():
|
|
|
+ with open(seo_output) as f:
|
|
|
+ try:
|
|
|
+ seo_data = json.load(f)
|
|
|
+ except json.JSONDecodeError:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # Try parsing from stdout if not in file
|
|
|
+ if not seo_data:
|
|
|
+ for line in result.stdout.split('\n'):
|
|
|
+ line = line.strip()
|
|
|
+ if line.startswith('{'):
|
|
|
+ try:
|
|
|
+ record = json.loads(line)
|
|
|
+ # SEO data typically has title, description, or og: tags
|
|
|
+ if any(key in record for key in ['title', 'description', 'og:title', 'canonical']):
|
|
|
+ seo_data = record
|
|
|
+ break
|
|
|
+ except json.JSONDecodeError:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # Verify hook ran successfully
|
|
|
+ self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
|
|
|
+ self.assertNotIn('Traceback', result.stderr)
|
|
|
+ self.assertNotIn('Error:', result.stderr)
|
|
|
+
|
|
|
+ # example.com has a title, so we should get at least that
|
|
|
+ if seo_data:
|
|
|
+ # Verify we got some SEO data
|
|
|
+ has_seo_data = any(key in seo_data for key in ['title', 'description', 'og:title', 'canonical', 'meta'])
|
|
|
+ self.assertTrue(has_seo_data or seo_data, f"No SEO data extracted: {seo_data}")
|
|
|
+
|
|
|
+ except RuntimeError as e:
|
|
|
+ if 'Chrome' in str(e) or 'CDP' in str(e):
|
|
|
+ self.skipTest(f"Chrome session setup failed: {e}")
|
|
|
+ raise
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ pytest.main([__file__, '-v'])
|