test_seo.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. """
  2. Tests for the SEO plugin.
  3. Tests the real SEO hook with an actual URL to verify
  4. meta tag extraction.
  5. """
  6. import json
  7. import subprocess
  8. import sys
  9. import tempfile
  10. import shutil
  11. from pathlib import Path
  12. from django.test import TestCase
  13. # Import chrome test helpers
  14. sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'chrome' / 'tests'))
  15. from chrome_test_helpers import (
  16. chrome_session,
  17. CHROME_NAVIGATE_HOOK,
  18. get_plugin_dir,
  19. get_hook_script,
  20. )
  21. # Get the path to the SEO hook
  22. PLUGIN_DIR = get_plugin_dir(__file__)
  23. SEO_HOOK = get_hook_script(PLUGIN_DIR, 'on_Snapshot__*_seo.*')
  24. class TestSEOPlugin(TestCase):
  25. """Test the SEO plugin."""
  26. def test_seo_hook_exists(self):
  27. """SEO hook script should exist."""
  28. self.assertIsNotNone(SEO_HOOK, "SEO hook not found in plugin directory")
  29. self.assertTrue(SEO_HOOK.exists(), f"Hook not found: {SEO_HOOK}")
  30. class TestSEOWithChrome(TestCase):
  31. """Integration tests for SEO plugin with Chrome."""
  32. def setUp(self):
  33. """Set up test environment."""
  34. self.temp_dir = Path(tempfile.mkdtemp())
  35. def tearDown(self):
  36. """Clean up."""
  37. shutil.rmtree(self.temp_dir, ignore_errors=True)
  38. def test_seo_extracts_meta_tags(self):
  39. """SEO hook should extract meta tags from a real URL."""
  40. test_url = 'https://example.com'
  41. snapshot_id = 'test-seo-snapshot'
  42. with chrome_session(
  43. self.temp_dir,
  44. crawl_id='test-seo-crawl',
  45. snapshot_id=snapshot_id,
  46. test_url=test_url,
  47. navigate=False,
  48. timeout=30,
  49. ) as (chrome_process, chrome_pid, snapshot_chrome_dir, env):
  50. seo_dir = snapshot_chrome_dir.parent / 'seo'
  51. seo_dir.mkdir(exist_ok=True)
  52. nav_result = subprocess.run(
  53. ['node', str(CHROME_NAVIGATE_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
  54. cwd=str(snapshot_chrome_dir),
  55. capture_output=True,
  56. text=True,
  57. timeout=120,
  58. env=env
  59. )
  60. self.assertEqual(nav_result.returncode, 0, f"Navigation failed: {nav_result.stderr}")
  61. # Run SEO hook with the active Chrome session
  62. result = subprocess.run(
  63. ['node', str(SEO_HOOK), f'--url={test_url}', f'--snapshot-id={snapshot_id}'],
  64. cwd=str(seo_dir),
  65. capture_output=True,
  66. text=True,
  67. timeout=60,
  68. env=env
  69. )
  70. # Check for output file
  71. seo_output = seo_dir / 'seo.json'
  72. seo_data = None
  73. # Try parsing from file first
  74. if seo_output.exists():
  75. with open(seo_output) as f:
  76. try:
  77. seo_data = json.load(f)
  78. except json.JSONDecodeError:
  79. pass
  80. # Try parsing from stdout if not in file
  81. if not seo_data:
  82. for line in result.stdout.split('\n'):
  83. line = line.strip()
  84. if line.startswith('{'):
  85. try:
  86. record = json.loads(line)
  87. # SEO data typically has title, description, or og: tags
  88. if any(key in record for key in ['title', 'description', 'og:title', 'canonical']):
  89. seo_data = record
  90. break
  91. except json.JSONDecodeError:
  92. continue
  93. # Verify hook ran successfully
  94. self.assertEqual(result.returncode, 0, f"Hook failed: {result.stderr}")
  95. self.assertNotIn('Traceback', result.stderr)
  96. self.assertNotIn('Error:', result.stderr)
  97. # example.com has a title, so we MUST get SEO data
  98. self.assertIsNotNone(seo_data, "No SEO data extracted from file or stdout")
  99. # Verify we got some SEO data
  100. has_seo_data = any(key in seo_data for key in ['title', 'description', 'og:title', 'canonical', 'meta'])
  101. self.assertTrue(has_seo_data, f"No SEO data extracted: {seo_data}")
  102. if __name__ == '__main__':
  103. pytest.main([__file__, '-v'])