test_ytdlp.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. """
  2. Integration tests for ytdlp plugin
  3. Tests verify:
  4. 1. Hook script exists
  5. 2. Verify deps with abx-pkg
  6. 3. YT-DLP extraction works on video URLs
  7. 4. JSONL output is correct
  8. 5. Config options work (YTDLP_ENABLED, YTDLP_TIMEOUT)
  9. 6. Handles non-video URLs gracefully
  10. """
  11. import json
  12. import subprocess
  13. import sys
  14. import tempfile
  15. import time
  16. from pathlib import Path
  17. import pytest
  18. PLUGIN_DIR = Path(__file__).parent.parent
  19. PLUGINS_ROOT = PLUGIN_DIR.parent
  20. YTDLP_HOOK = next(PLUGIN_DIR.glob('on_Snapshot__*_ytdlp.*'), None)
  21. TEST_URL = 'https://example.com/video.mp4'
  22. def test_hook_script_exists():
  23. """Verify on_Snapshot hook exists."""
  24. assert YTDLP_HOOK.exists(), f"Hook not found: {YTDLP_HOOK}"
  25. def test_verify_deps_with_abx_pkg():
  26. """Verify yt-dlp, node, and ffmpeg are available via abx-pkg."""
  27. from abx_pkg import Binary, PipProvider, AptProvider, BrewProvider, EnvProvider, BinProviderOverrides
  28. missing_binaries = []
  29. # Verify yt-dlp is available
  30. ytdlp_binary = Binary(name='yt-dlp', binproviders=[PipProvider(), EnvProvider()])
  31. ytdlp_loaded = ytdlp_binary.load()
  32. if not (ytdlp_loaded and ytdlp_loaded.abspath):
  33. missing_binaries.append('yt-dlp')
  34. # Verify node is available (yt-dlp needs it for JS extraction)
  35. node_binary = Binary(
  36. name='node',
  37. binproviders=[AptProvider(), BrewProvider(), EnvProvider()]
  38. )
  39. node_loaded = node_binary.load()
  40. if not (node_loaded and node_loaded.abspath):
  41. missing_binaries.append('node')
  42. # Verify ffmpeg is available (yt-dlp needs it for video conversion)
  43. ffmpeg_binary = Binary(name='ffmpeg', binproviders=[AptProvider(), BrewProvider(), EnvProvider()])
  44. ffmpeg_loaded = ffmpeg_binary.load()
  45. if not (ffmpeg_loaded and ffmpeg_loaded.abspath):
  46. missing_binaries.append('ffmpeg')
  47. if missing_binaries:
  48. pass
  49. def test_handles_non_video_url():
  50. """Test that ytdlp extractor handles non-video URLs gracefully via hook."""
  51. # Prerequisites checked by earlier test
  52. with tempfile.TemporaryDirectory() as tmpdir:
  53. tmpdir = Path(tmpdir)
  54. # Run ytdlp extraction hook on non-video URL
  55. result = subprocess.run(
  56. [sys.executable, str(YTDLP_HOOK), '--url', 'https://example.com', '--snapshot-id', 'test789'],
  57. cwd=tmpdir,
  58. capture_output=True,
  59. text=True,
  60. timeout=60
  61. )
  62. # Should exit 0 even for non-media URL
  63. assert result.returncode == 0, f"Should handle non-media URL gracefully: {result.stderr}"
  64. # Parse clean JSONL output
  65. result_json = None
  66. for line in result.stdout.strip().split('\n'):
  67. line = line.strip()
  68. if line.startswith('{'):
  69. pass
  70. try:
  71. record = json.loads(line)
  72. if record.get('type') == 'ArchiveResult':
  73. result_json = record
  74. break
  75. except json.JSONDecodeError:
  76. pass
  77. assert result_json, "Should have ArchiveResult JSONL output"
  78. assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
  79. def test_config_ytdlp_enabled_false_skips():
  80. """Test that YTDLP_ENABLED=False exits without emitting JSONL."""
  81. import os
  82. with tempfile.TemporaryDirectory() as tmpdir:
  83. env = os.environ.copy()
  84. env['YTDLP_ENABLED'] = 'False'
  85. result = subprocess.run(
  86. [sys.executable, str(YTDLP_HOOK), '--url', TEST_URL, '--snapshot-id', 'test999'],
  87. cwd=tmpdir,
  88. capture_output=True,
  89. text=True,
  90. env=env,
  91. timeout=30
  92. )
  93. assert result.returncode == 0, f"Should exit 0 when feature disabled: {result.stderr}"
  94. # Feature disabled - temporary failure, should NOT emit JSONL
  95. assert 'Skipping' in result.stderr or 'False' in result.stderr, "Should log skip reason to stderr"
  96. # Should NOT emit any JSONL
  97. jsonl_lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
  98. assert len(jsonl_lines) == 0, f"Should not emit JSONL when feature disabled, but got: {jsonl_lines}"
  99. def test_config_timeout():
  100. """Test that YTDLP_TIMEOUT config is respected (also via MEDIA_TIMEOUT alias)."""
  101. import os
  102. with tempfile.TemporaryDirectory() as tmpdir:
  103. env = os.environ.copy()
  104. env['YTDLP_TIMEOUT'] = '5'
  105. start_time = time.time()
  106. result = subprocess.run(
  107. [sys.executable, str(YTDLP_HOOK), '--url', 'https://example.com', '--snapshot-id', 'testtimeout'],
  108. cwd=tmpdir,
  109. capture_output=True,
  110. text=True,
  111. env=env,
  112. timeout=10 # Should complete in 5s, use 10s as safety margin
  113. )
  114. elapsed_time = time.time() - start_time
  115. assert result.returncode == 0, f"Should complete without hanging: {result.stderr}"
  116. # Allow 1 second overhead for subprocess startup and Python interpreter
  117. assert elapsed_time <= 6.0, f"Should complete within 6 seconds (5s timeout + 1s overhead), took {elapsed_time:.2f}s"
  118. def test_real_youtube_url():
  119. """Test that yt-dlp can extract video/audio from a real YouTube URL."""
  120. import os
  121. with tempfile.TemporaryDirectory() as tmpdir:
  122. tmpdir = Path(tmpdir)
  123. # Use a short, stable YouTube video (YouTube's own about video)
  124. youtube_url = 'https://www.youtube.com/watch?v=jNQXAC9IVRw' # "Me at the zoo" - first YouTube video
  125. env = os.environ.copy()
  126. env['YTDLP_TIMEOUT'] = '120' # Give it time to download
  127. start_time = time.time()
  128. result = subprocess.run(
  129. [sys.executable, str(YTDLP_HOOK), '--url', youtube_url, '--snapshot-id', 'testyoutube'],
  130. cwd=tmpdir,
  131. capture_output=True,
  132. text=True,
  133. env=env,
  134. timeout=180
  135. )
  136. elapsed_time = time.time() - start_time
  137. # Should succeed
  138. assert result.returncode == 0, f"Should extract video/audio successfully: {result.stderr}"
  139. # Parse JSONL output
  140. result_json = None
  141. for line in result.stdout.strip().split('\n'):
  142. line = line.strip()
  143. if line.startswith('{'):
  144. try:
  145. record = json.loads(line)
  146. if record.get('type') == 'ArchiveResult':
  147. result_json = record
  148. break
  149. except json.JSONDecodeError:
  150. pass
  151. assert result_json, f"Should have ArchiveResult JSONL output. stdout: {result.stdout}"
  152. assert result_json['status'] == 'succeeded', f"Should succeed: {result_json}"
  153. # Check that some video/audio files were downloaded
  154. output_files = list(tmpdir.glob('**/*'))
  155. media_files = [f for f in output_files if f.is_file() and f.suffix.lower() in ('.mp4', '.webm', '.mkv', '.m4a', '.mp3', '.json', '.jpg', '.webp')]
  156. assert len(media_files) > 0, f"Should have downloaded at least one video/audio file. Files: {output_files}"
  157. print(f"Successfully extracted {len(media_files)} file(s) in {elapsed_time:.2f}s")
  158. if __name__ == '__main__':
  159. pytest.main([__file__, '-v'])