test_ripgrep_detection.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. #!/usr/bin/env python3
  2. """
  3. Tests for ripgrep binary detection and archivebox install functionality.
  4. Guards against regressions in:
  5. pass
  6. 1. Machine.config overrides not being used in version command
  7. 2. Ripgrep hook not resolving binary names via shutil.which()
  8. 3. SEARCH_BACKEND_ENGINE not being passed to hook environment
  9. """
  10. import os
  11. import sys
  12. import json
  13. import shutil
  14. import tempfile
  15. import subprocess
  16. from pathlib import Path
  17. from unittest.mock import patch, MagicMock
  18. import pytest
  19. def test_ripgrep_hook_detects_binary_from_path():
  20. """Test that ripgrep hook finds binary using abx-pkg when env var is just a name."""
  21. hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
  22. assert shutil.which('rg'), "ripgrep not installed"
  23. # Set SEARCH_BACKEND_ENGINE to enable the hook
  24. env = os.environ.copy()
  25. env['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
  26. env['RIPGREP_BINARY'] = 'rg' # Just the name, not the full path (this was the bug)
  27. result = subprocess.run(
  28. [sys.executable, str(hook_path)],
  29. capture_output=True,
  30. text=True,
  31. env=env,
  32. timeout=10,
  33. )
  34. assert result.returncode == 0, f"Hook failed: {result.stderr}"
  35. # Parse JSONL output (filter out non-JSON lines)
  36. lines = [line for line in result.stdout.strip().split('\n') if line.strip() and line.strip().startswith('{')]
  37. assert len(lines) >= 1, "Expected at least 1 JSONL line (Binary)"
  38. binary = json.loads(lines[0])
  39. assert binary['type'] == 'Binary'
  40. assert binary['name'] == 'rg'
  41. assert 'binproviders' in binary, "Expected binproviders declaration"
  42. def test_ripgrep_hook_skips_when_backend_not_ripgrep():
  43. """Test that ripgrep hook exits silently when search backend is not ripgrep."""
  44. hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
  45. env = os.environ.copy()
  46. env['SEARCH_BACKEND_ENGINE'] = 'sqlite' # Different backend
  47. result = subprocess.run(
  48. [sys.executable, str(hook_path)],
  49. capture_output=True,
  50. text=True,
  51. env=env,
  52. timeout=10,
  53. )
  54. assert result.returncode == 0, "Hook should exit successfully when backend is not ripgrep"
  55. assert result.stdout.strip() == '', "Hook should produce no output when backend is not ripgrep"
  56. def test_ripgrep_hook_handles_absolute_path():
  57. """Test that ripgrep hook exits successfully when RIPGREP_BINARY is a valid absolute path."""
  58. hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
  59. rg_path = shutil.which('rg')
  60. assert rg_path, "ripgrep not installed"
  61. env = os.environ.copy()
  62. env['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
  63. env['RIPGREP_BINARY'] = rg_path # Full absolute path
  64. result = subprocess.run(
  65. [sys.executable, str(hook_path)],
  66. capture_output=True,
  67. text=True,
  68. env=env,
  69. timeout=10,
  70. )
  71. assert result.returncode == 0, f"Hook should exit successfully when binary already configured: {result.stderr}"
  72. lines = [line for line in result.stdout.strip().split('\n') if line.strip().startswith('{')]
  73. assert lines, "Expected Binary JSONL output when backend is ripgrep"
  74. @pytest.mark.django_db
  75. def test_machine_config_overrides_base_config():
  76. """
  77. Test that Machine.config overrides take precedence over base config.
  78. Guards against regression where archivebox version was showing binaries
  79. as "not installed" even though they were detected and stored in Machine.config.
  80. """
  81. from archivebox.machine.models import Machine, Binary
  82. import archivebox.machine.models as models
  83. models._CURRENT_MACHINE = None
  84. machine = Machine.current()
  85. # Simulate a hook detecting chrome and storing it with a different path than base config
  86. detected_chrome_path = '/custom/path/to/chrome'
  87. machine.config['CHROME_BINARY'] = detected_chrome_path
  88. machine.config['CHROME_VERSION'] = '143.0.7499.170'
  89. machine.save()
  90. # Create Binary record
  91. Binary.objects.create(
  92. machine=machine,
  93. name='chrome',
  94. abspath=detected_chrome_path,
  95. version='143.0.7499.170',
  96. binprovider='env',
  97. )
  98. # Verify Machine.config takes precedence
  99. from archivebox.config.configset import get_config
  100. config = get_config()
  101. # Machine.config should override the base config value
  102. assert machine.config.get('CHROME_BINARY') == detected_chrome_path
  103. # The version command should use Machine.config, not base config
  104. # (Base config might have 'chromium' while Machine.config has the full path)
  105. bin_value = machine.config.get('CHROME_BINARY') or config.get('CHROME_BINARY', '')
  106. assert bin_value == detected_chrome_path, \
  107. "Machine.config override should take precedence over base config"
  108. @pytest.mark.django_db
  109. def test_search_backend_engine_passed_to_hooks():
  110. """
  111. Test that SEARCH_BACKEND_ENGINE is configured properly.
  112. Guards against regression where hooks couldn't determine which search backend was active.
  113. """
  114. from archivebox.config.configset import get_config
  115. import os
  116. config = get_config()
  117. search_backend = config.get('SEARCH_BACKEND_ENGINE', 'ripgrep')
  118. # Verify config contains SEARCH_BACKEND_ENGINE
  119. assert search_backend in ('ripgrep', 'sqlite', 'sonic'), \
  120. f"SEARCH_BACKEND_ENGINE should be valid backend, got {search_backend}"
  121. # Verify it's accessible via environment (hooks read from os.environ)
  122. # Hooks receive environment variables, so this verifies the mechanism works
  123. assert 'SEARCH_BACKEND_ENGINE' in os.environ or search_backend == config.get('SEARCH_BACKEND_ENGINE'), \
  124. "SEARCH_BACKEND_ENGINE must be accessible to hooks"
  125. @pytest.mark.django_db
  126. def test_install_creates_binary_records():
  127. """
  128. Test that Binary records can be created and queried properly.
  129. This verifies the Binary model works correctly with the database.
  130. """
  131. from archivebox.machine.models import Machine, Binary
  132. import archivebox.machine.models as models
  133. models._CURRENT_MACHINE = None
  134. machine = Machine.current()
  135. initial_binary_count = Binary.objects.filter(machine=machine).count()
  136. # Create a test binary record
  137. test_binary = Binary.objects.create(
  138. machine=machine,
  139. name='test-binary',
  140. abspath='/usr/bin/test-binary',
  141. version='1.0.0',
  142. binprovider='env',
  143. status=Binary.StatusChoices.INSTALLED
  144. )
  145. # Verify Binary record was created
  146. final_binary_count = Binary.objects.filter(machine=machine).count()
  147. assert final_binary_count == initial_binary_count + 1, \
  148. "Binary record should be created"
  149. # Verify the binary can be queried
  150. found_binary = Binary.objects.filter(machine=machine, name='test-binary').first()
  151. assert found_binary is not None, "Binary should be found"
  152. assert found_binary.abspath == '/usr/bin/test-binary', "Binary path should match"
  153. assert found_binary.version == '1.0.0', "Binary version should match"
  154. # Clean up
  155. test_binary.delete()
  156. @pytest.mark.django_db
  157. def test_ripgrep_only_detected_when_backend_enabled():
  158. """
  159. Test ripgrep validation hook behavior with different SEARCH_BACKEND_ENGINE settings.
  160. Guards against ripgrep being detected when not needed.
  161. """
  162. import subprocess
  163. import sys
  164. from pathlib import Path
  165. assert shutil.which('rg'), "ripgrep not installed"
  166. hook_path = Path(__file__).parent.parent / 'on_Crawl__50_ripgrep_install.py'
  167. # Test 1: With ripgrep backend - should output Binary record
  168. env1 = os.environ.copy()
  169. env1['SEARCH_BACKEND_ENGINE'] = 'ripgrep'
  170. env1['RIPGREP_BINARY'] = 'rg'
  171. result1 = subprocess.run(
  172. [sys.executable, str(hook_path)],
  173. capture_output=True,
  174. text=True,
  175. env=env1,
  176. timeout=10,
  177. )
  178. assert result1.returncode == 0, f"Hook should succeed with ripgrep backend: {result1.stderr}"
  179. # Should output Binary JSONL when backend is ripgrep
  180. assert 'Binary' in result1.stdout, "Should output Binary when backend=ripgrep"
  181. # Test 2: With different backend - should output nothing
  182. env2 = os.environ.copy()
  183. env2['SEARCH_BACKEND_ENGINE'] = 'sqlite'
  184. env2['RIPGREP_BINARY'] = 'rg'
  185. result2 = subprocess.run(
  186. [sys.executable, str(hook_path)],
  187. capture_output=True,
  188. text=True,
  189. env=env2,
  190. timeout=10,
  191. )
  192. assert result2.returncode == 0, "Hook should exit successfully when backend is not ripgrep"
  193. assert result2.stdout.strip() == '', "Hook should produce no output when backend is not ripgrep"
  194. if __name__ == '__main__':
  195. pytest.main([__file__, '-v'])