test_chrome_test_helpers.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. """
  2. Tests for chrome_test_helpers.py functions.
  3. These tests verify the Python helper functions used across Chrome plugin tests.
  4. """
  5. import os
  6. import pytest
  7. import tempfile
  8. from pathlib import Path
  9. from archivebox.plugins.chrome.tests.chrome_test_helpers import (
  10. get_test_env,
  11. get_machine_type,
  12. get_lib_dir,
  13. get_node_modules_dir,
  14. get_extensions_dir,
  15. find_chromium_binary,
  16. get_plugin_dir,
  17. get_hook_script,
  18. parse_jsonl_output,
  19. )
  20. def test_get_machine_type():
  21. """Test get_machine_type() returns valid format."""
  22. machine_type = get_machine_type()
  23. assert isinstance(machine_type, str)
  24. assert '-' in machine_type, "Machine type should be in format: arch-os"
  25. # Should be one of the expected formats
  26. assert any(x in machine_type for x in ['arm64', 'x86_64']), "Should contain valid architecture"
  27. assert any(x in machine_type for x in ['darwin', 'linux', 'win32']), "Should contain valid OS"
  28. def test_get_lib_dir_with_env_var():
  29. """Test get_lib_dir() respects LIB_DIR env var."""
  30. with tempfile.TemporaryDirectory() as tmpdir:
  31. custom_lib = Path(tmpdir) / 'custom_lib'
  32. custom_lib.mkdir()
  33. old_lib_dir = os.environ.get('LIB_DIR')
  34. try:
  35. os.environ['LIB_DIR'] = str(custom_lib)
  36. lib_dir = get_lib_dir()
  37. assert lib_dir == custom_lib
  38. finally:
  39. if old_lib_dir:
  40. os.environ['LIB_DIR'] = old_lib_dir
  41. else:
  42. os.environ.pop('LIB_DIR', None)
  43. def test_get_node_modules_dir_with_env_var():
  44. """Test get_node_modules_dir() respects NODE_MODULES_DIR env var."""
  45. with tempfile.TemporaryDirectory() as tmpdir:
  46. custom_nm = Path(tmpdir) / 'node_modules'
  47. custom_nm.mkdir()
  48. old_nm_dir = os.environ.get('NODE_MODULES_DIR')
  49. try:
  50. os.environ['NODE_MODULES_DIR'] = str(custom_nm)
  51. nm_dir = get_node_modules_dir()
  52. assert nm_dir == custom_nm
  53. finally:
  54. if old_nm_dir:
  55. os.environ['NODE_MODULES_DIR'] = old_nm_dir
  56. else:
  57. os.environ.pop('NODE_MODULES_DIR', None)
  58. def test_get_extensions_dir_default():
  59. """Test get_extensions_dir() returns expected path format."""
  60. ext_dir = get_extensions_dir()
  61. assert isinstance(ext_dir, str)
  62. assert 'personas' in ext_dir
  63. assert 'chrome_extensions' in ext_dir
  64. def test_get_extensions_dir_with_custom_persona():
  65. """Test get_extensions_dir() respects ACTIVE_PERSONA env var."""
  66. old_persona = os.environ.get('ACTIVE_PERSONA')
  67. old_data_dir = os.environ.get('DATA_DIR')
  68. try:
  69. os.environ['ACTIVE_PERSONA'] = 'TestPersona'
  70. os.environ['DATA_DIR'] = '/tmp/test'
  71. ext_dir = get_extensions_dir()
  72. assert 'TestPersona' in ext_dir
  73. assert '/tmp/test' in ext_dir
  74. finally:
  75. if old_persona:
  76. os.environ['ACTIVE_PERSONA'] = old_persona
  77. else:
  78. os.environ.pop('ACTIVE_PERSONA', None)
  79. if old_data_dir:
  80. os.environ['DATA_DIR'] = old_data_dir
  81. else:
  82. os.environ.pop('DATA_DIR', None)
  83. def test_get_test_env_returns_dict():
  84. """Test get_test_env() returns properly formatted environment dict."""
  85. env = get_test_env()
  86. assert isinstance(env, dict)
  87. # Should include key paths
  88. assert 'MACHINE_TYPE' in env
  89. assert 'LIB_DIR' in env
  90. assert 'NODE_MODULES_DIR' in env
  91. assert 'NODE_PATH' in env # Critical for module resolution
  92. assert 'NPM_BIN_DIR' in env
  93. assert 'CHROME_EXTENSIONS_DIR' in env
  94. # Verify NODE_PATH equals NODE_MODULES_DIR (for Node.js module resolution)
  95. assert env['NODE_PATH'] == env['NODE_MODULES_DIR']
  96. def test_get_test_env_paths_are_absolute():
  97. """Test that get_test_env() returns absolute paths."""
  98. env = get_test_env()
  99. # All path-like values should be absolute
  100. assert Path(env['LIB_DIR']).is_absolute()
  101. assert Path(env['NODE_MODULES_DIR']).is_absolute()
  102. assert Path(env['NODE_PATH']).is_absolute()
  103. def test_find_chromium_binary():
  104. """Test find_chromium_binary() returns a path or None."""
  105. binary = find_chromium_binary()
  106. if binary:
  107. assert isinstance(binary, str)
  108. # Should be an absolute path if found
  109. assert os.path.isabs(binary)
  110. def test_get_plugin_dir():
  111. """Test get_plugin_dir() finds correct plugin directory."""
  112. # Use this test file's path
  113. test_file = __file__
  114. plugin_dir = get_plugin_dir(test_file)
  115. assert plugin_dir.exists()
  116. assert plugin_dir.is_dir()
  117. # Should be the chrome plugin directory
  118. assert plugin_dir.name == 'chrome'
  119. assert (plugin_dir.parent.name == 'plugins')
  120. def test_get_hook_script_finds_existing_hook():
  121. """Test get_hook_script() can find an existing hook."""
  122. from archivebox.plugins.chrome.tests.chrome_test_helpers import CHROME_PLUGIN_DIR
  123. # Try to find the chrome launch hook
  124. hook = get_hook_script(CHROME_PLUGIN_DIR, 'on_Crawl__*_chrome_launch.*')
  125. if hook: # May not exist in all test environments
  126. assert hook.exists()
  127. assert hook.is_file()
  128. assert 'chrome_launch' in hook.name
  129. def test_get_hook_script_returns_none_for_missing():
  130. """Test get_hook_script() returns None for non-existent hooks."""
  131. from archivebox.plugins.chrome.tests.chrome_test_helpers import CHROME_PLUGIN_DIR
  132. hook = get_hook_script(CHROME_PLUGIN_DIR, 'nonexistent_hook_*_pattern.*')
  133. assert hook is None
  134. def test_parse_jsonl_output_valid():
  135. """Test parse_jsonl_output() parses valid JSONL."""
  136. jsonl_output = '''{"type": "ArchiveResult", "status": "succeeded", "output": "test1"}
  137. {"type": "ArchiveResult", "status": "failed", "error": "test2"}
  138. '''
  139. # Returns first match only
  140. result = parse_jsonl_output(jsonl_output)
  141. assert result is not None
  142. assert result['type'] == 'ArchiveResult'
  143. assert result['status'] == 'succeeded'
  144. assert result['output'] == 'test1'
  145. def test_parse_jsonl_output_with_non_json_lines():
  146. """Test parse_jsonl_output() skips non-JSON lines."""
  147. mixed_output = '''Some non-JSON output
  148. {"type": "ArchiveResult", "status": "succeeded"}
  149. More non-JSON
  150. {"type": "ArchiveResult", "status": "failed"}
  151. '''
  152. result = parse_jsonl_output(mixed_output)
  153. assert result is not None
  154. assert result['type'] == 'ArchiveResult'
  155. assert result['status'] == 'succeeded'
  156. def test_parse_jsonl_output_empty():
  157. """Test parse_jsonl_output() handles empty input."""
  158. result = parse_jsonl_output('')
  159. assert result is None
  160. def test_parse_jsonl_output_filters_by_type():
  161. """Test parse_jsonl_output() can filter by record type."""
  162. jsonl_output = '''{"type": "LogEntry", "data": "log1"}
  163. {"type": "ArchiveResult", "data": "result1"}
  164. {"type": "ArchiveResult", "data": "result2"}
  165. '''
  166. # Should return first ArchiveResult, not LogEntry
  167. result = parse_jsonl_output(jsonl_output, record_type='ArchiveResult')
  168. assert result is not None
  169. assert result['type'] == 'ArchiveResult'
  170. assert result['data'] == 'result1' # First ArchiveResult
  171. def test_parse_jsonl_output_filters_custom_type():
  172. """Test parse_jsonl_output() can filter by custom record type."""
  173. jsonl_output = '''{"type": "ArchiveResult", "data": "result1"}
  174. {"type": "LogEntry", "data": "log1"}
  175. {"type": "ArchiveResult", "data": "result2"}
  176. '''
  177. result = parse_jsonl_output(jsonl_output, record_type='LogEntry')
  178. assert result is not None
  179. assert result['type'] == 'LogEntry'
  180. assert result['data'] == 'log1'
  181. def test_machine_type_consistency():
  182. """Test that machine type is consistent across calls."""
  183. mt1 = get_machine_type()
  184. mt2 = get_machine_type()
  185. assert mt1 == mt2, "Machine type should be stable across calls"
  186. def test_lib_dir_is_directory():
  187. """Test that lib_dir points to an actual directory when DATA_DIR is set."""
  188. with tempfile.TemporaryDirectory() as tmpdir:
  189. old_data_dir = os.environ.get('DATA_DIR')
  190. try:
  191. os.environ['DATA_DIR'] = tmpdir
  192. # Create the expected directory structure
  193. machine_type = get_machine_type()
  194. lib_dir = Path(tmpdir) / 'lib' / machine_type
  195. lib_dir.mkdir(parents=True, exist_ok=True)
  196. result = get_lib_dir()
  197. # Should return a Path object
  198. assert isinstance(result, Path)
  199. finally:
  200. if old_data_dir:
  201. os.environ['DATA_DIR'] = old_data_dir
  202. else:
  203. os.environ.pop('DATA_DIR', None)
  204. if __name__ == '__main__':
  205. pytest.main([__file__, '-v'])