test_cli_run_binary_worker.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. """
  2. Tests for BinaryWorker processing Binary queue.
  3. Tests cover:
  4. - BinaryWorker is spawned by Orchestrator when Binary queue has work
  5. - Binary hooks (on_Binary__*) actually run and install binaries
  6. - Binary status transitions from QUEUED -> INSTALLED
  7. - BinaryWorker exits after idle timeout
  8. """
  9. import json
  10. import sqlite3
  11. import time
  12. from archivebox.tests.conftest import (
  13. run_archivebox_cmd,
  14. parse_jsonl_output,
  15. )
  16. class TestBinaryWorkerSpawning:
  17. """Tests for BinaryWorker lifecycle."""
  18. def test_binary_worker_spawns_when_binary_queued(self, initialized_archive):
  19. """Orchestrator spawns BinaryWorker when Binary queue has work."""
  20. # Create a Binary record via CLI
  21. binary_record = {
  22. 'type': 'Binary',
  23. 'name': 'python3',
  24. 'binproviders': 'env', # Use env provider to detect system python
  25. }
  26. # Use `archivebox run` to create the Binary (this queues it)
  27. stdout, stderr, code = run_archivebox_cmd(
  28. ['run'],
  29. stdin=json.dumps(binary_record),
  30. data_dir=initialized_archive,
  31. timeout=60, # Increased timeout to allow for binary installation
  32. )
  33. assert code == 0, f"Failed to create Binary: {stderr}"
  34. # Verify Binary was created in DB
  35. conn = sqlite3.connect(initialized_archive / 'index.sqlite3')
  36. c = conn.cursor()
  37. binaries = c.execute(
  38. "SELECT name, status, abspath FROM machine_binary WHERE name='python3'"
  39. ).fetchall()
  40. conn.close()
  41. assert len(binaries) >= 1, "Binary was not created in database"
  42. name, status, abspath = binaries[0]
  43. assert name == 'python3'
  44. # Status should be INSTALLED after BinaryWorker processed it
  45. # (or QUEUED if worker timed out before installing)
  46. assert status in ['installed', 'queued']
  47. def test_binary_hooks_actually_run(self, initialized_archive):
  48. """Binary installation hooks (on_Binary__*) run and update abspath."""
  49. # Create a Binary for python3 (guaranteed to exist on system)
  50. binary_record = {
  51. 'type': 'Binary',
  52. 'name': 'python3',
  53. 'binproviders': 'env',
  54. }
  55. stdout, stderr, code = run_archivebox_cmd(
  56. ['run'],
  57. stdin=json.dumps(binary_record),
  58. data_dir=initialized_archive,
  59. timeout=30,
  60. )
  61. assert code == 0, f"Failed to process Binary: {stderr}"
  62. # Query database to check if hooks ran and populated abspath
  63. conn = sqlite3.connect(initialized_archive / 'index.sqlite3')
  64. c = conn.cursor()
  65. result = c.execute(
  66. "SELECT name, status, abspath, version FROM machine_binary WHERE name='python3'"
  67. ).fetchone()
  68. conn.close()
  69. assert result is not None, "Binary not found in database"
  70. name, status, abspath, version = result
  71. # If hooks ran successfully, abspath should be populated
  72. if status == 'installed':
  73. assert abspath, f"Binary installed but abspath is empty: {abspath}"
  74. assert '/python3' in abspath or '\\python3' in abspath, \
  75. f"abspath doesn't look like a python3 path: {abspath}"
  76. # Version should also be populated
  77. assert version, f"Binary installed but version is empty: {version}"
  78. def test_binary_status_transitions(self, initialized_archive):
  79. """Binary status correctly transitions QUEUED -> INSTALLED."""
  80. binary_record = {
  81. 'type': 'Binary',
  82. 'name': 'python3',
  83. 'binproviders': 'env',
  84. }
  85. # Create and process the Binary
  86. stdout, stderr, code = run_archivebox_cmd(
  87. ['run'],
  88. stdin=json.dumps(binary_record),
  89. data_dir=initialized_archive,
  90. timeout=30,
  91. )
  92. assert code == 0
  93. # Check final status
  94. conn = sqlite3.connect(initialized_archive / 'index.sqlite3')
  95. c = conn.cursor()
  96. status = c.execute(
  97. "SELECT status FROM machine_binary WHERE name='python3'"
  98. ).fetchone()
  99. conn.close()
  100. assert status is not None
  101. # Should be installed (or queued if worker timed out)
  102. assert status[0] in ['installed', 'queued']
  103. class TestBinaryWorkerHooks:
  104. """Tests for specific Binary hook providers."""
  105. def test_env_provider_hook_detects_system_binary(self, initialized_archive):
  106. """on_Binary__15_env_install.py hook detects system binaries."""
  107. binary_record = {
  108. 'type': 'Binary',
  109. 'name': 'python3',
  110. 'binproviders': 'env',
  111. }
  112. stdout, stderr, code = run_archivebox_cmd(
  113. ['run'],
  114. stdin=json.dumps(binary_record),
  115. data_dir=initialized_archive,
  116. timeout=30,
  117. )
  118. assert code == 0
  119. # Check that env provider hook populated the Binary
  120. conn = sqlite3.connect(initialized_archive / 'index.sqlite3')
  121. c = conn.cursor()
  122. result = c.execute(
  123. "SELECT binprovider, abspath FROM machine_binary WHERE name='python3' AND status='installed'"
  124. ).fetchone()
  125. conn.close()
  126. if result:
  127. binprovider, abspath = result
  128. assert binprovider == 'env', f"Expected env provider, got: {binprovider}"
  129. assert abspath, "abspath should be populated by env provider"
  130. def test_multiple_binaries_processed_in_batch(self, initialized_archive):
  131. """BinaryWorker processes multiple queued binaries."""
  132. # Create multiple Binary records
  133. binaries = [
  134. {'type': 'Binary', 'name': 'python3', 'binproviders': 'env'},
  135. {'type': 'Binary', 'name': 'curl', 'binproviders': 'env'},
  136. ]
  137. stdin = '\n'.join(json.dumps(b) for b in binaries)
  138. stdout, stderr, code = run_archivebox_cmd(
  139. ['run'],
  140. stdin=stdin,
  141. data_dir=initialized_archive,
  142. timeout=90, # Need more time for multiple binaries
  143. )
  144. assert code == 0
  145. # Both should be processed
  146. conn = sqlite3.connect(initialized_archive / 'index.sqlite3')
  147. c = conn.cursor()
  148. installed = c.execute(
  149. "SELECT name FROM machine_binary WHERE name IN ('python3', 'curl')"
  150. ).fetchall()
  151. conn.close()
  152. assert len(installed) >= 1, "At least one binary should be created"
  153. class TestBinaryWorkerEdgeCases:
  154. """Tests for edge cases and error handling."""
  155. def test_nonexistent_binary_stays_queued(self, initialized_archive):
  156. """Binary that doesn't exist stays queued (doesn't fail permanently)."""
  157. binary_record = {
  158. 'type': 'Binary',
  159. 'name': 'nonexistent-binary-xyz-12345',
  160. 'binproviders': 'env',
  161. }
  162. stdout, stderr, code = run_archivebox_cmd(
  163. ['run'],
  164. stdin=json.dumps(binary_record),
  165. data_dir=initialized_archive,
  166. timeout=30,
  167. )
  168. # Command should still succeed (orchestrator doesn't fail on binary install failures)
  169. assert code == 0
  170. # Binary should remain queued (not installed)
  171. conn = sqlite3.connect(initialized_archive / 'index.sqlite3')
  172. c = conn.cursor()
  173. result = c.execute(
  174. "SELECT status FROM machine_binary WHERE name='nonexistent-binary-xyz-12345'"
  175. ).fetchone()
  176. conn.close()
  177. if result:
  178. status = result[0]
  179. # Should stay queued since installation failed
  180. assert status == 'queued', f"Expected queued, got: {status}"
  181. def test_binary_worker_respects_machine_isolation(self, initialized_archive):
  182. """BinaryWorker only processes binaries for current machine."""
  183. # This is implicitly tested by other tests - Binary.objects.filter(machine=current)
  184. # ensures only current machine's binaries are processed
  185. binary_record = {
  186. 'type': 'Binary',
  187. 'name': 'python3',
  188. 'binproviders': 'env',
  189. }
  190. stdout, stderr, code = run_archivebox_cmd(
  191. ['run'],
  192. stdin=json.dumps(binary_record),
  193. data_dir=initialized_archive,
  194. timeout=30,
  195. )
  196. assert code == 0
  197. # Check that machine_id is set correctly
  198. conn = sqlite3.connect(initialized_archive / 'index.sqlite3')
  199. c = conn.cursor()
  200. result = c.execute(
  201. "SELECT machine_id FROM machine_binary WHERE name='python3'"
  202. ).fetchone()
  203. conn.close()
  204. assert result is not None
  205. machine_id = result[0]
  206. assert machine_id, "machine_id should be set on Binary"