2
0

test_cli_init.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. #!/usr/bin/env python3
  2. """
  3. Comprehensive tests for archivebox init command.
  4. Verify init creates correct database schema, filesystem structure, and config.
  5. """
  6. import os
  7. import subprocess
  8. import sqlite3
  9. from pathlib import Path
  10. from archivebox.config.common import STORAGE_CONFIG
  11. from .fixtures import *
  12. DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
  13. def test_init_creates_database_file(tmp_path):
  14. """Test that init creates index.sqlite3 database file."""
  15. os.chdir(tmp_path)
  16. result = subprocess.run(['archivebox', 'init'], capture_output=True)
  17. assert result.returncode == 0
  18. db_path = tmp_path / "index.sqlite3"
  19. assert db_path.exists()
  20. assert db_path.is_file()
  21. def test_init_creates_archive_directory(tmp_path):
  22. """Test that init creates archive directory."""
  23. os.chdir(tmp_path)
  24. subprocess.run(['archivebox', 'init'], capture_output=True)
  25. archive_dir = tmp_path / "archive"
  26. assert archive_dir.exists()
  27. assert archive_dir.is_dir()
  28. def test_init_creates_sources_directory(tmp_path):
  29. """Test that init creates sources directory."""
  30. os.chdir(tmp_path)
  31. subprocess.run(['archivebox', 'init'], capture_output=True)
  32. sources_dir = tmp_path / "sources"
  33. assert sources_dir.exists()
  34. assert sources_dir.is_dir()
  35. def test_init_creates_logs_directory(tmp_path):
  36. """Test that init creates logs directory."""
  37. os.chdir(tmp_path)
  38. subprocess.run(['archivebox', 'init'], capture_output=True)
  39. logs_dir = tmp_path / "logs"
  40. assert logs_dir.exists()
  41. assert logs_dir.is_dir()
  42. def test_init_creates_config_file(tmp_path):
  43. """Test that init creates ArchiveBox.conf config file."""
  44. os.chdir(tmp_path)
  45. subprocess.run(['archivebox', 'init'], capture_output=True)
  46. config_file = tmp_path / "ArchiveBox.conf"
  47. assert config_file.exists()
  48. assert config_file.is_file()
  49. def test_init_runs_migrations(tmp_path):
  50. """Test that init runs Django migrations and creates core tables."""
  51. os.chdir(tmp_path)
  52. subprocess.run(['archivebox', 'init'], capture_output=True)
  53. # Check that migrations were applied
  54. conn = sqlite3.connect("index.sqlite3")
  55. c = conn.cursor()
  56. # Check django_migrations table exists
  57. migrations = c.execute(
  58. "SELECT name FROM sqlite_master WHERE type='table' AND name='django_migrations'"
  59. ).fetchall()
  60. assert len(migrations) == 1
  61. # Check that some migrations were applied
  62. migration_count = c.execute("SELECT COUNT(*) FROM django_migrations").fetchone()[0]
  63. assert migration_count > 0
  64. conn.close()
  65. def test_init_creates_core_snapshot_table(tmp_path):
  66. """Test that init creates core_snapshot table."""
  67. os.chdir(tmp_path)
  68. subprocess.run(['archivebox', 'init'], capture_output=True)
  69. conn = sqlite3.connect("index.sqlite3")
  70. c = conn.cursor()
  71. # Check core_snapshot table exists
  72. tables = c.execute(
  73. "SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot'"
  74. ).fetchall()
  75. assert len(tables) == 1
  76. conn.close()
  77. def test_init_creates_crawls_crawl_table(tmp_path):
  78. """Test that init creates crawls_crawl table."""
  79. os.chdir(tmp_path)
  80. subprocess.run(['archivebox', 'init'], capture_output=True)
  81. conn = sqlite3.connect("index.sqlite3")
  82. c = conn.cursor()
  83. # Check crawls_crawl table exists
  84. tables = c.execute(
  85. "SELECT name FROM sqlite_master WHERE type='table' AND name='crawls_crawl'"
  86. ).fetchall()
  87. assert len(tables) == 1
  88. conn.close()
  89. def test_init_creates_core_archiveresult_table(tmp_path):
  90. """Test that init creates core_archiveresult table."""
  91. os.chdir(tmp_path)
  92. subprocess.run(['archivebox', 'init'], capture_output=True)
  93. conn = sqlite3.connect("index.sqlite3")
  94. c = conn.cursor()
  95. # Check core_archiveresult table exists
  96. tables = c.execute(
  97. "SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'"
  98. ).fetchall()
  99. assert len(tables) == 1
  100. conn.close()
  101. def test_init_sets_correct_file_permissions(tmp_path):
  102. """Test that init sets correct permissions on created files."""
  103. os.chdir(tmp_path)
  104. subprocess.run(['archivebox', 'init'], capture_output=True)
  105. # Check database permissions
  106. db_path = tmp_path / "index.sqlite3"
  107. assert oct(db_path.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
  108. # Check directory permissions
  109. archive_dir = tmp_path / "archive"
  110. assert oct(archive_dir.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
  111. def test_init_is_idempotent(tmp_path):
  112. """Test that running init multiple times is safe (idempotent)."""
  113. os.chdir(tmp_path)
  114. # First init
  115. result1 = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
  116. assert result1.returncode == 0
  117. assert "Initializing a new ArchiveBox" in result1.stdout
  118. # Second init should update, not fail
  119. result2 = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
  120. assert result2.returncode == 0
  121. assert "updating existing ArchiveBox" in result2.stdout or "up-to-date" in result2.stdout.lower()
  122. # Database should still be valid
  123. conn = sqlite3.connect("index.sqlite3")
  124. c = conn.cursor()
  125. count = c.execute("SELECT COUNT(*) FROM django_migrations").fetchone()[0]
  126. assert count > 0
  127. conn.close()
  128. def test_init_with_existing_data_preserves_snapshots(tmp_path, process, disable_extractors_dict):
  129. """Test that re-running init preserves existing snapshot data."""
  130. os.chdir(tmp_path)
  131. # Add a snapshot
  132. subprocess.run(
  133. ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
  134. capture_output=True,
  135. env=disable_extractors_dict,
  136. )
  137. # Check snapshot was created
  138. conn = sqlite3.connect("index.sqlite3")
  139. c = conn.cursor()
  140. count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
  141. assert count_before == 1
  142. conn.close()
  143. # Run init again
  144. result = subprocess.run(['archivebox', 'init'], capture_output=True)
  145. assert result.returncode == 0
  146. # Snapshot should still exist
  147. conn = sqlite3.connect("index.sqlite3")
  148. c = conn.cursor()
  149. count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
  150. assert count_after == count_before
  151. conn.close()
  152. def test_init_quick_flag_skips_checks(tmp_path):
  153. """Test that init --quick runs faster by skipping some checks."""
  154. os.chdir(tmp_path)
  155. result = subprocess.run(['archivebox', 'init', '--quick'], capture_output=True, text=True)
  156. assert result.returncode == 0
  157. # Database should still be created
  158. db_path = tmp_path / "index.sqlite3"
  159. assert db_path.exists()
  160. def test_init_creates_machine_table(tmp_path):
  161. """Test that init creates the machine_machine table."""
  162. os.chdir(tmp_path)
  163. subprocess.run(['archivebox', 'init'], capture_output=True)
  164. conn = sqlite3.connect("index.sqlite3")
  165. c = conn.cursor()
  166. # Check machine_machine table exists
  167. tables = c.execute(
  168. "SELECT name FROM sqlite_master WHERE type='table' AND name='machine_machine'"
  169. ).fetchall()
  170. conn.close()
  171. assert len(tables) == 1
  172. def test_init_output_shows_collection_info(tmp_path):
  173. """Test that init output shows helpful collection information."""
  174. os.chdir(tmp_path)
  175. result = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
  176. output = result.stdout
  177. # Should show some helpful info about the collection
  178. assert 'ArchiveBox' in output or 'collection' in output.lower() or 'Initializing' in output