| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246 |
- #!/usr/bin/env python3
- """
- Comprehensive tests for archivebox init command.
- Verify init creates correct database schema, filesystem structure, and config.
- """
- import os
- import subprocess
- import sqlite3
- from pathlib import Path
- from archivebox.config.common import STORAGE_CONFIG
- from .fixtures import *
- DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
- def test_init_creates_database_file(tmp_path):
- """Test that init creates index.sqlite3 database file."""
- os.chdir(tmp_path)
- result = subprocess.run(['archivebox', 'init'], capture_output=True)
- assert result.returncode == 0
- db_path = tmp_path / "index.sqlite3"
- assert db_path.exists()
- assert db_path.is_file()
- def test_init_creates_archive_directory(tmp_path):
- """Test that init creates archive directory."""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'init'], capture_output=True)
- archive_dir = tmp_path / "archive"
- assert archive_dir.exists()
- assert archive_dir.is_dir()
- def test_init_creates_sources_directory(tmp_path):
- """Test that init creates sources directory."""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'init'], capture_output=True)
- sources_dir = tmp_path / "sources"
- assert sources_dir.exists()
- assert sources_dir.is_dir()
- def test_init_creates_logs_directory(tmp_path):
- """Test that init creates logs directory."""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'init'], capture_output=True)
- logs_dir = tmp_path / "logs"
- assert logs_dir.exists()
- assert logs_dir.is_dir()
- def test_init_creates_config_file(tmp_path):
- """Test that init creates ArchiveBox.conf config file."""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'init'], capture_output=True)
- config_file = tmp_path / "ArchiveBox.conf"
- assert config_file.exists()
- assert config_file.is_file()
- def test_init_runs_migrations(tmp_path):
- """Test that init runs Django migrations and creates core tables."""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'init'], capture_output=True)
- # Check that migrations were applied
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- # Check django_migrations table exists
- migrations = c.execute(
- "SELECT name FROM sqlite_master WHERE type='table' AND name='django_migrations'"
- ).fetchall()
- assert len(migrations) == 1
- # Check that some migrations were applied
- migration_count = c.execute("SELECT COUNT(*) FROM django_migrations").fetchone()[0]
- assert migration_count > 0
- conn.close()
- def test_init_creates_core_snapshot_table(tmp_path):
- """Test that init creates core_snapshot table."""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'init'], capture_output=True)
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- # Check core_snapshot table exists
- tables = c.execute(
- "SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot'"
- ).fetchall()
- assert len(tables) == 1
- conn.close()
- def test_init_creates_crawls_crawl_table(tmp_path):
- """Test that init creates crawls_crawl table."""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'init'], capture_output=True)
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- # Check crawls_crawl table exists
- tables = c.execute(
- "SELECT name FROM sqlite_master WHERE type='table' AND name='crawls_crawl'"
- ).fetchall()
- assert len(tables) == 1
- conn.close()
- def test_init_creates_core_archiveresult_table(tmp_path):
- """Test that init creates core_archiveresult table."""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'init'], capture_output=True)
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- # Check core_archiveresult table exists
- tables = c.execute(
- "SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'"
- ).fetchall()
- assert len(tables) == 1
- conn.close()
- def test_init_sets_correct_file_permissions(tmp_path):
- """Test that init sets correct permissions on created files."""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'init'], capture_output=True)
- # Check database permissions
- db_path = tmp_path / "index.sqlite3"
- assert oct(db_path.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
- # Check directory permissions
- archive_dir = tmp_path / "archive"
- assert oct(archive_dir.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
- def test_init_is_idempotent(tmp_path):
- """Test that running init multiple times is safe (idempotent)."""
- os.chdir(tmp_path)
- # First init
- result1 = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
- assert result1.returncode == 0
- assert "Initializing a new ArchiveBox" in result1.stdout
- # Second init should update, not fail
- result2 = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
- assert result2.returncode == 0
- assert "updating existing ArchiveBox" in result2.stdout or "up-to-date" in result2.stdout.lower()
- # Database should still be valid
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- count = c.execute("SELECT COUNT(*) FROM django_migrations").fetchone()[0]
- assert count > 0
- conn.close()
- def test_init_with_existing_data_preserves_snapshots(tmp_path, process, disable_extractors_dict):
- """Test that re-running init preserves existing snapshot data."""
- os.chdir(tmp_path)
- # Add a snapshot
- subprocess.run(
- ['archivebox', 'add', '--index-only', '--depth=0', 'https://example.com'],
- capture_output=True,
- env=disable_extractors_dict,
- )
- # Check snapshot was created
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- count_before = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
- assert count_before == 1
- conn.close()
- # Run init again
- result = subprocess.run(['archivebox', 'init'], capture_output=True)
- assert result.returncode == 0
- # Snapshot should still exist
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- count_after = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
- assert count_after == count_before
- conn.close()
- def test_init_quick_flag_skips_checks(tmp_path):
- """Test that init --quick runs faster by skipping some checks."""
- os.chdir(tmp_path)
- result = subprocess.run(['archivebox', 'init', '--quick'], capture_output=True, text=True)
- assert result.returncode == 0
- # Database should still be created
- db_path = tmp_path / "index.sqlite3"
- assert db_path.exists()
- def test_init_creates_machine_table(tmp_path):
- """Test that init creates the machine_machine table."""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'init'], capture_output=True)
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- # Check machine_machine table exists
- tables = c.execute(
- "SELECT name FROM sqlite_master WHERE type='table' AND name='machine_machine'"
- ).fetchall()
- conn.close()
- assert len(tables) == 1
- def test_init_output_shows_collection_info(tmp_path):
- """Test that init output shows helpful collection information."""
- os.chdir(tmp_path)
- result = subprocess.run(['archivebox', 'init'], capture_output=True, text=True)
- output = result.stdout
- # Should show some helpful info about the collection
- assert 'ArchiveBox' in output or 'collection' in output.lower() or 'Initializing' in output
|