software
/
archival.ArchiveBox
同期ミラー https://github.com/ArchiveBox/ArchiveBox.git


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
							import os
import sqlite3

from .fixtures import *

def test_remove_single_snapshot(tmp_path, process, disable_extractors_dict):
    """Test removing a snapshot by URL pattern"""
    os.chdir(tmp_path)
    # Add a URL - creates source file snapshot
    subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)

    # Verify snapshot exists
    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    count_before = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
    conn.close()
    assert count_before >= 1

    # Remove all snapshots (including source file snapshots)
    remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes'], capture_output=True)
    # Check that it ran successfully (either output indicates success or return code 0)
    output = remove_process.stdout.decode("utf-8") + remove_process.stderr.decode("utf-8")
    assert remove_process.returncode == 0 or "removed" in output.lower() or "Found" in output

    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    count = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
    conn.close()

    assert count == 0


def test_remove_with_delete_flag(tmp_path, process, disable_extractors_dict):
    """Test removing snapshot with --delete also removes archive folder"""
    os.chdir(tmp_path)
    subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)

    # Get archives before delete
    archive_dir = tmp_path / "archive"
    archives_before = list(archive_dir.iterdir()) if archive_dir.exists() else []

    # Only run the rest of the test if archives were created
    if archives_before:
        subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
        archives_after = list(archive_dir.iterdir()) if archive_dir.exists() else []
        assert len(archives_after) < len(archives_before)
    else:
        # With --index-only, archive folders may not be created immediately
        # Just verify that remove command doesn't error
        remove_result = subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
        assert remove_result.returncode in (0, 1)  # 0 = success, 1 = no matches


def test_remove_regex(tmp_path, process, disable_extractors_dict):
    """Test removing snapshots by regex pattern"""
    os.chdir(tmp_path)
    subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
    subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True, env=disable_extractors_dict)

    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    count_before = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
    conn.close()
    assert count_before >= 2

    subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)

    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    count_after = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
    conn.close()
    assert count_after == 0


def test_add_creates_crawls(tmp_path, process, disable_extractors_dict):
    """Test that adding URLs creates crawls in database"""
    os.chdir(tmp_path)
    subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
    subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True, env=disable_extractors_dict)

    conn = sqlite3.connect("index.sqlite3")
    c = conn.cursor()
    crawl_count = c.execute("SELECT COUNT() from archivebox.crawls.crawl").fetchone()[0]
    conn.close()

    assert crawl_count == 2