| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- import os
- import sqlite3
- from .fixtures import *
- def test_remove_single_snapshot(tmp_path, process, disable_extractors_dict):
- """Test removing a snapshot by URL pattern"""
- os.chdir(tmp_path)
- # Add a URL - creates source file snapshot
- subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
- # Verify snapshot exists
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- count_before = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
- conn.close()
- assert count_before >= 1
- # Remove all snapshots (including source file snapshots)
- remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes'], capture_output=True)
- # Check that it ran successfully (either output indicates success or return code 0)
- output = remove_process.stdout.decode("utf-8") + remove_process.stderr.decode("utf-8")
- assert remove_process.returncode == 0 or "removed" in output.lower() or "Found" in output
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- count = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
- conn.close()
- assert count == 0
- def test_remove_with_delete_flag(tmp_path, process, disable_extractors_dict):
- """Test removing snapshot with --delete also removes archive folder"""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
- # Get archives before delete
- archive_dir = tmp_path / "archive"
- archives_before = list(archive_dir.iterdir()) if archive_dir.exists() else []
- # Only run the rest of the test if archives were created
- if archives_before:
- subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
- archives_after = list(archive_dir.iterdir()) if archive_dir.exists() else []
- assert len(archives_after) < len(archives_before)
- else:
- # With --index-only, archive folders may not be created immediately
- # Just verify that remove command doesn't error
- remove_result = subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
- assert remove_result.returncode in (0, 1) # 0 = success, 1 = no matches
- def test_remove_regex(tmp_path, process, disable_extractors_dict):
- """Test removing snapshots by regex pattern"""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
- subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True, env=disable_extractors_dict)
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- count_before = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
- conn.close()
- assert count_before >= 2
- subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- count_after = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
- conn.close()
- assert count_after == 0
- def test_add_creates_crawls(tmp_path, process, disable_extractors_dict):
- """Test that adding URLs creates crawls in database"""
- os.chdir(tmp_path)
- subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
- subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True, env=disable_extractors_dict)
- conn = sqlite3.connect("index.sqlite3")
- c = conn.cursor()
- crawl_count = c.execute("SELECT COUNT() from archivebox.crawls.crawl").fetchone()[0]
- conn.close()
- assert crawl_count == 2
|