test_remove.py 3.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. import os
  2. import sqlite3
  3. from .fixtures import *
  4. def test_remove_single_snapshot(tmp_path, process, disable_extractors_dict):
  5. """Test removing a snapshot by URL pattern"""
  6. os.chdir(tmp_path)
  7. # Add a URL - creates source file snapshot
  8. subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
  9. # Verify snapshot exists
  10. conn = sqlite3.connect("index.sqlite3")
  11. c = conn.cursor()
  12. count_before = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
  13. conn.close()
  14. assert count_before >= 1
  15. # Remove all snapshots (including source file snapshots)
  16. remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes'], capture_output=True)
  17. # Check that it ran successfully (either output indicates success or return code 0)
  18. output = remove_process.stdout.decode("utf-8") + remove_process.stderr.decode("utf-8")
  19. assert remove_process.returncode == 0 or "removed" in output.lower() or "Found" in output
  20. conn = sqlite3.connect("index.sqlite3")
  21. c = conn.cursor()
  22. count = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
  23. conn.close()
  24. assert count == 0
  25. def test_remove_with_delete_flag(tmp_path, process, disable_extractors_dict):
  26. """Test removing snapshot with --delete also removes archive folder"""
  27. os.chdir(tmp_path)
  28. subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
  29. # Get archives before delete
  30. archive_dir = tmp_path / "archive"
  31. archives_before = list(archive_dir.iterdir()) if archive_dir.exists() else []
  32. # Only run the rest of the test if archives were created
  33. if archives_before:
  34. subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
  35. archives_after = list(archive_dir.iterdir()) if archive_dir.exists() else []
  36. assert len(archives_after) < len(archives_before)
  37. else:
  38. # With --index-only, archive folders may not be created immediately
  39. # Just verify that remove command doesn't error
  40. remove_result = subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
  41. assert remove_result.returncode in (0, 1) # 0 = success, 1 = no matches
  42. def test_remove_regex(tmp_path, process, disable_extractors_dict):
  43. """Test removing snapshots by regex pattern"""
  44. os.chdir(tmp_path)
  45. subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
  46. subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True, env=disable_extractors_dict)
  47. conn = sqlite3.connect("index.sqlite3")
  48. c = conn.cursor()
  49. count_before = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
  50. conn.close()
  51. assert count_before >= 2
  52. subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
  53. conn = sqlite3.connect("index.sqlite3")
  54. c = conn.cursor()
  55. count_after = c.execute("SELECT COUNT() from archivebox.core.snapshot").fetchone()[0]
  56. conn.close()
  57. assert count_after == 0
  58. def test_add_creates_crawls(tmp_path, process, disable_extractors_dict):
  59. """Test that adding URLs creates crawls in database"""
  60. os.chdir(tmp_path)
  61. subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
  62. subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True, env=disable_extractors_dict)
  63. conn = sqlite3.connect("index.sqlite3")
  64. c = conn.cursor()
  65. crawl_count = c.execute("SELECT COUNT() from archivebox.crawls.crawl").fetchone()[0]
  66. conn.close()
  67. assert crawl_count == 2