test_remove.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. import os
  2. import sqlite3
  3. from .fixtures import *
  4. def test_remove_single_page(tmp_path, process, disable_extractors_dict):
  5. os.chdir(tmp_path)
  6. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
  7. remove_process = subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
  8. assert "Found 1 matching URLs to remove" in remove_process.stdout.decode("utf-8")
  9. conn = sqlite3.connect("index.sqlite3")
  10. c = conn.cursor()
  11. count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
  12. conn.commit()
  13. conn.close()
  14. assert count == 0
  15. def test_remove_single_page_filesystem(tmp_path, process, disable_extractors_dict):
  16. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
  17. assert list((tmp_path / "archive").iterdir()) != []
  18. subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes', '--delete'], capture_output=True)
  19. assert list((tmp_path / "archive").iterdir()) == []
  20. def test_remove_regex(tmp_path, process, disable_extractors_dict):
  21. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
  22. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
  23. assert list((tmp_path / "archive").iterdir()) != []
  24. subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
  25. assert list((tmp_path / "archive").iterdir()) == []
  26. def test_remove_exact(tmp_path, process, disable_extractors_dict):
  27. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
  28. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
  29. assert list((tmp_path / "archive").iterdir()) != []
  30. remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=exact', 'http://127.0.0.1:8080/static/iana.org.html', '--yes', '--delete'], capture_output=True)
  31. assert len(list((tmp_path / "archive").iterdir())) == 1
  32. def test_remove_substr(tmp_path, process, disable_extractors_dict):
  33. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
  34. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
  35. assert list((tmp_path / "archive").iterdir()) != []
  36. subprocess.run(['archivebox', 'remove', '--filter-type=substring', 'example.com', '--yes', '--delete'], capture_output=True)
  37. assert len(list((tmp_path / "archive").iterdir())) == 1
  38. def test_remove_domain(tmp_path, process, disable_extractors_dict):
  39. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
  40. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
  41. assert list((tmp_path / "archive").iterdir()) != []
  42. remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=domain', '127.0.0.1', '--yes', '--delete'], capture_output=True)
  43. assert len(list((tmp_path / "archive").iterdir())) == 0
  44. conn = sqlite3.connect("index.sqlite3")
  45. c = conn.cursor()
  46. count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
  47. conn.commit()
  48. conn.close()
  49. assert count == 0
  50. def test_remove_tag(tmp_path, process, disable_extractors_dict):
  51. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
  52. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
  53. assert list((tmp_path / "archive").iterdir()) != []
  54. conn = sqlite3.connect("index.sqlite3")
  55. c = conn.cursor()
  56. c.execute("INSERT INTO core_tag (id, name, slug) VALUES (2, 'test-tag', 'test-tag')")
  57. snapshot_ids = c.execute("SELECT id from core_snapshot")
  58. c.executemany('INSERT INTO core_snapshot_tags (snapshot_id, tag_id) VALUES (?, 2)', list(snapshot_ids))
  59. conn.commit()
  60. remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=tag', 'test-tag', '--yes', '--delete'], capture_output=True)
  61. assert len(list((tmp_path / "archive").iterdir())) == 0
  62. count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
  63. conn.commit()
  64. conn.close()
  65. assert count == 0
  66. def test_remove_before(tmp_path, process, disable_extractors_dict):
  67. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
  68. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
  69. assert list((tmp_path / "archive").iterdir()) != []
  70. conn = sqlite3.connect("index.sqlite3")
  71. c = conn.cursor()
  72. timestamp = c.execute("SELECT timestamp FROM core_snapshot ORDER BY timestamp ASC").fetchall()
  73. conn.commit()
  74. conn.close()
  75. before = list(map(lambda x: int(x[0].split(".")[0]), timestamp))
  76. subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete', '--before', str(before[1])], capture_output=True)
  77. assert (tmp_path / "archive" / timestamp[0][0]).exists()
  78. assert not (tmp_path / "archive" / timestamp[1][0]).exists()
  79. def test_remove_after(tmp_path, process, disable_extractors_dict):
  80. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
  81. subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
  82. assert list((tmp_path / "archive").iterdir()) != []
  83. conn = sqlite3.connect("index.sqlite3")
  84. c = conn.cursor()
  85. timestamp = c.execute("SELECT timestamp FROM core_snapshot ORDER BY timestamp ASC").fetchall()
  86. conn.commit()
  87. conn.close()
  88. after = list(map(lambda x: int(x[0].split(".")[0]), timestamp))
  89. subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete', '--after', str(after[1])], capture_output=True)
  90. assert (tmp_path / "archive" / timestamp[1][0]).exists()
  91. assert not (tmp_path / "archive" / timestamp[0][0]).exists()