浏览代码

feat: Add tests for remove command

Cristian 5 年之前
父节点
当前提交
fe9604a772
共有 4 个文件被更改,包括 68 次插入8 次删除
  1. 0 1
      archivebox/extractors/__init__.py
  2. 1 2
      archivebox/index/__init__.py
  3. 0 1
      archivebox/index/sql.py
  4. 67 4
      tests/test_remove.py

+ 0 - 1
archivebox/extractors/__init__.py

@@ -9,7 +9,6 @@ from ..index.schema import Link
 from ..index import (
     load_link_details,
     write_link_details,
-    write_main_index,
 )
 from ..util import enforce_types
 from ..logging_util import (

+ 1 - 2
archivebox/index/__init__.py

@@ -1,6 +1,5 @@
 __package__ = 'archivebox.index'
 
-import re
 import os
 import shutil
 import json as pyjson
@@ -373,7 +372,7 @@ LINK_FILTERS = {
     'exact': lambda pattern: Q(url=pattern),
     'substring': lambda pattern: Q(url__icontains=pattern),
     'regex': lambda pattern: Q(url__iregex=pattern),
-    'domain': lambda pattern: Q(domain=pattern),
+    'domain': lambda pattern: Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}"),
 }
 
 @enforce_types

+ 0 - 1
archivebox/index/sql.py

@@ -24,7 +24,6 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
 @enforce_types
 def remove_from_sql_main_index(snapshots: QuerySet, out_dir: str=OUTPUT_DIR) -> None:
     setup_django(out_dir, check_db=True)
-    from core.models import Snapshot
     from django.db import transaction
 
     with transaction.atomic():

+ 67 - 4
tests/test_remove.py

@@ -1,8 +1,71 @@
+import os
+import sqlite3
+
 from .fixtures import *
 
-def test_remove_leaves_index_in_consistent_state(tmp_path, process, disable_extractors_dict):
+def test_remove_single_page(tmp_path, process, disable_extractors_dict):
     os.chdir(tmp_path)
     subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
-    remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
-    list_process = subprocess.run(['archivebox', 'list'], capture_output=True)
-    assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8")
+    remove_process = subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
+    assert "Found 1 matching URLs to remove" in remove_process.stdout.decode("utf-8")
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
+    conn.commit()
+    conn.close()
+
+    assert count == 0
+
+
+def test_remove_single_page_filesystem(tmp_path, process, disable_extractors_dict):
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
+    assert list((tmp_path / "archive").iterdir()) != []
+
+    subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes', '--delete'], capture_output=True)
+
+    assert list((tmp_path / "archive").iterdir()) == []
+
+def test_remove_regex(tmp_path, process, disable_extractors_dict):
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
+    assert list((tmp_path / "archive").iterdir()) != []
+
+    subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
+
+    assert list((tmp_path / "archive").iterdir()) == []
+
+def test_remove_exact(tmp_path, process, disable_extractors_dict):
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
+    assert list((tmp_path / "archive").iterdir()) != []
+
+    remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=exact', 'http://127.0.0.1:8080/static/iana.org.html', '--yes', '--delete'], capture_output=True)
+
+    assert len(list((tmp_path / "archive").iterdir())) == 1
+
+def test_remove_substr(tmp_path, process, disable_extractors_dict):
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
+    assert list((tmp_path / "archive").iterdir()) != []
+
+    subprocess.run(['archivebox', 'remove', '--filter-type=substring', 'example.com', '--yes', '--delete'], capture_output=True)
+
+    assert len(list((tmp_path / "archive").iterdir())) == 1
+
+def test_remove_domain(tmp_path, process, disable_extractors_dict):
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
+    assert list((tmp_path / "archive").iterdir()) != []
+
+    remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=domain', '127.0.0.1', '--yes', '--delete'], capture_output=True)
+
+    assert len(list((tmp_path / "archive").iterdir())) == 0
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
+    conn.commit()
+    conn.close()
+
+    assert count == 0