ソースを参照

Merge pull request #536 from jdcaballerov/tag-list-filter

Nick Sweeting 5 年 前
コミット
79a936835c

+ 1 - 1
archivebox/cli/archivebox_list.py

@@ -98,7 +98,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     parser.add_argument(
         '--filter-type',
         type=str,
-        choices=('exact', 'substring', 'domain', 'regex'),
+        choices=('exact', 'substring', 'domain', 'regex','tag'),
         default='exact',
         help='Type of pattern matching to use when filtering URLs',
     )

+ 1 - 1
archivebox/cli/archivebox_remove.py

@@ -50,7 +50,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
     parser.add_argument(
         '--filter-type',
         type=str,
-        choices=('exact', 'substring', 'domain', 'regex'),
+        choices=('exact', 'substring', 'domain', 'regex','tag'),
         default='exact',
         help='Type of pattern matching to use when filtering URLs',
     )

+ 1 - 0
archivebox/index/__init__.py

@@ -361,6 +361,7 @@ LINK_FILTERS = {
     'substring': lambda pattern: Q(url__icontains=pattern),
     'regex': lambda pattern: Q(url__iregex=pattern),
     'domain': lambda pattern: Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}"),
+    'tag': lambda pattern: Q(tags__name=pattern),
 }
 
 @enforce_types

+ 23 - 0
tests/test_remove.py

@@ -70,6 +70,29 @@ def test_remove_domain(tmp_path, process, disable_extractors_dict):
 
     assert count == 0
 
+
+def test_remove_tag(tmp_path, process, disable_extractors_dict):
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
+    assert list((tmp_path / "archive").iterdir()) != []
+    
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    c.execute("INSERT INTO core_tag (id, name, slug) VALUES (2, 'test-tag', 'test-tag')")
+    snapshot_ids = c.execute("SELECT id from core_snapshot")
+    c.executemany('INSERT INTO core_snapshot_tags (snapshot_id, tag_id) VALUES (?, 2)', list(snapshot_ids))
+    conn.commit()
+
+    remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=tag', 'test-tag', '--yes', '--delete'], capture_output=True)
+
+    assert len(list((tmp_path / "archive").iterdir())) == 0
+
+    count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
+    conn.commit()
+    conn.close()
+
+    assert count == 0
+
 def test_remove_before(tmp_path, process, disable_extractors_dict):
     subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
     subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)