Browse Source

Merge pull request #388 from cdvv7788/remove-index

fix: Remove link from sql index on remove command
Nick Sweeting 5 years ago
parent
commit
523c384e62
3 changed files with 27 additions and 3 deletions
  1. 10 0
      archivebox/index/sql.py
  2. 9 3
      archivebox/main.py
  3. 8 0
      tests/test_remove.py

+ 10 - 0
archivebox/index/sql.py

@@ -20,6 +20,16 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
         for page in Snapshot.objects.all()
     )
 
+@enforce_types
+def remove_from_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
+    setup_django(out_dir, check_db=True)
+    from core.models import Snapshot
+    from django.db import transaction
+
+    with transaction.atomic():
+        for link in links:
+            Snapshot.objects.filter(url=link.url).delete()
+
 @enforce_types
 def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
     setup_django(out_dir, check_db=True)

+ 9 - 3
archivebox/main.py

@@ -49,6 +49,7 @@ from .index.sql import (
     parse_sql_main_index,
     get_admins,
     apply_migrations,
+    remove_from_sql_main_index,
 )
 from .index.html import parse_html_main_index
 from .extractors import archive_links
@@ -600,6 +601,7 @@ def remove(filter_str: Optional[str]=None,
     timer = TimedProgress(360, prefix='      ')
     try:
         to_keep = []
+        to_delete = []
         all_links = load_main_index(out_dir=out_dir)
         for link in all_links:
             should_remove = (
@@ -607,13 +609,17 @@ def remove(filter_str: Optional[str]=None,
                 or (before is not None and float(link.timestamp) > before)
                 or link_matches_filter(link, filter_patterns, filter_type)
             )
-            if not should_remove:
+            if should_remove:
+                to_delete.append(link)
+
+                if delete:
+                    shutil.rmtree(link.link_dir, ignore_errors=True)
+            else:
                 to_keep.append(link)
-            elif should_remove and delete:
-                shutil.rmtree(link.link_dir, ignore_errors=True)
     finally:
         timer.end()
 
+    remove_from_sql_main_index(links=to_delete, out_dir=out_dir)
     write_main_index(links=to_keep, out_dir=out_dir, finished=True)
     log_removal_finished(len(all_links), len(to_keep))
     

+ 8 - 0
tests/test_remove.py

@@ -0,0 +1,8 @@
+from .fixtures import *
+
+def test_remove_leaves_index_in_consistent_state(tmp_path, process):
+    os.chdir(tmp_path)
+    subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
+    remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
+    list_process = subprocess.run(['archivebox', 'list'], capture_output=True)
+    assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8")