|
|
@@ -21,19 +21,19 @@ from archivebox.tests.conftest import (
|
|
|
class TestArchiveResultCreate:
|
|
|
"""Tests for `archivebox archiveresult create`."""
|
|
|
|
|
|
- def test_create_from_snapshot_jsonl(self, cli_env, initialized_archive):
|
|
|
+ def test_create_from_snapshot_jsonl(self, initialized_archive):
|
|
|
"""Create archive results from Snapshot JSONL input."""
|
|
|
url = create_test_url()
|
|
|
|
|
|
# Create a snapshot first
|
|
|
- stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
|
|
+ stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
|
|
|
# Pipe snapshot to archiveresult create
|
|
|
stdout2, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'create', '--plugin=title'],
|
|
|
stdin=json.dumps(snapshot),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 0, f"Command failed: {stderr}"
|
|
|
@@ -47,16 +47,16 @@ class TestArchiveResultCreate:
|
|
|
ar = next(r for r in records if r['type'] == 'ArchiveResult')
|
|
|
assert ar['plugin'] == 'title'
|
|
|
|
|
|
- def test_create_with_specific_plugin(self, cli_env, initialized_archive):
|
|
|
+ def test_create_with_specific_plugin(self, initialized_archive):
|
|
|
"""Create archive result for specific plugin."""
|
|
|
url = create_test_url()
|
|
|
- stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
|
|
+ stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
|
|
|
stdout2, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'create', '--plugin=screenshot'],
|
|
|
stdin=json.dumps(snapshot),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 0
|
|
|
@@ -65,25 +65,25 @@ class TestArchiveResultCreate:
|
|
|
assert len(ar_records) >= 1
|
|
|
assert ar_records[0]['plugin'] == 'screenshot'
|
|
|
|
|
|
- def test_create_pass_through_crawl(self, cli_env, initialized_archive):
|
|
|
+ def test_create_pass_through_crawl(self, initialized_archive):
|
|
|
"""Pass-through Crawl records unchanged."""
|
|
|
url = create_test_url()
|
|
|
|
|
|
# Create crawl and snapshot
|
|
|
- stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], env=cli_env)
|
|
|
+ stdout1, _, _ = run_archivebox_cmd(['crawl', 'create', url], data_dir=initialized_archive)
|
|
|
crawl = parse_jsonl_output(stdout1)[0]
|
|
|
|
|
|
stdout2, _, _ = run_archivebox_cmd(
|
|
|
['snapshot', 'create'],
|
|
|
stdin=json.dumps(crawl),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
# Now pipe all to archiveresult create
|
|
|
stdout3, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'create', '--plugin=title'],
|
|
|
stdin=stdout2,
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 0
|
|
|
@@ -94,14 +94,14 @@ class TestArchiveResultCreate:
|
|
|
assert 'Snapshot' in types
|
|
|
assert 'ArchiveResult' in types
|
|
|
|
|
|
- def test_create_pass_through_only_when_no_snapshots(self, cli_env, initialized_archive):
|
|
|
+ def test_create_pass_through_only_when_no_snapshots(self, initialized_archive):
|
|
|
"""Only pass-through records but no new snapshots returns success."""
|
|
|
crawl_record = {'type': 'Crawl', 'id': 'fake-id', 'urls': 'https://example.com'}
|
|
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'create'],
|
|
|
stdin=json.dumps(crawl_record),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 0
|
|
|
@@ -111,31 +111,31 @@ class TestArchiveResultCreate:
|
|
|
class TestArchiveResultList:
|
|
|
"""Tests for `archivebox archiveresult list`."""
|
|
|
|
|
|
- def test_list_empty(self, cli_env, initialized_archive):
|
|
|
+ def test_list_empty(self, initialized_archive):
|
|
|
"""List with no archive results returns empty."""
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'list'],
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 0
|
|
|
assert 'Listed 0 archive results' in stderr
|
|
|
|
|
|
- def test_list_filter_by_status(self, cli_env, initialized_archive):
|
|
|
+ def test_list_filter_by_status(self, initialized_archive):
|
|
|
"""Filter archive results by status."""
|
|
|
# Create snapshot and archive result
|
|
|
url = create_test_url()
|
|
|
- stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
|
|
+ stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
run_archivebox_cmd(
|
|
|
['archiveresult', 'create', '--plugin=title'],
|
|
|
stdin=json.dumps(snapshot),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'list', '--status=queued'],
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 0
|
|
|
@@ -143,20 +143,20 @@ class TestArchiveResultList:
|
|
|
for r in records:
|
|
|
assert r['status'] == 'queued'
|
|
|
|
|
|
- def test_list_filter_by_plugin(self, cli_env, initialized_archive):
|
|
|
+ def test_list_filter_by_plugin(self, initialized_archive):
|
|
|
"""Filter archive results by plugin."""
|
|
|
url = create_test_url()
|
|
|
- stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
|
|
+ stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
run_archivebox_cmd(
|
|
|
['archiveresult', 'create', '--plugin=title'],
|
|
|
stdin=json.dumps(snapshot),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'list', '--plugin=title'],
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 0
|
|
|
@@ -164,22 +164,22 @@ class TestArchiveResultList:
|
|
|
for r in records:
|
|
|
assert r['plugin'] == 'title'
|
|
|
|
|
|
- def test_list_with_limit(self, cli_env, initialized_archive):
|
|
|
+ def test_list_with_limit(self, initialized_archive):
|
|
|
"""Limit number of results."""
|
|
|
# Create multiple archive results
|
|
|
for _ in range(3):
|
|
|
url = create_test_url()
|
|
|
- stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
|
|
+ stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
run_archivebox_cmd(
|
|
|
['archiveresult', 'create', '--plugin=title'],
|
|
|
stdin=json.dumps(snapshot),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'list', '--limit=2'],
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 0
|
|
|
@@ -190,23 +190,23 @@ class TestArchiveResultList:
|
|
|
class TestArchiveResultUpdate:
|
|
|
"""Tests for `archivebox archiveresult update`."""
|
|
|
|
|
|
- def test_update_status(self, cli_env, initialized_archive):
|
|
|
+ def test_update_status(self, initialized_archive):
|
|
|
"""Update archive result status."""
|
|
|
url = create_test_url()
|
|
|
- stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
|
|
+ stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
|
|
|
stdout2, _, _ = run_archivebox_cmd(
|
|
|
['archiveresult', 'create', '--plugin=title'],
|
|
|
stdin=json.dumps(snapshot),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
|
|
|
|
|
stdout3, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'update', '--status=failed'],
|
|
|
stdin=json.dumps(ar),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 0
|
|
|
@@ -219,45 +219,45 @@ class TestArchiveResultUpdate:
|
|
|
class TestArchiveResultDelete:
|
|
|
"""Tests for `archivebox archiveresult delete`."""
|
|
|
|
|
|
- def test_delete_requires_yes(self, cli_env, initialized_archive):
|
|
|
+ def test_delete_requires_yes(self, initialized_archive):
|
|
|
"""Delete requires --yes flag."""
|
|
|
url = create_test_url()
|
|
|
- stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
|
|
+ stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
|
|
|
stdout2, _, _ = run_archivebox_cmd(
|
|
|
['archiveresult', 'create', '--plugin=title'],
|
|
|
stdin=json.dumps(snapshot),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
|
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'delete'],
|
|
|
stdin=json.dumps(ar),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 1
|
|
|
assert '--yes' in stderr
|
|
|
|
|
|
- def test_delete_with_yes(self, cli_env, initialized_archive):
|
|
|
+ def test_delete_with_yes(self, initialized_archive):
|
|
|
"""Delete with --yes flag works."""
|
|
|
url = create_test_url()
|
|
|
- stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], env=cli_env)
|
|
|
+ stdout1, _, _ = run_archivebox_cmd(['snapshot', 'create', url], data_dir=initialized_archive)
|
|
|
snapshot = parse_jsonl_output(stdout1)[0]
|
|
|
|
|
|
stdout2, _, _ = run_archivebox_cmd(
|
|
|
['archiveresult', 'create', '--plugin=title'],
|
|
|
stdin=json.dumps(snapshot),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
ar = next(r for r in parse_jsonl_output(stdout2) if r.get('type') == 'ArchiveResult')
|
|
|
|
|
|
stdout, stderr, code = run_archivebox_cmd(
|
|
|
['archiveresult', 'delete', '--yes'],
|
|
|
stdin=json.dumps(ar),
|
|
|
- env=cli_env,
|
|
|
+ data_dir=initialized_archive,
|
|
|
)
|
|
|
|
|
|
assert code == 0
|