test_init.py 4.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. # archivebox init
  2. # archivebox add
  3. import os
  4. import subprocess
  5. from pathlib import Path
  6. import json, shutil
  7. import sqlite3
  8. from archivebox.config.common import STORAGE_CONFIG
  9. from .fixtures import *
  10. DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
  11. def test_init(tmp_path, process):
  12. assert "Initializing a new ArchiveBox" in process.stdout.decode("utf-8")
  13. def test_update(tmp_path, process):
  14. os.chdir(tmp_path)
  15. update_process = subprocess.run(['archivebox', 'init'], capture_output=True)
  16. assert "updating existing ArchiveBox" in update_process.stdout.decode("utf-8")
  17. def test_add_link(tmp_path, process, disable_extractors_dict):
  18. os.chdir(tmp_path)
  19. add_process = subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'],
  20. capture_output=True, env=disable_extractors_dict)
  21. # In the new architecture, URLs are saved to source files
  22. # Check that a source file was created with the URL
  23. sources_dir = tmp_path / "sources"
  24. assert sources_dir.exists(), "Sources directory should be created"
  25. source_files = list(sources_dir.glob("*cli_add.txt"))
  26. assert len(source_files) >= 1, "Source file should be created"
  27. source_content = source_files[0].read_text()
  28. assert "https://example.com" in source_content
  29. def test_add_multiple_urls(tmp_path, process, disable_extractors_dict):
  30. """Test adding multiple URLs via command line arguments"""
  31. os.chdir(tmp_path)
  32. add_process = subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com', 'https://iana.org'],
  33. capture_output=True, env=disable_extractors_dict)
  34. # Check that a source file was created with both URLs
  35. sources_dir = tmp_path / "sources"
  36. assert sources_dir.exists(), "Sources directory should be created"
  37. source_files = list(sources_dir.glob("*cli_add.txt"))
  38. assert len(source_files) >= 1, "Source file should be created"
  39. source_content = source_files[-1].read_text()
  40. assert "https://example.com" in source_content
  41. assert "https://iana.org" in source_content
  42. def test_correct_permissions_output_folder(tmp_path, process):
  43. index_files = ['index.sqlite3', 'archive']
  44. for file in index_files:
  45. file_path = tmp_path / file
  46. assert oct(file_path.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
  47. def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict):
  48. os.chdir(tmp_path)
  49. add_process = subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True,
  50. env=disable_extractors_dict)
  51. # Check database permissions
  52. assert oct((tmp_path / "index.sqlite3").stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
  53. def test_collision_urls_different_timestamps(tmp_path, process, disable_extractors_dict):
  54. os.chdir(tmp_path)
  55. subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True,
  56. env=disable_extractors_dict)
  57. subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True,
  58. env=disable_extractors_dict)
  59. # Check both URLs are in database
  60. conn = sqlite3.connect("index.sqlite3")
  61. c = conn.cursor()
  62. count = c.execute("SELECT COUNT(*) FROM core_snapshot").fetchone()[0]
  63. conn.close()
  64. assert count == 2
  65. def test_unrecognized_folders(tmp_path, process, disable_extractors_dict):
  66. os.chdir(tmp_path)
  67. subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True,
  68. env=disable_extractors_dict)
  69. (tmp_path / "archive" / "some_random_folder").mkdir(parents=True, exist_ok=True)
  70. init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
  71. # Just check that init completes successfully
  72. assert init_process.returncode == 0