test_add.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. import subprocess
  2. import json
  3. import sqlite3
  4. import os
  5. from .fixtures import *
  6. def test_depth_flag_is_accepted(process, disable_extractors_dict):
  7. arg_process = subprocess.run(["archivebox", "add", "--index-only", "https://example.com", "--depth=0"],
  8. capture_output=True, env=disable_extractors_dict)
  9. assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode("utf-8")
  10. def test_depth_flag_fails_if_it_is_not_0_or_1(process, disable_extractors_dict):
  11. arg_process = subprocess.run(
  12. ["archivebox", "add", "--index-only", "--depth=5", "https://example.com"],
  13. capture_output=True,
  14. env=disable_extractors_dict,
  15. )
  16. # Error message may say "invalid choice" or "is not one of"
  17. stderr = arg_process.stderr.decode("utf-8")
  18. assert 'invalid' in stderr.lower() or 'not one of' in stderr.lower()
  19. arg_process = subprocess.run(
  20. ["archivebox", "add", "--index-only", "--depth=-1", "https://example.com"],
  21. capture_output=True,
  22. env=disable_extractors_dict,
  23. )
  24. stderr = arg_process.stderr.decode("utf-8")
  25. assert 'invalid' in stderr.lower() or 'not one of' in stderr.lower()
  26. def test_depth_flag_0_creates_source_file(tmp_path, process, disable_extractors_dict):
  27. os.chdir(tmp_path)
  28. arg_process = subprocess.run(
  29. ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
  30. capture_output=True,
  31. env=disable_extractors_dict,
  32. )
  33. # Check that source file was created with the URL
  34. sources_dir = tmp_path / "sources"
  35. assert sources_dir.exists()
  36. source_files = list(sources_dir.glob("*cli_add.txt"))
  37. assert len(source_files) >= 1
  38. source_content = source_files[0].read_text()
  39. assert "example.com" in source_content
  40. def test_overwrite_flag_is_accepted(process, disable_extractors_dict):
  41. subprocess.run(
  42. ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
  43. capture_output=True,
  44. env=disable_extractors_dict,
  45. )
  46. arg_process = subprocess.run(
  47. ["archivebox", "add", "--index-only", "--overwrite", "https://example.com"],
  48. capture_output=True,
  49. env=disable_extractors_dict,
  50. )
  51. assert 'unrecognized arguments: --overwrite' not in arg_process.stderr.decode("utf-8")
  52. def test_add_creates_crawl_in_database(tmp_path, process, disable_extractors_dict):
  53. os.chdir(tmp_path)
  54. subprocess.run(
  55. ["archivebox", "add", "--index-only", "--depth=0", "https://example.com"],
  56. capture_output=True,
  57. env=disable_extractors_dict,
  58. )
  59. # Check that a Crawl was created in database
  60. conn = sqlite3.connect("index.sqlite3")
  61. c = conn.cursor()
  62. count = c.execute("SELECT COUNT(*) FROM crawls_crawl").fetchone()[0]
  63. conn.close()
  64. assert count >= 1
  65. def test_add_with_tags(tmp_path, process, disable_extractors_dict):
  66. """Test adding URL with tags."""
  67. os.chdir(tmp_path)
  68. subprocess.run(
  69. ["archivebox", "add", "--index-only", "--depth=0", "--tag=test,example", "https://example.com"],
  70. capture_output=True,
  71. env=disable_extractors_dict,
  72. )
  73. # Check that tags were created in database
  74. conn = sqlite3.connect("index.sqlite3")
  75. c = conn.cursor()
  76. tags = c.execute("SELECT name FROM core_tag").fetchall()
  77. conn.close()
  78. tag_names = [t[0] for t in tags]
  79. assert 'test' in tag_names or 'example' in tag_names
  80. def test_add_multiple_urls_single_call(tmp_path, process, disable_extractors_dict):
  81. """Test adding multiple URLs in a single call creates multiple snapshots."""
  82. os.chdir(tmp_path)
  83. subprocess.run(
  84. ["archivebox", "add", "--index-only", "--depth=0",
  85. "https://example.com", "https://example.org"],
  86. capture_output=True,
  87. env=disable_extractors_dict,
  88. )
  89. # Check both URLs are in the source file
  90. sources_dir = tmp_path / "sources"
  91. source_files = list(sources_dir.glob("*cli_add.txt"))
  92. assert len(source_files) >= 1
  93. source_content = source_files[0].read_text()
  94. assert "example.com" in source_content
  95. assert "example.org" in source_content
  96. def test_add_from_file(tmp_path, process, disable_extractors_dict):
  97. """Test adding URLs from a file."""
  98. os.chdir(tmp_path)
  99. # Create a file with URLs
  100. urls_file = tmp_path / "urls.txt"
  101. urls_file.write_text("https://example.com\nhttps://example.org\n")
  102. subprocess.run(
  103. ["archivebox", "add", "--index-only", "--depth=0", str(urls_file)],
  104. capture_output=True,
  105. env=disable_extractors_dict,
  106. )
  107. # Check that a Crawl was created
  108. conn = sqlite3.connect("index.sqlite3")
  109. c = conn.cursor()
  110. count = c.execute("SELECT COUNT(*) FROM crawls_crawl").fetchone()[0]
  111. conn.close()
  112. assert count >= 1
  113. class TestAddCLI:
  114. """Test the CLI interface for add command."""
  115. def test_add_help(self, tmp_path, process):
  116. """Test that --help works for add command."""
  117. os.chdir(tmp_path)
  118. result = subprocess.run(
  119. ["archivebox", "add", "--help"],
  120. capture_output=True,
  121. text=True,
  122. )
  123. assert result.returncode == 0
  124. assert '--depth' in result.stdout or 'depth' in result.stdout
  125. assert '--tag' in result.stdout or 'tag' in result.stdout
  126. def test_add_no_args_shows_help(self, tmp_path, process):
  127. """Test that add with no args shows help or usage."""
  128. os.chdir(tmp_path)
  129. result = subprocess.run(
  130. ["archivebox", "add"],
  131. capture_output=True,
  132. text=True,
  133. )
  134. # Should either show help or error about missing URL
  135. combined = result.stdout + result.stderr
  136. assert 'usage' in combined.lower() or 'url' in combined.lower() or 'add' in combined.lower()