tests.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. #!/usr/bin/env python3
  2. __package__ = 'archivebox.cli'
  3. import os
  4. import sys
  5. import shutil
  6. import unittest
  7. from pathlib import Path
  8. from contextlib import contextmanager
  9. TEST_CONFIG = {
  10. 'USE_COLOR': 'False',
  11. 'SHOW_PROGRESS': 'False',
  12. 'DATA_DIR': 'data.tests',
  13. 'SAVE_ARCHIVE_DOT_ORG': 'False',
  14. 'SAVE_TITLE': 'False',
  15. 'USE_CURL': 'False',
  16. 'USE_WGET': 'False',
  17. 'USE_GIT': 'False',
  18. 'USE_CHROME': 'False',
  19. 'USE_YOUTUBEDL': 'False',
  20. }
  21. DATA_DIR = 'data.tests'
  22. os.environ.update(TEST_CONFIG)
  23. from ..main import init
  24. from ..index import load_main_index
  25. from archivebox.config.constants import (
  26. SQL_INDEX_FILENAME,
  27. JSON_INDEX_FILENAME,
  28. HTML_INDEX_FILENAME,
  29. )
  30. from . import (
  31. archivebox_init,
  32. archivebox_add,
  33. archivebox_remove,
  34. )
  35. HIDE_CLI_OUTPUT = True
  36. test_urls = '''
  37. https://example1.com/what/is/happening.html?what=1#how-about-this=1
  38. https://example2.com/what/is/happening/?what=1#how-about-this=1
  39. HTtpS://example3.com/what/is/happening/?what=1#how-about-this=1f
  40. https://example4.com/what/is/happening.html
  41. https://example5.com/
  42. https://example6.com
  43. <test>http://example7.com</test>
  44. [https://example8.com/what/is/this.php?what=1]
  45. [and http://example9.com?what=1&other=3#and-thing=2]
  46. <what>https://example10.com#and-thing=2 "</about>
  47. abc<this["https://subb.example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def
  48. sdflkf[what](https://subb.example12.com/who/what.php?whoami=1#whatami=2)?am=hi
  49. example13.bada
  50. and example14.badb
  51. <or>htt://example15.badc</that>
  52. '''
  53. stdout = sys.stdout
  54. stderr = sys.stderr
  55. @contextmanager
  56. def output_hidden(show_failing=True):
  57. if not HIDE_CLI_OUTPUT:
  58. yield
  59. return
  60. sys.stdout = open('stdout.txt', 'w+', encoding='utf-8')
  61. sys.stderr = open('stderr.txt', 'w+', encoding='utf-8')
  62. try:
  63. yield
  64. sys.stdout.close()
  65. sys.stderr.close()
  66. sys.stdout = stdout
  67. sys.stderr = stderr
  68. except Exception:
  69. sys.stdout.close()
  70. sys.stderr.close()
  71. sys.stdout = stdout
  72. sys.stderr = stderr
  73. if show_failing:
  74. with open('stdout.txt', 'r', encoding='utf-8') as f:
  75. print(f.read())
  76. with open('stderr.txt', 'r', encoding='utf-8') as f:
  77. print(f.read())
  78. raise
  79. finally:
  80. os.remove('stdout.txt')
  81. os.remove('stderr.txt')
  82. class TestInit(unittest.TestCase):
  83. def setUp(self):
  84. os.makedirs(DATA_DIR, exist_ok=True)
  85. def tearDown(self):
  86. shutil.rmtree(DATA_DIR, ignore_errors=True)
  87. def test_basic_init(self):
  88. with output_hidden():
  89. archivebox_init.main([])
  90. assert (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
  91. assert (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
  92. assert (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
  93. assert len(load_main_index(out_dir=DATA_DIR)) == 0
  94. def test_conflicting_init(self):
  95. with open(Path(DATA_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f:
  96. f.write('test')
  97. try:
  98. with output_hidden(show_failing=False):
  99. archivebox_init.main([])
  100. assert False, 'Init should have exited with an exception'
  101. except SystemExit:
  102. pass
  103. assert not (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
  104. assert not (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
  105. assert not (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
  106. try:
  107. load_main_index(out_dir=DATA_DIR)
  108. assert False, 'load_main_index should raise an exception when no index is present'
  109. except Exception:
  110. pass
  111. def test_no_dirty_state(self):
  112. with output_hidden():
  113. init()
  114. shutil.rmtree(DATA_DIR, ignore_errors=True)
  115. with output_hidden():
  116. init()
  117. class TestAdd(unittest.TestCase):
  118. def setUp(self):
  119. os.makedirs(DATA_DIR, exist_ok=True)
  120. with output_hidden():
  121. init()
  122. def tearDown(self):
  123. shutil.rmtree(DATA_DIR, ignore_errors=True)
  124. def test_add_arg_url(self):
  125. with output_hidden():
  126. archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all'])
  127. all_links = load_main_index(out_dir=DATA_DIR)
  128. assert len(all_links) == 30
  129. def test_add_arg_file(self):
  130. test_file = Path(DATA_DIR) / 'test.txt'
  131. with open(test_file, 'w+', encoding='utf') as f:
  132. f.write(test_urls)
  133. with output_hidden():
  134. archivebox_add.main([test_file])
  135. all_links = load_main_index(out_dir=DATA_DIR)
  136. assert len(all_links) == 12
  137. os.remove(test_file)
  138. def test_add_stdin_url(self):
  139. with output_hidden():
  140. archivebox_add.main([], stdin=test_urls)
  141. all_links = load_main_index(out_dir=DATA_DIR)
  142. assert len(all_links) == 12
  143. class TestRemove(unittest.TestCase):
  144. def setUp(self):
  145. os.makedirs(DATA_DIR, exist_ok=True)
  146. with output_hidden():
  147. init()
  148. archivebox_add.main([], stdin=test_urls)
  149. # def tearDown(self):
  150. # shutil.rmtree(DATA_DIR, ignore_errors=True)
  151. def test_remove_exact(self):
  152. with output_hidden():
  153. archivebox_remove.main(['--yes', '--delete', 'https://example5.com/'])
  154. all_links = load_main_index(out_dir=DATA_DIR)
  155. assert len(all_links) == 11
  156. def test_remove_regex(self):
  157. with output_hidden():
  158. archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', r'http(s)?:\/\/(.+\.)?(example\d\.com)'])
  159. all_links = load_main_index(out_dir=DATA_DIR)
  160. assert len(all_links) == 4
  161. def test_remove_domain(self):
  162. with output_hidden():
  163. archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com'])
  164. all_links = load_main_index(out_dir=DATA_DIR)
  165. assert len(all_links) == 10
  166. def test_remove_none(self):
  167. try:
  168. with output_hidden(show_failing=False):
  169. archivebox_remove.main(['--yes', '--delete', 'https://doesntexist.com'])
  170. assert False, 'Should raise if no URLs match'
  171. except Exception:
  172. pass
  173. if __name__ == '__main__':
  174. if '--verbose' in sys.argv or '-v' in sys.argv:
  175. HIDE_CLI_OUTPUT = False
  176. unittest.main()