tests.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. #!/usr/bin/env python3
  2. __package__ = 'archivebox.cli'
  3. import os
  4. import sys
  5. import shutil
  6. import unittest
  7. from pathlib import Path
  8. from contextlib import contextmanager
  9. TEST_CONFIG = {
  10. 'USE_COLOR': 'False',
  11. 'SHOW_PROGRESS': 'False',
  12. 'DATA_DIR': 'data.tests',
  13. 'SAVE_ARCHIVEDOTORG': 'False',
  14. 'SAVE_TITLE': 'False',
  15. 'USE_CURL': 'False',
  16. 'USE_WGET': 'False',
  17. 'USE_GIT': 'False',
  18. 'USE_CHROME': 'False',
  19. 'USE_YOUTUBEDL': 'False',
  20. }
  21. DATA_DIR = 'data.tests'
  22. os.environ.update(TEST_CONFIG)
  23. from ..main import init
  24. from archivebox.config.constants import (
  25. SQL_INDEX_FILENAME,
  26. JSON_INDEX_FILENAME,
  27. HTML_INDEX_FILENAME,
  28. )
  29. from . import (
  30. archivebox_init,
  31. archivebox_add,
  32. archivebox_remove,
  33. )
  34. HIDE_CLI_OUTPUT = True
  35. test_urls = '''
  36. https://example1.com/what/is/happening.html?what=1#how-about-this=1
  37. https://example2.com/what/is/happening/?what=1#how-about-this=1
  38. HTtpS://example3.com/what/is/happening/?what=1#how-about-this=1f
  39. https://example4.com/what/is/happening.html
  40. https://example5.com/
  41. https://example6.com
  42. <test>http://example7.com</test>
  43. [https://example8.com/what/is/this.php?what=1]
  44. [and http://example9.com?what=1&other=3#and-thing=2]
  45. <what>https://example10.com#and-thing=2 "</about>
  46. abc<this["https://subb.example11.com/what/is#and-thing=2?whoami=23&where=1"]that>def
  47. sdflkf[what](https://subb.example12.com/who/what.php?whoami=1#whatami=2)?am=hi
  48. example13.bada
  49. and example14.badb
  50. <or>htt://example15.badc</that>
  51. '''
  52. stdout = sys.stdout
  53. stderr = sys.stderr
  54. @contextmanager
  55. def output_hidden(show_failing=True):
  56. if not HIDE_CLI_OUTPUT:
  57. yield
  58. return
  59. sys.stdout = open('stdout.txt', 'w+', encoding='utf-8')
  60. sys.stderr = open('stderr.txt', 'w+', encoding='utf-8')
  61. try:
  62. yield
  63. sys.stdout.close()
  64. sys.stderr.close()
  65. sys.stdout = stdout
  66. sys.stderr = stderr
  67. except Exception:
  68. sys.stdout.close()
  69. sys.stderr.close()
  70. sys.stdout = stdout
  71. sys.stderr = stderr
  72. if show_failing:
  73. with open('stdout.txt', 'r', encoding='utf-8') as f:
  74. print(f.read())
  75. with open('stderr.txt', 'r', encoding='utf-8') as f:
  76. print(f.read())
  77. raise
  78. finally:
  79. os.remove('stdout.txt')
  80. os.remove('stderr.txt')
  81. class TestInit(unittest.TestCase):
  82. def setUp(self):
  83. os.makedirs(DATA_DIR, exist_ok=True)
  84. def tearDown(self):
  85. shutil.rmtree(DATA_DIR, ignore_errors=True)
  86. def test_basic_init(self):
  87. with output_hidden():
  88. archivebox_init.main([])
  89. assert (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
  90. assert (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
  91. assert (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
  92. assert len(load_main_index(out_dir=DATA_DIR)) == 0
  93. def test_conflicting_init(self):
  94. with open(Path(DATA_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f:
  95. f.write('test')
  96. try:
  97. with output_hidden(show_failing=False):
  98. archivebox_init.main([])
  99. assert False, 'Init should have exited with an exception'
  100. except SystemExit:
  101. pass
  102. assert not (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
  103. assert not (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
  104. assert not (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
  105. try:
  106. load_main_index(out_dir=DATA_DIR)
  107. assert False, 'load_main_index should raise an exception when no index is present'
  108. except Exception:
  109. pass
  110. def test_no_dirty_state(self):
  111. with output_hidden():
  112. init()
  113. shutil.rmtree(DATA_DIR, ignore_errors=True)
  114. with output_hidden():
  115. init()
  116. class TestAdd(unittest.TestCase):
  117. def setUp(self):
  118. os.makedirs(DATA_DIR, exist_ok=True)
  119. with output_hidden():
  120. init()
  121. def tearDown(self):
  122. shutil.rmtree(DATA_DIR, ignore_errors=True)
  123. def test_add_arg_url(self):
  124. with output_hidden():
  125. archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all'])
  126. all_links = load_main_index(out_dir=DATA_DIR)
  127. assert len(all_links) == 30
  128. def test_add_arg_file(self):
  129. test_file = Path(DATA_DIR) / 'test.txt'
  130. with open(test_file, 'w+', encoding='utf') as f:
  131. f.write(test_urls)
  132. with output_hidden():
  133. archivebox_add.main([test_file])
  134. all_links = load_main_index(out_dir=DATA_DIR)
  135. assert len(all_links) == 12
  136. os.remove(test_file)
  137. def test_add_stdin_url(self):
  138. with output_hidden():
  139. archivebox_add.main([], stdin=test_urls)
  140. all_links = load_main_index(out_dir=DATA_DIR)
  141. assert len(all_links) == 12
  142. class TestRemove(unittest.TestCase):
  143. def setUp(self):
  144. os.makedirs(DATA_DIR, exist_ok=True)
  145. with output_hidden():
  146. init()
  147. archivebox_add.main([], stdin=test_urls)
  148. # def tearDown(self):
  149. # shutil.rmtree(DATA_DIR, ignore_errors=True)
  150. def test_remove_exact(self):
  151. with output_hidden():
  152. archivebox_remove.main(['--yes', '--delete', 'https://example5.com/'])
  153. all_links = load_main_index(out_dir=DATA_DIR)
  154. assert len(all_links) == 11
  155. def test_remove_regex(self):
  156. with output_hidden():
  157. archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', r'http(s)?:\/\/(.+\.)?(example\d\.com)'])
  158. all_links = load_main_index(out_dir=DATA_DIR)
  159. assert len(all_links) == 4
  160. def test_remove_domain(self):
  161. with output_hidden():
  162. archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com'])
  163. all_links = load_main_index(out_dir=DATA_DIR)
  164. assert len(all_links) == 10
  165. def test_remove_none(self):
  166. try:
  167. with output_hidden(show_failing=False):
  168. archivebox_remove.main(['--yes', '--delete', 'https://doesntexist.com'])
  169. assert False, 'Should raise if no URLs match'
  170. except Exception:
  171. pass
  172. if __name__ == '__main__':
  173. if '--verbose' in sys.argv or '-v' in sys.argv:
  174. HIDE_CLI_OUTPUT = False
  175. unittest.main()