test_migrations_07_to_09.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. #!/usr/bin/env python3
  2. """
  3. Migration tests from 0.7.x to 0.9.x.
  4. 0.7.x schema includes:
  5. - Tag model with ManyToMany to Snapshot
  6. - ArchiveResult model with ForeignKey to Snapshot
  7. - AutoField primary keys
  8. """
  9. import shutil
  10. import sqlite3
  11. import tempfile
  12. import unittest
  13. from pathlib import Path
  14. from .test_migrations_helpers import (
  15. SCHEMA_0_7,
  16. seed_0_7_data,
  17. run_archivebox,
  18. create_data_dir_structure,
  19. verify_snapshot_count,
  20. verify_snapshot_urls,
  21. verify_snapshot_titles,
  22. verify_tag_count,
  23. verify_archiveresult_count,
  24. verify_foreign_keys,
  25. verify_all_snapshots_in_output,
  26. )
  27. class TestMigrationFrom07x(unittest.TestCase):
  28. """Test migration from 0.7.x schema to latest."""
  29. def setUp(self):
  30. """Create a temporary directory with 0.7.x schema and data."""
  31. self.work_dir = Path(tempfile.mkdtemp())
  32. self.db_path = self.work_dir / 'index.sqlite3'
  33. # Create directory structure
  34. create_data_dir_structure(self.work_dir)
  35. # Create database with 0.7.x schema
  36. conn = sqlite3.connect(str(self.db_path))
  37. conn.executescript(SCHEMA_0_7)
  38. conn.close()
  39. # Seed with test data
  40. self.original_data = seed_0_7_data(self.db_path)
  41. def tearDown(self):
  42. """Clean up temporary directory."""
  43. shutil.rmtree(self.work_dir, ignore_errors=True)
  44. def test_migration_preserves_snapshot_count(self):
  45. """Migration should preserve all snapshots."""
  46. expected_count = len(self.original_data['snapshots'])
  47. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  48. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  49. ok, msg = verify_snapshot_count(self.db_path, expected_count)
  50. self.assertTrue(ok, msg)
  51. def test_migration_preserves_snapshot_urls(self):
  52. """Migration should preserve all snapshot URLs."""
  53. expected_urls = [s['url'] for s in self.original_data['snapshots']]
  54. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  55. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  56. ok, msg = verify_snapshot_urls(self.db_path, expected_urls)
  57. self.assertTrue(ok, msg)
  58. def test_migration_preserves_snapshot_titles(self):
  59. """Migration should preserve all snapshot titles."""
  60. expected_titles = {s['url']: s['title'] for s in self.original_data['snapshots']}
  61. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  62. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  63. ok, msg = verify_snapshot_titles(self.db_path, expected_titles)
  64. self.assertTrue(ok, msg)
  65. def test_migration_preserves_tags(self):
  66. """Migration should preserve all tags."""
  67. expected_count = len(self.original_data['tags'])
  68. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  69. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  70. ok, msg = verify_tag_count(self.db_path, expected_count)
  71. self.assertTrue(ok, msg)
  72. def test_migration_preserves_archiveresults(self):
  73. """Migration should preserve all archive results."""
  74. expected_count = len(self.original_data['archiveresults'])
  75. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  76. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  77. ok, msg = verify_archiveresult_count(self.db_path, expected_count)
  78. self.assertTrue(ok, msg)
  79. def test_migration_preserves_foreign_keys(self):
  80. """Migration should maintain foreign key relationships."""
  81. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  82. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  83. ok, msg = verify_foreign_keys(self.db_path)
  84. self.assertTrue(ok, msg)
  85. def test_status_works_after_migration(self):
  86. """Status command should work after migration."""
  87. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  88. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  89. result = run_archivebox(self.work_dir, ['status'])
  90. self.assertEqual(result.returncode, 0, f"Status failed after migration: {result.stderr}")
  91. def test_search_works_after_migration(self):
  92. """Search command should find ALL migrated snapshots."""
  93. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  94. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  95. result = run_archivebox(self.work_dir, ['search'])
  96. self.assertEqual(result.returncode, 0, f"Search failed after migration: {result.stderr}")
  97. # Verify ALL snapshots appear in output
  98. output = result.stdout + result.stderr
  99. ok, msg = verify_all_snapshots_in_output(output, self.original_data['snapshots'])
  100. self.assertTrue(ok, msg)
  101. def test_list_works_after_migration(self):
  102. """List command should work and show ALL migrated data."""
  103. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  104. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  105. result = run_archivebox(self.work_dir, ['snapshot', 'list'])
  106. self.assertEqual(result.returncode, 0, f"List failed after migration: {result.stderr}")
  107. # Verify ALL snapshots appear in output
  108. output = result.stdout + result.stderr
  109. ok, msg = verify_all_snapshots_in_output(output, self.original_data['snapshots'])
  110. self.assertTrue(ok, msg)
  111. def test_new_schema_elements_created_after_migration(self):
  112. """Migration should create new 0.9.x schema elements (crawls_crawl, etc.)."""
  113. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  114. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  115. conn = sqlite3.connect(str(self.db_path))
  116. cursor = conn.cursor()
  117. # Check that new tables exist
  118. cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
  119. tables = {row[0] for row in cursor.fetchall()}
  120. conn.close()
  121. # 0.9.x should have crawls_crawl table
  122. self.assertIn('crawls_crawl', tables, "crawls_crawl table not created during migration")
  123. def test_snapshots_have_new_fields_after_migration(self):
  124. """Migrated snapshots should have new 0.9.x fields (status, depth, etc.)."""
  125. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  126. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  127. conn = sqlite3.connect(str(self.db_path))
  128. cursor = conn.cursor()
  129. # Check snapshot table has new columns
  130. cursor.execute('PRAGMA table_info(core_snapshot)')
  131. columns = {row[1] for row in cursor.fetchall()}
  132. conn.close()
  133. # 0.9.x snapshots should have status, depth, created_at, modified_at
  134. required_new_columns = {'status', 'depth', 'created_at', 'modified_at'}
  135. for col in required_new_columns:
  136. self.assertIn(col, columns, f"Snapshot missing new column: {col}")
  137. def test_add_works_after_migration(self):
  138. """Adding new URLs should work after migration from 0.7.x."""
  139. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  140. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  141. # Verify that init created the crawls_crawl table before proceeding
  142. conn = sqlite3.connect(str(self.db_path))
  143. cursor = conn.cursor()
  144. cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='crawls_crawl'")
  145. table_exists = cursor.fetchone() is not None
  146. conn.close()
  147. self.assertTrue(table_exists, f"Init failed to create crawls_crawl table. Init stderr: {result.stderr[-500:]}")
  148. # Try to add a new URL after migration (use --index-only for speed)
  149. result = run_archivebox(self.work_dir, ['add', '--index-only', 'https://example.com/new-page'], timeout=45)
  150. self.assertEqual(result.returncode, 0, f"Add failed after migration: {result.stderr}")
  151. # Verify a Crawl was created for the new URL
  152. conn = sqlite3.connect(str(self.db_path))
  153. cursor = conn.cursor()
  154. cursor.execute("SELECT COUNT(*) FROM crawls_crawl")
  155. crawl_count = cursor.fetchone()[0]
  156. conn.close()
  157. self.assertGreaterEqual(crawl_count, 1, f"No Crawl created when adding URL. Add stderr: {result.stderr[-500:]}")
  158. def test_archiveresult_status_preserved_after_migration(self):
  159. """Migration should preserve archive result status values."""
  160. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  161. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  162. conn = sqlite3.connect(str(self.db_path))
  163. cursor = conn.cursor()
  164. # Get status counts
  165. cursor.execute("SELECT status, COUNT(*) FROM core_archiveresult GROUP BY status")
  166. status_counts = dict(cursor.fetchall())
  167. conn.close()
  168. # Original data has known status distribution: succeeded, failed, skipped
  169. self.assertIn('succeeded', status_counts, "Should have succeeded results")
  170. self.assertIn('failed', status_counts, "Should have failed results")
  171. self.assertIn('skipped', status_counts, "Should have skipped results")
  172. def test_version_works_after_migration(self):
  173. """Version command should work after migration."""
  174. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  175. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  176. result = run_archivebox(self.work_dir, ['version'])
  177. self.assertEqual(result.returncode, 0, f"Version failed after migration: {result.stderr}")
  178. # Should show version info
  179. output = result.stdout + result.stderr
  180. self.assertTrue('ArchiveBox' in output or 'version' in output.lower(),
  181. f"Version output missing expected content: {output[:500]}")
  182. def test_help_works_after_migration(self):
  183. """Help command should work after migration."""
  184. result = run_archivebox(self.work_dir, ['init'], timeout=45)
  185. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  186. result = run_archivebox(self.work_dir, ['help'])
  187. self.assertEqual(result.returncode, 0, f"Help failed after migration: {result.stderr}")
  188. # Should show available commands
  189. output = result.stdout + result.stderr
  190. self.assertTrue('add' in output.lower() and 'status' in output.lower(),
  191. f"Help output missing expected commands: {output[:500]}")
  192. class TestMigrationDataIntegrity07x(unittest.TestCase):
  193. """Comprehensive data integrity tests for 0.7.x migrations."""
  194. def test_no_duplicate_snapshots_after_migration(self):
  195. """Migration should not create duplicate snapshots."""
  196. work_dir = Path(tempfile.mkdtemp())
  197. db_path = work_dir / 'index.sqlite3'
  198. try:
  199. create_data_dir_structure(work_dir)
  200. conn = sqlite3.connect(str(db_path))
  201. conn.executescript(SCHEMA_0_7)
  202. conn.close()
  203. seed_0_7_data(db_path)
  204. result = run_archivebox(work_dir, ['init'], timeout=45)
  205. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  206. # Check for duplicate URLs
  207. conn = sqlite3.connect(str(db_path))
  208. cursor = conn.cursor()
  209. cursor.execute("""
  210. SELECT url, COUNT(*) as cnt FROM core_snapshot
  211. GROUP BY url HAVING cnt > 1
  212. """)
  213. duplicates = cursor.fetchall()
  214. conn.close()
  215. self.assertEqual(len(duplicates), 0, f"Found duplicate URLs: {duplicates}")
  216. finally:
  217. shutil.rmtree(work_dir, ignore_errors=True)
  218. def test_no_orphaned_archiveresults_after_migration(self):
  219. """Migration should not leave orphaned ArchiveResults."""
  220. work_dir = Path(tempfile.mkdtemp())
  221. db_path = work_dir / 'index.sqlite3'
  222. try:
  223. create_data_dir_structure(work_dir)
  224. conn = sqlite3.connect(str(db_path))
  225. conn.executescript(SCHEMA_0_7)
  226. conn.close()
  227. seed_0_7_data(db_path)
  228. result = run_archivebox(work_dir, ['init'], timeout=45)
  229. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  230. ok, msg = verify_foreign_keys(db_path)
  231. self.assertTrue(ok, msg)
  232. finally:
  233. shutil.rmtree(work_dir, ignore_errors=True)
  234. def test_timestamps_preserved_after_migration(self):
  235. """Migration should preserve original timestamps."""
  236. work_dir = Path(tempfile.mkdtemp())
  237. db_path = work_dir / 'index.sqlite3'
  238. try:
  239. create_data_dir_structure(work_dir)
  240. conn = sqlite3.connect(str(db_path))
  241. conn.executescript(SCHEMA_0_7)
  242. conn.close()
  243. original_data = seed_0_7_data(db_path)
  244. original_timestamps = {s['url']: s['timestamp'] for s in original_data['snapshots']}
  245. result = run_archivebox(work_dir, ['init'], timeout=45)
  246. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  247. conn = sqlite3.connect(str(db_path))
  248. cursor = conn.cursor()
  249. cursor.execute("SELECT url, timestamp FROM core_snapshot")
  250. migrated_timestamps = {row[0]: row[1] for row in cursor.fetchall()}
  251. conn.close()
  252. for url, original_ts in original_timestamps.items():
  253. self.assertEqual(
  254. migrated_timestamps.get(url), original_ts,
  255. f"Timestamp changed for {url}: {original_ts} -> {migrated_timestamps.get(url)}"
  256. )
  257. finally:
  258. shutil.rmtree(work_dir, ignore_errors=True)
  259. def test_tag_associations_preserved_after_migration(self):
  260. """Migration should preserve snapshot-tag associations."""
  261. work_dir = Path(tempfile.mkdtemp())
  262. db_path = work_dir / 'index.sqlite3'
  263. try:
  264. create_data_dir_structure(work_dir)
  265. conn = sqlite3.connect(str(db_path))
  266. conn.executescript(SCHEMA_0_7)
  267. conn.close()
  268. seed_0_7_data(db_path)
  269. # Count tag associations before migration
  270. conn = sqlite3.connect(str(db_path))
  271. cursor = conn.cursor()
  272. cursor.execute("SELECT COUNT(*) FROM core_snapshot_tags")
  273. original_count = cursor.fetchone()[0]
  274. conn.close()
  275. result = run_archivebox(work_dir, ['init'], timeout=45)
  276. self.assertEqual(result.returncode, 0, f"Init failed: {result.stderr}")
  277. # Count tag associations after migration
  278. conn = sqlite3.connect(str(db_path))
  279. cursor = conn.cursor()
  280. cursor.execute("SELECT COUNT(*) FROM core_snapshot_tags")
  281. migrated_count = cursor.fetchone()[0]
  282. conn.close()
  283. self.assertEqual(migrated_count, original_count,
  284. f"Tag associations changed: {original_count} -> {migrated_count}")
  285. finally:
  286. shutil.rmtree(work_dir, ignore_errors=True)
  287. if __name__ == '__main__':
  288. unittest.main()