json.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. __package__ = 'archivebox.legacy.storage'
  2. import os
  3. import json
  4. from datetime import datetime
  5. from typing import List, Optional, Iterator
  6. from ..schema import Link, ArchiveResult
  7. from ..config import (
  8. VERSION,
  9. OUTPUT_DIR,
  10. )
  11. from ..util import (
  12. enforce_types,
  13. atomic_write,
  14. )
  15. ### Main Links Index
  16. @enforce_types
  17. def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
  18. """parse a archive index json file and return the list of links"""
  19. index_path = os.path.join(out_dir, 'index.json')
  20. if os.path.exists(index_path):
  21. with open(index_path, 'r', encoding='utf-8') as f:
  22. links = json.load(f)['links']
  23. for link_json in links:
  24. yield Link.from_json(link_json)
  25. return ()
  26. @enforce_types
  27. def write_json_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
  28. """write the json link index to a given path"""
  29. assert isinstance(links, List), 'Links must be a list, not a generator.'
  30. assert not links or isinstance(links[0].history, dict)
  31. assert not links or isinstance(links[0].sources, list)
  32. if links and links[0].history.get('title'):
  33. assert isinstance(links[0].history['title'][0], ArchiveResult)
  34. if links and links[0].sources:
  35. assert isinstance(links[0].sources[0], str)
  36. path = os.path.join(out_dir, 'index.json')
  37. index_json = {
  38. 'info': 'ArchiveBox Index',
  39. 'source': 'https://github.com/pirate/ArchiveBox',
  40. 'docs': 'https://github.com/pirate/ArchiveBox/wiki',
  41. 'version': VERSION,
  42. 'num_links': len(links),
  43. 'updated': datetime.now(),
  44. 'links': links,
  45. }
  46. atomic_write(index_json, path)
  47. ### Link Details Index
  48. @enforce_types
  49. def write_json_link_details(link: Link, out_dir: Optional[str]=None) -> None:
  50. """write a json file with some info about the link"""
  51. out_dir = out_dir or link.link_dir
  52. path = os.path.join(out_dir, 'index.json')
  53. atomic_write(link._asdict(extended=True), path)
  54. @enforce_types
  55. def parse_json_link_details(out_dir: str) -> Optional[Link]:
  56. """load the json link index from a given directory"""
  57. existing_index = os.path.join(out_dir, 'index.json')
  58. if os.path.exists(existing_index):
  59. with open(existing_index, 'r', encoding='utf-8') as f:
  60. link_json = json.load(f)
  61. return Link.from_json(link_json)
  62. return None