generic_jsonl.py 742 B

12345678910111213141516171819202122232425262728293031323334
  1. __package__ = 'archivebox.parsers'
  2. import json
  3. from typing import IO, Iterable
  4. from datetime import datetime, timezone
  5. from ..index.schema import Link
  6. from ..util import (
  7. htmldecode,
  8. enforce_types,
  9. )
  10. from .generic_json import jsonObjectToLink
  11. def parse_line(line: str):
  12. if line.strip() != "":
  13. return json.loads(line)
  14. @enforce_types
  15. def parse_generic_jsonl_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
  16. """Parse JSONL format bookmarks export files"""
  17. json_file.seek(0)
  18. links = [ parse_line(line) for line in json_file ]
  19. for link in links:
  20. if link:
  21. yield jsonObjectToLink(link,json_file.name)
  22. KEY = 'jsonl'
  23. NAME = 'Generic JSONL'
  24. PARSER = parse_generic_jsonl_export