readwise_reader_api.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. __package__ = "archivebox.parsers"
  2. import re
  3. import requests
  4. import archivebox
  5. from datetime import datetime
  6. from typing import IO, Iterable, Optional
  7. from configparser import ConfigParser
  8. from ..index.schema import Link
  9. from ..util import enforce_types
  10. from ..system import atomic_write
  11. from ..config import READWISE_READER_TOKENS
  12. API_DB_PATH = archivebox.DATA_DIR / "sources" / "readwise_reader_api.db"
  13. class ReadwiseReaderAPI:
  14. cursor: Optional[str]
  15. def __init__(self, api_token, cursor=None) -> None:
  16. self.api_token = api_token
  17. self.cursor = cursor
  18. def get_archive(self):
  19. response = requests.get(
  20. url="https://readwise.io/api/v3/list/",
  21. headers={"Authorization": f"Token {self.api_token}"},
  22. params={
  23. "location": "archive",
  24. "pageCursor": self.cursor,
  25. }
  26. )
  27. response.raise_for_status()
  28. return response
  29. def get_readwise_reader_articles(api: ReadwiseReaderAPI):
  30. response = api.get_archive()
  31. body = response.json()
  32. articles = body["results"]
  33. yield from articles
  34. if body['nextPageCursor']:
  35. api.cursor = body["nextPageCursor"]
  36. yield from get_readwise_reader_articles(api)
  37. def link_from_article(article: dict, sources: list):
  38. url: str = article['source_url']
  39. title = article["title"] or url
  40. timestamp = datetime.fromisoformat(article['updated_at']).timestamp()
  41. return Link(
  42. url=url,
  43. timestamp=str(timestamp),
  44. title=title,
  45. tags="",
  46. sources=sources,
  47. )
  48. def write_cursor(username: str, since: str):
  49. if not API_DB_PATH.exists():
  50. atomic_write(API_DB_PATH, "")
  51. since_file = ConfigParser()
  52. since_file.optionxform = str
  53. since_file.read(API_DB_PATH)
  54. since_file[username] = {"since": since}
  55. with open(API_DB_PATH, "w+") as new:
  56. since_file.write(new)
  57. def read_cursor(username: str) -> Optional[str]:
  58. if not API_DB_PATH.exists():
  59. atomic_write(API_DB_PATH, "")
  60. config_file = ConfigParser()
  61. config_file.optionxform = str
  62. config_file.read(API_DB_PATH)
  63. return config_file.get(username, "since", fallback=None)
  64. @enforce_types
  65. def should_parse_as_readwise_reader_api(text: str) -> bool:
  66. return text.startswith("readwise-reader://")
  67. @enforce_types
  68. def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
  69. """Parse bookmarks from the Readwise Reader API"""
  70. input_buffer.seek(0)
  71. pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
  72. for line in input_buffer:
  73. if should_parse_as_readwise_reader_api(line):
  74. username = pattern.search(line).group(1)
  75. api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
  76. for article in get_readwise_reader_articles(api):
  77. yield link_from_article(article, sources=[line])
  78. if api.cursor:
  79. write_cursor(username, api.cursor)
  80. KEY = "readwise_reader_api"
  81. NAME = "Readwise Reader API"
  82. PARSER = parse_readwise_reader_api_export