readwise_reader_api.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. __package__ = "archivebox.parsers"
  2. import re
  3. import requests
  4. from datetime import datetime
  5. from typing import IO, Iterable, Optional
  6. from configparser import ConfigParser
  7. import abx
  8. from archivebox.misc.util import enforce_types
  9. from archivebox.misc.system import atomic_write
  10. from ..index.schema import Link
  11. class ReadwiseReaderAPI:
  12. cursor: Optional[str]
  13. def __init__(self, api_token, cursor=None) -> None:
  14. self.api_token = api_token
  15. self.cursor = cursor
  16. def get_archive(self):
  17. response = requests.get(
  18. url="https://readwise.io/api/v3/list/",
  19. headers={"Authorization": f"Token {self.api_token}"},
  20. params={
  21. "location": "archive",
  22. "pageCursor": self.cursor,
  23. }
  24. )
  25. response.raise_for_status()
  26. return response
  27. def get_readwise_reader_articles(api: ReadwiseReaderAPI):
  28. response = api.get_archive()
  29. body = response.json()
  30. articles = body["results"]
  31. yield from articles
  32. if body['nextPageCursor']:
  33. api.cursor = body["nextPageCursor"]
  34. yield from get_readwise_reader_articles(api)
  35. def link_from_article(article: dict, sources: list):
  36. url: str = article['source_url']
  37. title = article["title"] or url
  38. timestamp = datetime.fromisoformat(article['updated_at']).timestamp()
  39. return Link(
  40. url=url,
  41. timestamp=str(timestamp),
  42. title=title,
  43. tags="",
  44. sources=sources,
  45. )
  46. def write_cursor(username: str, since: str):
  47. READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH
  48. if not READWISE_DB_PATH.exists():
  49. atomic_write(READWISE_DB_PATH, "")
  50. since_file = ConfigParser()
  51. since_file.optionxform = str
  52. since_file.read(READWISE_DB_PATH)
  53. since_file[username] = {"since": since}
  54. with open(READWISE_DB_PATH, "w+") as new:
  55. since_file.write(new)
  56. def read_cursor(username: str) -> Optional[str]:
  57. READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH
  58. if not READWISE_DB_PATH.exists():
  59. atomic_write(READWISE_DB_PATH, "")
  60. config_file = ConfigParser()
  61. config_file.optionxform = str
  62. config_file.read(READWISE_DB_PATH)
  63. return config_file.get(username, "since", fallback=None)
  64. @enforce_types
  65. def should_parse_as_readwise_reader_api(text: str) -> bool:
  66. return text.startswith("readwise-reader://")
  67. @enforce_types
  68. def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
  69. """Parse bookmarks from the Readwise Reader API"""
  70. READWISE_READER_TOKENS = abx.pm.hook.get_CONFIG().READWISE_READER_TOKENS
  71. input_buffer.seek(0)
  72. pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
  73. for line in input_buffer:
  74. if should_parse_as_readwise_reader_api(line):
  75. username = pattern.search(line).group(1)
  76. api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
  77. for article in get_readwise_reader_articles(api):
  78. yield link_from_article(article, sources=[line])
  79. if api.cursor:
  80. write_cursor(username, api.cursor)
  81. KEY = "readwise_reader_api"
  82. NAME = "Readwise Reader API"
  83. PARSER = parse_readwise_reader_api_export