| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- __package__ = "archivebox.parsers"
- import re
- import requests
- from datetime import datetime
- from typing import IO, Iterable, Optional
- from configparser import ConfigParser
- from pathlib import Path
- from ..index.schema import Link
- from ..util import enforce_types
- from ..system import atomic_write
- from ..config import (
- SOURCES_DIR,
- READWISE_READER_TOKENS,
- )
- API_DB_PATH = Path(SOURCES_DIR) / "readwise_reader_api.db"
- class ReadwiseReaderAPI:
- cursor: Optional[str]
- def __init__(self, api_token, cursor=None) -> None:
- self.api_token = api_token
- self.cursor = cursor
- def get_archive(self):
- response = requests.get(
- url="https://readwise.io/api/v3/list/",
- headers={"Authorization": "Token s71gNtiNDWquEvlJFFUyDU10ao8fn99lGyNryvyllQcDSnrd7X"},
- params={
- "location": "archive",
- "pageCursor": self.cursor,
- }
- )
- response.raise_for_status()
- return response
- def get_readwise_reader_articles(api: ReadwiseReaderAPI):
- response = api.get_archive()
- body = response.json()
- articles = body["results"]
- yield from articles
- if body['nextPageCursor']:
- api.cursor = body["nextPageCursor"]
- yield from get_readwise_reader_articles(api)
- def link_from_article(article: dict, sources: list):
- url: str = article['source_url']
- title = article["title"] or url
- timestamp = datetime.fromisoformat(article['updated_at']).timestamp()
- return Link(
- url=url,
- timestamp=str(timestamp),
- title=title,
- tags="",
- sources=sources,
- )
- def write_cursor(username: str, since: str):
- if not API_DB_PATH.exists():
- atomic_write(API_DB_PATH, "")
- since_file = ConfigParser()
- since_file.optionxform = str
- since_file.read(API_DB_PATH)
- since_file[username] = {"since": since}
- with open(API_DB_PATH, "w+") as new:
- since_file.write(new)
- def read_cursor(username: str) -> Optional[str]:
- if not API_DB_PATH.exists():
- atomic_write(API_DB_PATH, "")
- config_file = ConfigParser()
- config_file.optionxform = str
- config_file.read(API_DB_PATH)
- return config_file.get(username, "since", fallback=None)
- @enforce_types
- def should_parse_as_readwise_reader_api(text: str) -> bool:
- return text.startswith("readwise-reader://")
- @enforce_types
- def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
- """Parse bookmarks from the Readwise Reader API"""
- input_buffer.seek(0)
- pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
- for line in input_buffer:
- if should_parse_as_readwise_reader_api(line):
- username = pattern.search(line).group(1)
- api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
- for article in get_readwise_reader_articles(api):
- yield link_from_article(article, sources=[line])
- if api.cursor:
- write_cursor(username, api.cursor)
- KEY = "readwise_reader_api"
- NAME = "Readwise Reader API"
- PARSER = parse_readwise_reader_api_export
|