Explorar o código

move utils and vendored libs into subfolders

Nick Sweeting %!s(int64=5) %!d(string=hai) anos
pai
achega
a0a79cead8

+ 0 - 1
archivebox/core/admin.py

@@ -14,7 +14,6 @@ from django import forms
 from core.models import Snapshot, Tag
 from core.forms import AddLinkForm, TagField
 
-from core.utils import get_icons
 from core.mixins import SearchResultsAdminMixin
 
 from index.html import snapshot_icons

+ 1 - 1
archivebox/core/forms.py

@@ -3,7 +3,7 @@ __package__ = 'archivebox.core'
 from django import forms
 
 from ..util import URL_REGEX
-from .utils_taggit import edit_string_for_tags, parse_tags
+from ..vendor.taggit_utils import edit_string_for_tags, parse_tags
 
 CHOICES = (
     ('0', 'depth = 0 (archive just these URLs)'),

+ 35 - 37
archivebox/parsers/pocket_api.py

@@ -4,34 +4,35 @@ __package__ = 'archivebox.parsers'
 import re
 
 from typing import IO, Iterable, Optional
-from datetime import datetime
 from configparser import ConfigParser
 
 from pathlib import Path
-from pocket import Pocket
-import requests
+from ..vendor.pocket import Pocket
 
 from ..index.schema import Link
-from ..util import (
-    enforce_types,
-)
+from ..util import enforce_types
+from ..system import atomic_write
 from ..config import (
-    SOURCES_DIR
+    SOURCES_DIR,
+    POCKET_CONSUMER_KEY,
+    POCKET_ACCESS_TOKENS,
 )
 
-_COUNT_PER_PAGE = 500
-_API_DB_PATH = Path(SOURCES_DIR) / 'pocket_api.db'
+
+COUNT_PER_PAGE = 500
+API_DB_PATH = Path(SOURCES_DIR) / 'pocket_api.db'
 
 # search for broken protocols that sometimes come from the Pocket API
 _BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')
 
+
 def get_pocket_articles(api: Pocket, since=None, page=0):
     body, headers = api.get(
         state='archive',
         sort='oldest',
         since=since,
-        count=_COUNT_PER_PAGE,
-        offset=page * _COUNT_PER_PAGE,
+        count=COUNT_PER_PAGE,
+        offset=page * COUNT_PER_PAGE,
     )
 
     articles = body['list'].values() if isinstance(body['list'], dict) else body['list']
@@ -39,7 +40,7 @@ def get_pocket_articles(api: Pocket, since=None, page=0):
 
     yield from articles
 
-    if returned_count == _COUNT_PER_PAGE:
+    if returned_count == COUNT_PER_PAGE:
         yield from get_pocket_articles(api, since=since, page=page + 1)
     else:
         api.last_since = body['since']
@@ -60,56 +61,53 @@ def link_from_article(article: dict, sources: list):
         sources=sources
     )
 
-def write_since(username: str, since: str):
-    from ..system import atomic_write
 
-    if not _API_DB_PATH.exists():
-        atomic_write(_API_DB_PATH, '')
+def write_since(username: str, since: str):
+    if not API_DB_PATH.exists():
+        atomic_write(API_DB_PATH, '')
 
     since_file = ConfigParser()
     since_file.optionxform = str
-    since_file.read(_API_DB_PATH)
+    since_file.read(API_DB_PATH)
 
     since_file[username] = {
         'since': since
     }
 
-    with open(_API_DB_PATH, 'w+') as new:
+    with open(API_DB_PATH, 'w+') as new:
         since_file.write(new)
 
-def read_since(username: str) -> Optional[str]:
-    from ..system import atomic_write
 
-    if not _API_DB_PATH.exists():
-        atomic_write(_API_DB_PATH, '')
+def read_since(username: str) -> Optional[str]:
+    if not API_DB_PATH.exists():
+        atomic_write(API_DB_PATH, '')
 
     config_file = ConfigParser()
     config_file.optionxform = str
-    config_file.read(_API_DB_PATH)
+    config_file.read(API_DB_PATH)
 
     return config_file.get(username, 'since', fallback=None)
 
+
 @enforce_types
 def should_parse_as_pocket_api(text: str) -> bool:
     return text.startswith('pocket://')
 
+
 @enforce_types
 def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
     """Parse bookmarks from the Pocket API"""
 
     input_buffer.seek(0)
-    pattern = re.compile("^pocket:\/\/(\w+)")
+    pattern = re.compile(r"^pocket:\/\/(\w+)")
     for line in input_buffer:
-      if should_parse_as_pocket_api(line):
-        from ..config import (
-          POCKET_CONSUMER_KEY,
-          POCKET_ACCESS_TOKENS,
-        )
-        username = pattern.search(line).group(1)
-        api = Pocket(POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS[username])
-        api.last_since = None
-
-        for article in get_pocket_articles(api, since=read_since(username)):
-          yield link_from_article(article, sources=[line])
-
-        write_since(username, api.last_since)
+        if should_parse_as_pocket_api(line):
+            
+            username = pattern.search(line).group(1)
+            api = Pocket(POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS[username])
+            api.last_since = None
+    
+            for article in get_pocket_articles(api, since=read_since(username)):
+                yield link_from_article(article, sources=[line])
+    
+            write_since(username, api.last_since)

+ 4 - 5
archivebox/util.py

@@ -1,11 +1,11 @@
 __package__ = 'archivebox'
 
 import re
-from pathlib import Path
+import requests
 import json as pyjson
 
-
 from typing import List, Optional, Any
+from pathlib import Path
 from inspect import signature
 from functools import wraps
 from hashlib import sha256
@@ -13,10 +13,9 @@ from urllib.parse import urlparse, quote, unquote
 from html import escape, unescape
 from datetime import datetime
 from dateparser import parse as dateparser
-
-import requests
 from requests.exceptions import RequestException, ReadTimeout
-from .base32_crockford import encode as base32_encode                            # type: ignore
+
+from .vendor.base32_crockford import encode as base32_encode                            # type: ignore
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 
 try:

+ 0 - 0
archivebox/vendor/__init__.py


+ 0 - 0
archivebox/base32_crockford.py → archivebox/vendor/base32_crockford.py


+ 368 - 0
archivebox/vendor/pocket.py

@@ -0,0 +1,368 @@
+# https://github.com/tapanpandita/pocket/blob/master/pocket.py
+
+import requests
+import json
+from functools import wraps
+
+
+class PocketException(Exception):
+    '''
+    Base class for all pocket exceptions
+    http://getpocket.com/developer/docs/errors
+
+    '''
+    pass
+
+
+class InvalidQueryException(PocketException):
+    pass
+
+
+class AuthException(PocketException):
+    pass
+
+
+class RateLimitException(PocketException):
+    '''
+    http://getpocket.com/developer/docs/rate-limits
+
+    '''
+    pass
+
+
+class ServerMaintenanceException(PocketException):
+    pass
+
+EXCEPTIONS = {
+    400: InvalidQueryException,
+    401: AuthException,
+    403: RateLimitException,
+    503: ServerMaintenanceException,
+}
+
+
+def method_wrapper(fn):
+
+    @wraps(fn)
+    def wrapped(self, *args, **kwargs):
+        arg_names = list(fn.__code__.co_varnames)
+        arg_names.remove('self')
+        kwargs.update(dict(zip(arg_names, args)))
+
+        url = self.api_endpoints[fn.__name__]
+        payload = dict([
+            (k, v) for k, v in kwargs.items()
+            if v is not None
+        ])
+        payload.update(self.get_payload())
+
+        return self.make_request(url, payload)
+
+    return wrapped
+
+
+def bulk_wrapper(fn):
+
+    @wraps(fn)
+    def wrapped(self, *args, **kwargs):
+        arg_names = list(fn.__code__.co_varnames)
+        arg_names.remove('self')
+        kwargs.update(dict(zip(arg_names, args)))
+
+        wait = kwargs.get('wait', True)
+        query = dict(
+            [(k, v) for k, v in kwargs.items() if v is not None]
+        )
+        # TODO: Fix this hack
+        query['action'] = 'add' if fn.__name__ == 'bulk_add' else fn.__name__
+
+        if wait:
+            self.add_bulk_query(query)
+            return self
+        else:
+            url = self.api_endpoints['send']
+            payload = {
+                'actions': [query],
+            }
+            payload.update(self.get_payload())
+            return self.make_request(
+                url,
+                json.dumps(payload),
+                headers={'content-type': 'application/json'},
+            )
+
+    return wrapped
+
+
+class Pocket(object):
+    '''
+    This class implements a basic python wrapper around the pocket api. For a
+    detailed documentation of the methods and what they do please refer the
+    official pocket api documentation at
+    http://getpocket.com/developer/docs/overview
+
+    '''
+    api_endpoints = dict(
+        (method, 'https://getpocket.com/v3/%s' % method)
+        for method in "add,send,get".split(",")
+    )
+
+    statuses = {
+        200: 'Request was successful',
+        400: 'Invalid request, please make sure you follow the '
+             'documentation for proper syntax',
+        401: 'Problem authenticating the user',
+        403: 'User was authenticated, but access denied due to lack of '
+             'permission or rate limiting',
+        503: 'Pocket\'s sync server is down for scheduled maintenance.',
+    }
+
+    def __init__(self, consumer_key, access_token):
+        self.consumer_key = consumer_key
+        self.access_token = access_token
+        self._bulk_query = []
+
+        self._payload = {
+            'consumer_key': self.consumer_key,
+            'access_token': self.access_token,
+        }
+
+    def get_payload(self):
+        return self._payload
+
+    def add_bulk_query(self, query):
+        self._bulk_query.append(query)
+
+    @staticmethod
+    def _post_request(url, payload, headers):
+        r = requests.post(url, data=payload, headers=headers)
+        return r
+
+    @classmethod
+    def _make_request(cls, url, payload, headers=None):
+        r = cls._post_request(url, payload, headers)
+
+        if r.status_code > 399:
+            error_msg = cls.statuses.get(r.status_code)
+            extra_info = r.headers.get('X-Error')
+            raise EXCEPTIONS.get(r.status_code, PocketException)(
+                '%s. %s' % (error_msg, extra_info)
+            )
+
+        return r.json() or r.text, r.headers
+
+    @classmethod
+    def make_request(cls, url, payload, headers=None):
+        return cls._make_request(url, payload, headers)
+
+    @method_wrapper
+    def add(self, url, title=None, tags=None, tweet_id=None):
+        '''
+        This method allows you to add a page to a user's list.
+        In order to use the /v3/add endpoint, your consumer key must have the
+        "Add" permission.
+        http://getpocket.com/developer/docs/v3/add
+
+        '''
+
+    @method_wrapper
+    def get(
+        self, state=None, favorite=None, tag=None, contentType=None,
+        sort=None, detailType=None, search=None, domain=None, since=None,
+        count=None, offset=None
+    ):
+        '''
+        This method allows you to retrieve a user's list. It supports
+        retrieving items changed since a specific time to allow for syncing.
+        http://getpocket.com/developer/docs/v3/retrieve
+
+        '''
+
+    @method_wrapper
+    def send(self, actions):
+        '''
+        This method allows you to make changes to a user's list. It supports
+        adding new pages, marking pages as read, changing titles, or updating
+        tags. Multiple changes to items can be made in one request.
+        http://getpocket.com/developer/docs/v3/modify
+
+        '''
+
+    @bulk_wrapper
+    def bulk_add(
+        self, item_id, ref_id=None, tags=None, time=None, title=None,
+        url=None, wait=True
+    ):
+        '''
+        Add a new item to the user's list
+        http://getpocket.com/developer/docs/v3/modify#action_add
+
+        '''
+
+    @bulk_wrapper
+    def archive(self, item_id, time=None, wait=True):
+        '''
+        Move an item to the user's archive
+        http://getpocket.com/developer/docs/v3/modify#action_archive
+
+        '''
+
+    @bulk_wrapper
+    def readd(self, item_id, time=None, wait=True):
+        '''
+        Re-add (unarchive) an item to the user's list
+        http://getpocket.com/developer/docs/v3/modify#action_readd
+
+        '''
+
+    @bulk_wrapper
+    def favorite(self, item_id, time=None, wait=True):
+        '''
+        Mark an item as a favorite
+        http://getpocket.com/developer/docs/v3/modify#action_favorite
+
+        '''
+
+    @bulk_wrapper
+    def unfavorite(self, item_id, time=None, wait=True):
+        '''
+        Remove an item from the user's favorites
+        http://getpocket.com/developer/docs/v3/modify#action_unfavorite
+
+        '''
+
+    @bulk_wrapper
+    def delete(self, item_id, time=None, wait=True):
+        '''
+        Permanently remove an item from the user's account
+        http://getpocket.com/developer/docs/v3/modify#action_delete
+
+        '''
+
+    @bulk_wrapper
+    def tags_add(self, item_id, tags, time=None, wait=True):
+        '''
+        Add one or more tags to an item
+        http://getpocket.com/developer/docs/v3/modify#action_tags_add
+
+        '''
+
+    @bulk_wrapper
+    def tags_remove(self, item_id, tags, time=None, wait=True):
+        '''
+        Remove one or more tags from an item
+        http://getpocket.com/developer/docs/v3/modify#action_tags_remove
+
+        '''
+
+    @bulk_wrapper
+    def tags_replace(self, item_id, tags, time=None, wait=True):
+        '''
+        Replace all of the tags for an item with one or more provided tags
+        http://getpocket.com/developer/docs/v3/modify#action_tags_replace
+
+        '''
+
+    @bulk_wrapper
+    def tags_clear(self, item_id, time=None, wait=True):
+        '''
+        Remove all tags from an item.
+        http://getpocket.com/developer/docs/v3/modify#action_tags_clear
+
+        '''
+
+    @bulk_wrapper
+    def tag_rename(self, item_id, old_tag, new_tag, time=None, wait=True):
+        '''
+        Rename a tag. This affects all items with this tag.
+        http://getpocket.com/developer/docs/v3/modify#action_tag_rename
+
+        '''
+
+    def commit(self):
+        '''
+        This method executes the bulk query, flushes stored queries and
+        returns the response
+
+        '''
+        url = self.api_endpoints['send']
+        payload = {
+            'actions': self._bulk_query,
+        }
+        payload.update(self._payload)
+        self._bulk_query = []
+
+        return self._make_request(
+            url,
+            json.dumps(payload),
+            headers={'content-type': 'application/json'},
+        )
+
+    @classmethod
+    def get_request_token(
+        cls, consumer_key, redirect_uri='http://example.com/', state=None
+    ):
+        '''
+        Returns the request token that can be used to fetch the access token
+
+        '''
+        headers = {
+            'X-Accept': 'application/json',
+        }
+        url = 'https://getpocket.com/v3/oauth/request'
+        payload = {
+            'consumer_key': consumer_key,
+            'redirect_uri': redirect_uri,
+        }
+
+        if state:
+            payload['state'] = state
+
+        return cls._make_request(url, payload, headers)[0]['code']
+
+    @classmethod
+    def get_credentials(cls, consumer_key, code):
+        '''
+        Fetches access token from using the request token and consumer key
+
+        '''
+        headers = {
+            'X-Accept': 'application/json',
+        }
+        url = 'https://getpocket.com/v3/oauth/authorize'
+        payload = {
+            'consumer_key': consumer_key,
+            'code': code,
+        }
+
+        return cls._make_request(url, payload, headers)[0]
+
+    @classmethod
+    def get_access_token(cls, consumer_key, code):
+        return cls.get_credentials(consumer_key, code)['access_token']
+
+    @classmethod
+    def get_auth_url(cls, code, redirect_uri='http://example.com'):
+        auth_url = ('https://getpocket.com/auth/authorize'
+                    '?request_token=%s&redirect_uri=%s' % (code, redirect_uri))
+        return auth_url
+
+    @classmethod
+    def auth(
+        cls, consumer_key, redirect_uri='http://example.com/', state=None,
+    ):
+        '''
+        This is a test method for verifying if oauth worked
+        http://getpocket.com/developer/docs/authentication
+
+        '''
+        code = cls.get_request_token(consumer_key, redirect_uri, state)
+
+        auth_url = 'https://getpocket.com/auth/authorize?request_token='\
+            '%s&redirect_uri=%s' % (code, redirect_uri)
+        raw_input(
+            'Please open %s in your browser to authorize the app and '
+            'press enter:' % auth_url
+        )
+
+        return cls.get_access_token(consumer_key, code)

+ 0 - 0
archivebox/core/utils_taggit.py → archivebox/vendor/taggit_utils.py


+ 5 - 8
setup.py

@@ -48,6 +48,11 @@ setuptools.setup(
         "wheel",
     ],
     install_requires=[
+        # only add things here that have corresponding apt python3-packages available
+        # anything added here also needs to be added to our package dependencies in
+        # stdeb.cfg (apt), archivebox.rb (brew), Dockerfile, etc.
+        # if there is no apt python3-package equivalent, then vendor it instead in
+        # ./archivebox/vendor/
         "requests==2.24.0",
         "atomicwrites==1.4.0",
         "mypy-extensions==0.4.3",
@@ -59,12 +64,6 @@ setuptools.setup(
         "python-crontab==2.5.1",
         "croniter==0.3.34",
         "w3lib==1.22.0",
-        "pocket==0.3.6",
-        # Some/all of these will likely be added in the future:
-        # wpull
-        # pywb
-        # pyppeteer
-        # archivenow
     ],
     extras_require={
         'dev': [
@@ -81,8 +80,6 @@ setuptools.setup(
             "bottle",
             "stdeb",
         ],
-        # 'redis': ['redis', 'django-redis'],
-        # 'pywb': ['pywb', 'redis'],
     },
     packages=[PKG_NAME],
     include_package_data=True,   # see MANIFEST.in