Browse Source

move utils and vendored libs into subfolders

Nick Sweeting 5 years ago
parent
commit
a0a79cead8

+ 0 - 1
archivebox/core/admin.py

@@ -14,7 +14,6 @@ from django import forms
 from core.models import Snapshot, Tag
 from core.models import Snapshot, Tag
 from core.forms import AddLinkForm, TagField
 from core.forms import AddLinkForm, TagField
 
 
-from core.utils import get_icons
 from core.mixins import SearchResultsAdminMixin
 from core.mixins import SearchResultsAdminMixin
 
 
 from index.html import snapshot_icons
 from index.html import snapshot_icons

+ 1 - 1
archivebox/core/forms.py

@@ -3,7 +3,7 @@ __package__ = 'archivebox.core'
 from django import forms
 from django import forms
 
 
 from ..util import URL_REGEX
 from ..util import URL_REGEX
-from .utils_taggit import edit_string_for_tags, parse_tags
+from ..vendor.taggit_utils import edit_string_for_tags, parse_tags
 
 
 CHOICES = (
 CHOICES = (
     ('0', 'depth = 0 (archive just these URLs)'),
     ('0', 'depth = 0 (archive just these URLs)'),

+ 35 - 37
archivebox/parsers/pocket_api.py

@@ -4,34 +4,35 @@ __package__ = 'archivebox.parsers'
 import re
 import re
 
 
 from typing import IO, Iterable, Optional
 from typing import IO, Iterable, Optional
-from datetime import datetime
 from configparser import ConfigParser
 from configparser import ConfigParser
 
 
 from pathlib import Path
 from pathlib import Path
-from pocket import Pocket
-import requests
+from ..vendor.pocket import Pocket
 
 
 from ..index.schema import Link
 from ..index.schema import Link
-from ..util import (
-    enforce_types,
-)
+from ..util import enforce_types
+from ..system import atomic_write
 from ..config import (
 from ..config import (
-    SOURCES_DIR
+    SOURCES_DIR,
+    POCKET_CONSUMER_KEY,
+    POCKET_ACCESS_TOKENS,
 )
 )
 
 
-_COUNT_PER_PAGE = 500
-_API_DB_PATH = Path(SOURCES_DIR) / 'pocket_api.db'
+
+COUNT_PER_PAGE = 500
+API_DB_PATH = Path(SOURCES_DIR) / 'pocket_api.db'
 
 
 # search for broken protocols that sometimes come from the Pocket API
 # search for broken protocols that sometimes come from the Pocket API
 _BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')
 _BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')
 
 
+
 def get_pocket_articles(api: Pocket, since=None, page=0):
 def get_pocket_articles(api: Pocket, since=None, page=0):
     body, headers = api.get(
     body, headers = api.get(
         state='archive',
         state='archive',
         sort='oldest',
         sort='oldest',
         since=since,
         since=since,
-        count=_COUNT_PER_PAGE,
-        offset=page * _COUNT_PER_PAGE,
+        count=COUNT_PER_PAGE,
+        offset=page * COUNT_PER_PAGE,
     )
     )
 
 
     articles = body['list'].values() if isinstance(body['list'], dict) else body['list']
     articles = body['list'].values() if isinstance(body['list'], dict) else body['list']
@@ -39,7 +40,7 @@ def get_pocket_articles(api: Pocket, since=None, page=0):
 
 
     yield from articles
     yield from articles
 
 
-    if returned_count == _COUNT_PER_PAGE:
+    if returned_count == COUNT_PER_PAGE:
         yield from get_pocket_articles(api, since=since, page=page + 1)
         yield from get_pocket_articles(api, since=since, page=page + 1)
     else:
     else:
         api.last_since = body['since']
         api.last_since = body['since']
@@ -60,56 +61,53 @@ def link_from_article(article: dict, sources: list):
         sources=sources
         sources=sources
     )
     )
 
 
-def write_since(username: str, since: str):
-    from ..system import atomic_write
 
 
-    if not _API_DB_PATH.exists():
-        atomic_write(_API_DB_PATH, '')
+def write_since(username: str, since: str):
+    if not API_DB_PATH.exists():
+        atomic_write(API_DB_PATH, '')
 
 
     since_file = ConfigParser()
     since_file = ConfigParser()
     since_file.optionxform = str
     since_file.optionxform = str
-    since_file.read(_API_DB_PATH)
+    since_file.read(API_DB_PATH)
 
 
     since_file[username] = {
     since_file[username] = {
         'since': since
         'since': since
     }
     }
 
 
-    with open(_API_DB_PATH, 'w+') as new:
+    with open(API_DB_PATH, 'w+') as new:
         since_file.write(new)
         since_file.write(new)
 
 
-def read_since(username: str) -> Optional[str]:
-    from ..system import atomic_write
 
 
-    if not _API_DB_PATH.exists():
-        atomic_write(_API_DB_PATH, '')
+def read_since(username: str) -> Optional[str]:
+    if not API_DB_PATH.exists():
+        atomic_write(API_DB_PATH, '')
 
 
     config_file = ConfigParser()
     config_file = ConfigParser()
     config_file.optionxform = str
     config_file.optionxform = str
-    config_file.read(_API_DB_PATH)
+    config_file.read(API_DB_PATH)
 
 
     return config_file.get(username, 'since', fallback=None)
     return config_file.get(username, 'since', fallback=None)
 
 
+
 @enforce_types
 @enforce_types
 def should_parse_as_pocket_api(text: str) -> bool:
 def should_parse_as_pocket_api(text: str) -> bool:
     return text.startswith('pocket://')
     return text.startswith('pocket://')
 
 
+
 @enforce_types
 @enforce_types
 def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
 def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
     """Parse bookmarks from the Pocket API"""
     """Parse bookmarks from the Pocket API"""
 
 
     input_buffer.seek(0)
     input_buffer.seek(0)
-    pattern = re.compile("^pocket:\/\/(\w+)")
+    pattern = re.compile(r"^pocket:\/\/(\w+)")
     for line in input_buffer:
     for line in input_buffer:
-      if should_parse_as_pocket_api(line):
-        from ..config import (
-          POCKET_CONSUMER_KEY,
-          POCKET_ACCESS_TOKENS,
-        )
-        username = pattern.search(line).group(1)
-        api = Pocket(POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS[username])
-        api.last_since = None
-
-        for article in get_pocket_articles(api, since=read_since(username)):
-          yield link_from_article(article, sources=[line])
-
-        write_since(username, api.last_since)
+        if should_parse_as_pocket_api(line):
+            
+            username = pattern.search(line).group(1)
+            api = Pocket(POCKET_CONSUMER_KEY, POCKET_ACCESS_TOKENS[username])
+            api.last_since = None
+    
+            for article in get_pocket_articles(api, since=read_since(username)):
+                yield link_from_article(article, sources=[line])
+    
+            write_since(username, api.last_since)

+ 4 - 5
archivebox/util.py

@@ -1,11 +1,11 @@
 __package__ = 'archivebox'
 __package__ = 'archivebox'
 
 
 import re
 import re
-from pathlib import Path
+import requests
 import json as pyjson
 import json as pyjson
 
 
-
 from typing import List, Optional, Any
 from typing import List, Optional, Any
+from pathlib import Path
 from inspect import signature
 from inspect import signature
 from functools import wraps
 from functools import wraps
 from hashlib import sha256
 from hashlib import sha256
@@ -13,10 +13,9 @@ from urllib.parse import urlparse, quote, unquote
 from html import escape, unescape
 from html import escape, unescape
 from datetime import datetime
 from datetime import datetime
 from dateparser import parse as dateparser
 from dateparser import parse as dateparser
-
-import requests
 from requests.exceptions import RequestException, ReadTimeout
 from requests.exceptions import RequestException, ReadTimeout
-from .base32_crockford import encode as base32_encode                            # type: ignore
+
+from .vendor.base32_crockford import encode as base32_encode                            # type: ignore
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
 
 
 try:
 try:

+ 0 - 0
archivebox/vendor/__init__.py


+ 0 - 0
archivebox/base32_crockford.py → archivebox/vendor/base32_crockford.py


+ 368 - 0
archivebox/vendor/pocket.py

@@ -0,0 +1,368 @@
+# https://github.com/tapanpandita/pocket/blob/master/pocket.py
+
+import requests
+import json
+from functools import wraps
+
+
+class PocketException(Exception):
+    '''
+    Base class for all pocket exceptions
+    http://getpocket.com/developer/docs/errors
+
+    '''
+    pass
+
+
+class InvalidQueryException(PocketException):
+    pass
+
+
+class AuthException(PocketException):
+    pass
+
+
+class RateLimitException(PocketException):
+    '''
+    http://getpocket.com/developer/docs/rate-limits
+
+    '''
+    pass
+
+
+class ServerMaintenanceException(PocketException):
+    pass
+
+EXCEPTIONS = {
+    400: InvalidQueryException,
+    401: AuthException,
+    403: RateLimitException,
+    503: ServerMaintenanceException,
+}
+
+
+def method_wrapper(fn):
+
+    @wraps(fn)
+    def wrapped(self, *args, **kwargs):
+        arg_names = list(fn.__code__.co_varnames)
+        arg_names.remove('self')
+        kwargs.update(dict(zip(arg_names, args)))
+
+        url = self.api_endpoints[fn.__name__]
+        payload = dict([
+            (k, v) for k, v in kwargs.items()
+            if v is not None
+        ])
+        payload.update(self.get_payload())
+
+        return self.make_request(url, payload)
+
+    return wrapped
+
+
+def bulk_wrapper(fn):
+
+    @wraps(fn)
+    def wrapped(self, *args, **kwargs):
+        arg_names = list(fn.__code__.co_varnames)
+        arg_names.remove('self')
+        kwargs.update(dict(zip(arg_names, args)))
+
+        wait = kwargs.get('wait', True)
+        query = dict(
+            [(k, v) for k, v in kwargs.items() if v is not None]
+        )
+        # TODO: Fix this hack
+        query['action'] = 'add' if fn.__name__ == 'bulk_add' else fn.__name__
+
+        if wait:
+            self.add_bulk_query(query)
+            return self
+        else:
+            url = self.api_endpoints['send']
+            payload = {
+                'actions': [query],
+            }
+            payload.update(self.get_payload())
+            return self.make_request(
+                url,
+                json.dumps(payload),
+                headers={'content-type': 'application/json'},
+            )
+
+    return wrapped
+
+
+class Pocket(object):
+    '''
+    This class implements a basic python wrapper around the pocket api. For a
+    detailed documentation of the methods and what they do please refer the
+    official pocket api documentation at
+    http://getpocket.com/developer/docs/overview
+
+    '''
+    api_endpoints = dict(
+        (method, 'https://getpocket.com/v3/%s' % method)
+        for method in "add,send,get".split(",")
+    )
+
+    statuses = {
+        200: 'Request was successful',
+        400: 'Invalid request, please make sure you follow the '
+             'documentation for proper syntax',
+        401: 'Problem authenticating the user',
+        403: 'User was authenticated, but access denied due to lack of '
+             'permission or rate limiting',
+        503: 'Pocket\'s sync server is down for scheduled maintenance.',
+    }
+
+    def __init__(self, consumer_key, access_token):
+        self.consumer_key = consumer_key
+        self.access_token = access_token
+        self._bulk_query = []
+
+        self._payload = {
+            'consumer_key': self.consumer_key,
+            'access_token': self.access_token,
+        }
+
+    def get_payload(self):
+        return self._payload
+
+    def add_bulk_query(self, query):
+        self._bulk_query.append(query)
+
+    @staticmethod
+    def _post_request(url, payload, headers):
+        r = requests.post(url, data=payload, headers=headers)
+        return r
+
+    @classmethod
+    def _make_request(cls, url, payload, headers=None):
+        r = cls._post_request(url, payload, headers)
+
+        if r.status_code > 399:
+            error_msg = cls.statuses.get(r.status_code)
+            extra_info = r.headers.get('X-Error')
+            raise EXCEPTIONS.get(r.status_code, PocketException)(
+                '%s. %s' % (error_msg, extra_info)
+            )
+
+        return r.json() or r.text, r.headers
+
+    @classmethod
+    def make_request(cls, url, payload, headers=None):
+        return cls._make_request(url, payload, headers)
+
+    @method_wrapper
+    def add(self, url, title=None, tags=None, tweet_id=None):
+        '''
+        This method allows you to add a page to a user's list.
+        In order to use the /v3/add endpoint, your consumer key must have the
+        "Add" permission.
+        http://getpocket.com/developer/docs/v3/add
+
+        '''
+
+    @method_wrapper
+    def get(
+        self, state=None, favorite=None, tag=None, contentType=None,
+        sort=None, detailType=None, search=None, domain=None, since=None,
+        count=None, offset=None
+    ):
+        '''
+        This method allows you to retrieve a user's list. It supports
+        retrieving items changed since a specific time to allow for syncing.
+        http://getpocket.com/developer/docs/v3/retrieve
+
+        '''
+
+    @method_wrapper
+    def send(self, actions):
+        '''
+        This method allows you to make changes to a user's list. It supports
+        adding new pages, marking pages as read, changing titles, or updating
+        tags. Multiple changes to items can be made in one request.
+        http://getpocket.com/developer/docs/v3/modify
+
+        '''
+
+    @bulk_wrapper
+    def bulk_add(
+        self, item_id, ref_id=None, tags=None, time=None, title=None,
+        url=None, wait=True
+    ):
+        '''
+        Add a new item to the user's list
+        http://getpocket.com/developer/docs/v3/modify#action_add
+
+        '''
+
+    @bulk_wrapper
+    def archive(self, item_id, time=None, wait=True):
+        '''
+        Move an item to the user's archive
+        http://getpocket.com/developer/docs/v3/modify#action_archive
+
+        '''
+
+    @bulk_wrapper
+    def readd(self, item_id, time=None, wait=True):
+        '''
+        Re-add (unarchive) an item to the user's list
+        http://getpocket.com/developer/docs/v3/modify#action_readd
+
+        '''
+
+    @bulk_wrapper
+    def favorite(self, item_id, time=None, wait=True):
+        '''
+        Mark an item as a favorite
+        http://getpocket.com/developer/docs/v3/modify#action_favorite
+
+        '''
+
+    @bulk_wrapper
+    def unfavorite(self, item_id, time=None, wait=True):
+        '''
+        Remove an item from the user's favorites
+        http://getpocket.com/developer/docs/v3/modify#action_unfavorite
+
+        '''
+
+    @bulk_wrapper
+    def delete(self, item_id, time=None, wait=True):
+        '''
+        Permanently remove an item from the user's account
+        http://getpocket.com/developer/docs/v3/modify#action_delete
+
+        '''
+
+    @bulk_wrapper
+    def tags_add(self, item_id, tags, time=None, wait=True):
+        '''
+        Add one or more tags to an item
+        http://getpocket.com/developer/docs/v3/modify#action_tags_add
+
+        '''
+
+    @bulk_wrapper
+    def tags_remove(self, item_id, tags, time=None, wait=True):
+        '''
+        Remove one or more tags from an item
+        http://getpocket.com/developer/docs/v3/modify#action_tags_remove
+
+        '''
+
+    @bulk_wrapper
+    def tags_replace(self, item_id, tags, time=None, wait=True):
+        '''
+        Replace all of the tags for an item with one or more provided tags
+        http://getpocket.com/developer/docs/v3/modify#action_tags_replace
+
+        '''
+
+    @bulk_wrapper
+    def tags_clear(self, item_id, time=None, wait=True):
+        '''
+        Remove all tags from an item.
+        http://getpocket.com/developer/docs/v3/modify#action_tags_clear
+
+        '''
+
+    @bulk_wrapper
+    def tag_rename(self, item_id, old_tag, new_tag, time=None, wait=True):
+        '''
+        Rename a tag. This affects all items with this tag.
+        http://getpocket.com/developer/docs/v3/modify#action_tag_rename
+
+        '''
+
+    def commit(self):
+        '''
+        This method executes the bulk query, flushes stored queries and
+        returns the response
+
+        '''
+        url = self.api_endpoints['send']
+        payload = {
+            'actions': self._bulk_query,
+        }
+        payload.update(self._payload)
+        self._bulk_query = []
+
+        return self._make_request(
+            url,
+            json.dumps(payload),
+            headers={'content-type': 'application/json'},
+        )
+
+    @classmethod
+    def get_request_token(
+        cls, consumer_key, redirect_uri='http://example.com/', state=None
+    ):
+        '''
+        Returns the request token that can be used to fetch the access token
+
+        '''
+        headers = {
+            'X-Accept': 'application/json',
+        }
+        url = 'https://getpocket.com/v3/oauth/request'
+        payload = {
+            'consumer_key': consumer_key,
+            'redirect_uri': redirect_uri,
+        }
+
+        if state:
+            payload['state'] = state
+
+        return cls._make_request(url, payload, headers)[0]['code']
+
+    @classmethod
+    def get_credentials(cls, consumer_key, code):
+        '''
+        Fetches access token from using the request token and consumer key
+
+        '''
+        headers = {
+            'X-Accept': 'application/json',
+        }
+        url = 'https://getpocket.com/v3/oauth/authorize'
+        payload = {
+            'consumer_key': consumer_key,
+            'code': code,
+        }
+
+        return cls._make_request(url, payload, headers)[0]
+
+    @classmethod
+    def get_access_token(cls, consumer_key, code):
+        return cls.get_credentials(consumer_key, code)['access_token']
+
+    @classmethod
+    def get_auth_url(cls, code, redirect_uri='http://example.com'):
+        auth_url = ('https://getpocket.com/auth/authorize'
+                    '?request_token=%s&redirect_uri=%s' % (code, redirect_uri))
+        return auth_url
+
+    @classmethod
+    def auth(
+        cls, consumer_key, redirect_uri='http://example.com/', state=None,
+    ):
+        '''
+        This is a test method for verifying if oauth worked
+        http://getpocket.com/developer/docs/authentication
+
+        '''
+        code = cls.get_request_token(consumer_key, redirect_uri, state)
+
+        auth_url = 'https://getpocket.com/auth/authorize?request_token='\
+            '%s&redirect_uri=%s' % (code, redirect_uri)
+        raw_input(
+            'Please open %s in your browser to authorize the app and '
+            'press enter:' % auth_url
+        )
+
+        return cls.get_access_token(consumer_key, code)

+ 0 - 0
archivebox/core/utils_taggit.py → archivebox/vendor/taggit_utils.py


+ 5 - 8
setup.py

@@ -48,6 +48,11 @@ setuptools.setup(
         "wheel",
         "wheel",
     ],
     ],
     install_requires=[
     install_requires=[
+        # only add things here that have corresponding apt python3-packages available
+        # anything added here also needs to be added to our package dependencies in
+        # stdeb.cfg (apt), archivebox.rb (brew), Dockerfile, etc.
+        # if there is no apt python3-package equivalent, then vendor it instead in
+        # ./archivebox/vendor/
         "requests==2.24.0",
         "requests==2.24.0",
         "atomicwrites==1.4.0",
         "atomicwrites==1.4.0",
         "mypy-extensions==0.4.3",
         "mypy-extensions==0.4.3",
@@ -59,12 +64,6 @@ setuptools.setup(
         "python-crontab==2.5.1",
         "python-crontab==2.5.1",
         "croniter==0.3.34",
         "croniter==0.3.34",
         "w3lib==1.22.0",
         "w3lib==1.22.0",
-        "pocket==0.3.6",
-        # Some/all of these will likely be added in the future:
-        # wpull
-        # pywb
-        # pyppeteer
-        # archivenow
     ],
     ],
     extras_require={
     extras_require={
         'dev': [
         'dev': [
@@ -81,8 +80,6 @@ setuptools.setup(
             "bottle",
             "bottle",
             "stdeb",
             "stdeb",
         ],
         ],
-        # 'redis': ['redis', 'django-redis'],
-        # 'pywb': ['pywb', 'redis'],
     },
     },
     packages=[PKG_NAME],
     packages=[PKG_NAME],
     include_package_data=True,   # see MANIFEST.in
     include_package_data=True,   # see MANIFEST.in