Browse Source

big overhaul of REST API, split into auth, core, and cli methods

Nick Sweeting 1 year ago
parent
commit
75153252dc

+ 1 - 0
archivebox/api/__init__.py

@@ -0,0 +1 @@
+__package__ = 'archivebox.api'

+ 2 - 0
archivebox/api/apps.py

@@ -1,3 +1,5 @@
+__package__ = 'archivebox.api'
+
 from django.apps import AppConfig
 from django.apps import AppConfig
 
 
 
 

+ 0 - 184
archivebox/api/archive.py

@@ -1,184 +0,0 @@
-# archivebox_api.py
-from typing import List, Optional
-from enum import Enum
-from pydantic import BaseModel
-from ninja import Router
-from main import (
-    add,
-    remove,
-    update,
-    list_all,
-    ONLY_NEW,
-)  # Assuming these functions are defined in main.py
-
-
-# Schemas
-
-class StatusChoices(str, Enum):
-    indexed = 'indexed'
-    archived = 'archived'
-    unarchived = 'unarchived'
-    present = 'present'
-    valid = 'valid'
-    invalid = 'invalid'
-    duplicate = 'duplicate'
-    orphaned = 'orphaned'
-    corrupted = 'corrupted'
-    unrecognized = 'unrecognized'
-
-
-class AddURLSchema(BaseModel):
-    urls: List[str]
-    tag: str = ""
-    depth: int = 0
-    update: bool = not ONLY_NEW  # Default to the opposite of ONLY_NEW
-    update_all: bool = False
-    index_only: bool = False
-    overwrite: bool = False
-    init: bool = False
-    extractors: str = ""
-    parser: str = "auto"
-
-
-class RemoveURLSchema(BaseModel):
-    yes: bool = False
-    delete: bool = False
-    before: Optional[float] = None
-    after: Optional[float] = None
-    filter_type: str = "exact"
-    filter_patterns: Optional[List[str]] = None
-
-
-class UpdateSchema(BaseModel):
-    resume: Optional[float] = None
-    only_new: Optional[bool] = None
-    index_only: Optional[bool] = False
-    overwrite: Optional[bool] = False
-    before: Optional[float] = None
-    after: Optional[float] = None
-    status: Optional[StatusChoices] = None
-    filter_type: Optional[str] = 'exact'
-    filter_patterns: Optional[List[str]] = None
-    extractors: Optional[str] = ""
-
-
-class ListAllSchema(BaseModel):
-    filter_patterns: Optional[List[str]] = None
-    filter_type: str = 'exact'
-    status: Optional[StatusChoices] = None
-    after: Optional[float] = None
-    before: Optional[float] = None
-    sort: Optional[str] = None
-    csv: Optional[str] = None
-    json: bool = False
-    html: bool = False
-    with_headers: bool = False
-
-
-# API Router
-router = Router()
-
-
[email protected]("/add", response={200: dict})
-def api_add(request, payload: AddURLSchema):
-    try:
-        result = add(
-            urls=payload.urls,
-            tag=payload.tag,
-            depth=payload.depth,
-            update=payload.update,
-            update_all=payload.update_all,
-            index_only=payload.index_only,
-            overwrite=payload.overwrite,
-            init=payload.init,
-            extractors=payload.extractors,
-            parser=payload.parser,
-        )
-        # Currently the add function returns a list of ALL items in the DB, ideally only return new items
-        return {
-            "status": "success",
-            "message": "URLs added successfully.",
-            "result": str(result),
-        }
-    except Exception as e:
-        # Handle exceptions raised by the add function or during processing
-        return {"status": "error", "message": str(e)}
-
-
[email protected]("/remove", response={200: dict})
-def api_remove(request, payload: RemoveURLSchema):
-    try:
-        result = remove(
-            yes=payload.yes,
-            delete=payload.delete,
-            before=payload.before,
-            after=payload.after,
-            filter_type=payload.filter_type,
-            filter_patterns=payload.filter_patterns,
-        )
-        return {
-            "status": "success",
-            "message": "URLs removed successfully.",
-            "result": result,
-        }
-    except Exception as e:
-        # Handle exceptions raised by the remove function or during processing
-        return {"status": "error", "message": str(e)}
-
-
[email protected]("/update", response={200: dict})
-def api_update(request, payload: UpdateSchema):
-    try:
-        result = update(
-            resume=payload.resume,
-            only_new=payload.only_new,
-            index_only=payload.index_only,
-            overwrite=payload.overwrite,
-            before=payload.before,
-            after=payload.after,
-            status=payload.status,
-            filter_type=payload.filter_type,
-            filter_patterns=payload.filter_patterns,
-            extractors=payload.extractors,
-        )
-        return {
-            "status": "success",
-            "message": "Archive updated successfully.",
-            "result": result,
-        }
-    except Exception as e:
-        # Handle exceptions raised by the update function or during processing
-        return {"status": "error", "message": str(e)}
-
-
[email protected]("/list_all", response={200: dict})
-def api_list_all(request, payload: ListAllSchema):
-    try:
-        result = list_all(
-            filter_patterns=payload.filter_patterns,
-            filter_type=payload.filter_type,
-            status=payload.status,
-            after=payload.after,
-            before=payload.before,
-            sort=payload.sort,
-            csv=payload.csv,
-            json=payload.json,
-            html=payload.html,
-            with_headers=payload.with_headers,
-        )
-        # TODO: This is kind of bad, make the format a choice field
-        if payload.json:
-            return {"status": "success", "format": "json", "data": result}
-        elif payload.html:
-            return {"status": "success", "format": "html", "data": result}
-        elif payload.csv:
-            return {"status": "success", "format": "csv", "data": result}
-        else:
-            return {
-                "status": "success",
-                "message": "List generated successfully.",
-                "data": result,
-            }
-    except Exception as e:
-        # Handle exceptions raised by the list_all function or during processing
-        return {"status": "error", "message": str(e)}

+ 92 - 33
archivebox/api/auth.py

@@ -1,48 +1,107 @@
+__package__ = 'archivebox.api'
+
+from typing import Optional
+
+from django.http import HttpRequest
+from django.contrib.auth import login
 from django.contrib.auth import authenticate
 from django.contrib.auth import authenticate
-from ninja import Form, Router, Schema
-from ninja.security import HttpBearer
+from django.contrib.auth.models import AbstractBaseUser
 
 
-from api.models import Token
+from ninja.security import HttpBearer, APIKeyQuery, APIKeyHeader, HttpBasicAuth, django_auth_superuser
 
 
-router = Router()
 
 
+def auth_using_token(token, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]:
+    """Given an API token string, check if a corresponding non-expired APIToken exists, and return its user"""
+    from api.models import APIToken        # lazy import model to avoid loading it at urls.py import time
+    
+    user = None
 
 
-class GlobalAuth(HttpBearer):
-    def authenticate(self, request, token):
+    submitted_empty_form = token in ('string', '', None)
+    if submitted_empty_form:
+        user = request.user       # see if user is authed via django session and use that as the default
+    else:
         try:
         try:
-            return Token.objects.get(token=token).user
-        except Token.DoesNotExist:
+            token = APIToken.objects.get(token=token)
+            if token.is_valid():
+                user = token.user
+        except APIToken.DoesNotExist:
             pass
             pass
 
 
+    if not user:
+        print('[❌] Failed to authenticate API user using API Key:', request)
 
 
-class AuthSchema(Schema):
-    email: str
-    password: str
-
+    return None
 
 
-@router.post("/authenticate", auth=None)  # overriding global auth
-def get_token(request, auth_data: AuthSchema):
-    user = authenticate(username=auth_data.email, password=auth_data.password)
-    if user:
-        # Assuming a user can have multiple tokens and you want to create a new one every time
-        new_token = Token.objects.create(user=user)
-        return {"token": new_token.token, "expires": new_token.expiry_as_iso8601}
+def auth_using_password(username, password, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]:
+    """Given a username and password, check if they are valid and return the corresponding user"""
+    user = None
+    
+    submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None))
+    if submitted_empty_form:
+        user = request.user       # see if user is authed via django session and use that as the default
     else:
     else:
-        return {"error": "Invalid credentials"}
+        user = authenticate(
+            username=username,
+            password=password,
+        )
+
+    if not user:
+        print('[❌] Failed to authenticate API user using API Key:', request)
+
+    return user
+
+
+### Base Auth Types
+
+class APITokenAuthCheck:
+    """The base class for authentication methods that use an api.models.APIToken"""
+    def authenticate(self, request: HttpRequest, key: Optional[str]=None) -> Optional[AbstractBaseUser]:
+        user = auth_using_token(
+            token=key,
+            request=request,
+        )
+        if user is not None:
+            login(request, user, backend='django.contrib.auth.backends.ModelBackend')
+        return user
+
+class UserPassAuthCheck:
+    """The base class for authentication methods that use a username & password"""
+    def authenticate(self, request: HttpRequest, username: Optional[str]=None, password: Optional[str]=None) -> Optional[AbstractBaseUser]:
+        user = auth_using_password(
+            username=username,
+            password=password,
+            request=request,
+        )
+        if user is not None:
+            login(request, user, backend='django.contrib.auth.backends.ModelBackend')
+        return user
+
+
+### Django-Ninja-Provided Auth Methods
+
+class UsernameAndPasswordAuth(UserPassAuthCheck, HttpBasicAuth):
+    """Allow authenticating by passing username & password via HTTP Basic Authentication (not recommended)"""
+    pass
+
+class QueryParamTokenAuth(APITokenAuthCheck, APIKeyQuery):
+    """Allow authenticating by passing api_key=xyz as a GET/POST query parameter"""
+    param_name = "api_key"
+
+class HeaderTokenAuth(APITokenAuthCheck, APIKeyHeader):
+    """Allow authenticating by passing X-API-Key=xyz as a request header"""
+    param_name = "X-API-Key"
 
 
+class BearerTokenAuth(APITokenAuthCheck, HttpBearer):
+    """Allow authenticating by passing Bearer=xyz as a request header"""
+    pass
 
 
-class TokenValidationSchema(Schema):
-    token: str
 
 
+### Enabled Auth Methods
 
 
[email protected]("/validate_token", auth=None) # No authentication required for this endpoint
-def validate_token(request, token_data: TokenValidationSchema):
-    try:
-        # Attempt to authenticate using the provided token
-        user = GlobalAuth().authenticate(request, token_data.token)
-        if user:
-            return {"status": "valid"}
-        else:
-            return {"status": "invalid"}
-    except Token.DoesNotExist:
-        return {"status": "invalid"}
+API_AUTH_METHODS = [
+    QueryParamTokenAuth(), 
+    HeaderTokenAuth(),
+    BearerTokenAuth(),
+    django_auth_superuser,
+    UsernameAndPasswordAuth(),
+]

+ 7 - 6
archivebox/api/migrations/0001_initial.py

@@ -1,9 +1,10 @@
-# Generated by Django 3.1.14 on 2024-04-09 18:52
+# Generated by Django 4.2.11 on 2024-04-25 04:19
 
 
 import api.models
 import api.models
 from django.conf import settings
 from django.conf import settings
 from django.db import migrations, models
 from django.db import migrations, models
 import django.db.models.deletion
 import django.db.models.deletion
+import uuid
 
 
 
 
 class Migration(migrations.Migration):
 class Migration(migrations.Migration):
@@ -16,13 +17,13 @@ class Migration(migrations.Migration):
 
 
     operations = [
     operations = [
         migrations.CreateModel(
         migrations.CreateModel(
-            name='Token',
+            name='APIToken',
             fields=[
             fields=[
-                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('token', models.CharField(default=auth.models.hex_uuid, max_length=32, unique=True)),
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
+                ('token', models.CharField(default=api.models.generate_secret_token, max_length=32, unique=True)),
                 ('created', models.DateTimeField(auto_now_add=True)),
                 ('created', models.DateTimeField(auto_now_add=True)),
-                ('expiry', models.DateTimeField(blank=True, null=True)),
-                ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='tokens', to=settings.AUTH_USER_MODEL)),
+                ('expires', models.DateTimeField(blank=True, null=True)),
+                ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
             ],
             ],
         ),
         ),
     ]
     ]

+ 47 - 15
archivebox/api/models.py

@@ -1,30 +1,62 @@
+__package__ = 'archivebox.api'
+
 import uuid
 import uuid
+import secrets
 from datetime import timedelta
 from datetime import timedelta
 
 
 from django.conf import settings
 from django.conf import settings
 from django.db import models
 from django.db import models
 from django.utils import timezone
 from django.utils import timezone
-from django.utils.translation import gettext_lazy as _
 
 
-def hex_uuid():
-    return uuid.uuid4().hex
 
 
 
 
-class Token(models.Model):
-    user = models.ForeignKey(
-        settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name="tokens"
-    )
-    token = models.CharField(max_length=32, default=hex_uuid, unique=True)
+def generate_secret_token() -> str:
+    # returns cryptographically secure string with len() == 32
+    return secrets.token_hex(16)
+
+
+class APIToken(models.Model):
+    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
+
+    user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
+    token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
+    
     created = models.DateTimeField(auto_now_add=True)
     created = models.DateTimeField(auto_now_add=True)
-    expiry = models.DateTimeField(null=True, blank=True)
+    expires = models.DateTimeField(null=True, blank=True)
+
+    class Meta:
+        verbose_name = "API Key"
+        verbose_name_plural = "API Keys"
+
+    def __str__(self) -> str:
+        return self.token
+
+    def __repr__(self) -> str:
+        return f'<APIToken user={self.user.username} token=************{self.token[-4:]}>'
+
+    def __json__(self) -> dict:
+        return {
+            "TYPE":             "APIToken",    
+            "id":               str(self.id),
+            "user_id":          str(self.user.id),
+            "user_username":    self.user.username,
+            "token":            self.token,
+            "created":          self.created.isoformat(),
+            "expires":          self.expires_as_iso8601,
+        }
 
 
     @property
     @property
-    def expiry_as_iso8601(self):
+    def expires_as_iso8601(self):
         """Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
         """Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
-        expiry_date = (
-            self.expiry if self.expiry else timezone.now() + timedelta(days=365 * 100)
-        )
+        expiry_date = self.expires or (timezone.now() + timedelta(days=365 * 100))
+
         return expiry_date.isoformat()
         return expiry_date.isoformat()
 
 
-    def __str__(self):
-        return self.token
+    def is_valid(self, for_date=None):
+        for_date = for_date or timezone.now()
+
+        if self.expires and self.expires < for_date:
+            return False
+
+        return True
+

+ 53 - 0
archivebox/api/routes_auth.py

@@ -0,0 +1,53 @@
+__package__ = 'archivebox.api'
+
+from typing import Optional
+
+from django.contrib.auth import authenticate
+from ninja import Router, Schema
+
+from api.models import APIToken
+from api.auth import auth_using_token, auth_using_password
+
+
+router = Router(tags=['Authentication'])
+
+
+class PasswordAuthSchema(Schema):
+    """Schema for a /get_api_token request"""
+    username: Optional[str] = None
+    password: Optional[str] = None
+
+
[email protected]("/get_api_token", auth=None, summary='Generate an API token for a given username & password (or currently logged-in user)')             # auth=None because they are not authed yet
+def get_api_token(request, auth_data: PasswordAuthSchema):
+    user = auth_using_password(
+        username=auth_data.username,
+        password=auth_data.password,
+        request=request,
+    )
+
+    if user:
+        # TODO: support multiple tokens in the future, for now we just have one per user
+        api_token, created = APIToken.objects.get_or_create(user=user)
+
+        return api_token.__json__()
+    
+    return {"success": False, "errors": ["Invalid credentials"]}
+
+
+
+class TokenAuthSchema(Schema):
+    """Schema for a /check_api_token request"""
+    token: str
+
+
[email protected]("/check_api_token", auth=None, summary='Validate an API token to make sure its valid and non-expired')        # auth=None because they are not authed yet
+def check_api_token(request, token_data: TokenAuthSchema):
+    user = auth_using_token(
+        token=token_data.token,
+        request=request,
+    )
+    if user:
+        return {"success": True, "user_id": str(user.id)}
+    
+    return {"success": False, "user_id": None}

+ 236 - 0
archivebox/api/routes_cli.py

@@ -0,0 +1,236 @@
+__package__ = 'archivebox.api'
+
+from typing import List, Dict, Any, Optional
+from enum import Enum
+
+# from pydantic import BaseModel
+from archivebox.api.routes_core import paginate
+from ninja import Router, Schema
+
+from ..main import (
+    add,
+    remove,
+    update,
+    list_all,
+    schedule,
+)
+from ..util import ansi_to_html
+from ..config import ONLY_NEW
+
+
+# router for API that exposes archivebox cli subcommands as REST endpoints
+router = Router(tags=['ArchiveBox CLI Sub-Commands'])
+
+
+# Schemas
+
+JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
+
+class CLICommandResponseSchema(Schema):
+    success: bool
+    errors: List[str]
+    result: JSONType
+    stdout: str
+    stderr: str
+
+class FilterTypeChoices(str, Enum):
+    exact = 'exact'
+    substring = 'substring'
+    regex = 'regex'
+    domain = 'domain'
+    tag = 'tag'
+    timestamp = 'timestamp'
+
+class StatusChoices(str, Enum):
+    indexed = 'indexed'
+    archived = 'archived'
+    unarchived = 'unarchived'
+    present = 'present'
+    valid = 'valid'
+    invalid = 'invalid'
+    duplicate = 'duplicate'
+    orphaned = 'orphaned'
+    corrupted = 'corrupted'
+    unrecognized = 'unrecognized'
+
+
+class AddCommandSchema(Schema):
+    urls: List[str]
+    tag: str = ""
+    depth: int = 0
+    update: bool = not ONLY_NEW  # Default to the opposite of ONLY_NEW
+    update_all: bool = False
+    index_only: bool = False
+    overwrite: bool = False
+    init: bool = False
+    extractors: str = ""
+    parser: str = "auto"
+
+class UpdateCommandSchema(Schema):
+    resume: Optional[float] = 0
+    only_new: bool = ONLY_NEW
+    index_only: bool = False
+    overwrite: bool = False
+    after: Optional[float] = 0
+    before: Optional[float] = 999999999999999
+    status: Optional[StatusChoices] = StatusChoices.unarchived
+    filter_type: Optional[str] = FilterTypeChoices.substring
+    filter_patterns: Optional[List[str]] = ['https://example.com']
+    extractors: Optional[str] = ""
+
+class ScheduleCommandSchema(Schema):
+    import_path: Optional[str] = None
+    add: bool = False
+    every: Optional[str] = None
+    tag: str = ''
+    depth: int = 0
+    overwrite: bool = False
+    update: bool = not ONLY_NEW
+    clear: bool = False
+
+class ListCommandSchema(Schema):
+    filter_patterns: Optional[List[str]] = ['https://example.com']
+    filter_type: str = FilterTypeChoices.substring
+    status: Optional[StatusChoices] = StatusChoices.indexed
+    after: Optional[float] = 0
+    before: Optional[float] = 999999999999999
+    sort: str = 'added'
+    as_json: bool = True
+    as_html: bool = False
+    as_csv: str | bool = 'timestamp,url'
+    with_headers: bool = False
+
+class RemoveCommandSchema(Schema):
+    delete: bool = True
+    after: Optional[float] = 0
+    before: Optional[float] = 999999999999999
+    filter_type: str = FilterTypeChoices.exact
+    filter_patterns: Optional[List[str]] = ['https://example.com']
+
+
+
+
+
[email protected]("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
+def cli_add(request, args: AddCommandSchema):
+    result = add(
+        urls=args.urls,
+        tag=args.tag,
+        depth=args.depth,
+        update=args.update,
+        update_all=args.update_all,
+        index_only=args.index_only,
+        overwrite=args.overwrite,
+        init=args.init,
+        extractors=args.extractors,
+        parser=args.parser,
+    )
+
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+
+
[email protected]("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
+def cli_update(request, args: UpdateCommandSchema):
+    result = update(
+        resume=args.resume,
+        only_new=args.only_new,
+        index_only=args.index_only,
+        overwrite=args.overwrite,
+        before=args.before,
+        after=args.after,
+        status=args.status,
+        filter_type=args.filter_type,
+        filter_patterns=args.filter_patterns,
+        extractors=args.extractors,
+    )
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+
+
[email protected]("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
+def cli_add(request, args: ScheduleCommandSchema):
+    result = schedule(
+        import_path=args.import_path,
+        add=args.add,
+        show=args.show,
+        clear=args.clear,
+        every=args.every,
+        tag=args.tag,
+        depth=args.depth,
+        overwrite=args.overwrite,
+        update=args.update,
+    )
+
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+
+
+
[email protected]("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns]')
+def cli_list(request, args: ListCommandSchema):
+    result = list_all(
+        filter_patterns=args.filter_patterns,
+        filter_type=args.filter_type,
+        status=args.status,
+        after=args.after,
+        before=args.before,
+        sort=args.sort,
+        csv=args.as_csv,
+        json=args.as_json,
+        html=args.as_html,
+        with_headers=args.with_headers,
+    )
+
+    result_format = 'txt'
+    if args.as_json:
+        result_format = "json"
+    elif args.as_html:
+        result_format = "html"
+    elif args.as_csv:
+        result_format = "csv"
+
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "result_format": result_format,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+    
+
+
[email protected]("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
+def cli_remove(request, args: RemoveCommandSchema):
+    result = remove(
+        yes=True,            # no way to interactively ask for confirmation via API, so we force yes
+        delete=args.delete,
+        before=args.before,
+        after=args.after,
+        filter_type=args.filter_type,
+        filter_patterns=args.filter_patterns,
+    )
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+    

+ 210 - 0
archivebox/api/routes_core.py

@@ -0,0 +1,210 @@
+__package__ = 'archivebox.api'
+
+from uuid import UUID
+from typing import List, Optional, Union
+from datetime import datetime
+
+from django.shortcuts import get_object_or_404
+
+from ninja import Router, Schema, FilterSchema, Field, Query
+from ninja.pagination import paginate
+
+from core.models import Snapshot, ArchiveResult, Tag
+
+
+router = Router(tags=['Core Models'])
+
+
+
+
+### ArchiveResult #########################################################################
+
+class ArchiveResultSchema(Schema):
+    id: UUID
+
+    snapshot_id: UUID
+    snapshot_url: str
+    snapshot_tags: str
+
+    extractor: str
+    cmd: List[str]
+    pwd: str
+    cmd_version: str
+    output: str
+    status: str
+
+    created: datetime
+
+    @staticmethod
+    def resolve_id(obj):
+        return obj.uuid
+
+    @staticmethod
+    def resolve_created(obj):
+        return obj.start_ts
+
+    @staticmethod
+    def resolve_snapshot_url(obj):
+        return obj.snapshot.url
+
+    @staticmethod
+    def resolve_snapshot_tags(obj):
+        return obj.snapshot.tags_str()
+
+
+class ArchiveResultFilterSchema(FilterSchema):
+    id: Optional[UUID] = Field(None, q='uuid')
+
+    search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
+    snapshot_id: Optional[UUID] = Field(None, q='snapshot_id')
+    snapshot_url: Optional[str] = Field(None, q='snapshot__url')
+    snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name')
+    
+    status: Optional[str] = Field(None, q='status')
+    output: Optional[str] = Field(None, q='output__icontains')
+    extractor: Optional[str] = Field(None, q='extractor__icontains')
+    cmd: Optional[str] = Field(None, q='cmd__0__icontains')
+    pwd: Optional[str] = Field(None, q='pwd__icontains')
+    cmd_version: Optional[str] = Field(None, q='cmd_version')
+
+    created: Optional[datetime] = Field(None, q='updated')
+    created__gte: Optional[datetime] = Field(None, q='updated__gte')
+    created__lt: Optional[datetime] = Field(None, q='updated__lt')
+
+
[email protected]("/archiveresults", response=List[ArchiveResultSchema])
+@paginate
+def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
+    qs = ArchiveResult.objects.all()
+    results = filters.filter(qs)
+    return results
+
+
[email protected]("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
+def get_archiveresult(request, archiveresult_id: str):
+    archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
+    return archiveresult
+
+
+# @router.post("/archiveresult", response=ArchiveResultSchema)
+# def create_archiveresult(request, payload: ArchiveResultSchema):
+#     archiveresult = ArchiveResult.objects.create(**payload.dict())
+#     return archiveresult
+#
+# @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
+# def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
+#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
+#   
+#     for attr, value in payload.dict().items():
+#         setattr(archiveresult, attr, value)
+#     archiveresult.save()
+#
+#     return archiveresult
+#
+# @router.delete("/archiveresult/{archiveresult_id}")
+# def delete_archiveresult(request, archiveresult_id: str):
+#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
+#     archiveresult.delete()
+#     return {"success": True}
+
+
+
+
+
+### Snapshot #########################################################################
+
+
+class SnapshotSchema(Schema):
+    id: UUID
+
+    url: str
+    tags: str
+    title: Optional[str]
+    timestamp: str
+    bookmarked: datetime
+    added: datetime
+    updated: datetime
+    archive_path: str
+
+    archiveresults: List[ArchiveResultSchema]
+
+    # @staticmethod
+    # def resolve_id(obj):
+    #     return str(obj.id)
+
+    @staticmethod
+    def resolve_tags(obj):
+        return obj.tags_str()
+
+    @staticmethod
+    def resolve_archiveresults(obj, context):
+        if context['request'].with_archiveresults:
+            return obj.archiveresult_set.all().distinct()
+        return ArchiveResult.objects.none()
+
+
+class SnapshotFilterSchema(FilterSchema):
+    id: Optional[UUID] = Field(None, q='id')
+
+    search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains'])
+    url: Optional[str] = Field(None, q='url')
+    tag: Optional[str] = Field(None, q='tags__name')
+    title: Optional[str] = Field(None, q='title__icontains')
+    
+    timestamp: Optional[str] = Field(None, q='timestamp__startswith')
+    
+    added: Optional[datetime] = Field(None, q='added')
+    added__gte: Optional[datetime] = Field(None, q='added__gte')
+    added__lt: Optional[datetime] = Field(None, q='added__lt')
+
+
[email protected]("/snapshots", response=List[SnapshotSchema])
+@paginate
+def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True):
+    request.with_archiveresults = with_archiveresults
+
+    qs = Snapshot.objects.all()
+    results = filters.filter(qs)
+    return results
+
[email protected]("/snapshot/{snapshot_id}", response=SnapshotSchema)
+def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
+    request.with_archiveresults = with_archiveresults
+    snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+    return snapshot
+
+
+# @router.post("/snapshot", response=SnapshotSchema)
+# def create_snapshot(request, payload: SnapshotSchema):
+#     snapshot = Snapshot.objects.create(**payload.dict())
+#     return snapshot
+#
+# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
+# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
+#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+#
+#     for attr, value in payload.dict().items():
+#         setattr(snapshot, attr, value)
+#     snapshot.save()
+#
+#     return snapshot
+#
+# @router.delete("/snapshot/{snapshot_id}")
+# def delete_snapshot(request, snapshot_id: str):
+#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+#     snapshot.delete()
+#     return {"success": True}
+
+
+
+### Tag #########################################################################
+
+
+class TagSchema(Schema):
+    name: str
+    slug: str
+
+
[email protected]("/tags", response=List[TagSchema])
+def list_tags(request):
+    return Tag.objects.all()

+ 11 - 8
archivebox/api/tests.py

@@ -1,27 +1,30 @@
+__package__ = 'archivebox.api'
+
 from django.test import TestCase
 from django.test import TestCase
 from ninja.testing import TestClient
 from ninja.testing import TestClient
-from archivebox.api.archive import router as archive_router
 
 
-class ArchiveBoxAPITestCase(TestCase):
+from .routes_cli import router
+
+class ArchiveBoxCLIAPITestCase(TestCase):
     def setUp(self):
     def setUp(self):
-        self.client = TestClient(archive_router)
+        self.client = TestClient(router)
 
 
     def test_add_endpoint(self):
     def test_add_endpoint(self):
-        response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "test"})
+        response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "testTag1,testTag2"})
         self.assertEqual(response.status_code, 200)
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["status"], "success")
+        self.assertTrue(response.json()["success"])
 
 
     def test_remove_endpoint(self):
     def test_remove_endpoint(self):
         response = self.client.post("/remove", json={"filter_patterns": ["http://example.com"]})
         response = self.client.post("/remove", json={"filter_patterns": ["http://example.com"]})
         self.assertEqual(response.status_code, 200)
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["status"], "success")
+        self.assertTrue(response.json()["success"])
 
 
     def test_update_endpoint(self):
     def test_update_endpoint(self):
         response = self.client.post("/update", json={})
         response = self.client.post("/update", json={})
         self.assertEqual(response.status_code, 200)
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["status"], "success")
+        self.assertTrue(response.json()["success"])
 
 
     def test_list_all_endpoint(self):
     def test_list_all_endpoint(self):
         response = self.client.post("/list_all", json={})
         response = self.client.post("/list_all", json={})
         self.assertEqual(response.status_code, 200)
         self.assertEqual(response.status_code, 200)
-        self.assertTrue("success" in response.json()["status"])
+        self.assertTrue(response.json()["success"])

+ 111 - 0
archivebox/api/urls.py

@@ -0,0 +1,111 @@
+__package__ = 'archivebox.api'
+
+# import orjson
+
+from io import StringIO
+from traceback import format_exception
+from contextlib import redirect_stdout, redirect_stderr
+
+from django.urls import path
+from django.http import HttpRequest, HttpResponse
+from django.views.generic.base import RedirectView
+from django.core.exceptions import ObjectDoesNotExist, EmptyResultSet, PermissionDenied
+
+from ninja import NinjaAPI, Swagger
+
+# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/
+
+from api.auth import API_AUTH_METHODS
+from ..config import VERSION, COMMIT_HASH
+
+# from ninja.renderers import BaseRenderer
+
+# class ORJSONRenderer(BaseRenderer):
+#     media_type = "application/json"
+
+#     def render(self, request, data, *, response_status):
+#         return {
+#             "success": True,
+#             "errors": [],
+#             "result": data,
+#             "stdout": ansi_to_html(stdout.getvalue().strip()),
+#             "stderr": ansi_to_html(stderr.getvalue().strip()),
+#         }
+#         return orjson.dumps(data)
+
+
+class NinjaAPIWithIOCapture(NinjaAPI):    
+    def create_temporal_response(self, request: HttpRequest) -> HttpResponse:
+        stdout, stderr = StringIO(), StringIO()
+
+        with redirect_stderr(stderr):
+            with redirect_stdout(stdout):
+                request.stdout = stdout
+                request.stderr = stderr
+
+                response = super().create_temporal_response(request)
+
+        print('RESPONDING NOW', response)
+
+        return response
+
+html_description=f'''
+<h3>Welcome to your ArchiveBox server's REST API <code>[v1 ALPHA]</code> homepage!</h3>
+<br/>
+<i><b>WARNING: This API is still in an early development stage and may change!</b></i>
+<br/>
+<ul>
+<li>⬅️ Manage your server: <a href="/admin/api/"><b>Setup API Keys</b></a>, <a href="/admin/">Go to your Server Admin UI</a>, <a href="/">Go to your Snapshots list</a> 
+<li>💬 Ask questions and get help here: <a href="https://zulip.archivebox.io">ArchiveBox Chat Forum</a></li>
+<li>🐞 Report API bugs here: <a href="https://github.com/ArchiveBox/ArchiveBox/issues">Github Issues</a></li>
+<li>📚 ArchiveBox Documentation: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Github Wiki</a></li>
+<li>📜 See the API source code: <a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/api"><code>archivebox/api/</code></a></li>
+</ul>
+<small>Served by ArchiveBox v{VERSION} (<a href="https://github.com/ArchiveBox/ArchiveBox/commit/{COMMIT_HASH}"><code>{COMMIT_HASH[:8]}</code></a>), API powered by <a href="https://django-ninja.dev/"><code>django-ninja</code></a>.</small>
+'''
+
+api = NinjaAPIWithIOCapture(
+    title='ArchiveBox API',
+    description=html_description,
+    version='1.0.0',
+    csrf=False,
+    auth=API_AUTH_METHODS,
+    urls_namespace="api",
+    docs=Swagger(settings={"persistAuthorization": True}),
+    # docs_decorator=login_required,
+    # renderer=ORJSONRenderer(),
+)
+api.add_router('/auth/',     'api.routes_auth.router')
+api.add_router('/core/',     'api.routes_core.router')
+api.add_router('/cli/',      'api.routes_cli.router')
+
+
[email protected]_handler(Exception)
+def generic_exception_handler(request, err):
+    status = 503
+    if isinstance(err, (ObjectDoesNotExist, EmptyResultSet, PermissionDenied)):
+        status = 404
+
+    print(''.join(format_exception(err)))
+
+    return api.create_response(
+        request,
+        {
+            "succeeded": False,
+            "errors": [
+                ''.join(format_exception(err)),
+                # or send simpler exception-only summary without full traceback:
+                # f'{err.__class__.__name__}: {err}',
+                # *([str(err.__context__)] if getattr(err, '__context__', None) else []),
+            ],
+        },
+        status=status,
+    )
+
+
+urlpatterns = [
+    path("v1/",              api.urls),
+
+    path("v1",               RedirectView.as_view(url='/api/v1/docs')),
+    path("",                 RedirectView.as_view(url='/api/v1/docs')),
+]

+ 1 - 0
archivebox/config.py

@@ -1366,6 +1366,7 @@ def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=C
         stderr('        archivebox init')
         stderr('        archivebox init')
         raise SystemExit(2)
         raise SystemExit(2)
 
 
+
 def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG):
 def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG):
     output_dir = out_dir or config['OUTPUT_DIR']
     output_dir = out_dir or config['OUTPUT_DIR']
     from .index.sql import list_migrations
     from .index.sql import list_migrations

+ 2 - 0
archivebox/core/admin.py

@@ -20,6 +20,7 @@ from core.models import Snapshot, ArchiveResult, Tag
 from core.forms import AddLinkForm
 from core.forms import AddLinkForm
 
 
 from core.mixins import SearchResultsAdminMixin
 from core.mixins import SearchResultsAdminMixin
+from api.models import APIToken
 
 
 from index.html import snapshot_icons
 from index.html import snapshot_icons
 from logging_util import printable_filesize
 from logging_util import printable_filesize
@@ -100,6 +101,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
 
 
 archivebox_admin = ArchiveBoxAdmin()
 archivebox_admin = ArchiveBoxAdmin()
 archivebox_admin.register(get_user_model())
 archivebox_admin.register(get_user_model())
+archivebox_admin.register(APIToken)
 archivebox_admin.disable_action('delete_selected')
 archivebox_admin.disable_action('delete_selected')
 
 
 class ArchiveResultInline(admin.TabularInline):
 class ArchiveResultInline(admin.TabularInline):

+ 2 - 0
archivebox/core/apps.py

@@ -1,3 +1,5 @@
+__package__ = 'archivebox.core'
+
 from django.apps import AppConfig
 from django.apps import AppConfig
 
 
 
 

+ 3 - 0
archivebox/core/auth.py

@@ -1,5 +1,8 @@
+__package__ = 'archivebox.core'
+
 import os
 import os
 from django.conf import settings
 from django.conf import settings
+
 from ..config import (
 from ..config import (
     LDAP
     LDAP
 )
 )

+ 5 - 10
archivebox/core/urls.py

@@ -1,4 +1,4 @@
-from .admin import archivebox_admin
+__package__ = 'archivebox.core'
 
 
 from django.urls import path, include
 from django.urls import path, include
 from django.views import static
 from django.views import static
@@ -6,14 +6,9 @@ from django.contrib.staticfiles.urls import staticfiles_urlpatterns
 from django.conf import settings
 from django.conf import settings
 from django.views.generic.base import RedirectView
 from django.views.generic.base import RedirectView
 
 
-from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
-
-from ninja import NinjaAPI
-from api.auth import GlobalAuth
+from .admin import archivebox_admin
+from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
 
 
-api = NinjaAPI(auth=GlobalAuth())
-api.add_router("/auth/", "api.auth.router")
-api.add_router("/archive/", "api.archive.router")
 
 
 # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
 # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
 # from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
 # from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
@@ -43,10 +38,10 @@ urlpatterns = [
     path('accounts/', include('django.contrib.auth.urls')),
     path('accounts/', include('django.contrib.auth.urls')),
     path('admin/', archivebox_admin.urls),
     path('admin/', archivebox_admin.urls),
     
     
-    path("api/", api.urls),
+    path("api/",      include('api.urls')),
 
 
     path('health/', HealthCheckView.as_view(), name='healthcheck'),
     path('health/', HealthCheckView.as_view(), name='healthcheck'),
-    path('error/', lambda _: 1/0),
+    path('error/', lambda *_: 1/0),
 
 
     # path('jet_api/', include('jet_django.urls')),  Enable to use https://www.jetadmin.io/integrations/django
     # path('jet_api/', include('jet_django.urls')),  Enable to use https://www.jetadmin.io/integrations/django
 
 

+ 1 - 1
archivebox/main.py

@@ -695,7 +695,7 @@ def add(urls: Union[str, List[str]],
     if CAN_UPGRADE:
     if CAN_UPGRADE:
         hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
         hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
 
 
-    return all_links
+    return new_links
 
 
 @enforce_types
 @enforce_types
 def remove(filter_str: Optional[str]=None,
 def remove(filter_str: Optional[str]=None,

+ 2 - 1
archivebox/templates/core/navigation.html

@@ -6,6 +6,7 @@
     <a href="/admin/core/tag/">Tags</a> |
     <a href="/admin/core/tag/">Tags</a> |
     <a href="/admin/core/archiveresult/?o=-1">Log</a> &nbsp; &nbsp;
     <a href="/admin/core/archiveresult/?o=-1">Log</a> &nbsp; &nbsp;
     <a href="{% url 'Docs' %}" target="_blank" rel="noopener noreferrer">Docs</a> | 
     <a href="{% url 'Docs' %}" target="_blank" rel="noopener noreferrer">Docs</a> | 
+    <a href="/api">API</a> | 
     <a href="{% url 'public-index' %}">Public</a> | 
     <a href="{% url 'public-index' %}">Public</a> | 
     <a href="/admin/">Admin</a>
     <a href="/admin/">Admin</a>
      &nbsp; &nbsp;
      &nbsp; &nbsp;
@@ -16,7 +17,7 @@
         {% endblock %}
         {% endblock %}
         {% block userlinks %}
         {% block userlinks %}
             {% if user.has_usable_password %}
             {% if user.has_usable_password %}
-                <a href="{% url 'admin:password_change' %}">Account</a> /
+                <a href="{% url 'admin:password_change' %}" title="Change your account password">Account</a> /
             {% endif %}
             {% endif %}
             <a href="{% url 'admin:logout' %}">{% trans 'Log out' %}</a>
             <a href="{% url 'admin:logout' %}">{% trans 'Log out' %}</a>
         {% endblock %}
         {% endblock %}

+ 2 - 1
archivebox/util.py

@@ -358,7 +358,8 @@ def chrome_cleanup():
     if IN_DOCKER and lexists("/home/archivebox/.config/chromium/SingletonLock"):
     if IN_DOCKER and lexists("/home/archivebox/.config/chromium/SingletonLock"):
         remove_file("/home/archivebox/.config/chromium/SingletonLock")
         remove_file("/home/archivebox/.config/chromium/SingletonLock")
 
 
-def ansi_to_html(text):
+@enforce_types
+def ansi_to_html(text: str) -> str:
     """
     """
     Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
     Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
     """
     """

+ 0 - 4
pyproject.toml

@@ -18,22 +18,18 @@ dependencies = [
     "django-ninja>=1.1.0",
     "django-ninja>=1.1.0",
     "django-extensions>=3.2.3",
     "django-extensions>=3.2.3",
     "mypy-extensions>=1.0.0",
     "mypy-extensions>=1.0.0",
-
     # Python Helper Libraries
     # Python Helper Libraries
     "requests>=2.31.0",
     "requests>=2.31.0",
     "dateparser>=1.0.0",
     "dateparser>=1.0.0",
     "feedparser>=6.0.11",
     "feedparser>=6.0.11",
     "w3lib>=1.22.0",
     "w3lib>=1.22.0",
-
     # Feature-Specific Dependencies
     # Feature-Specific Dependencies
     "python-crontab>=2.5.1",          # for: archivebox schedule
     "python-crontab>=2.5.1",          # for: archivebox schedule
     "croniter>=0.3.34",               # for: archivebox schedule
     "croniter>=0.3.34",               # for: archivebox schedule
     "ipython>5.0.0",                  # for: archivebox shell
     "ipython>5.0.0",                  # for: archivebox shell
-
     # Extractor Dependencies
     # Extractor Dependencies
     "yt-dlp>=2024.4.9",               # for: media
     "yt-dlp>=2024.4.9",               # for: media
     "playwright>=1.43.0; platform_machine != 'armv7l'",  # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages
     "playwright>=1.43.0; platform_machine != 'armv7l'",  # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages
-    
     # TODO: add more extractors
     # TODO: add more extractors
     #  - gallery-dl
     #  - gallery-dl
     #  - scihubdl
     #  - scihubdl