Browse Source

big overhaul of REST API, split into auth, core, and cli methods

Nick Sweeting 1 year ago
parent
commit
75153252dc

+ 1 - 0
archivebox/api/__init__.py

@@ -0,0 +1 @@
+__package__ = 'archivebox.api'

+ 2 - 0
archivebox/api/apps.py

@@ -1,3 +1,5 @@
+__package__ = 'archivebox.api'
+
 from django.apps import AppConfig
 
 

+ 0 - 184
archivebox/api/archive.py

@@ -1,184 +0,0 @@
-# archivebox_api.py
-from typing import List, Optional
-from enum import Enum
-from pydantic import BaseModel
-from ninja import Router
-from main import (
-    add,
-    remove,
-    update,
-    list_all,
-    ONLY_NEW,
-)  # Assuming these functions are defined in main.py
-
-
-# Schemas
-
-class StatusChoices(str, Enum):
-    indexed = 'indexed'
-    archived = 'archived'
-    unarchived = 'unarchived'
-    present = 'present'
-    valid = 'valid'
-    invalid = 'invalid'
-    duplicate = 'duplicate'
-    orphaned = 'orphaned'
-    corrupted = 'corrupted'
-    unrecognized = 'unrecognized'
-
-
-class AddURLSchema(BaseModel):
-    urls: List[str]
-    tag: str = ""
-    depth: int = 0
-    update: bool = not ONLY_NEW  # Default to the opposite of ONLY_NEW
-    update_all: bool = False
-    index_only: bool = False
-    overwrite: bool = False
-    init: bool = False
-    extractors: str = ""
-    parser: str = "auto"
-
-
-class RemoveURLSchema(BaseModel):
-    yes: bool = False
-    delete: bool = False
-    before: Optional[float] = None
-    after: Optional[float] = None
-    filter_type: str = "exact"
-    filter_patterns: Optional[List[str]] = None
-
-
-class UpdateSchema(BaseModel):
-    resume: Optional[float] = None
-    only_new: Optional[bool] = None
-    index_only: Optional[bool] = False
-    overwrite: Optional[bool] = False
-    before: Optional[float] = None
-    after: Optional[float] = None
-    status: Optional[StatusChoices] = None
-    filter_type: Optional[str] = 'exact'
-    filter_patterns: Optional[List[str]] = None
-    extractors: Optional[str] = ""
-
-
-class ListAllSchema(BaseModel):
-    filter_patterns: Optional[List[str]] = None
-    filter_type: str = 'exact'
-    status: Optional[StatusChoices] = None
-    after: Optional[float] = None
-    before: Optional[float] = None
-    sort: Optional[str] = None
-    csv: Optional[str] = None
-    json: bool = False
-    html: bool = False
-    with_headers: bool = False
-
-
-# API Router
-router = Router()
-
-
[email protected]("/add", response={200: dict})
-def api_add(request, payload: AddURLSchema):
-    try:
-        result = add(
-            urls=payload.urls,
-            tag=payload.tag,
-            depth=payload.depth,
-            update=payload.update,
-            update_all=payload.update_all,
-            index_only=payload.index_only,
-            overwrite=payload.overwrite,
-            init=payload.init,
-            extractors=payload.extractors,
-            parser=payload.parser,
-        )
-        # Currently the add function returns a list of ALL items in the DB, ideally only return new items
-        return {
-            "status": "success",
-            "message": "URLs added successfully.",
-            "result": str(result),
-        }
-    except Exception as e:
-        # Handle exceptions raised by the add function or during processing
-        return {"status": "error", "message": str(e)}
-
-
[email protected]("/remove", response={200: dict})
-def api_remove(request, payload: RemoveURLSchema):
-    try:
-        result = remove(
-            yes=payload.yes,
-            delete=payload.delete,
-            before=payload.before,
-            after=payload.after,
-            filter_type=payload.filter_type,
-            filter_patterns=payload.filter_patterns,
-        )
-        return {
-            "status": "success",
-            "message": "URLs removed successfully.",
-            "result": result,
-        }
-    except Exception as e:
-        # Handle exceptions raised by the remove function or during processing
-        return {"status": "error", "message": str(e)}
-
-
[email protected]("/update", response={200: dict})
-def api_update(request, payload: UpdateSchema):
-    try:
-        result = update(
-            resume=payload.resume,
-            only_new=payload.only_new,
-            index_only=payload.index_only,
-            overwrite=payload.overwrite,
-            before=payload.before,
-            after=payload.after,
-            status=payload.status,
-            filter_type=payload.filter_type,
-            filter_patterns=payload.filter_patterns,
-            extractors=payload.extractors,
-        )
-        return {
-            "status": "success",
-            "message": "Archive updated successfully.",
-            "result": result,
-        }
-    except Exception as e:
-        # Handle exceptions raised by the update function or during processing
-        return {"status": "error", "message": str(e)}
-
-
[email protected]("/list_all", response={200: dict})
-def api_list_all(request, payload: ListAllSchema):
-    try:
-        result = list_all(
-            filter_patterns=payload.filter_patterns,
-            filter_type=payload.filter_type,
-            status=payload.status,
-            after=payload.after,
-            before=payload.before,
-            sort=payload.sort,
-            csv=payload.csv,
-            json=payload.json,
-            html=payload.html,
-            with_headers=payload.with_headers,
-        )
-        # TODO: This is kind of bad, make the format a choice field
-        if payload.json:
-            return {"status": "success", "format": "json", "data": result}
-        elif payload.html:
-            return {"status": "success", "format": "html", "data": result}
-        elif payload.csv:
-            return {"status": "success", "format": "csv", "data": result}
-        else:
-            return {
-                "status": "success",
-                "message": "List generated successfully.",
-                "data": result,
-            }
-    except Exception as e:
-        # Handle exceptions raised by the list_all function or during processing
-        return {"status": "error", "message": str(e)}

+ 92 - 33
archivebox/api/auth.py

@@ -1,48 +1,107 @@
+__package__ = 'archivebox.api'
+
+from typing import Optional
+
+from django.http import HttpRequest
+from django.contrib.auth import login
 from django.contrib.auth import authenticate
-from ninja import Form, Router, Schema
-from ninja.security import HttpBearer
+from django.contrib.auth.models import AbstractBaseUser
 
-from api.models import Token
+from ninja.security import HttpBearer, APIKeyQuery, APIKeyHeader, HttpBasicAuth, django_auth_superuser
 
-router = Router()
 
+def auth_using_token(token, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]:
+    """Given an API token string, check if a corresponding non-expired APIToken exists, and return its user"""
+    from api.models import APIToken        # lazy import model to avoid loading it at urls.py import time
+    
+    user = None
 
-class GlobalAuth(HttpBearer):
-    def authenticate(self, request, token):
+    submitted_empty_form = token in ('string', '', None)
+    if submitted_empty_form:
+        user = request.user       # see if user is authed via django session and use that as the default
+    else:
         try:
-            return Token.objects.get(token=token).user
-        except Token.DoesNotExist:
+            token = APIToken.objects.get(token=token)
+            if token.is_valid():
+                user = token.user
+        except APIToken.DoesNotExist:
             pass
 
+    if not user:
+        print('[❌] Failed to authenticate API user using API Key:', request)
 
-class AuthSchema(Schema):
-    email: str
-    password: str
-
+    return None
 
-@router.post("/authenticate", auth=None)  # overriding global auth
-def get_token(request, auth_data: AuthSchema):
-    user = authenticate(username=auth_data.email, password=auth_data.password)
-    if user:
-        # Assuming a user can have multiple tokens and you want to create a new one every time
-        new_token = Token.objects.create(user=user)
-        return {"token": new_token.token, "expires": new_token.expiry_as_iso8601}
+def auth_using_password(username, password, request: Optional[HttpRequest]=None) -> Optional[AbstractBaseUser]:
+    """Given a username and password, check if they are valid and return the corresponding user"""
+    user = None
+    
+    submitted_empty_form = (username, password) in (('string', 'string'), ('', ''), (None, None))
+    if submitted_empty_form:
+        user = request.user       # see if user is authed via django session and use that as the default
     else:
-        return {"error": "Invalid credentials"}
+        user = authenticate(
+            username=username,
+            password=password,
+        )
+
+    if not user:
+        print('[❌] Failed to authenticate API user using API Key:', request)
+
+    return user
+
+
+### Base Auth Types
+
+class APITokenAuthCheck:
+    """The base class for authentication methods that use an api.models.APIToken"""
+    def authenticate(self, request: HttpRequest, key: Optional[str]=None) -> Optional[AbstractBaseUser]:
+        user = auth_using_token(
+            token=key,
+            request=request,
+        )
+        if user is not None:
+            login(request, user, backend='django.contrib.auth.backends.ModelBackend')
+        return user
+
+class UserPassAuthCheck:
+    """The base class for authentication methods that use a username & password"""
+    def authenticate(self, request: HttpRequest, username: Optional[str]=None, password: Optional[str]=None) -> Optional[AbstractBaseUser]:
+        user = auth_using_password(
+            username=username,
+            password=password,
+            request=request,
+        )
+        if user is not None:
+            login(request, user, backend='django.contrib.auth.backends.ModelBackend')
+        return user
+
+
+### Django-Ninja-Provided Auth Methods
+
+class UsernameAndPasswordAuth(UserPassAuthCheck, HttpBasicAuth):
+    """Allow authenticating by passing username & password via HTTP Basic Authentication (not recommended)"""
+    pass
+
+class QueryParamTokenAuth(APITokenAuthCheck, APIKeyQuery):
+    """Allow authenticating by passing api_key=xyz as a GET/POST query parameter"""
+    param_name = "api_key"
+
+class HeaderTokenAuth(APITokenAuthCheck, APIKeyHeader):
+    """Allow authenticating by passing X-API-Key=xyz as a request header"""
+    param_name = "X-API-Key"
 
+class BearerTokenAuth(APITokenAuthCheck, HttpBearer):
+    """Allow authenticating by passing Bearer=xyz as a request header"""
+    pass
 
-class TokenValidationSchema(Schema):
-    token: str
 
+### Enabled Auth Methods
 
[email protected]("/validate_token", auth=None) # No authentication required for this endpoint
-def validate_token(request, token_data: TokenValidationSchema):
-    try:
-        # Attempt to authenticate using the provided token
-        user = GlobalAuth().authenticate(request, token_data.token)
-        if user:
-            return {"status": "valid"}
-        else:
-            return {"status": "invalid"}
-    except Token.DoesNotExist:
-        return {"status": "invalid"}
+API_AUTH_METHODS = [
+    QueryParamTokenAuth(), 
+    HeaderTokenAuth(),
+    BearerTokenAuth(),
+    django_auth_superuser,
+    UsernameAndPasswordAuth(),
+]

+ 7 - 6
archivebox/api/migrations/0001_initial.py

@@ -1,9 +1,10 @@
-# Generated by Django 3.1.14 on 2024-04-09 18:52
+# Generated by Django 4.2.11 on 2024-04-25 04:19
 
 import api.models
 from django.conf import settings
 from django.db import migrations, models
 import django.db.models.deletion
+import uuid
 
 
 class Migration(migrations.Migration):
@@ -16,13 +17,13 @@ class Migration(migrations.Migration):
 
     operations = [
         migrations.CreateModel(
-            name='Token',
+            name='APIToken',
             fields=[
-                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('token', models.CharField(default=auth.models.hex_uuid, max_length=32, unique=True)),
+                ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
+                ('token', models.CharField(default=api.models.generate_secret_token, max_length=32, unique=True)),
                 ('created', models.DateTimeField(auto_now_add=True)),
-                ('expiry', models.DateTimeField(blank=True, null=True)),
-                ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='tokens', to=settings.AUTH_USER_MODEL)),
+                ('expires', models.DateTimeField(blank=True, null=True)),
+                ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
             ],
         ),
     ]

+ 47 - 15
archivebox/api/models.py

@@ -1,30 +1,62 @@
+__package__ = 'archivebox.api'
+
 import uuid
+import secrets
 from datetime import timedelta
 
 from django.conf import settings
 from django.db import models
 from django.utils import timezone
-from django.utils.translation import gettext_lazy as _
 
-def hex_uuid():
-    return uuid.uuid4().hex
 
 
-class Token(models.Model):
-    user = models.ForeignKey(
-        settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name="tokens"
-    )
-    token = models.CharField(max_length=32, default=hex_uuid, unique=True)
+def generate_secret_token() -> str:
+    # returns cryptographically secure string with len() == 32
+    return secrets.token_hex(16)
+
+
+class APIToken(models.Model):
+    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
+
+    user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE)
+    token = models.CharField(max_length=32, default=generate_secret_token, unique=True)
+    
     created = models.DateTimeField(auto_now_add=True)
-    expiry = models.DateTimeField(null=True, blank=True)
+    expires = models.DateTimeField(null=True, blank=True)
+
+    class Meta:
+        verbose_name = "API Key"
+        verbose_name_plural = "API Keys"
+
+    def __str__(self) -> str:
+        return self.token
+
+    def __repr__(self) -> str:
+        return f'<APIToken user={self.user.username} token=************{self.token[-4:]}>'
+
+    def __json__(self) -> dict:
+        return {
+            "TYPE":             "APIToken",    
+            "id":               str(self.id),
+            "user_id":          str(self.user.id),
+            "user_username":    self.user.username,
+            "token":            self.token,
+            "created":          self.created.isoformat(),
+            "expires":          self.expires_as_iso8601,
+        }
 
     @property
-    def expiry_as_iso8601(self):
+    def expires_as_iso8601(self):
         """Returns the expiry date of the token in ISO 8601 format or a date 100 years in the future if none."""
-        expiry_date = (
-            self.expiry if self.expiry else timezone.now() + timedelta(days=365 * 100)
-        )
+        expiry_date = self.expires or (timezone.now() + timedelta(days=365 * 100))
+
         return expiry_date.isoformat()
 
-    def __str__(self):
-        return self.token
+    def is_valid(self, for_date=None):
+        for_date = for_date or timezone.now()
+
+        if self.expires and self.expires < for_date:
+            return False
+
+        return True
+

+ 53 - 0
archivebox/api/routes_auth.py

@@ -0,0 +1,53 @@
+__package__ = 'archivebox.api'
+
+from typing import Optional
+
+from django.contrib.auth import authenticate
+from ninja import Router, Schema
+
+from api.models import APIToken
+from api.auth import auth_using_token, auth_using_password
+
+
+router = Router(tags=['Authentication'])
+
+
+class PasswordAuthSchema(Schema):
+    """Schema for a /get_api_token request"""
+    username: Optional[str] = None
+    password: Optional[str] = None
+
+
[email protected]("/get_api_token", auth=None, summary='Generate an API token for a given username & password (or currently logged-in user)')             # auth=None because they are not authed yet
+def get_api_token(request, auth_data: PasswordAuthSchema):
+    user = auth_using_password(
+        username=auth_data.username,
+        password=auth_data.password,
+        request=request,
+    )
+
+    if user:
+        # TODO: support multiple tokens in the future, for now we just have one per user
+        api_token, created = APIToken.objects.get_or_create(user=user)
+
+        return api_token.__json__()
+    
+    return {"success": False, "errors": ["Invalid credentials"]}
+
+
+
+class TokenAuthSchema(Schema):
+    """Schema for a /check_api_token request"""
+    token: str
+
+
[email protected]("/check_api_token", auth=None, summary='Validate an API token to make sure its valid and non-expired')        # auth=None because they are not authed yet
+def check_api_token(request, token_data: TokenAuthSchema):
+    user = auth_using_token(
+        token=token_data.token,
+        request=request,
+    )
+    if user:
+        return {"success": True, "user_id": str(user.id)}
+    
+    return {"success": False, "user_id": None}

+ 236 - 0
archivebox/api/routes_cli.py

@@ -0,0 +1,236 @@
+__package__ = 'archivebox.api'
+
+from typing import List, Dict, Any, Optional
+from enum import Enum
+
+# from pydantic import BaseModel
+from archivebox.api.routes_core import paginate
+from ninja import Router, Schema
+
+from ..main import (
+    add,
+    remove,
+    update,
+    list_all,
+    schedule,
+)
+from ..util import ansi_to_html
+from ..config import ONLY_NEW
+
+
+# router for API that exposes archivebox cli subcommands as REST endpoints
+router = Router(tags=['ArchiveBox CLI Sub-Commands'])
+
+
+# Schemas
+
+JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
+
+class CLICommandResponseSchema(Schema):
+    success: bool
+    errors: List[str]
+    result: JSONType
+    stdout: str
+    stderr: str
+
+class FilterTypeChoices(str, Enum):
+    exact = 'exact'
+    substring = 'substring'
+    regex = 'regex'
+    domain = 'domain'
+    tag = 'tag'
+    timestamp = 'timestamp'
+
+class StatusChoices(str, Enum):
+    indexed = 'indexed'
+    archived = 'archived'
+    unarchived = 'unarchived'
+    present = 'present'
+    valid = 'valid'
+    invalid = 'invalid'
+    duplicate = 'duplicate'
+    orphaned = 'orphaned'
+    corrupted = 'corrupted'
+    unrecognized = 'unrecognized'
+
+
+class AddCommandSchema(Schema):
+    urls: List[str]
+    tag: str = ""
+    depth: int = 0
+    update: bool = not ONLY_NEW  # Default to the opposite of ONLY_NEW
+    update_all: bool = False
+    index_only: bool = False
+    overwrite: bool = False
+    init: bool = False
+    extractors: str = ""
+    parser: str = "auto"
+
+class UpdateCommandSchema(Schema):
+    resume: Optional[float] = 0
+    only_new: bool = ONLY_NEW
+    index_only: bool = False
+    overwrite: bool = False
+    after: Optional[float] = 0
+    before: Optional[float] = 999999999999999
+    status: Optional[StatusChoices] = StatusChoices.unarchived
+    filter_type: Optional[str] = FilterTypeChoices.substring
+    filter_patterns: Optional[List[str]] = ['https://example.com']
+    extractors: Optional[str] = ""
+
+class ScheduleCommandSchema(Schema):
+    import_path: Optional[str] = None
+    add: bool = False
+    every: Optional[str] = None
+    tag: str = ''
+    depth: int = 0
+    overwrite: bool = False
+    update: bool = not ONLY_NEW
+    clear: bool = False
+
+class ListCommandSchema(Schema):
+    filter_patterns: Optional[List[str]] = ['https://example.com']
+    filter_type: str = FilterTypeChoices.substring
+    status: Optional[StatusChoices] = StatusChoices.indexed
+    after: Optional[float] = 0
+    before: Optional[float] = 999999999999999
+    sort: str = 'added'
+    as_json: bool = True
+    as_html: bool = False
+    as_csv: str | bool = 'timestamp,url'
+    with_headers: bool = False
+
+class RemoveCommandSchema(Schema):
+    delete: bool = True
+    after: Optional[float] = 0
+    before: Optional[float] = 999999999999999
+    filter_type: str = FilterTypeChoices.exact
+    filter_patterns: Optional[List[str]] = ['https://example.com']
+
+
+
+
+
[email protected]("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
+def cli_add(request, args: AddCommandSchema):
+    result = add(
+        urls=args.urls,
+        tag=args.tag,
+        depth=args.depth,
+        update=args.update,
+        update_all=args.update_all,
+        index_only=args.index_only,
+        overwrite=args.overwrite,
+        init=args.init,
+        extractors=args.extractors,
+        parser=args.parser,
+    )
+
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+
+
[email protected]("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
+def cli_update(request, args: UpdateCommandSchema):
+    result = update(
+        resume=args.resume,
+        only_new=args.only_new,
+        index_only=args.index_only,
+        overwrite=args.overwrite,
+        before=args.before,
+        after=args.after,
+        status=args.status,
+        filter_type=args.filter_type,
+        filter_patterns=args.filter_patterns,
+        extractors=args.extractors,
+    )
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+
+
[email protected]("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
+def cli_add(request, args: ScheduleCommandSchema):
+    result = schedule(
+        import_path=args.import_path,
+        add=args.add,
+        show=args.show,
+        clear=args.clear,
+        every=args.every,
+        tag=args.tag,
+        depth=args.depth,
+        overwrite=args.overwrite,
+        update=args.update,
+    )
+
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+
+
+
[email protected]("/list", response=CLICommandResponseSchema, summary='archivebox list [args] [filter_patterns]')
+def cli_list(request, args: ListCommandSchema):
+    result = list_all(
+        filter_patterns=args.filter_patterns,
+        filter_type=args.filter_type,
+        status=args.status,
+        after=args.after,
+        before=args.before,
+        sort=args.sort,
+        csv=args.as_csv,
+        json=args.as_json,
+        html=args.as_html,
+        with_headers=args.with_headers,
+    )
+
+    result_format = 'txt'
+    if args.as_json:
+        result_format = "json"
+    elif args.as_html:
+        result_format = "html"
+    elif args.as_csv:
+        result_format = "csv"
+
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "result_format": result_format,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+    
+
+
[email protected]("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
+def cli_remove(request, args: RemoveCommandSchema):
+    result = remove(
+        yes=True,            # no way to interactively ask for confirmation via API, so we force yes
+        delete=args.delete,
+        before=args.before,
+        after=args.after,
+        filter_type=args.filter_type,
+        filter_patterns=args.filter_patterns,
+    )
+    return {
+        "success": True,
+        "errors": [],
+        "result": result,
+        "stdout": ansi_to_html(request.stdout.getvalue().strip()),
+        "stderr": ansi_to_html(request.stderr.getvalue().strip()),
+    }
+    

+ 210 - 0
archivebox/api/routes_core.py

@@ -0,0 +1,210 @@
+__package__ = 'archivebox.api'
+
+from uuid import UUID
+from typing import List, Optional, Union
+from datetime import datetime
+
+from django.shortcuts import get_object_or_404
+
+from ninja import Router, Schema, FilterSchema, Field, Query
+from ninja.pagination import paginate
+
+from core.models import Snapshot, ArchiveResult, Tag
+
+
+router = Router(tags=['Core Models'])
+
+
+
+
+### ArchiveResult #########################################################################
+
+class ArchiveResultSchema(Schema):
+    id: UUID
+
+    snapshot_id: UUID
+    snapshot_url: str
+    snapshot_tags: str
+
+    extractor: str
+    cmd: List[str]
+    pwd: str
+    cmd_version: str
+    output: str
+    status: str
+
+    created: datetime
+
+    @staticmethod
+    def resolve_id(obj):
+        return obj.uuid
+
+    @staticmethod
+    def resolve_created(obj):
+        return obj.start_ts
+
+    @staticmethod
+    def resolve_snapshot_url(obj):
+        return obj.snapshot.url
+
+    @staticmethod
+    def resolve_snapshot_tags(obj):
+        return obj.snapshot.tags_str()
+
+
+class ArchiveResultFilterSchema(FilterSchema):
+    id: Optional[UUID] = Field(None, q='uuid')
+
+    search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains'])
+    snapshot_id: Optional[UUID] = Field(None, q='snapshot_id')
+    snapshot_url: Optional[str] = Field(None, q='snapshot__url')
+    snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name')
+    
+    status: Optional[str] = Field(None, q='status')
+    output: Optional[str] = Field(None, q='output__icontains')
+    extractor: Optional[str] = Field(None, q='extractor__icontains')
+    cmd: Optional[str] = Field(None, q='cmd__0__icontains')
+    pwd: Optional[str] = Field(None, q='pwd__icontains')
+    cmd_version: Optional[str] = Field(None, q='cmd_version')
+
+    created: Optional[datetime] = Field(None, q='updated')
+    created__gte: Optional[datetime] = Field(None, q='updated__gte')
+    created__lt: Optional[datetime] = Field(None, q='updated__lt')
+
+
[email protected]("/archiveresults", response=List[ArchiveResultSchema])
+@paginate
+def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)):
+    qs = ArchiveResult.objects.all()
+    results = filters.filter(qs)
+    return results
+
+
[email protected]("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
+def get_archiveresult(request, archiveresult_id: str):
+    archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
+    return archiveresult
+
+
+# @router.post("/archiveresult", response=ArchiveResultSchema)
+# def create_archiveresult(request, payload: ArchiveResultSchema):
+#     archiveresult = ArchiveResult.objects.create(**payload.dict())
+#     return archiveresult
+#
+# @router.put("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema)
+# def update_archiveresult(request, archiveresult_id: str, payload: ArchiveResultSchema):
+#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
+#   
+#     for attr, value in payload.dict().items():
+#         setattr(archiveresult, attr, value)
+#     archiveresult.save()
+#
+#     return archiveresult
+#
+# @router.delete("/archiveresult/{archiveresult_id}")
+# def delete_archiveresult(request, archiveresult_id: str):
+#     archiveresult = get_object_or_404(ArchiveResult, id=archiveresult_id)
+#     archiveresult.delete()
+#     return {"success": True}
+
+
+
+
+
+### Snapshot #########################################################################
+
+
+class SnapshotSchema(Schema):
+    id: UUID
+
+    url: str
+    tags: str
+    title: Optional[str]
+    timestamp: str
+    bookmarked: datetime
+    added: datetime
+    updated: datetime
+    archive_path: str
+
+    archiveresults: List[ArchiveResultSchema]
+
+    # @staticmethod
+    # def resolve_id(obj):
+    #     return str(obj.id)
+
+    @staticmethod
+    def resolve_tags(obj):
+        return obj.tags_str()
+
+    @staticmethod
+    def resolve_archiveresults(obj, context):
+        if context['request'].with_archiveresults:
+            return obj.archiveresult_set.all().distinct()
+        return ArchiveResult.objects.none()
+
+
+class SnapshotFilterSchema(FilterSchema):
+    id: Optional[UUID] = Field(None, q='id')
+
+    search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains'])
+    url: Optional[str] = Field(None, q='url')
+    tag: Optional[str] = Field(None, q='tags__name')
+    title: Optional[str] = Field(None, q='title__icontains')
+    
+    timestamp: Optional[str] = Field(None, q='timestamp__startswith')
+    
+    added: Optional[datetime] = Field(None, q='added')
+    added__gte: Optional[datetime] = Field(None, q='added__gte')
+    added__lt: Optional[datetime] = Field(None, q='added__lt')
+
+
[email protected]("/snapshots", response=List[SnapshotSchema])
+@paginate
+def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True):
+    request.with_archiveresults = with_archiveresults
+
+    qs = Snapshot.objects.all()
+    results = filters.filter(qs)
+    return results
+
[email protected]("/snapshot/{snapshot_id}", response=SnapshotSchema)
+def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True):
+    request.with_archiveresults = with_archiveresults
+    snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+    return snapshot
+
+
+# @router.post("/snapshot", response=SnapshotSchema)
+# def create_snapshot(request, payload: SnapshotSchema):
+#     snapshot = Snapshot.objects.create(**payload.dict())
+#     return snapshot
+#
+# @router.put("/snapshot/{snapshot_id}", response=SnapshotSchema)
+# def update_snapshot(request, snapshot_id: str, payload: SnapshotSchema):
+#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+#
+#     for attr, value in payload.dict().items():
+#         setattr(snapshot, attr, value)
+#     snapshot.save()
+#
+#     return snapshot
+#
+# @router.delete("/snapshot/{snapshot_id}")
+# def delete_snapshot(request, snapshot_id: str):
+#     snapshot = get_object_or_404(Snapshot, id=snapshot_id)
+#     snapshot.delete()
+#     return {"success": True}
+
+
+
+### Tag #########################################################################
+
+
+class TagSchema(Schema):
+    name: str
+    slug: str
+
+
[email protected]("/tags", response=List[TagSchema])
+def list_tags(request):
+    return Tag.objects.all()

+ 11 - 8
archivebox/api/tests.py

@@ -1,27 +1,30 @@
+__package__ = 'archivebox.api'
+
 from django.test import TestCase
 from ninja.testing import TestClient
-from archivebox.api.archive import router as archive_router
 
-class ArchiveBoxAPITestCase(TestCase):
+from .routes_cli import router
+
+class ArchiveBoxCLIAPITestCase(TestCase):
     def setUp(self):
-        self.client = TestClient(archive_router)
+        self.client = TestClient(router)
 
     def test_add_endpoint(self):
-        response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "test"})
+        response = self.client.post("/add", json={"urls": ["http://example.com"], "tag": "testTag1,testTag2"})
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["status"], "success")
+        self.assertTrue(response.json()["success"])
 
     def test_remove_endpoint(self):
         response = self.client.post("/remove", json={"filter_patterns": ["http://example.com"]})
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["status"], "success")
+        self.assertTrue(response.json()["success"])
 
     def test_update_endpoint(self):
         response = self.client.post("/update", json={})
         self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.json()["status"], "success")
+        self.assertTrue(response.json()["success"])
 
     def test_list_all_endpoint(self):
         response = self.client.post("/list_all", json={})
         self.assertEqual(response.status_code, 200)
-        self.assertTrue("success" in response.json()["status"])
+        self.assertTrue(response.json()["success"])

+ 111 - 0
archivebox/api/urls.py

@@ -0,0 +1,111 @@
+__package__ = 'archivebox.api'
+
+# import orjson
+
+from io import StringIO
+from traceback import format_exception
+from contextlib import redirect_stdout, redirect_stderr
+
+from django.urls import path
+from django.http import HttpRequest, HttpResponse
+from django.views.generic.base import RedirectView
+from django.core.exceptions import ObjectDoesNotExist, EmptyResultSet, PermissionDenied
+
+from ninja import NinjaAPI, Swagger
+
+# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/
+
+from api.auth import API_AUTH_METHODS
+from ..config import VERSION, COMMIT_HASH
+
+# from ninja.renderers import BaseRenderer
+
+# class ORJSONRenderer(BaseRenderer):
+#     media_type = "application/json"
+
+#     def render(self, request, data, *, response_status):
+#         return {
+#             "success": True,
+#             "errors": [],
+#             "result": data,
+#             "stdout": ansi_to_html(stdout.getvalue().strip()),
+#             "stderr": ansi_to_html(stderr.getvalue().strip()),
+#         }
+#         return orjson.dumps(data)
+
+
+class NinjaAPIWithIOCapture(NinjaAPI):    
+    def create_temporal_response(self, request: HttpRequest) -> HttpResponse:
+        stdout, stderr = StringIO(), StringIO()
+
+        with redirect_stderr(stderr):
+            with redirect_stdout(stdout):
+                request.stdout = stdout
+                request.stderr = stderr
+
+                response = super().create_temporal_response(request)
+
+        print('RESPONDING NOW', response)
+
+        return response
+
+html_description=f'''
+<h3>Welcome to your ArchiveBox server's REST API <code>[v1 ALPHA]</code> homepage!</h3>
+<br/>
+<i><b>WARNING: This API is still in an early development stage and may change!</b></i>
+<br/>
+<ul>
+<li>⬅️ Manage your server: <a href="/admin/api/"><b>Setup API Keys</b></a>, <a href="/admin/">Go to your Server Admin UI</a>, <a href="/">Go to your Snapshots list</a> 
+<li>💬 Ask questions and get help here: <a href="https://zulip.archivebox.io">ArchiveBox Chat Forum</a></li>
+<li>🐞 Report API bugs here: <a href="https://github.com/ArchiveBox/ArchiveBox/issues">Github Issues</a></li>
+<li>📚 ArchiveBox Documentation: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki">Github Wiki</a></li>
+<li>📜 See the API source code: <a href="https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/api"><code>archivebox/api/</code></a></li>
+</ul>
+<small>Served by ArchiveBox v{VERSION} (<a href="https://github.com/ArchiveBox/ArchiveBox/commit/{COMMIT_HASH}"><code>{COMMIT_HASH[:8]}</code></a>), API powered by <a href="https://django-ninja.dev/"><code>django-ninja</code></a>.</small>
+'''
+
+api = NinjaAPIWithIOCapture(
+    title='ArchiveBox API',
+    description=html_description,
+    version='1.0.0',
+    csrf=False,
+    auth=API_AUTH_METHODS,
+    urls_namespace="api",
+    docs=Swagger(settings={"persistAuthorization": True}),
+    # docs_decorator=login_required,
+    # renderer=ORJSONRenderer(),
+)
+api.add_router('/auth/',     'api.routes_auth.router')
+api.add_router('/core/',     'api.routes_core.router')
+api.add_router('/cli/',      'api.routes_cli.router')
+
+
[email protected]_handler(Exception)
+def generic_exception_handler(request, err):
+    status = 503
+    if isinstance(err, (ObjectDoesNotExist, EmptyResultSet, PermissionDenied)):
+        status = 404
+
+    print(''.join(format_exception(err)))
+
+    return api.create_response(
+        request,
+        {
+            "succeeded": False,
+            "errors": [
+                ''.join(format_exception(err)),
+                # or send simpler exception-only summary without full traceback:
+                # f'{err.__class__.__name__}: {err}',
+                # *([str(err.__context__)] if getattr(err, '__context__', None) else []),
+            ],
+        },
+        status=status,
+    )
+
+
+urlpatterns = [
+    path("v1/",              api.urls),
+
+    path("v1",               RedirectView.as_view(url='/api/v1/docs')),
+    path("",                 RedirectView.as_view(url='/api/v1/docs')),
+]

+ 1 - 0
archivebox/config.py

@@ -1366,6 +1366,7 @@ def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=C
         stderr('        archivebox init')
         raise SystemExit(2)
 
+
 def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CONFIG):
     output_dir = out_dir or config['OUTPUT_DIR']
     from .index.sql import list_migrations

+ 2 - 0
archivebox/core/admin.py

@@ -20,6 +20,7 @@ from core.models import Snapshot, ArchiveResult, Tag
 from core.forms import AddLinkForm
 
 from core.mixins import SearchResultsAdminMixin
+from api.models import APIToken
 
 from index.html import snapshot_icons
 from logging_util import printable_filesize
@@ -100,6 +101,7 @@ class ArchiveBoxAdmin(admin.AdminSite):
 
 archivebox_admin = ArchiveBoxAdmin()
 archivebox_admin.register(get_user_model())
+archivebox_admin.register(APIToken)
 archivebox_admin.disable_action('delete_selected')
 
 class ArchiveResultInline(admin.TabularInline):

+ 2 - 0
archivebox/core/apps.py

@@ -1,3 +1,5 @@
+__package__ = 'archivebox.core'
+
 from django.apps import AppConfig
 
 

+ 3 - 0
archivebox/core/auth.py

@@ -1,5 +1,8 @@
+__package__ = 'archivebox.core'
+
 import os
 from django.conf import settings
+
 from ..config import (
     LDAP
 )

+ 5 - 10
archivebox/core/urls.py

@@ -1,4 +1,4 @@
-from .admin import archivebox_admin
+__package__ = 'archivebox.core'
 
 from django.urls import path, include
 from django.views import static
@@ -6,14 +6,9 @@ from django.contrib.staticfiles.urls import staticfiles_urlpatterns
 from django.conf import settings
 from django.views.generic.base import RedirectView
 
-from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
-
-from ninja import NinjaAPI
-from api.auth import GlobalAuth
+from .admin import archivebox_admin
+from .views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
 
-api = NinjaAPI(auth=GlobalAuth())
-api.add_router("/auth/", "api.auth.router")
-api.add_router("/archive/", "api.archive.router")
 
 # GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
 # from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
@@ -43,10 +38,10 @@ urlpatterns = [
     path('accounts/', include('django.contrib.auth.urls')),
     path('admin/', archivebox_admin.urls),
     
-    path("api/", api.urls),
+    path("api/",      include('api.urls')),
 
     path('health/', HealthCheckView.as_view(), name='healthcheck'),
-    path('error/', lambda _: 1/0),
+    path('error/', lambda *_: 1/0),
 
     # path('jet_api/', include('jet_django.urls')),  Enable to use https://www.jetadmin.io/integrations/django
 

+ 1 - 1
archivebox/main.py

@@ -695,7 +695,7 @@ def add(urls: Union[str, List[str]],
     if CAN_UPGRADE:
         hint(f"There's a new version of ArchiveBox available! Your current version is {VERSION}. You can upgrade to {VERSIONS_AVAILABLE['recommended_version']['tag_name']} ({VERSIONS_AVAILABLE['recommended_version']['html_url']}). For more on how to upgrade: https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives\n")
 
-    return all_links
+    return new_links
 
 @enforce_types
 def remove(filter_str: Optional[str]=None,

+ 2 - 1
archivebox/templates/core/navigation.html

@@ -6,6 +6,7 @@
     <a href="/admin/core/tag/">Tags</a> |
     <a href="/admin/core/archiveresult/?o=-1">Log</a> &nbsp; &nbsp;
     <a href="{% url 'Docs' %}" target="_blank" rel="noopener noreferrer">Docs</a> | 
+    <a href="/api">API</a> | 
     <a href="{% url 'public-index' %}">Public</a> | 
     <a href="/admin/">Admin</a>
      &nbsp; &nbsp;
@@ -16,7 +17,7 @@
         {% endblock %}
         {% block userlinks %}
             {% if user.has_usable_password %}
-                <a href="{% url 'admin:password_change' %}">Account</a> /
+                <a href="{% url 'admin:password_change' %}" title="Change your account password">Account</a> /
             {% endif %}
             <a href="{% url 'admin:logout' %}">{% trans 'Log out' %}</a>
         {% endblock %}

+ 2 - 1
archivebox/util.py

@@ -358,7 +358,8 @@ def chrome_cleanup():
     if IN_DOCKER and lexists("/home/archivebox/.config/chromium/SingletonLock"):
         remove_file("/home/archivebox/.config/chromium/SingletonLock")
 
-def ansi_to_html(text):
+@enforce_types
+def ansi_to_html(text: str) -> str:
     """
     Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
     """

+ 0 - 4
pyproject.toml

@@ -18,22 +18,18 @@ dependencies = [
     "django-ninja>=1.1.0",
     "django-extensions>=3.2.3",
     "mypy-extensions>=1.0.0",
-
     # Python Helper Libraries
     "requests>=2.31.0",
     "dateparser>=1.0.0",
     "feedparser>=6.0.11",
     "w3lib>=1.22.0",
-
     # Feature-Specific Dependencies
     "python-crontab>=2.5.1",          # for: archivebox schedule
     "croniter>=0.3.34",               # for: archivebox schedule
     "ipython>5.0.0",                  # for: archivebox shell
-
     # Extractor Dependencies
     "yt-dlp>=2024.4.9",               # for: media
     "playwright>=1.43.0; platform_machine != 'armv7l'",  # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages
-    
     # TODO: add more extractors
     #  - gallery-dl
     #  - scihubdl