Browse Source

Merge branch 'dev' into fix-URL_REGEX

Nick Sweeting 1 year ago
parent
commit
17f40f3ada
67 changed files with 4116 additions and 1348 deletions
  1. 1 0
      .dockerignore
  2. 7 0
      .github/workflows/docker.yml
  3. 1 1
      .github/workflows/pip.yml
  4. 5 0
      .gitignore
  5. 39 14
      Dockerfile
  6. 1 1
      README.md
  7. 86 39
      archivebox/config.py
  8. 1 0
      archivebox/core/__init__.py
  9. 115 77
      archivebox/core/admin.py
  10. 5 2
      archivebox/core/apps.py
  11. 13 0
      archivebox/core/auth.py
  12. 12 0
      archivebox/core/auth_ldap.py
  13. 1 1
      archivebox/core/mixins.py
  14. 0 4
      archivebox/core/settings.py
  15. 2 10
      archivebox/core/urls.py
  16. 1 1
      archivebox/core/views.py
  17. 9 12
      archivebox/extractors/__init__.py
  18. 9 2
      archivebox/extractors/archive_org.py
  19. 13 3
      archivebox/extractors/favicon.py
  20. 9 3
      archivebox/extractors/headers.py
  21. 3 2
      archivebox/extractors/htmltotext.py
  22. 9 2
      archivebox/extractors/media.py
  23. 10 4
      archivebox/extractors/mercury.py
  24. 11 22
      archivebox/extractors/singlefile.py
  25. 10 3
      archivebox/extractors/title.py
  26. 15 3
      archivebox/extractors/wget.py
  27. 4 4
      archivebox/index/__init__.py
  28. 6 2
      archivebox/index/schema.py
  29. 8 8
      archivebox/logging_util.py
  30. 9 2
      archivebox/main.py
  31. 2371 0
      archivebox/package-lock.json
  32. 2 2
      archivebox/package.json
  33. 2 0
      archivebox/parsers/__init__.py
  34. 66 46
      archivebox/parsers/generic_json.py
  35. 34 0
      archivebox/parsers/generic_jsonl.py
  36. 20 28
      archivebox/parsers/generic_rss.py
  37. 16 25
      archivebox/parsers/pinboard_rss.py
  38. 19 16
      archivebox/system.py
  39. 1 1
      archivebox/templates/core/navigation.html
  40. 0 0
      archivebox/templates/core/snapshot.html
  41. 54 10
      archivebox/util.py
  42. 6 0
      archivebox/vendor/requirements.txt
  43. 2 2
      bin/build_brew.sh
  44. 14 0
      bin/build_deb.sh
  45. 15 1
      bin/build_dev.sh
  46. 2 4
      bin/build_docker.sh
  47. 4 11
      bin/build_pip.sh
  48. 61 14
      bin/docker_entrypoint.sh
  49. 100 0
      bin/lock_pkgs.sh
  50. 1 1
      bin/setup.sh
  51. 1 1
      bin/test.sh
  52. 107 75
      docker-compose.yml
  53. 1 0
      etc/sonic.cfg
  54. 231 253
      package-lock.json
  55. 2 2
      package.json
  56. 178 560
      pdm.lock
  57. 1 1
      pip_dist
  58. 92 45
      pyproject.toml
  59. 33 27
      requirements.txt
  60. 1 1
      tests/mock_server/server.py
  61. 1 0
      tests/mock_server/templates/example-single.jsonl
  62. 24 0
      tests/mock_server/templates/example.atom
  63. 6 0
      tests/mock_server/templates/example.json
  64. 2 0
      tests/mock_server/templates/example.json.bad
  65. 4 0
      tests/mock_server/templates/example.jsonl
  66. 32 0
      tests/mock_server/templates/example.rss
  67. 195 0
      tests/test_add.py

+ 1 - 0
.dockerignore

@@ -28,4 +28,5 @@ assets/
 docker/
 docker/
 
 
 data/
 data/
+data*/
 output/
 output/

+ 7 - 0
.github/workflows/docker.yml

@@ -81,6 +81,13 @@ jobs:
 
 
       - name: Image digest
       - name: Image digest
         run: echo ${{ steps.docker_build.outputs.digest }}
         run: echo ${{ steps.docker_build.outputs.digest }}
+
+      - name: Update README
+        uses: peter-evans/dockerhub-description@v4
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+          repository: archivebox/archivebox
        
        
       # This ugly bit is necessary if you don't want your cache to grow forever
       # This ugly bit is necessary if you don't want your cache to grow forever
       # until it hits GitHub's limit of 5GB.
       # until it hits GitHub's limit of 5GB.

+ 1 - 1
.github/workflows/pip.yml

@@ -35,7 +35,7 @@ jobs:
           cache: true
           cache: true
 
 
       - name: Install dependencies
       - name: Install dependencies
-        run: pdm install --fail-fast --no-lock --group :all --no-self
+        run: pdm install --fail-fast --no-lock --dev --group=':all' --no-self
 
 
       - name: Build package
       - name: Build package
         run: |
         run: |

+ 5 - 0
.gitignore

@@ -12,6 +12,10 @@ venv/
 .docker-venv/
 .docker-venv/
 node_modules/
 node_modules/
 
 
+# Ignore dev lockfiles (should always be built fresh)
+requirements-dev.txt
+pdm.dev.lock
+
 # Packaging artifacts
 # Packaging artifacts
 .pdm-python
 .pdm-python
 .pdm-build
 .pdm-build
@@ -25,6 +29,7 @@ data/
 data1/
 data1/
 data2/
 data2/
 data3/
 data3/
+data*/
 output/
 output/
 
 
 # vim
 # vim

+ 39 - 14
Dockerfile

@@ -10,7 +10,7 @@
 #     docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
 #     docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
 # Multi-arch build:
 # Multi-arch build:
 #     docker buildx create --use
 #     docker buildx create --use
-#     docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
+#     docker buildx build . --platform=linux/amd64,linux/arm64--push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
 #
 #
 # Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
 # Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
 
 
@@ -20,10 +20,24 @@ FROM python:3.11-slim-bookworm
 
 
 LABEL name="archivebox" \
 LABEL name="archivebox" \
     maintainer="Nick Sweeting <[email protected]>" \
     maintainer="Nick Sweeting <[email protected]>" \
-    description="All-in-one personal internet archiving container" \
+    description="All-in-one self-hosted internet archiving solution" \
     homepage="https://github.com/ArchiveBox/ArchiveBox" \
     homepage="https://github.com/ArchiveBox/ArchiveBox" \
-    documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
-
+    documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker" \
+    org.opencontainers.image.title="ArchiveBox" \
+    org.opencontainers.image.vendor="ArchiveBox" \
+    org.opencontainers.image.description="All-in-one self-hosted internet archiving solution" \
+    org.opencontainers.image.source="https://github.com/ArchiveBox/ArchiveBox" \
+    com.docker.image.source.entrypoint="Dockerfile" \
+    # TODO: release ArchiveBox as a Docker Desktop extension (requires these labels):
+    # https://docs.docker.com/desktop/extensions-sdk/architecture/metadata/
+    com.docker.desktop.extension.api.version=">= 1.4.7" \
+    com.docker.desktop.extension.icon="https://archivebox.io/icon.png" \
+    com.docker.extension.publisher-url="https://archivebox.io" \
+    com.docker.extension.screenshots='[{"alt": "Screenshot of Admin UI", "url": "https://github.com/ArchiveBox/ArchiveBox/assets/511499/e8e0b6f8-8fdf-4b7f-8124-c10d8699bdb2"}]' \
+    com.docker.extension.detailed-description='See here for detailed documentation: https://wiki.archivebox.io' \
+    com.docker.extension.changelog='See here for release notes: https://github.com/ArchiveBox/ArchiveBox/releases' \
+    com.docker.extension.categories='database,utility-tools'
+    
 ARG TARGETPLATFORM
 ARG TARGETPLATFORM
 ARG TARGETOS
 ARG TARGETOS
 ARG TARGETARCH
 ARG TARGETARCH
@@ -167,7 +181,6 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
         curl wget git yt-dlp ffmpeg ripgrep \
         curl wget git yt-dlp ffmpeg ripgrep \
         # Packages we have also needed in the past:
         # Packages we have also needed in the past:
         # youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
         # youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
-        # fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
     && rm -rf /var/lib/apt/lists/* \
     && rm -rf /var/lib/apt/lists/* \
     # Save version info
     # Save version info
     && ( \
     && ( \
@@ -183,6 +196,11 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/ms-playwright,sharing=locked,id=browsers-$TARGETARCH$TARGETVARIANT \
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/ms-playwright,sharing=locked,id=browsers-$TARGETARCH$TARGETVARIANT \
     echo "[+] Installing Browser binary dependencies to $PLAYWRIGHT_BROWSERS_PATH..." \
     echo "[+] Installing Browser binary dependencies to $PLAYWRIGHT_BROWSERS_PATH..." \
     && apt-get update -qq \
     && apt-get update -qq \
+    && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
+        fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-khmeros fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
+        # chrome can run without dbus/upower technically, it complains about missing dbus but should run ok anyway
+        # libxss1 dbus dbus-x11 upower \
+    # && service dbus start \
     && if [[ "$TARGETPLATFORM" == *amd64* || "$TARGETPLATFORM" == *arm64* ]]; then \
     && if [[ "$TARGETPLATFORM" == *amd64* || "$TARGETPLATFORM" == *arm64* ]]; then \
         # install Chromium using playwright
         # install Chromium using playwright
         pip install playwright \
         pip install playwright \
@@ -190,10 +208,12 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
         && playwright install --with-deps chromium \
         && playwright install --with-deps chromium \
         && export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \
         && export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \
     else \
     else \
-        # fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.) 
-        apt-get install -qq -y -t bookworm-backports --no-install-recommends \
-            chromium fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
-        && export CHROME_BINARY="$(which chromium)"; \
+        # fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.)
+        # apt-get install -qq -y -t bookworm-backports --no-install-recommends \
+        #     chromium \
+        # && export CHROME_BINARY="$(which chromium)"; \
+        echo 'armv7 no longer supported in versions after v0.7.3' \
+        exit 1; \
     fi \
     fi \
     && rm -rf /var/lib/apt/lists/* \
     && rm -rf /var/lib/apt/lists/* \
     && ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
     && ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
@@ -262,9 +282,15 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
 
 
 # Setup ArchiveBox runtime config
 # Setup ArchiveBox runtime config
 WORKDIR "$DATA_DIR"
 WORKDIR "$DATA_DIR"
-ENV IN_DOCKER=True
+ENV IN_DOCKER=True \
+    DISPLAY=novnc:0.0 \
+    CUSTOM_TEMPLATES_DIR=/data/templates \
+    CHROME_USER_DATA_DIR=/data/personas/Default/chromium \
+    GOOGLE_API_KEY=no \
+    GOOGLE_DEFAULT_CLIENT_ID=no \
+    GOOGLE_DEFAULT_CLIENT_SECRET=no \
+    ALLOWED_HOSTS=*
     ## No need to set explicitly, these values will be autodetected by archivebox in docker:
     ## No need to set explicitly, these values will be autodetected by archivebox in docker:
-    # CHROME_SANDBOX=False \
     # WGET_BINARY="wget" \
     # WGET_BINARY="wget" \
     # YOUTUBEDL_BINARY="yt-dlp" \
     # YOUTUBEDL_BINARY="yt-dlp" \
     # CHROME_BINARY="/usr/bin/chromium-browser" \
     # CHROME_BINARY="/usr/bin/chromium-browser" \
@@ -289,9 +315,8 @@ WORKDIR "$DATA_DIR"
 VOLUME "$DATA_DIR"
 VOLUME "$DATA_DIR"
 EXPOSE 8000
 EXPOSE 8000
 
 
-# Optional:
-# HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
-#     CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
+HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
+    CMD curl --silent 'http://localhost:8000/health/' | grep -q 'OK'
 
 
 ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
 ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
 CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]
 CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]

+ 1 - 1
README.md

@@ -1076,7 +1076,7 @@ Because ArchiveBox is designed to ingest a large volume of URLs with multiple co
 <li><strong>Don't store large collections on older filesystems like EXT3/FAT</strong> as they may not be able to handle more than 50k directory entries in the <code>data/archive/</code> folder.
 <li><strong>Don't store large collections on older filesystems like EXT3/FAT</strong> as they may not be able to handle more than 50k directory entries in the <code>data/archive/</code> folder.
 </li>
 </li>
 <li><strong>Try to keep the <code>data/index.sqlite3</code> file on local drive (not a network mount)</strong> or SSD for maximum performance, however the <code>data/archive/</code> folder can be on a network mount or slower HDD.</li>
 <li><strong>Try to keep the <code>data/index.sqlite3</code> file on local drive (not a network mount)</strong> or SSD for maximum performance, however the <code>data/archive/</code> folder can be on a network mount or slower HDD.</li>
-<li>If using Docker or NFS/SMB/FUSE for the `data/archive/` folder, you may need to set <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid"><code>PUID</code> & <code>PGID</code></a> and <a href="https://github.com/ArchiveBox/ArchiveBox/issues/1304">disable <code>root_squash</code></a> on your fileshare server.
+<li>If using Docker or NFS/SMB/FUSE for the <code>data/archive/</code> folder, you may need to set <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid"><code>PUID</code> & <code>PGID</code></a> and <a href="https://github.com/ArchiveBox/ArchiveBox/issues/1304">disable <code>root_squash</code></a> on your fileshare server.
 </li>
 </li>
 </ul>
 </ul>
 
 

+ 86 - 39
archivebox/config.py

@@ -112,6 +112,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
         'LDAP_FIRSTNAME_ATTR':       {'type': str,   'default': None},
         'LDAP_FIRSTNAME_ATTR':       {'type': str,   'default': None},
         'LDAP_LASTNAME_ATTR':        {'type': str,   'default': None},
         'LDAP_LASTNAME_ATTR':        {'type': str,   'default': None},
         'LDAP_EMAIL_ATTR':           {'type': str,   'default': None},
         'LDAP_EMAIL_ATTR':           {'type': str,   'default': None},
+        'LDAP_CREATE_SUPERUSER':      {'type': bool,  'default': False},
     },
     },
 
 
     'ARCHIVE_METHOD_TOGGLES': {
     'ARCHIVE_METHOD_TOGGLES': {
@@ -136,14 +137,15 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
     },
     },
 
 
     'ARCHIVE_METHOD_OPTIONS': {
     'ARCHIVE_METHOD_OPTIONS': {
-        'RESOLUTION':               {'type': str,   'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION',)},
-        'GIT_DOMAINS':              {'type': str,   'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com'},
+        'RESOLUTION':               {'type': str,   'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION','WINDOW_SIZE')},
+        'GIT_DOMAINS':              {'type': str,   'default': 'github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht'},
         'CHECK_SSL_VALIDITY':       {'type': bool,  'default': True},
         'CHECK_SSL_VALIDITY':       {'type': bool,  'default': True},
         'MEDIA_MAX_SIZE':           {'type': str,   'default': '750m'},
         'MEDIA_MAX_SIZE':           {'type': str,   'default': '750m'},
 
 
-        'CURL_USER_AGENT':          {'type': str,   'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
-        'WGET_USER_AGENT':          {'type': str,   'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
-        'CHROME_USER_AGENT':        {'type': str,   'default': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},
+        'USER_AGENT':               {'type': str,   'default': None},
+        'CURL_USER_AGENT':          {'type': str,   'default': lambda c: c['USER_AGENT'] or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) curl/{CURL_VERSION}'},
+        'WGET_USER_AGENT':          {'type': str,   'default': lambda c: c['USER_AGENT'] or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/) wget/{WGET_VERSION}'},
+        'CHROME_USER_AGENT':        {'type': str,   'default': lambda c: c['USER_AGENT'] or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)'},
 
 
         'COOKIES_FILE':             {'type': str,   'default': None},
         'COOKIES_FILE':             {'type': str,   'default': None},
         'CHROME_USER_DATA_DIR':     {'type': str,   'default': None},
         'CHROME_USER_DATA_DIR':     {'type': str,   'default': None},
@@ -151,7 +153,11 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
         'CHROME_TIMEOUT':           {'type': int,   'default': 0},
         'CHROME_TIMEOUT':           {'type': int,   'default': 0},
         'CHROME_HEADLESS':          {'type': bool,  'default': True},
         'CHROME_HEADLESS':          {'type': bool,  'default': True},
         'CHROME_SANDBOX':           {'type': bool,  'default': lambda c: not c['IN_DOCKER']},
         'CHROME_SANDBOX':           {'type': bool,  'default': lambda c: not c['IN_DOCKER']},
+        'CHROME_EXTRA_ARGS':        {'type': list,  'default': None},
+
         'YOUTUBEDL_ARGS':           {'type': list,  'default': lambda c: [
         'YOUTUBEDL_ARGS':           {'type': list,  'default': lambda c: [
+                                                                '--restrict-filenames',
+                                                                '--trim-filenames', '128',
                                                                 '--write-description',
                                                                 '--write-description',
                                                                 '--write-info-json',
                                                                 '--write-info-json',
                                                                 '--write-annotations',
                                                                 '--write-annotations',
@@ -173,6 +179,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
                                                                 '--add-metadata',
                                                                 '--add-metadata',
                                                                 '--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(c['MEDIA_MAX_SIZE'], c['MEDIA_MAX_SIZE']),
                                                                 '--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(c['MEDIA_MAX_SIZE'], c['MEDIA_MAX_SIZE']),
                                                                 ]},
                                                                 ]},
+        'YOUTUBEDL_EXTRA_ARGS':     {'type': list,  'default': None},
 
 
 
 
         'WGET_ARGS':                {'type': list,  'default': ['--no-verbose',
         'WGET_ARGS':                {'type': list,  'default': ['--no-verbose',
@@ -184,12 +191,17 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
                                                                 '--no-parent',
                                                                 '--no-parent',
                                                                 '-e', 'robots=off',
                                                                 '-e', 'robots=off',
                                                                 ]},
                                                                 ]},
+        'WGET_EXTRA_ARGS':          {'type': list,  'default': None},
         'CURL_ARGS':                {'type': list,  'default': ['--silent',
         'CURL_ARGS':                {'type': list,  'default': ['--silent',
                                                                 '--location',
                                                                 '--location',
                                                                 '--compressed'
                                                                 '--compressed'
                                                                ]},
                                                                ]},
+        'CURL_EXTRA_ARGS':          {'type': list,  'default': None},
         'GIT_ARGS':                 {'type': list,  'default': ['--recursive']},
         'GIT_ARGS':                 {'type': list,  'default': ['--recursive']},
-        'SINGLEFILE_ARGS':          {'type': list,  'default' : None},
+        'SINGLEFILE_ARGS':          {'type': list,  'default': None},
+        'SINGLEFILE_EXTRA_ARGS':    {'type': list,  'default': None},
+        'MERCURY_ARGS':             {'type': list,  'default': ['--format=text']},
+        'MERCURY_EXTRA_ARGS':       {'type': list,  'default': None},
         'FAVICON_PROVIDER':         {'type': str,   'default': 'https://www.google.com/s2/favicons?domain={}'},
         'FAVICON_PROVIDER':         {'type': str,   'default': 'https://www.google.com/s2/favicons?domain={}'},
     },
     },
 
 
@@ -269,6 +281,7 @@ TEMPLATES_DIR_NAME = 'templates'
 ARCHIVE_DIR_NAME = 'archive'
 ARCHIVE_DIR_NAME = 'archive'
 SOURCES_DIR_NAME = 'sources'
 SOURCES_DIR_NAME = 'sources'
 LOGS_DIR_NAME = 'logs'
 LOGS_DIR_NAME = 'logs'
+PERSONAS_DIR_NAME = 'personas'
 SQL_INDEX_FILENAME = 'index.sqlite3'
 SQL_INDEX_FILENAME = 'index.sqlite3'
 JSON_INDEX_FILENAME = 'index.json'
 JSON_INDEX_FILENAME = 'index.json'
 HTML_INDEX_FILENAME = 'index.html'
 HTML_INDEX_FILENAME = 'index.html'
@@ -342,9 +355,11 @@ ALLOWED_IN_OUTPUT_DIR = {
     'static',
     'static',
     'sonic',
     'sonic',
     'search.sqlite3',
     'search.sqlite3',
+    'crontabs',
     ARCHIVE_DIR_NAME,
     ARCHIVE_DIR_NAME,
     SOURCES_DIR_NAME,
     SOURCES_DIR_NAME,
     LOGS_DIR_NAME,
     LOGS_DIR_NAME,
+    PERSONAS_DIR_NAME,
     SQL_INDEX_FILENAME,
     SQL_INDEX_FILENAME,
     f'{SQL_INDEX_FILENAME}-wal',
     f'{SQL_INDEX_FILENAME}-wal',
     f'{SQL_INDEX_FILENAME}-shm',
     f'{SQL_INDEX_FILENAME}-shm',
@@ -363,24 +378,32 @@ ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
 
 
 ############################## Version Config ##################################
 ############################## Version Config ##################################
 
 
-def get_system_user():
-    SYSTEM_USER = getpass.getuser() or os.getlogin()
+def get_system_user() -> str:
+    # some host OS's are unable to provide a username (k3s, Windows), making this complicated
+    # uid 999 is especially problematic and breaks many attempts
+    SYSTEM_USER = None
+    FALLBACK_USER_PLACHOLDER = f'user_{os.getuid()}'
+
+    # Option 1
     try:
     try:
         import pwd
         import pwd
-        return pwd.getpwuid(os.geteuid()).pw_name or SYSTEM_USER
-    except KeyError:
-        # Process' UID might not map to a user in cases such as running the Docker image
-        # (where `archivebox` is 999) as a different UID.
+        SYSTEM_USER = SYSTEM_USER or pwd.getpwuid(os.geteuid()).pw_name
+    except (ModuleNotFoundError, Exception):
         pass
         pass
-    except ModuleNotFoundError:
-        # pwd doesn't exist on windows
+
+    # Option 2
+    try:
+        SYSTEM_USER = SYSTEM_USER or getpass.getuser()
+    except Exception:
         pass
         pass
+
+    # Option 3
+    try:
+        SYSTEM_USER = SYSTEM_USER or os.getlogin()
     except Exception:
     except Exception:
-        # this should never happen, uncomment to debug
-        # raise
         pass
         pass
 
 
-    return SYSTEM_USER
+    return SYSTEM_USER or FALLBACK_USER_PLACHOLDER
 
 
 def get_version(config):
 def get_version(config):
     try:
     try:
@@ -487,9 +510,10 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
     'ARCHIVE_DIR':              {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME},
     'ARCHIVE_DIR':              {'default': lambda c: c['OUTPUT_DIR'] / ARCHIVE_DIR_NAME},
     'SOURCES_DIR':              {'default': lambda c: c['OUTPUT_DIR'] / SOURCES_DIR_NAME},
     'SOURCES_DIR':              {'default': lambda c: c['OUTPUT_DIR'] / SOURCES_DIR_NAME},
     'LOGS_DIR':                 {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME},
     'LOGS_DIR':                 {'default': lambda c: c['OUTPUT_DIR'] / LOGS_DIR_NAME},
+    'PERSONAS_DIR':             {'default': lambda c: c['OUTPUT_DIR'] / PERSONAS_DIR_NAME},
     'CONFIG_FILE':              {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
     'CONFIG_FILE':              {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
     'COOKIES_FILE':             {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
     'COOKIES_FILE':             {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
-    'CHROME_USER_DATA_DIR':     {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)},   # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None
+    'CHROME_USER_DATA_DIR':     {'default': lambda c: Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None},
     'URL_DENYLIST_PTN':         {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
     'URL_DENYLIST_PTN':         {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
     'URL_ALLOWLIST_PTN':        {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
     'URL_ALLOWLIST_PTN':        {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', ALLOWDENYLIST_REGEX_FLAGS)},
     'DIR_OUTPUT_PERMISSIONS':   {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},  # exec is always needed to list directories
     'DIR_OUTPUT_PERMISSIONS':   {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},  # exec is always needed to list directories
@@ -519,6 +543,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
     'CURL_VERSION':             {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
     'CURL_VERSION':             {'default': lambda c: bin_version(c['CURL_BINARY']) if c['USE_CURL'] else None},
     'CURL_USER_AGENT':          {'default': lambda c: c['CURL_USER_AGENT'].format(**c)},
     'CURL_USER_AGENT':          {'default': lambda c: c['CURL_USER_AGENT'].format(**c)},
     'CURL_ARGS':                {'default': lambda c: c['CURL_ARGS'] or []},
     'CURL_ARGS':                {'default': lambda c: c['CURL_ARGS'] or []},
+    'CURL_EXTRA_ARGS':          {'default': lambda c: c['CURL_EXTRA_ARGS'] or []},
     'SAVE_FAVICON':             {'default': lambda c: c['USE_CURL'] and c['SAVE_FAVICON']},
     'SAVE_FAVICON':             {'default': lambda c: c['USE_CURL'] and c['SAVE_FAVICON']},
     'SAVE_ARCHIVE_DOT_ORG':     {'default': lambda c: c['USE_CURL'] and c['SAVE_ARCHIVE_DOT_ORG']},
     'SAVE_ARCHIVE_DOT_ORG':     {'default': lambda c: c['USE_CURL'] and c['SAVE_ARCHIVE_DOT_ORG']},
 
 
@@ -529,18 +554,22 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
     'SAVE_WGET':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WGET']},
     'SAVE_WGET':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WGET']},
     'SAVE_WARC':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
     'SAVE_WARC':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
     'WGET_ARGS':                {'default': lambda c: c['WGET_ARGS'] or []},
     'WGET_ARGS':                {'default': lambda c: c['WGET_ARGS'] or []},
+    'WGET_EXTRA_ARGS':          {'default': lambda c: c['WGET_EXTRA_ARGS'] or []},
 
 
     'RIPGREP_VERSION':          {'default': lambda c: bin_version(c['RIPGREP_BINARY']) if c['USE_RIPGREP'] else None},
     'RIPGREP_VERSION':          {'default': lambda c: bin_version(c['RIPGREP_BINARY']) if c['USE_RIPGREP'] else None},
 
 
     'USE_SINGLEFILE':           {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
     'USE_SINGLEFILE':           {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
     'SINGLEFILE_VERSION':       {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
     'SINGLEFILE_VERSION':       {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
     'SINGLEFILE_ARGS':          {'default': lambda c: c['SINGLEFILE_ARGS'] or []},
     'SINGLEFILE_ARGS':          {'default': lambda c: c['SINGLEFILE_ARGS'] or []},
+    'SINGLEFILE_EXTRA_ARGS':    {'default': lambda c: c['SINGLEFILE_EXTRA_ARGS'] or []},
 
 
     'USE_READABILITY':          {'default': lambda c: c['USE_READABILITY'] and c['SAVE_READABILITY']},
     'USE_READABILITY':          {'default': lambda c: c['USE_READABILITY'] and c['SAVE_READABILITY']},
     'READABILITY_VERSION':      {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},
     'READABILITY_VERSION':      {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},
 
 
     'USE_MERCURY':              {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
     'USE_MERCURY':              {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
     'MERCURY_VERSION':          {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None},  # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
     'MERCURY_VERSION':          {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None},  # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
+    'MERCURY_ARGS':             {'default': lambda c: c['MERCURY_ARGS'] or []},
+    'MERCURY_EXTRA_ARGS':       {'default': lambda c: c['MERCURY_EXTRA_ARGS'] or []},
 
 
     'USE_GIT':                  {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
     'USE_GIT':                  {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
     'GIT_VERSION':              {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
     'GIT_VERSION':              {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},
@@ -550,6 +579,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
     'YOUTUBEDL_VERSION':        {'default': lambda c: bin_version(c['YOUTUBEDL_BINARY']) if c['USE_YOUTUBEDL'] else None},
     'YOUTUBEDL_VERSION':        {'default': lambda c: bin_version(c['YOUTUBEDL_BINARY']) if c['USE_YOUTUBEDL'] else None},
     'SAVE_MEDIA':               {'default': lambda c: c['USE_YOUTUBEDL'] and c['SAVE_MEDIA']},
     'SAVE_MEDIA':               {'default': lambda c: c['USE_YOUTUBEDL'] and c['SAVE_MEDIA']},
     'YOUTUBEDL_ARGS':           {'default': lambda c: c['YOUTUBEDL_ARGS'] or []},
     'YOUTUBEDL_ARGS':           {'default': lambda c: c['YOUTUBEDL_ARGS'] or []},
+    'YOUTUBEDL_EXTRA_ARGS':     {'default': lambda c: c['YOUTUBEDL_EXTRA_ARGS'] or []},
 
 
     'CHROME_BINARY':            {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()},
     'CHROME_BINARY':            {'default': lambda c: c['CHROME_BINARY'] or find_chrome_binary()},
     'USE_CHROME':               {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
     'USE_CHROME':               {'default': lambda c: c['USE_CHROME'] and c['CHROME_BINARY'] and (c['SAVE_PDF'] or c['SAVE_SCREENSHOT'] or c['SAVE_DOM'] or c['SAVE_SINGLEFILE'])},
@@ -571,6 +601,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
     'EXTERNAL_LOCATIONS':       {'default': lambda c: get_external_locations(c)},
     'EXTERNAL_LOCATIONS':       {'default': lambda c: get_external_locations(c)},
     'DATA_LOCATIONS':           {'default': lambda c: get_data_locations(c)},
     'DATA_LOCATIONS':           {'default': lambda c: get_data_locations(c)},
     'CHROME_OPTIONS':           {'default': lambda c: get_chrome_info(c)},
     'CHROME_OPTIONS':           {'default': lambda c: get_chrome_info(c)},
+    'CHROME_EXTRA_ARGS':        {'default': lambda c: c['CHROME_EXTRA_ARGS'] or []},
     'SAVE_ALLOWLIST_PTN':       {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
     'SAVE_ALLOWLIST_PTN':       {'default': lambda c: c['SAVE_ALLOWLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_ALLOWLIST'].items()}},
     'SAVE_DENYLIST_PTN':        {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
     'SAVE_DENYLIST_PTN':        {'default': lambda c: c['SAVE_DENYLIST'] and {re.compile(k, ALLOWDENYLIST_REGEX_FLAGS): v for k, v in c['SAVE_DENYLIST'].items()}},
 }
 }
@@ -899,27 +930,36 @@ def find_chrome_binary() -> Optional[str]:
 
 
 def find_chrome_data_dir() -> Optional[str]:
 def find_chrome_data_dir() -> Optional[str]:
     """find any installed chrome user data directories in the default locations"""
     """find any installed chrome user data directories in the default locations"""
-    # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
-    # make sure data dir finding precedence order always matches binary finding order
-    default_profile_paths = (
-        '~/.config/chromium',
-        '~/Library/Application Support/Chromium',
-        '~/AppData/Local/Chromium/User Data',
-        '~/.config/chrome',
-        '~/.config/google-chrome',
-        '~/Library/Application Support/Google/Chrome',
-        '~/AppData/Local/Google/Chrome/User Data',
-        '~/.config/google-chrome-stable',
-        '~/.config/google-chrome-beta',
-        '~/Library/Application Support/Google/Chrome Canary',
-        '~/AppData/Local/Google/Chrome SxS/User Data',
-        '~/.config/google-chrome-unstable',
-        '~/.config/google-chrome-dev',
-    )
-    for path in default_profile_paths:
-        full_path = Path(path).resolve()
-        if full_path.exists():
-            return full_path
+    # deprecated because this is DANGEROUS, do not re-implement/uncomment this behavior.
+
+    # Going forward we want to discourage people from using their main chrome profile for archiving.
+    # Session tokens, personal data, and cookies are often returned in server responses,
+    # when they get archived, they are essentially burned as anyone who can view the archive
+    # can use that data to masquerade as the logged-in user that did the archiving.
+    # For this reason users should always create dedicated burner profiles for archiving and not use
+    # their daily driver main accounts.
+
+    # # Precedence: Chromium, Chrome, Beta, Canary, Unstable, Dev
+    # # make sure data dir finding precedence order always matches binary finding order
+    # default_profile_paths = (
+    #     '~/.config/chromium',
+    #     '~/Library/Application Support/Chromium',
+    #     '~/AppData/Local/Chromium/User Data',
+    #     '~/.config/chrome',
+    #     '~/.config/google-chrome',
+    #     '~/Library/Application Support/Google/Chrome',
+    #     '~/AppData/Local/Google/Chrome/User Data',
+    #     '~/.config/google-chrome-stable',
+    #     '~/.config/google-chrome-beta',
+    #     '~/Library/Application Support/Google/Chrome Canary',
+    #     '~/AppData/Local/Google/Chrome SxS/User Data',
+    #     '~/.config/google-chrome-unstable',
+    #     '~/.config/google-chrome-dev',
+    # )
+    # for path in default_profile_paths:
+    #     full_path = Path(path).resolve()
+    #     if full_path.exists():
+    #         return full_path
     return None
     return None
 
 
 def wget_supports_compression(config):
 def wget_supports_compression(config):
@@ -990,6 +1030,11 @@ def get_data_locations(config: ConfigDict) -> ConfigValue:
             'enabled': True,
             'enabled': True,
             'is_valid': config['LOGS_DIR'].exists(),
             'is_valid': config['LOGS_DIR'].exists(),
         },
         },
+        'PERSONAS_DIR': {
+            'path': config['PERSONAS_DIR'].resolve(),
+            'enabled': True,
+            'is_valid': config['PERSONAS_DIR'].exists(),
+        },
         'ARCHIVE_DIR': {
         'ARCHIVE_DIR': {
             'path': config['ARCHIVE_DIR'].resolve(),
             'path': config['ARCHIVE_DIR'].resolve(),
             'enabled': True,
             'enabled': True,
@@ -1337,6 +1382,8 @@ def check_migrations(out_dir: Union[str, Path, None]=None, config: ConfigDict=CO
 
 
     (Path(output_dir) / SOURCES_DIR_NAME).mkdir(exist_ok=True)
     (Path(output_dir) / SOURCES_DIR_NAME).mkdir(exist_ok=True)
     (Path(output_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True)
     (Path(output_dir) / LOGS_DIR_NAME).mkdir(exist_ok=True)
+    (Path(output_dir) / PERSONAS_DIR_NAME).mkdir(exist_ok=True)
+    (Path(output_dir) / PERSONAS_DIR_NAME / 'Default').mkdir(exist_ok=True)
 
 
 
 
 
 

+ 1 - 0
archivebox/core/__init__.py

@@ -1 +1,2 @@
 __package__ = 'archivebox.core'
 __package__ = 'archivebox.core'
+

+ 115 - 77
archivebox/core/admin.py

@@ -6,6 +6,7 @@ from contextlib import redirect_stdout
 from datetime import datetime, timezone
 from datetime import datetime, timezone
 
 
 from django.contrib import admin
 from django.contrib import admin
+from django.db.models import Count
 from django.urls import path
 from django.urls import path
 from django.utils.html import format_html
 from django.utils.html import format_html
 from django.utils.safestring import mark_safe
 from django.utils.safestring import mark_safe
@@ -23,8 +24,16 @@ from core.mixins import SearchResultsAdminMixin
 from index.html import snapshot_icons
 from index.html import snapshot_icons
 from logging_util import printable_filesize
 from logging_util import printable_filesize
 from main import add, remove
 from main import add, remove
-from config import OUTPUT_DIR, SNAPSHOTS_PER_PAGE
 from extractors import archive_links
 from extractors import archive_links
+from config import (
+    OUTPUT_DIR,
+    SNAPSHOTS_PER_PAGE,
+    VERSION,
+    VERSIONS_AVAILABLE,
+    CAN_UPGRADE
+)
+
+GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE}
 
 
 # Admin URLs
 # Admin URLs
 # /admin/
 # /admin/
@@ -39,6 +48,60 @@ from extractors import archive_links
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
 # TODO: https://stackoverflow.com/questions/40760880/add-custom-button-to-django-admin-panel
 
 
 
 
+class ArchiveBoxAdmin(admin.AdminSite):
+    site_header = 'ArchiveBox'
+    index_title = 'Links'
+    site_title = 'Index'
+    namespace = 'admin'
+
+    def get_urls(self):
+        return [
+            path('core/snapshot/add/', self.add_view, name='Add'),
+        ] + super().get_urls()
+
+    def add_view(self, request):
+        if not request.user.is_authenticated:
+            return redirect(f'/admin/login/?next={request.path}')
+
+        request.current_app = self.name
+        context = {
+            **self.each_context(request),
+            'title': 'Add URLs',
+        }
+
+        if request.method == 'GET':
+            context['form'] = AddLinkForm()
+
+        elif request.method == 'POST':
+            form = AddLinkForm(request.POST)
+            if form.is_valid():
+                url = form.cleaned_data["url"]
+                print(f'[+] Adding URL: {url}')
+                depth = 0 if form.cleaned_data["depth"] == "0" else 1
+                input_kwargs = {
+                    "urls": url,
+                    "depth": depth,
+                    "update_all": False,
+                    "out_dir": OUTPUT_DIR,
+                }
+                add_stdout = StringIO()
+                with redirect_stdout(add_stdout):
+                   add(**input_kwargs)
+                print(add_stdout.getvalue())
+
+                context.update({
+                    "stdout": ansi_to_html(add_stdout.getvalue().strip()),
+                    "form": AddLinkForm()
+                })
+            else:
+                context["form"] = form
+
+        return render(template_name='add.html', request=request, context=context)
+
+archivebox_admin = ArchiveBoxAdmin()
+archivebox_admin.register(get_user_model())
+archivebox_admin.disable_action('delete_selected')
+
 class ArchiveResultInline(admin.TabularInline):
 class ArchiveResultInline(admin.TabularInline):
     model = ArchiveResult
     model = ArchiveResult
 
 
@@ -48,11 +111,11 @@ class TagInline(admin.TabularInline):
 from django.contrib.admin.helpers import ActionForm
 from django.contrib.admin.helpers import ActionForm
 from django.contrib.admin.widgets import AutocompleteSelectMultiple
 from django.contrib.admin.widgets import AutocompleteSelectMultiple
 
 
-# WIP: broken by Django 3.1.2 -> 4.0 migration
 class AutocompleteTags:
 class AutocompleteTags:
     model = Tag
     model = Tag
     search_fields = ['name']
     search_fields = ['name']
     name = 'tags'
     name = 'tags'
+    remote_field = TagInline
 
 
 class AutocompleteTagsAdminStub:
 class AutocompleteTagsAdminStub:
     name = 'admin'
     name = 'admin'
@@ -62,7 +125,6 @@ class SnapshotActionForm(ActionForm):
     tags = forms.ModelMultipleChoiceField(
     tags = forms.ModelMultipleChoiceField(
         queryset=Tag.objects.all(),
         queryset=Tag.objects.all(),
         required=False,
         required=False,
-        # WIP: broken by Django 3.1.2 -> 4.0 migration
         widget=AutocompleteSelectMultiple(
         widget=AutocompleteSelectMultiple(
             AutocompleteTags(),
             AutocompleteTags(),
             AutocompleteTagsAdminStub(),
             AutocompleteTagsAdminStub(),
@@ -81,6 +143,7 @@ class SnapshotActionForm(ActionForm):
     # )
     # )
 
 
 
 
[email protected](Snapshot, site=archivebox_admin)
 class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
 class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
     list_display = ('added', 'title_str', 'files', 'size', 'url_str')
     list_display = ('added', 'title_str', 'files', 'size', 'url_str')
     sort_fields = ('title_str', 'url_str', 'added', 'files')
     sort_fields = ('title_str', 'url_str', 'added', 'files')
@@ -96,6 +159,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
 
 
     action_form = SnapshotActionForm
     action_form = SnapshotActionForm
 
 
+    def changelist_view(self, request, extra_context=None):
+        extra_context = extra_context or {}
+        return super().changelist_view(request, extra_context | GLOBAL_CONTEXT)
+
     def get_urls(self):
     def get_urls(self):
         urls = super().get_urls()
         urls = super().get_urls()
         custom_urls = [
         custom_urls = [
@@ -105,7 +172,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
 
 
     def get_queryset(self, request):
     def get_queryset(self, request):
         self.request = request
         self.request = request
-        return super().get_queryset(request).prefetch_related('tags')
+        return super().get_queryset(request).prefetch_related('tags').annotate(archiveresult_count=Count('archiveresult'))
 
 
     def tag_list(self, obj):
     def tag_list(self, obj):
         return ', '.join(obj.tags.values_list('name', flat=True))
         return ', '.join(obj.tags.values_list('name', flat=True))
@@ -163,6 +230,10 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
             obj.id,
             obj.id,
         )
         )
 
 
+    @admin.display(
+        description='Title',
+        ordering='title',
+    )
     def title_str(self, obj):
     def title_str(self, obj):
         canon = obj.as_link().canonical_outputs()
         canon = obj.as_link().canonical_outputs()
         tags = ''.join(
         tags = ''.join(
@@ -184,12 +255,17 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
             urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
             urldecode(htmldecode(obj.latest_title or obj.title or ''))[:128] or 'Pending...'
         ) + mark_safe(f' <span class="tags">{tags}</span>')
         ) + mark_safe(f' <span class="tags">{tags}</span>')
 
 
+    @admin.display(
+        description='Files Saved',
+        ordering='archiveresult_count',
+    )
     def files(self, obj):
     def files(self, obj):
         return snapshot_icons(obj)
         return snapshot_icons(obj)
 
 
-    files.admin_order_field = 'updated'
-    files.short_description = 'Files Saved'
 
 
+    @admin.display(
+        ordering='archiveresult_count'
+    )
     def size(self, obj):
     def size(self, obj):
         archive_size = (Path(obj.link_dir) / 'index.html').exists() and obj.archive_size
         archive_size = (Path(obj.link_dir) / 'index.html').exists() and obj.archive_size
         if archive_size:
         if archive_size:
@@ -204,8 +280,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
             size_txt,
             size_txt,
         )
         )
 
 
-    size.admin_order_field = 'archiveresult__count'
 
 
+    @admin.display(
+        description='Original URL',
+        ordering='url',
+    )
     def url_str(self, obj):
     def url_str(self, obj):
         return format_html(
         return format_html(
             '<a href="{}"><code style="user-select: all;">{}</code></a>',
             '<a href="{}"><code style="user-select: all;">{}</code></a>',
@@ -242,65 +321,76 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin):
     #     print('[*] Got request', request.method, request.POST)
     #     print('[*] Got request', request.method, request.POST)
     #     return super().changelist_view(request, extra_context=None)
     #     return super().changelist_view(request, extra_context=None)
 
 
+    @admin.action(
+        description="Pull"
+    )
     def update_snapshots(self, request, queryset):
     def update_snapshots(self, request, queryset):
         archive_links([
         archive_links([
             snapshot.as_link()
             snapshot.as_link()
             for snapshot in queryset
             for snapshot in queryset
         ], out_dir=OUTPUT_DIR)
         ], out_dir=OUTPUT_DIR)
-    update_snapshots.short_description = "Pull"
 
 
+    @admin.action(
+        description="⬇️ Title"
+    )
     def update_titles(self, request, queryset):
     def update_titles(self, request, queryset):
         archive_links([
         archive_links([
             snapshot.as_link()
             snapshot.as_link()
             for snapshot in queryset
             for snapshot in queryset
         ], overwrite=True, methods=('title','favicon'), out_dir=OUTPUT_DIR)
         ], overwrite=True, methods=('title','favicon'), out_dir=OUTPUT_DIR)
-    update_titles.short_description = "⬇️ Title"
 
 
+    @admin.action(
+        description="Re-Snapshot"
+    )
     def resnapshot_snapshot(self, request, queryset):
     def resnapshot_snapshot(self, request, queryset):
         for snapshot in queryset:
         for snapshot in queryset:
             timestamp = datetime.now(timezone.utc).isoformat('T', 'seconds')
             timestamp = datetime.now(timezone.utc).isoformat('T', 'seconds')
             new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
             new_url = snapshot.url.split('#')[0] + f'#{timestamp}'
             add(new_url, tag=snapshot.tags_str())
             add(new_url, tag=snapshot.tags_str())
-    resnapshot_snapshot.short_description = "Re-Snapshot"
 
 
+    @admin.action(
+        description="Reset"
+    )
     def overwrite_snapshots(self, request, queryset):
     def overwrite_snapshots(self, request, queryset):
         archive_links([
         archive_links([
             snapshot.as_link()
             snapshot.as_link()
             for snapshot in queryset
             for snapshot in queryset
         ], overwrite=True, out_dir=OUTPUT_DIR)
         ], overwrite=True, out_dir=OUTPUT_DIR)
-    overwrite_snapshots.short_description = "Reset"
 
 
+    @admin.action(
+        description="Delete"
+    )
     def delete_snapshots(self, request, queryset):
     def delete_snapshots(self, request, queryset):
         remove(snapshots=queryset, yes=True, delete=True, out_dir=OUTPUT_DIR)
         remove(snapshots=queryset, yes=True, delete=True, out_dir=OUTPUT_DIR)
 
 
-    delete_snapshots.short_description = "Delete"
 
 
+    @admin.action(
+        description="+"
+    )
     def add_tags(self, request, queryset):
     def add_tags(self, request, queryset):
         tags = request.POST.getlist('tags')
         tags = request.POST.getlist('tags')
         print('[+] Adding tags', tags, 'to Snapshots', queryset)
         print('[+] Adding tags', tags, 'to Snapshots', queryset)
         for obj in queryset:
         for obj in queryset:
             obj.tags.add(*tags)
             obj.tags.add(*tags)
 
 
-    add_tags.short_description = "+"
 
 
+    @admin.action(
+        description="–"
+    )
     def remove_tags(self, request, queryset):
     def remove_tags(self, request, queryset):
         tags = request.POST.getlist('tags')
         tags = request.POST.getlist('tags')
         print('[-] Removing tags', tags, 'to Snapshots', queryset)
         print('[-] Removing tags', tags, 'to Snapshots', queryset)
         for obj in queryset:
         for obj in queryset:
             obj.tags.remove(*tags)
             obj.tags.remove(*tags)
 
 
-    remove_tags.short_description = "–"
 
 
         
         
 
 
-    title_str.short_description = 'Title'
-    url_str.short_description = 'Original URL'
 
 
-    title_str.admin_order_field = 'title'
-    url_str.admin_order_field = 'url'
 
 
 
 
 
 
[email protected](Tag, site=archivebox_admin)
 class TagAdmin(admin.ModelAdmin):
 class TagAdmin(admin.ModelAdmin):
     list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id')
     list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'id')
     sort_fields = ('id', 'name', 'slug')
     sort_fields = ('id', 'name', 'slug')
@@ -331,6 +421,7 @@ class TagAdmin(admin.ModelAdmin):
         ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={obj.id}">and {total_count-10} more...<a>' if obj.snapshot_set.count() > 10 else ''))
         ) + (f'<br/><a href="/admin/core/snapshot/?tags__id__exact={obj.id}">and {total_count-10} more...<a>' if obj.snapshot_set.count() > 10 else ''))
 
 
 
 
[email protected](ArchiveResult, site=archivebox_admin)
 class ArchiveResultAdmin(admin.ModelAdmin):
 class ArchiveResultAdmin(admin.ModelAdmin):
     list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'tags_str', 'cmd_str', 'status', 'output_str')
     list_display = ('id', 'start_ts', 'extractor', 'snapshot_str', 'tags_str', 'cmd_str', 'status', 'output_str')
     sort_fields = ('start_ts', 'extractor', 'status')
     sort_fields = ('start_ts', 'extractor', 'status')
@@ -343,6 +434,9 @@ class ArchiveResultAdmin(admin.ModelAdmin):
     ordering = ['-start_ts']
     ordering = ['-start_ts']
     list_per_page = SNAPSHOTS_PER_PAGE
     list_per_page = SNAPSHOTS_PER_PAGE
 
 
+    @admin.display(
+        description='snapshot'
+    )
     def snapshot_str(self, obj):
     def snapshot_str(self, obj):
         return format_html(
         return format_html(
             '<a href="/archive/{}/index.html"><b><code>[{}]</code></b></a><br/>'
             '<a href="/archive/{}/index.html"><b><code>[{}]</code></b></a><br/>'
@@ -352,6 +446,9 @@ class ArchiveResultAdmin(admin.ModelAdmin):
             obj.snapshot.url[:128],
             obj.snapshot.url[:128],
         )
         )
 
 
+    @admin.display(
+        description='tags'
+    )
     def tags_str(self, obj):
     def tags_str(self, obj):
         return obj.snapshot.tags_str()
         return obj.snapshot.tags_str()
 
 
@@ -368,62 +465,3 @@ class ArchiveResultAdmin(admin.ModelAdmin):
             obj.output if (obj.status == 'succeeded') and obj.extractor not in ('title', 'archive_org') else 'index.html',
             obj.output if (obj.status == 'succeeded') and obj.extractor not in ('title', 'archive_org') else 'index.html',
             obj.output,
             obj.output,
         )
         )
-
-    tags_str.short_description = 'tags'
-    snapshot_str.short_description = 'snapshot'
-
-class ArchiveBoxAdmin(admin.AdminSite):
-    site_header = 'ArchiveBox'
-    index_title = 'Links'
-    site_title = 'Index'
-
-    def get_urls(self):
-        return [
-            path('core/snapshot/add/', self.add_view, name='Add'),
-        ] + super().get_urls()
-
-    def add_view(self, request):
-        if not request.user.is_authenticated:
-            return redirect(f'/admin/login/?next={request.path}')
-
-        request.current_app = self.name
-        context = {
-            **self.each_context(request),
-            'title': 'Add URLs',
-        }
-
-        if request.method == 'GET':
-            context['form'] = AddLinkForm()
-
-        elif request.method == 'POST':
-            form = AddLinkForm(request.POST)
-            if form.is_valid():
-                url = form.cleaned_data["url"]
-                print(f'[+] Adding URL: {url}')
-                depth = 0 if form.cleaned_data["depth"] == "0" else 1
-                input_kwargs = {
-                    "urls": url,
-                    "depth": depth,
-                    "update_all": False,
-                    "out_dir": OUTPUT_DIR,
-                }
-                add_stdout = StringIO()
-                with redirect_stdout(add_stdout):
-                   add(**input_kwargs)
-                print(add_stdout.getvalue())
-
-                context.update({
-                    "stdout": ansi_to_html(add_stdout.getvalue().strip()),
-                    "form": AddLinkForm()
-                })
-            else:
-                context["form"] = form
-
-        return render(template_name='add.html', request=request, context=context)
-
-admin.site = ArchiveBoxAdmin()
-admin.site.register(get_user_model())
-admin.site.register(Snapshot, SnapshotAdmin)
-admin.site.register(Tag, TagAdmin)
-admin.site.register(ArchiveResult, ArchiveResultAdmin)
-admin.site.disable_action('delete_selected')

+ 5 - 2
archivebox/core/apps.py

@@ -3,5 +3,8 @@ from django.apps import AppConfig
 
 
 class CoreConfig(AppConfig):
 class CoreConfig(AppConfig):
     name = 'core'
     name = 'core'
-    # WIP: broken by Django 3.1.2 -> 4.0 migration
-    default_auto_field = 'django.db.models.UUIDField'
+
+    def ready(self):
+        from .auth import register_signals
+
+        register_signals()

+ 13 - 0
archivebox/core/auth.py

@@ -0,0 +1,13 @@
+import os
+from django.conf import settings
+from ..config import (
+    LDAP
+)
+
+def register_signals():
+
+    if LDAP:
+        import django_auth_ldap.backend
+        from .auth_ldap import create_user
+
+        django_auth_ldap.backend.populate_user.connect(create_user)

+ 12 - 0
archivebox/core/auth_ldap.py

@@ -0,0 +1,12 @@
+from django.conf import settings
+from ..config import (
+    LDAP_CREATE_SUPERUSER
+)
+
+def create_user(sender, user=None, ldap_user=None, **kwargs):
+
+    if not user.id and LDAP_CREATE_SUPERUSER:
+        user.is_superuser = True
+
+    user.is_staff = True
+    print(f'[!] WARNING: Creating new user {user} based on LDAP user {ldap_user} (is_staff={user.is_staff}, is_superuser={user.is_superuser})')

+ 1 - 1
archivebox/core/mixins.py

@@ -18,4 +18,4 @@ class SearchResultsAdminMixin:
             print(f'[!] Error while using search backend: {err.__class__.__name__} {err}')
             print(f'[!] Error while using search backend: {err.__class__.__name__} {err}')
             messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
             messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
         
         
-        return qs, use_distinct
+        return qs.distinct(), use_distinct

+ 0 - 4
archivebox/core/settings.py

@@ -269,9 +269,6 @@ AUTH_PASSWORD_VALIDATORS = [
     {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
     {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
 ]
 ]
 
 
-# WIP: broken by Django 3.1.2 -> 4.0 migration
-DEFAULT_AUTO_FIELD = 'django.db.models.UUIDField'
-
 ################################################################################
 ################################################################################
 ### Shell Settings
 ### Shell Settings
 ################################################################################
 ################################################################################
@@ -290,7 +287,6 @@ if IS_SHELL:
 
 
 LANGUAGE_CODE = 'en-us'
 LANGUAGE_CODE = 'en-us'
 USE_I18N = True
 USE_I18N = True
-USE_L10N = True
 USE_TZ = True
 USE_TZ = True
 DATETIME_FORMAT = 'Y-m-d g:iA'
 DATETIME_FORMAT = 'Y-m-d g:iA'
 SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'
 SHORT_DATETIME_FORMAT = 'Y-m-d h:iA'

+ 2 - 10
archivebox/core/urls.py

@@ -1,4 +1,4 @@
-from django.contrib import admin
+from .admin import archivebox_admin
 
 
 from django.urls import path, include
 from django.urls import path, include
 from django.views import static
 from django.views import static
@@ -8,11 +8,6 @@ from django.views.generic.base import RedirectView
 
 
 from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
 from core.views import HomepageView, SnapshotView, PublicIndexView, AddView, HealthCheckView
 
 
-# GLOBAL_CONTEXT doesn't work as-is, disabled for now: https://github.com/ArchiveBox/ArchiveBox/discussions/1306
-# from config import VERSION, VERSIONS_AVAILABLE, CAN_UPGRADE
-# GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': VERSIONS_AVAILABLE, 'CAN_UPGRADE': CAN_UPGRADE}
-
-
 # print('DEBUG', settings.DEBUG)
 # print('DEBUG', settings.DEBUG)
 
 
 urlpatterns = [
 urlpatterns = [
@@ -34,11 +29,8 @@ urlpatterns = [
 
 
 
 
     path('accounts/', include('django.contrib.auth.urls')),
     path('accounts/', include('django.contrib.auth.urls')),
-    path('admin/', admin.site.urls),
+    path('admin/', archivebox_admin.urls),
     
     
-    # do not add extra_context like this as not all admin views (e.g. ModelAdmin.autocomplete_view accept extra kwargs)
-    # path('admin/', admin.site.urls, {'extra_context': GLOBAL_CONTEXT}),
-
     path('health/', HealthCheckView.as_view(), name='healthcheck'),
     path('health/', HealthCheckView.as_view(), name='healthcheck'),
     path('error/', lambda _: 1/0),
     path('error/', lambda _: 1/0),
 
 

+ 1 - 1
archivebox/core/views.py

@@ -231,7 +231,7 @@ class PublicIndexView(ListView):
                 qs = qs | query_search_index(query)
                 qs = qs | query_search_index(query)
             except Exception as err:
             except Exception as err:
                 print(f'[!] Error while using search backend: {err.__class__.__name__} {err}')
                 print(f'[!] Error while using search backend: {err.__class__.__name__} {err}')
-        return qs
+        return qs.distinct()
 
 
     def get(self, *args, **kwargs):
     def get(self, *args, **kwargs):
         if PUBLIC_INDEX or self.request.user.is_authenticated:
         if PUBLIC_INDEX or self.request.user.is_authenticated:

+ 9 - 12
archivebox/extractors/__init__.py

@@ -131,7 +131,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
 
 
         link = load_link_details(link, out_dir=out_dir)
         link = load_link_details(link, out_dir=out_dir)
         write_link_details(link, out_dir=out_dir, skip_sql_index=False)
         write_link_details(link, out_dir=out_dir, skip_sql_index=False)
-        log_link_archiving_started(link, out_dir, is_new)
+        log_link_archiving_started(link, str(out_dir), is_new)
         link = link.overwrite(updated=datetime.now(timezone.utc))
         link = link.overwrite(updated=datetime.now(timezone.utc))
         stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
         stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}
         start_ts = datetime.now(timezone.utc)
         start_ts = datetime.now(timezone.utc)
@@ -165,16 +165,6 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
                     # print('{black}      X {}{reset}'.format(method_name, **ANSI))
                     # print('{black}      X {}{reset}'.format(method_name, **ANSI))
                     stats['skipped'] += 1
                     stats['skipped'] += 1
             except Exception as e:
             except Exception as e:
-                # Disabled until https://github.com/ArchiveBox/ArchiveBox/issues/984
-                # and https://github.com/ArchiveBox/ArchiveBox/issues/1014
-                # are fixed.
-                """
-                raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format(
-                    method_name,
-                    link.url,
-                )) from e
-                """
-                # Instead, use the kludgy workaround from
                 # https://github.com/ArchiveBox/ArchiveBox/issues/984#issuecomment-1150541627
                 # https://github.com/ArchiveBox/ArchiveBox/issues/984#issuecomment-1150541627
                 with open(ERROR_LOG, "a", encoding='utf-8') as f:
                 with open(ERROR_LOG, "a", encoding='utf-8') as f:
                     command = ' '.join(sys.argv)
                     command = ' '.join(sys.argv)
@@ -186,6 +176,13 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
                         ts
                         ts
                     ) + "\n" + str(e) + "\n"))
                     ) + "\n" + str(e) + "\n"))
                     #f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
                     #f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
+               
+                # print(f'        ERROR: {method_name} {e.__class__.__name__}: {e} {getattr(e, "hints", "")}', ts, link.url, command)
+                raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format(
+                    method_name,
+                    link.url,
+                )) from e
+
 
 
         # print('    ', stats)
         # print('    ', stats)
 
 
@@ -218,7 +215,7 @@ def archive_links(all_links: Union[Iterable[Link], QuerySet], overwrite: bool=Fa
 
 
     if type(all_links) is QuerySet:
     if type(all_links) is QuerySet:
         num_links: int = all_links.count()
         num_links: int = all_links.count()
-        get_link = lambda x: x.as_link()
+        get_link = lambda x: x.as_link_with_details()
         all_links = all_links.iterator()
         all_links = all_links.iterator()
     else:
     else:
         num_links: int = len(all_links)
         num_links: int = len(all_links)

+ 9 - 2
archivebox/extractors/archive_org.py

@@ -10,10 +10,12 @@ from ..system import run, chmod_file
 from ..util import (
 from ..util import (
     enforce_types,
     enforce_types,
     is_static_file,
     is_static_file,
+    dedupe,
 )
 )
 from ..config import (
 from ..config import (
     TIMEOUT,
     TIMEOUT,
     CURL_ARGS,
     CURL_ARGS,
+    CURL_EXTRA_ARGS,
     CHECK_SSL_VALIDITY,
     CHECK_SSL_VALIDITY,
     SAVE_ARCHIVE_DOT_ORG,
     SAVE_ARCHIVE_DOT_ORG,
     CURL_BINARY,
     CURL_BINARY,
@@ -44,13 +46,18 @@ def save_archive_dot_org(link: Link, out_dir: Optional[Path]=None, timeout: int=
     output: ArchiveOutput = 'archive.org.txt'
     output: ArchiveOutput = 'archive.org.txt'
     archive_org_url = None
     archive_org_url = None
     submit_url = 'https://web.archive.org/save/{}'.format(link.url)
     submit_url = 'https://web.archive.org/save/{}'.format(link.url)
-    cmd = [
-        CURL_BINARY,
+    # later options take precedence
+    options = [
         *CURL_ARGS,
         *CURL_ARGS,
+        *CURL_EXTRA_ARGS,
         '--head',
         '--head',
         '--max-time', str(timeout),
         '--max-time', str(timeout),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
+    ]
+    cmd = [
+        CURL_BINARY,
+        *dedupe(options),
         submit_url,
         submit_url,
     ]
     ]
     status = 'succeeded'
     status = 'succeeded'

+ 13 - 3
archivebox/extractors/favicon.py

@@ -6,13 +6,18 @@ from typing import Optional
 
 
 from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from ..index.schema import Link, ArchiveResult, ArchiveOutput
 from ..system import chmod_file, run
 from ..system import chmod_file, run
-from ..util import enforce_types, domain
+from ..util import (
+    enforce_types,
+     domain,
+     dedupe,
+)
 from ..config import (
 from ..config import (
     TIMEOUT,
     TIMEOUT,
     SAVE_FAVICON,
     SAVE_FAVICON,
     FAVICON_PROVIDER,
     FAVICON_PROVIDER,
     CURL_BINARY,
     CURL_BINARY,
     CURL_ARGS,
     CURL_ARGS,
+    CURL_EXTRA_ARGS,
     CURL_VERSION,
     CURL_VERSION,
     CHECK_SSL_VALIDITY,
     CHECK_SSL_VALIDITY,
     CURL_USER_AGENT,
     CURL_USER_AGENT,
@@ -34,13 +39,18 @@ def save_favicon(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
 
 
     out_dir = out_dir or link.link_dir
     out_dir = out_dir or link.link_dir
     output: ArchiveOutput = 'favicon.ico'
     output: ArchiveOutput = 'favicon.ico'
-    cmd = [
-        CURL_BINARY,
+    # later options take precedence
+    options = [
         *CURL_ARGS,
         *CURL_ARGS,
+        *CURL_EXTRA_ARGS,
         '--max-time', str(timeout),
         '--max-time', str(timeout),
         '--output', str(output),
         '--output', str(output),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
+    ]
+    cmd = [
+        CURL_BINARY,
+        *dedupe(options),
         FAVICON_PROVIDER.format(domain(link.url)),
         FAVICON_PROVIDER.format(domain(link.url)),
     ]
     ]
     status = 'failed'
     status = 'failed'

+ 9 - 3
archivebox/extractors/headers.py

@@ -9,11 +9,13 @@ from ..system import atomic_write
 from ..util import (
 from ..util import (
     enforce_types,
     enforce_types,
     get_headers,
     get_headers,
+    dedupe,
 )
 )
 from ..config import (
 from ..config import (
     TIMEOUT,
     TIMEOUT,
     CURL_BINARY,
     CURL_BINARY,
     CURL_ARGS,
     CURL_ARGS,
+    CURL_EXTRA_ARGS,
     CURL_USER_AGENT,
     CURL_USER_AGENT,
     CURL_VERSION,
     CURL_VERSION,
     CHECK_SSL_VALIDITY,
     CHECK_SSL_VALIDITY,
@@ -40,14 +42,18 @@ def save_headers(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOUT)
 
 
     status = 'succeeded'
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     timer = TimedProgress(timeout, prefix='      ')
-
-    cmd = [
-        CURL_BINARY,
+    # later options take precedence
+    options = [
         *CURL_ARGS,
         *CURL_ARGS,
+        *CURL_EXTRA_ARGS,
         '--head',
         '--head',
         '--max-time', str(timeout),
         '--max-time', str(timeout),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
+    ]
+    cmd = [
+        CURL_BINARY,
+        *dedupe(options),
         link.url,
         link.url,
     ]
     ]
     try:
     try:

+ 3 - 2
archivebox/extractors/htmltotext.py

@@ -121,9 +121,11 @@ def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
 
 
     out_dir = Path(out_dir or link.link_dir)
     out_dir = Path(out_dir or link.link_dir)
     output = "htmltotext.txt"
     output = "htmltotext.txt"
+    cmd = ['(internal) archivebox.extractors.htmltotext', './{singlefile,dom}.html']
 
 
     timer = TimedProgress(timeout, prefix='      ')
     timer = TimedProgress(timeout, prefix='      ')
     extracted_text = None
     extracted_text = None
+    status = 'failed'
     try:
     try:
         extractor = HTMLTextExtractor()
         extractor = HTMLTextExtractor()
         document = get_html(link, out_dir)
         document = get_html(link, out_dir)
@@ -136,10 +138,9 @@ def save_htmltotext(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
         extracted_text = str(extractor)
         extracted_text = str(extractor)
 
 
         atomic_write(str(out_dir / output), extracted_text)
         atomic_write(str(out_dir / output), extracted_text)
+        status = 'succeeded'
     except (Exception, OSError) as err:
     except (Exception, OSError) as err:
-        status = 'failed'
         output = err
         output = err
-        cmd = ['(internal) archivebox.extractors.htmltotext', './{singlefile,dom}.html']
     finally:
     finally:
         timer.end()
         timer.end()
 
 

+ 9 - 2
archivebox/extractors/media.py

@@ -8,11 +8,13 @@ from ..system import run, chmod_file
 from ..util import (
 from ..util import (
     enforce_types,
     enforce_types,
     is_static_file,
     is_static_file,
+    dedupe,
 )
 )
 from ..config import (
 from ..config import (
     MEDIA_TIMEOUT,
     MEDIA_TIMEOUT,
     SAVE_MEDIA,
     SAVE_MEDIA,
     YOUTUBEDL_ARGS,
     YOUTUBEDL_ARGS,
+    YOUTUBEDL_EXTRA_ARGS,
     YOUTUBEDL_BINARY,
     YOUTUBEDL_BINARY,
     YOUTUBEDL_VERSION,
     YOUTUBEDL_VERSION,
     CHECK_SSL_VALIDITY
     CHECK_SSL_VALIDITY
@@ -39,11 +41,16 @@ def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIME
     output: ArchiveOutput = 'media'
     output: ArchiveOutput = 'media'
     output_path = out_dir / output
     output_path = out_dir / output
     output_path.mkdir(exist_ok=True)
     output_path.mkdir(exist_ok=True)
-    cmd = [
-        YOUTUBEDL_BINARY,
+    # later options take precedence
+    options = [
         *YOUTUBEDL_ARGS,
         *YOUTUBEDL_ARGS,
+        *YOUTUBEDL_EXTRA_ARGS,
         *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
         *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
         # TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR}
         # TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR}
+    ]
+    cmd = [
+        YOUTUBEDL_BINARY,
+        *dedupe(options),
         link.url,
         link.url,
     ]
     ]
     status = 'succeeded'
     status = 'succeeded'

+ 10 - 4
archivebox/extractors/mercury.py

@@ -11,13 +11,15 @@ from ..system import run, atomic_write
 from ..util import (
 from ..util import (
     enforce_types,
     enforce_types,
     is_static_file,
     is_static_file,
-
+    dedupe,
 )
 )
 from ..config import (
 from ..config import (
     TIMEOUT,
     TIMEOUT,
     SAVE_MERCURY,
     SAVE_MERCURY,
     DEPENDENCIES,
     DEPENDENCIES,
     MERCURY_VERSION,
     MERCURY_VERSION,
+    MERCURY_ARGS,
+    MERCURY_EXTRA_ARGS,
 )
 )
 from ..logging_util import TimedProgress
 from ..logging_util import TimedProgress
 
 
@@ -60,12 +62,16 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
     timer = TimedProgress(timeout, prefix='      ')
     timer = TimedProgress(timeout, prefix='      ')
     try:
     try:
         output_folder.mkdir(exist_ok=True)
         output_folder.mkdir(exist_ok=True)
-
-        # Get plain text version of article
+        # later options take precedence
+        options = [
+            *MERCURY_ARGS,
+            *MERCURY_EXTRA_ARGS,
+        ]
+        # By default, get plain text version of article
         cmd = [
         cmd = [
             DEPENDENCIES['MERCURY_BINARY']['path'],
             DEPENDENCIES['MERCURY_BINARY']['path'],
             link.url,
             link.url,
-            "--format=text"
+            *dedupe(options)
         ]
         ]
         result = run(cmd, cwd=out_dir, timeout=timeout)
         result = run(cmd, cwd=out_dir, timeout=timeout)
         try:
         try:

+ 11 - 22
archivebox/extractors/singlefile.py

@@ -11,6 +11,7 @@ from ..util import (
     enforce_types,
     enforce_types,
     is_static_file,
     is_static_file,
     chrome_args,
     chrome_args,
+    dedupe,
 )
 )
 from ..config import (
 from ..config import (
     TIMEOUT,
     TIMEOUT,
@@ -18,6 +19,7 @@ from ..config import (
     DEPENDENCIES,
     DEPENDENCIES,
     SINGLEFILE_VERSION,
     SINGLEFILE_VERSION,
     SINGLEFILE_ARGS,
     SINGLEFILE_ARGS,
+    SINGLEFILE_EXTRA_ARGS,
     CHROME_BINARY,
     CHROME_BINARY,
     COOKIES_FILE,
     COOKIES_FILE,
 )
 )
@@ -47,38 +49,24 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
 
 
     # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
     # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
     browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:]))
     browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:]))
+    # later options take precedence
     options = [
     options = [
-        *SINGLEFILE_ARGS,
-        *(["--browser-cookies-file={}".format(COOKIES_FILE)] if COOKIES_FILE else []),
         '--browser-executable-path={}'.format(CHROME_BINARY),
         '--browser-executable-path={}'.format(CHROME_BINARY),
+        *(["--browser-cookies-file={}".format(COOKIES_FILE)] if COOKIES_FILE else []),
         browser_args,
         browser_args,
+        *SINGLEFILE_ARGS,
+        *SINGLEFILE_EXTRA_ARGS,
     ]
     ]
-
-    # Deduplicate options (single-file doesn't like when you use the same option two times)
-    #
-    # NOTE: Options names that come first clobber conflicting names that come later
-    # My logic is SINGLEFILE_ARGS is the option that affects the singlefile command with most 
-    # specificity, therefore the user sets it with a lot intent, therefore it should take precedence 
-    # kind of like the ergonomic principle of lexical scope in programming languages.
-    seen_option_names = []
-    def test_seen(argument):
-        option_name = argument.split("=")[0]
-        if option_name in seen_option_names:
-            return False
-        else:
-            seen_option_names.append(option_name)
-            return True
-    deduped_options = list(filter(test_seen, options))
-
     cmd = [
     cmd = [
         DEPENDENCIES['SINGLEFILE_BINARY']['path'],
         DEPENDENCIES['SINGLEFILE_BINARY']['path'],
-        *deduped_options,
+        *dedupe(options),
         link.url,
         link.url,
         output,
         output,
     ]
     ]
 
 
     status = 'succeeded'
     status = 'succeeded'
     timer = TimedProgress(timeout, prefix='      ')
     timer = TimedProgress(timeout, prefix='      ')
+    result = None
     try:
     try:
         result = run(cmd, cwd=str(out_dir), timeout=timeout)
         result = run(cmd, cwd=str(out_dir), timeout=timeout)
 
 
@@ -86,7 +74,7 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
         #  "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
         #  "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
         output_tail = [
         output_tail = [
             line.strip()
             line.strip()
-            for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:]
+            for line in (result.stdout + result.stderr).decode().rsplit('\n', 5)[-5:]
             if line.strip()
             if line.strip()
         ]
         ]
         hints = (
         hints = (
@@ -96,12 +84,13 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
 
 
         # Check for common failure cases
         # Check for common failure cases
         if (result.returncode > 0) or not (out_dir / output).is_file():
         if (result.returncode > 0) or not (out_dir / output).is_file():
-            raise ArchiveError('SingleFile was not able to archive the page', hints)
+            raise ArchiveError(f'SingleFile was not able to archive the page (status={result.returncode})', hints)
         chmod_file(output, cwd=str(out_dir))
         chmod_file(output, cwd=str(out_dir))
     except (Exception, OSError) as err:
     except (Exception, OSError) as err:
         status = 'failed'
         status = 'failed'
         # TODO: Make this prettier. This is necessary to run the command (escape JSON internal quotes).
         # TODO: Make this prettier. This is necessary to run the command (escape JSON internal quotes).
         cmd[2] = browser_args.replace('"', "\\\"")
         cmd[2] = browser_args.replace('"', "\\\"")
+        err.hints = (result.stdout + result.stderr).decode().split('\n')
         output = err
         output = err
     finally:
     finally:
         timer.end()
         timer.end()

+ 10 - 3
archivebox/extractors/title.py

@@ -10,6 +10,7 @@ from ..util import (
     enforce_types,
     enforce_types,
     download_url,
     download_url,
     htmldecode,
     htmldecode,
+    dedupe,
 )
 )
 from ..config import (
 from ..config import (
     TIMEOUT,
     TIMEOUT,
@@ -17,6 +18,7 @@ from ..config import (
     SAVE_TITLE,
     SAVE_TITLE,
     CURL_BINARY,
     CURL_BINARY,
     CURL_ARGS,
     CURL_ARGS,
+    CURL_EXTRA_ARGS,
     CURL_VERSION,
     CURL_VERSION,
     CURL_USER_AGENT,
     CURL_USER_AGENT,
 )
 )
@@ -75,7 +77,7 @@ def get_html(link: Link, path: Path, timeout: int=TIMEOUT) -> str:
             with open(abs_path / source, "r", encoding="utf-8") as f:
             with open(abs_path / source, "r", encoding="utf-8") as f:
                 document = f.read()
                 document = f.read()
                 break
                 break
-        except (FileNotFoundError, TypeError):
+        except (FileNotFoundError, TypeError, UnicodeDecodeError):
             continue
             continue
     if document is None:
     if document is None:
         return download_url(link.url, timeout=timeout)
         return download_url(link.url, timeout=timeout)
@@ -102,12 +104,17 @@ def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -
     from core.models import Snapshot
     from core.models import Snapshot
 
 
     output: ArchiveOutput = None
     output: ArchiveOutput = None
-    cmd = [
-        CURL_BINARY,
+    # later options take precedence
+    options = [
         *CURL_ARGS,
         *CURL_ARGS,
+        *CURL_EXTRA_ARGS,
         '--max-time', str(timeout),
         '--max-time', str(timeout),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *(['--user-agent', '{}'.format(CURL_USER_AGENT)] if CURL_USER_AGENT else []),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
         *([] if CHECK_SSL_VALIDITY else ['--insecure']),
+    ]
+    cmd = [
+        CURL_BINARY,
+        *dedupe(options),
         link.url,
         link.url,
     ]
     ]
     status = 'succeeded'
     status = 'succeeded'

+ 15 - 3
archivebox/extractors/wget.py

@@ -15,9 +15,11 @@ from ..util import (
     path,
     path,
     domain,
     domain,
     urldecode,
     urldecode,
+    dedupe,
 )
 )
 from ..config import (
 from ..config import (
     WGET_ARGS,
     WGET_ARGS,
+    WGET_EXTRA_ARGS,
     TIMEOUT,
     TIMEOUT,
     SAVE_WGET,
     SAVE_WGET,
     SAVE_WARC,
     SAVE_WARC,
@@ -55,10 +57,10 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
 
 
     # WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html
     # WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html
     output: ArchiveOutput = None
     output: ArchiveOutput = None
-    cmd = [
-        WGET_BINARY,
-        # '--server-response',  # print headers for better error parsing
+    # later options take precedence
+    options = [
         *WGET_ARGS,
         *WGET_ARGS,
+        *WGET_EXTRA_ARGS,
         '--timeout={}'.format(timeout),
         '--timeout={}'.format(timeout),
         *(['--restrict-file-names={}'.format(RESTRICT_FILE_NAMES)] if RESTRICT_FILE_NAMES else []),
         *(['--restrict-file-names={}'.format(RESTRICT_FILE_NAMES)] if RESTRICT_FILE_NAMES else []),
         *(['--warc-file={}'.format(str(warc_path))] if SAVE_WARC else []),
         *(['--warc-file={}'.format(str(warc_path))] if SAVE_WARC else []),
@@ -68,6 +70,11 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
         *(['--compression=auto'] if WGET_AUTO_COMPRESSION else []),
         *(['--compression=auto'] if WGET_AUTO_COMPRESSION else []),
         *([] if SAVE_WARC else ['--timestamping']),
         *([] if SAVE_WARC else ['--timestamping']),
         *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']),
         *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate', '--no-hsts']),
+        # '--server-response',  # print headers for better error parsing
+    ]
+    cmd = [
+        WGET_BINARY,
+        *dedupe(options),
         link.url,
         link.url,
     ]
     ]
 
 
@@ -202,4 +209,9 @@ def wget_output_path(link: Link) -> Optional[str]:
     if search_dir.is_dir():
     if search_dir.is_dir():
         return domain(link.url).replace(":", "+")
         return domain(link.url).replace(":", "+")
 
 
+    # fallback to just the domain dir without port
+    search_dir = Path(link.link_dir) / domain(link.url).split(":", 1)[0]
+    if search_dir.is_dir():
+        return domain(link.url).split(":", 1)[0]
+
     return None
     return None

+ 4 - 4
archivebox/index/__init__.py

@@ -250,7 +250,7 @@ def load_main_index(out_dir: Path=OUTPUT_DIR, warn: bool=True) -> List[Link]:
     """parse and load existing index with any new links from import_path merged in"""
     """parse and load existing index with any new links from import_path merged in"""
     from core.models import Snapshot
     from core.models import Snapshot
     try:
     try:
-        return Snapshot.objects.all()
+        return Snapshot.objects.all().only('id')
 
 
     except (KeyboardInterrupt, SystemExit):
     except (KeyboardInterrupt, SystemExit):
         raise SystemExit(0)
         raise SystemExit(0)
@@ -407,7 +407,7 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
 
 
 def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
 def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """indexed links without checking archive status or data directory validity"""
     """indexed links without checking archive status or data directory validity"""
-    links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
+    links = (snapshot.as_link() for snapshot in snapshots.iterator())
     return {
     return {
         link.link_dir: link
         link.link_dir: link
         for link in links
         for link in links
@@ -415,7 +415,7 @@ def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
 
 
 def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
 def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """indexed links that are archived with a valid data directory"""
     """indexed links that are archived with a valid data directory"""
-    links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
+    links = (snapshot.as_link() for snapshot in snapshots.iterator())
     return {
     return {
         link.link_dir: link
         link.link_dir: link
         for link in filter(is_archived, links)
         for link in filter(is_archived, links)
@@ -423,7 +423,7 @@ def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio
 
 
 def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
 def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
     """indexed links that are unarchived with no data directory or an empty data directory"""
     """indexed links that are unarchived with no data directory or an empty data directory"""
-    links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
+    links = (snapshot.as_link() for snapshot in snapshots.iterator())
     return {
     return {
         link.link_dir: link
         link.link_dir: link
         for link in filter(is_unarchived, links)
         for link in filter(is_unarchived, links)

+ 6 - 2
archivebox/index/schema.py

@@ -379,11 +379,15 @@ class Link:
 
 
         output_paths = (
         output_paths = (
             domain(self.url),
             domain(self.url),
+            'output.html',
             'output.pdf',
             'output.pdf',
             'screenshot.png',
             'screenshot.png',
-            'output.html',
+            'singlefile.html',
+            'readability/content.html',
+            'mercury/content.html',
+            'htmltotext.txt',
             'media',
             'media',
-            'singlefile.html'
+            'git',
         )
         )
 
 
         return any(
         return any(

+ 8 - 8
archivebox/logging_util.py

@@ -432,12 +432,14 @@ def log_archive_method_finished(result: "ArchiveResult"):
                     **ANSI,
                     **ANSI,
                 ),
                 ),
             ]
             ]
+        
+        # import pudb; pudb.set_trace()
 
 
         # Prettify error output hints string and limit to five lines
         # Prettify error output hints string and limit to five lines
         hints = getattr(result.output, 'hints', None) or ()
         hints = getattr(result.output, 'hints', None) or ()
         if hints:
         if hints:
             if isinstance(hints, (list, tuple, type(_ for _ in ()))):
             if isinstance(hints, (list, tuple, type(_ for _ in ()))):
-                hints = [hint.decode() for hint in hints if isinstance(hint, bytes)]
+                hints = [hint.decode() if isinstance(hint, bytes) else str(hint) for hint in hints]
             else:
             else:
                 if isinstance(hints, bytes):
                 if isinstance(hints, bytes):
                     hints = hints.decode()
                     hints = hints.decode()
@@ -492,12 +494,12 @@ def log_removal_started(links: List["Link"], yes: bool, delete: bool):
     if delete:
     if delete:
         file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()]
         file_counts = [link.num_outputs for link in links if Path(link.link_dir).exists()]
         print(
         print(
-            f'    {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n'
+            f'    {len(links)} Links will be de-listed from the main index, and their archived content folders will be deleted from disk.\n' +
             f'    ({len(file_counts)} data folders with {sum(file_counts)} archived files will be deleted!)'
             f'    ({len(file_counts)} data folders with {sum(file_counts)} archived files will be deleted!)'
         )
         )
     else:
     else:
         print(
         print(
-            '    Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n'
+            '    Matching links will be de-listed from the main index, but their archived content folders will remain in place on disk.\n' +
             '    (Pass --delete if you also want to permanently delete the data folders)'
             '    (Pass --delete if you also want to permanently delete the data folders)'
         )
         )
 
 
@@ -636,17 +638,15 @@ def printable_folder_status(name: str, folder: Dict) -> str:
 
 
 @enforce_types
 @enforce_types
 def printable_dependency_version(name: str, dependency: Dict) -> str:
 def printable_dependency_version(name: str, dependency: Dict) -> str:
-    version = None
+    color, symbol, note, version = 'red', 'X', 'invalid', '?'
+
     if dependency['enabled']:
     if dependency['enabled']:
         if dependency['is_valid']:
         if dependency['is_valid']:
-            color, symbol, note, version = 'green', '√', 'valid', ''
+            color, symbol, note = 'green', '√', 'valid'
 
 
             parsed_version_num = re.search(r'[\d\.]+', dependency['version'])
             parsed_version_num = re.search(r'[\d\.]+', dependency['version'])
             if parsed_version_num:
             if parsed_version_num:
                 version = f'v{parsed_version_num[0]}'
                 version = f'v{parsed_version_num[0]}'
-
-        if not version:
-            color, symbol, note, version = 'red', 'X', 'invalid', '?'
     else:
     else:
         color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
         color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
 
 

+ 9 - 2
archivebox/main.py

@@ -791,6 +791,8 @@ def update(resume: Optional[float]=None,
            out_dir: Path=OUTPUT_DIR) -> List[Link]:
            out_dir: Path=OUTPUT_DIR) -> List[Link]:
     """Import any new links from subscriptions and retry any previously failed/skipped links"""
     """Import any new links from subscriptions and retry any previously failed/skipped links"""
 
 
+    from core.models import ArchiveResult
+
     check_data_folder(out_dir=out_dir)
     check_data_folder(out_dir=out_dir)
     check_dependencies()
     check_dependencies()
     new_links: List[Link] = [] # TODO: Remove input argument: only_new
     new_links: List[Link] = [] # TODO: Remove input argument: only_new
@@ -798,19 +800,23 @@ def update(resume: Optional[float]=None,
     extractors = extractors.split(",") if extractors else []
     extractors = extractors.split(",") if extractors else []
 
 
     # Step 1: Filter for selected_links
     # Step 1: Filter for selected_links
+    print('[*] Finding matching Snapshots to update...')
+    print(f'    - Filtering by {" ".join(filter_patterns)} ({filter_type}) {before=} {after=} {status=}...')
     matching_snapshots = list_links(
     matching_snapshots = list_links(
         filter_patterns=filter_patterns,
         filter_patterns=filter_patterns,
         filter_type=filter_type,
         filter_type=filter_type,
         before=before,
         before=before,
         after=after,
         after=after,
     )
     )
-
+    print(f'    - Checking {matching_snapshots.count()} snapshot folders for existing data with {status=}...')
     matching_folders = list_folders(
     matching_folders = list_folders(
         links=matching_snapshots,
         links=matching_snapshots,
         status=status,
         status=status,
         out_dir=out_dir,
         out_dir=out_dir,
     )
     )
-    all_links = [link for link in matching_folders.values() if link]
+    all_links = (link for link in matching_folders.values() if link)
+    print('    - Sorting by most unfinished -> least unfinished + date archived...')
+    all_links = sorted(all_links, key=lambda link: (ArchiveResult.objects.filter(snapshot__url=link.url).count(), link.timestamp))
 
 
     if index_only:
     if index_only:
         for link in all_links:
         for link in all_links:
@@ -836,6 +842,7 @@ def update(resume: Optional[float]=None,
     if extractors:
     if extractors:
         archive_kwargs["methods"] = extractors
         archive_kwargs["methods"] = extractors
 
 
+
     archive_links(to_archive, overwrite=overwrite, **archive_kwargs)
     archive_links(to_archive, overwrite=overwrite, **archive_kwargs)
 
 
     # Step 4: Re-write links index with updated titles, icons, and resources
     # Step 4: Re-write links index with updated titles, icons, and resources

+ 2371 - 0
archivebox/package-lock.json

@@ -0,0 +1,2371 @@
+{
+  "name": "archivebox",
+  "version": "0.8.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "archivebox",
+      "version": "0.8.0",
+      "license": "MIT",
+      "dependencies": {
+        "@postlight/parser": "^2.2.3",
+        "readability-extractor": "github:ArchiveBox/readability-extractor",
+        "single-file-cli": "^1.1.54"
+      }
+    },
+    "node_modules/@asamuzakjp/dom-selector": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-2.0.2.tgz",
+      "integrity": "sha512-x1KXOatwofR6ZAYzXRBL5wrdV0vwNxlTCK9NCuLqAzQYARqGcvFwiJA6A1ERuh+dgeA4Dxm3JBYictIes+SqUQ==",
+      "dependencies": {
+        "bidi-js": "^1.0.3",
+        "css-tree": "^2.3.1",
+        "is-potential-custom-element-name": "^1.0.1"
+      }
+    },
+    "node_modules/@babel/runtime-corejs2": {
+      "version": "7.24.4",
+      "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.24.4.tgz",
+      "integrity": "sha512-ZCKqyUKt/Coimg+3Kafu43yNetgYnTXzNbEGAgxc81J5sI0qFNbQ613w7PNny+SmijAmGVroL0GDvx5rG/JI5Q==",
+      "dependencies": {
+        "core-js": "^2.6.12",
+        "regenerator-runtime": "^0.14.0"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@mozilla/readability": {
+      "version": "0.5.0",
+      "resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.5.0.tgz",
+      "integrity": "sha512-Z+CZ3QaosfFaTqvhQsIktyGrjFjSC0Fa4EMph4mqKnWhmyoGICsV/8QK+8HpXut6zV7zwfWwqDmEjtk1Qf6EgQ==",
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/@postlight/ci-failed-test-reporter": {
+      "version": "1.0.26",
+      "resolved": "https://registry.npmjs.org/@postlight/ci-failed-test-reporter/-/ci-failed-test-reporter-1.0.26.tgz",
+      "integrity": "sha512-xfXzxyOiKhco7Gx2OLTe9b66b0dFJw0elg94KGHoQXf5F8JqqFvdo35J8wayGOor64CSMvn+4Bjlu2NKV+yTGA==",
+      "dependencies": {
+        "dotenv": "^6.2.0",
+        "node-fetch": "^2.3.0"
+      },
+      "bin": {
+        "ciftr": "cli.js"
+      }
+    },
+    "node_modules/@postlight/parser": {
+      "version": "2.2.3",
+      "resolved": "https://registry.npmjs.org/@postlight/parser/-/parser-2.2.3.tgz",
+      "integrity": "sha512-4/syRvqJARgLN4yH8qtl634WO0+KINjkijU/SmhCJqqh8/aOfv5uQf+SquFpA+JwsAsbGzYQkIxSum29riOreg==",
+      "bundleDependencies": [
+        "jquery",
+        "moment-timezone",
+        "browser-request"
+      ],
+      "dependencies": {
+        "@babel/runtime-corejs2": "^7.2.0",
+        "@postlight/ci-failed-test-reporter": "^1.0",
+        "browser-request": "*",
+        "cheerio": "^0.22.0",
+        "difflib": "github:postlight/difflib.js",
+        "ellipsize": "0.1.0",
+        "iconv-lite": "0.5.0",
+        "jquery": "*",
+        "moment": "^2.23.0",
+        "moment-parseformat": "3.0.0",
+        "moment-timezone": "*",
+        "postman-request": "^2.88.1-postman.31",
+        "string-direction": "^0.1.2",
+        "turndown": "^7.1.1",
+        "valid-url": "^1.0.9",
+        "wuzzy": "^0.1.4",
+        "yargs-parser": "^15.0.1"
+      },
+      "bin": {
+        "mercury-parser": "cli.js",
+        "postlight-parser": "cli.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/@postlight/parser/node_modules/browser-request": {
+      "version": "0.3.2",
+      "engines": [
+        "node"
+      ],
+      "inBundle": true,
+      "dependencies": {
+        "http-headers": "^3.0.1"
+      }
+    },
+    "node_modules/@postlight/parser/node_modules/http-headers": {
+      "version": "3.0.2",
+      "inBundle": true,
+      "license": "MIT",
+      "dependencies": {
+        "next-line": "^1.1.0"
+      }
+    },
+    "node_modules/@postlight/parser/node_modules/jquery": {
+      "version": "3.6.0",
+      "inBundle": true,
+      "license": "MIT"
+    },
+    "node_modules/@postlight/parser/node_modules/moment": {
+      "version": "2.29.4",
+      "inBundle": true,
+      "license": "MIT",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/@postlight/parser/node_modules/moment-timezone": {
+      "version": "0.5.37",
+      "inBundle": true,
+      "license": "MIT",
+      "dependencies": {
+        "moment": ">= 2.9.0"
+      },
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/@postlight/parser/node_modules/next-line": {
+      "version": "1.1.0",
+      "inBundle": true,
+      "license": "MIT"
+    },
+    "node_modules/@postman/form-data": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/@postman/form-data/-/form-data-3.1.1.tgz",
+      "integrity": "sha512-vjh8Q2a8S6UCm/KKs31XFJqEEgmbjBmpPNVV2eVav6905wyFAwaUOBGA1NPBI4ERH9MMZc6w0umFgM6WbEPMdg==",
+      "dependencies": {
+        "asynckit": "^0.4.0",
+        "combined-stream": "^1.0.8",
+        "mime-types": "^2.1.12"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/@postman/tough-cookie": {
+      "version": "4.1.3-postman.1",
+      "resolved": "https://registry.npmjs.org/@postman/tough-cookie/-/tough-cookie-4.1.3-postman.1.tgz",
+      "integrity": "sha512-txpgUqZOnWYnUHZpHjkfb0IwVH4qJmyq77pPnJLlfhMtdCLMFTEeQHlzQiK906aaNCe4NEB5fGJHo9uzGbFMeA==",
+      "dependencies": {
+        "psl": "^1.1.33",
+        "punycode": "^2.1.1",
+        "universalify": "^0.2.0",
+        "url-parse": "^1.5.3"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/@postman/tunnel-agent": {
+      "version": "0.6.3",
+      "resolved": "https://registry.npmjs.org/@postman/tunnel-agent/-/tunnel-agent-0.6.3.tgz",
+      "integrity": "sha512-k57fzmAZ2PJGxfOA4SGR05ejorHbVAa/84Hxh/2nAztjNXc4ZjOm9NUIk6/Z6LCrBvJZqjRZbN8e/nROVUPVdg==",
+      "dependencies": {
+        "safe-buffer": "^5.0.1"
+      },
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/@puppeteer/browsers": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.0.0.tgz",
+      "integrity": "sha512-3PS82/5+tnpEaUWonjAFFvlf35QHF15xqyGd34GBa5oP5EPVfFXRsbSxIGYf1M+vZlqBZ3oxT1kRg9OYhtt8ng==",
+      "dependencies": {
+        "debug": "4.3.4",
+        "extract-zip": "2.0.1",
+        "progress": "2.0.3",
+        "proxy-agent": "6.3.1",
+        "tar-fs": "3.0.4",
+        "unbzip2-stream": "1.4.3",
+        "yargs": "17.7.2"
+      },
+      "bin": {
+        "browsers": "lib/cjs/main-cli.js"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@tootallnate/quickjs-emscripten": {
+      "version": "0.23.0",
+      "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",
+      "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA=="
+    },
+    "node_modules/@types/node": {
+      "version": "20.12.7",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz",
+      "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==",
+      "optional": true,
+      "dependencies": {
+        "undici-types": "~5.26.4"
+      }
+    },
+    "node_modules/@types/yauzl": {
+      "version": "2.10.3",
+      "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz",
+      "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==",
+      "optional": true,
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
+    "node_modules/agent-base": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz",
+      "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==",
+      "dependencies": {
+        "debug": "^4.3.4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/ajv": {
+      "version": "6.12.6",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
+      "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.1",
+        "fast-json-stable-stringify": "^2.0.0",
+        "json-schema-traverse": "^0.4.1",
+        "uri-js": "^4.2.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/asn1": {
+      "version": "0.2.6",
+      "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.6.tgz",
+      "integrity": "sha512-ix/FxPn0MDjeyJ7i/yoHGFt/EX6LyNbxSEhPPXODPL+KB0VPk86UYfL0lMdy+KCnv+fmvIzySwaK5COwqVbWTQ==",
+      "dependencies": {
+        "safer-buffer": "~2.1.0"
+      }
+    },
+    "node_modules/assert-plus": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz",
+      "integrity": "sha512-NfJ4UzBCcQGLDlQq7nHxH+tv3kyZ0hHQqF5BO6J7tNJeP5do1llPr8dZ8zHonfhAu0PHAdMkSo+8o0wxg9lZWw==",
+      "engines": {
+        "node": ">=0.8"
+      }
+    },
+    "node_modules/ast-types": {
+      "version": "0.13.4",
+      "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz",
+      "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==",
+      "dependencies": {
+        "tslib": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/asynckit": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
+      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
+    },
+    "node_modules/aws-sign2": {
+      "version": "0.7.0",
+      "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz",
+      "integrity": "sha512-08kcGqnYf/YmjoRhfxyu+CLxBjUtHLXLXX/vUfx9l2LYzG3c1m61nrpyFUZI6zeS+Li/wWMMidD9KgrqtGq3mA==",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/aws4": {
+      "version": "1.12.0",
+      "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.12.0.tgz",
+      "integrity": "sha512-NmWvPnx0F1SfrQbYwOi7OeaNGokp9XhzNioJ/CSBs8Qa4vxug81mhJEAVZwxXuBmYB5KDRfMq/F3RR0BIU7sWg=="
+    },
+    "node_modules/b4a": {
+      "version": "1.6.6",
+      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz",
+      "integrity": "sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg=="
+    },
+    "node_modules/bare-events": {
+      "version": "2.2.2",
+      "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.2.2.tgz",
+      "integrity": "sha512-h7z00dWdG0PYOQEvChhOSWvOfkIKsdZGkWr083FgN/HyoQuebSew/cgirYqh9SCuy/hRvxc5Vy6Fw8xAmYHLkQ==",
+      "optional": true
+    },
+    "node_modules/base64-js": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
+      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ]
+    },
+    "node_modules/basic-ftp": {
+      "version": "5.0.5",
+      "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz",
+      "integrity": "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==",
+      "engines": {
+        "node": ">=10.0.0"
+      }
+    },
+    "node_modules/bcrypt-pbkdf": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz",
+      "integrity": "sha512-qeFIXtP4MSoi6NLqO12WfqARWWuCKi2Rn/9hJLEmtB5yTNr9DqFWkJRCf2qShWzPeAMRnOgCrq0sg/KLv5ES9w==",
+      "dependencies": {
+        "tweetnacl": "^0.14.3"
+      }
+    },
+    "node_modules/bidi-js": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz",
+      "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==",
+      "dependencies": {
+        "require-from-string": "^2.0.2"
+      }
+    },
+    "node_modules/bluebird": {
+      "version": "2.11.0",
+      "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-2.11.0.tgz",
+      "integrity": "sha512-UfFSr22dmHPQqPP9XWHRhq+gWnHCYguQGkXQlbyPtW5qTnhFWA8/iXg765tH0cAjy7l/zPJ1aBTO0g5XgA7kvQ=="
+    },
+    "node_modules/boolbase": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
+      "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
+    },
+    "node_modules/brotli": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/brotli/-/brotli-1.3.3.tgz",
+      "integrity": "sha512-oTKjJdShmDuGW94SyyaoQvAjf30dZaHnjJ8uAF+u2/vGJkJbJPJAT1gDiOJP5v1Zb6f9KEyW/1HpuaWIXtGHPg==",
+      "dependencies": {
+        "base64-js": "^1.1.2"
+      }
+    },
+    "node_modules/buffer": {
+      "version": "5.7.1",
+      "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
+      "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "dependencies": {
+        "base64-js": "^1.3.1",
+        "ieee754": "^1.1.13"
+      }
+    },
+    "node_modules/buffer-crc32": {
+      "version": "0.2.13",
+      "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
+      "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/camelcase": {
+      "version": "5.3.1",
+      "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz",
+      "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/caseless": {
+      "version": "0.12.0",
+      "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz",
+      "integrity": "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw=="
+    },
+    "node_modules/cheerio": {
+      "version": "0.22.0",
+      "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-0.22.0.tgz",
+      "integrity": "sha512-8/MzidM6G/TgRelkzDG13y3Y9LxBjCb+8yOEZ9+wwq5gVF2w2pV0wmHvjfT0RvuxGyR7UEuK36r+yYMbT4uKgA==",
+      "dependencies": {
+        "css-select": "~1.2.0",
+        "dom-serializer": "~0.1.0",
+        "entities": "~1.1.1",
+        "htmlparser2": "^3.9.1",
+        "lodash.assignin": "^4.0.9",
+        "lodash.bind": "^4.1.4",
+        "lodash.defaults": "^4.0.1",
+        "lodash.filter": "^4.4.0",
+        "lodash.flatten": "^4.2.0",
+        "lodash.foreach": "^4.3.0",
+        "lodash.map": "^4.4.0",
+        "lodash.merge": "^4.4.0",
+        "lodash.pick": "^4.2.1",
+        "lodash.reduce": "^4.4.0",
+        "lodash.reject": "^4.4.0",
+        "lodash.some": "^4.4.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/chromium-bidi": {
+      "version": "0.5.8",
+      "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.5.8.tgz",
+      "integrity": "sha512-blqh+1cEQbHBKmok3rVJkBlBxt9beKBgOsxbFgs7UJcoVbbeZ+K7+6liAsjgpc8l1Xd55cQUy14fXZdGSb4zIw==",
+      "dependencies": {
+        "mitt": "3.0.1",
+        "urlpattern-polyfill": "10.0.0"
+      },
+      "peerDependencies": {
+        "devtools-protocol": "*"
+      }
+    },
+    "node_modules/cliui": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
+      "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
+      "dependencies": {
+        "string-width": "^4.2.0",
+        "strip-ansi": "^6.0.1",
+        "wrap-ansi": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/color-convert": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+      "dependencies": {
+        "color-name": "~1.1.4"
+      },
+      "engines": {
+        "node": ">=7.0.0"
+      }
+    },
+    "node_modules/color-name": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
+    },
+    "node_modules/combined-stream": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
+      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+      "dependencies": {
+        "delayed-stream": "~1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/core-js": {
+      "version": "2.6.12",
+      "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.12.tgz",
+      "integrity": "sha512-Kb2wC0fvsWfQrgk8HU5lW6U/Lcs8+9aaYcy4ZFc6DDlo4nZ7n70dEgE5rtR0oG6ufKDUnrwfWL1mXR5ljDatrQ==",
+      "deprecated": "core-js@<3.23.3 is no longer maintained and not recommended for usage due to the number of issues. Because of the V8 engine whims, feature detection in old core-js versions could cause a slowdown up to 100x even if nothing is polyfilled. Some versions have web compatibility issues. Please, upgrade your dependencies to the actual version of core-js.",
+      "hasInstallScript": true
+    },
+    "node_modules/core-util-is": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz",
+      "integrity": "sha512-3lqz5YjWTYnW6dlDa5TLaTCcShfar1e40rmcJVwCBJC6mWlFuj0eCHIElmG1g5kyuJ/GD+8Wn4FFCcz4gJPfaQ=="
+    },
+    "node_modules/cross-fetch": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-4.0.0.tgz",
+      "integrity": "sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g==",
+      "dependencies": {
+        "node-fetch": "^2.6.12"
+      }
+    },
+    "node_modules/css-select": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz",
+      "integrity": "sha512-dUQOBoqdR7QwV90WysXPLXG5LO7nhYBgiWVfxF80DKPF8zx1t/pUd2FYy73emg3zrjtM6dzmYgbHKfV2rxiHQA==",
+      "dependencies": {
+        "boolbase": "~1.0.0",
+        "css-what": "2.1",
+        "domutils": "1.5.1",
+        "nth-check": "~1.0.1"
+      }
+    },
+    "node_modules/css-tree": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-2.3.1.tgz",
+      "integrity": "sha512-6Fv1DV/TYw//QF5IzQdqsNDjx/wc8TrMBZsqjL9eW01tWb7R7k/mq+/VXfJCl7SoD5emsJop9cOByJZfs8hYIw==",
+      "dependencies": {
+        "mdn-data": "2.0.30",
+        "source-map-js": "^1.0.1"
+      },
+      "engines": {
+        "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0"
+      }
+    },
+    "node_modules/css-what": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz",
+      "integrity": "sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg==",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/cssstyle": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-4.0.1.tgz",
+      "integrity": "sha512-8ZYiJ3A/3OkDd093CBT/0UKDWry7ak4BdPTFP2+QEP7cmhouyq/Up709ASSj2cK02BbZiMgk7kYjZNS4QP5qrQ==",
+      "dependencies": {
+        "rrweb-cssom": "^0.6.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/dashdash": {
+      "version": "1.14.1",
+      "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz",
+      "integrity": "sha512-jRFi8UDGo6j+odZiEpjazZaWqEal3w/basFjQHQEwVtZJGDpxbH1MeYluwCS8Xq5wmLJooDlMgvVarmWfGM44g==",
+      "dependencies": {
+        "assert-plus": "^1.0.0"
+      },
+      "engines": {
+        "node": ">=0.10"
+      }
+    },
+    "node_modules/data-uri-to-buffer": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
+      "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==",
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/data-urls": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-5.0.0.tgz",
+      "integrity": "sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==",
+      "dependencies": {
+        "whatwg-mimetype": "^4.0.0",
+        "whatwg-url": "^14.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/data-urls/node_modules/tr46": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.0.0.tgz",
+      "integrity": "sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==",
+      "dependencies": {
+        "punycode": "^2.3.1"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/data-urls/node_modules/whatwg-url": {
+      "version": "14.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.0.0.tgz",
+      "integrity": "sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==",
+      "dependencies": {
+        "tr46": "^5.0.0",
+        "webidl-conversions": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/debug": {
+      "version": "4.3.4",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
+      "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
+      "dependencies": {
+        "ms": "2.1.2"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/decamelize": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz",
+      "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/decimal.js": {
+      "version": "10.4.3",
+      "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.4.3.tgz",
+      "integrity": "sha512-VBBaLc1MgL5XpzgIP7ny5Z6Nx3UrRkIViUkPUdtl9aya5amy3De1gsUUSB1g3+3sExYNjCAsAznmukyxCb1GRA=="
+    },
+    "node_modules/degenerator": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz",
+      "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==",
+      "dependencies": {
+        "ast-types": "^0.13.4",
+        "escodegen": "^2.1.0",
+        "esprima": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/delayed-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
+      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/devtools-protocol": {
+      "version": "0.0.1232444",
+      "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1232444.tgz",
+      "integrity": "sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg=="
+    },
+    "node_modules/difflib": {
+      "version": "0.2.6",
+      "resolved": "git+ssh://[email protected]/postlight/difflib.js.git#32e8e38c7fcd935241b9baab71bb432fd9b166ed",
+      "dependencies": {
+        "heap": ">= 0.2.0"
+      }
+    },
+    "node_modules/dom-serializer": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.1.tgz",
+      "integrity": "sha512-l0IU0pPzLWSHBcieZbpOKgkIn3ts3vAh7ZuFyXNwJxJXk/c4Gwj9xaTJwIDVQCXawWD0qb3IzMGH5rglQaO0XA==",
+      "dependencies": {
+        "domelementtype": "^1.3.0",
+        "entities": "^1.1.1"
+      }
+    },
+    "node_modules/domelementtype": {
+      "version": "1.3.1",
+      "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.3.1.tgz",
+      "integrity": "sha512-BSKB+TSpMpFI/HOxCNr1O8aMOTZ8hT3pM3GQ0w/mWRmkhEDSFJkkyzz4XQsBV44BChwGkrDfMyjVD0eA2aFV3w=="
+    },
+    "node_modules/domhandler": {
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.4.2.tgz",
+      "integrity": "sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==",
+      "dependencies": {
+        "domelementtype": "1"
+      }
+    },
+    "node_modules/domino": {
+      "version": "2.1.6",
+      "resolved": "https://registry.npmjs.org/domino/-/domino-2.1.6.tgz",
+      "integrity": "sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ=="
+    },
+    "node_modules/dompurify": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.0.tgz",
+      "integrity": "sha512-yoU4rhgPKCo+p5UrWWWNKiIq+ToGqmVVhk0PmMYBK4kRsR3/qhemNFL8f6CFmBd4gMwm3F4T7HBoydP5uY07fA=="
+    },
+    "node_modules/domutils": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz",
+      "integrity": "sha512-gSu5Oi/I+3wDENBsOWBiRK1eoGxcywYSqg3rR960/+EfY0CF4EX1VPkgHOZ3WiS/Jg2DtliF6BhWcHlfpYUcGw==",
+      "dependencies": {
+        "dom-serializer": "0",
+        "domelementtype": "1"
+      }
+    },
+    "node_modules/dotenv": {
+      "version": "6.2.0",
+      "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-6.2.0.tgz",
+      "integrity": "sha512-HygQCKUBSFl8wKQZBSemMywRWcEDNidvNbjGVyZu3nbZ8qq9ubiPoGLMdRDpfSrpkkm9BXYFkpKxxFX38o/76w==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/ecc-jsbn": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz",
+      "integrity": "sha512-eh9O+hwRHNbG4BLTjEl3nw044CkGm5X6LoaCf7LPp7UU8Qrt47JYNi6nPX8xjW97TKGKm1ouctg0QSpZe9qrnw==",
+      "dependencies": {
+        "jsbn": "~0.1.0",
+        "safer-buffer": "^2.1.0"
+      }
+    },
+    "node_modules/ecc-jsbn/node_modules/jsbn": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
+      "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg=="
+    },
+    "node_modules/ellipsize": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/ellipsize/-/ellipsize-0.1.0.tgz",
+      "integrity": "sha512-5gxbEjcb/Z2n6TTmXZx9wVi3N/DOzE7RXY3Xg9dakDuhX/izwumB9rGjeWUV6dTA0D0+juvo+JonZgNR9sgA5A=="
+    },
+    "node_modules/emoji-regex": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
+    },
+    "node_modules/end-of-stream": {
+      "version": "1.4.4",
+      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz",
+      "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==",
+      "dependencies": {
+        "once": "^1.4.0"
+      }
+    },
+    "node_modules/entities": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz",
+      "integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w=="
+    },
+    "node_modules/escalade": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz",
+      "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/escodegen": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz",
+      "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==",
+      "dependencies": {
+        "esprima": "^4.0.1",
+        "estraverse": "^5.2.0",
+        "esutils": "^2.0.2"
+      },
+      "bin": {
+        "escodegen": "bin/escodegen.js",
+        "esgenerate": "bin/esgenerate.js"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "optionalDependencies": {
+        "source-map": "~0.6.1"
+      }
+    },
+    "node_modules/esprima": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
+      "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
+      "bin": {
+        "esparse": "bin/esparse.js",
+        "esvalidate": "bin/esvalidate.js"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/estraverse": {
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
+      "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/esutils": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
+      "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/extend": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
+      "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="
+    },
+    "node_modules/extract-zip": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
+      "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==",
+      "dependencies": {
+        "debug": "^4.1.1",
+        "get-stream": "^5.1.0",
+        "yauzl": "^2.10.0"
+      },
+      "bin": {
+        "extract-zip": "cli.js"
+      },
+      "engines": {
+        "node": ">= 10.17.0"
+      },
+      "optionalDependencies": {
+        "@types/yauzl": "^2.9.1"
+      }
+    },
+    "node_modules/extsprintf": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz",
+      "integrity": "sha512-11Ndz7Nv+mvAC1j0ktTa7fAb0vLyGGX+rMHNBYQviQDGU0Hw7lhctJANqbPhu9nV9/izT/IntTgZ7Im/9LJs9g==",
+      "engines": [
+        "node >=0.6.0"
+      ]
+    },
+    "node_modules/fast-deep-equal": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
+      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
+    },
+    "node_modules/fast-fifo": {
+      "version": "1.3.2",
+      "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
+      "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ=="
+    },
+    "node_modules/fast-json-stable-stringify": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
+      "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw=="
+    },
+    "node_modules/fd-slicer": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz",
+      "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==",
+      "dependencies": {
+        "pend": "~1.2.0"
+      }
+    },
+    "node_modules/file-url": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/file-url/-/file-url-3.0.0.tgz",
+      "integrity": "sha512-g872QGsHexznxkIAdK8UiZRe7SkE6kvylShU4Nsj8NvfvZag7S0QuQ4IgvPDkk75HxgjIVDwycFTDAgIiO4nDA==",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/forever-agent": {
+      "version": "0.6.1",
+      "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz",
+      "integrity": "sha512-j0KLYPhm6zeac4lz3oJ3o65qvgQCcPubiyotZrXqEaG4hNagNYO8qdlUrX5vwqv9ohqeT/Z3j6+yW067yWWdUw==",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/form-data": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
+      "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
+      "dependencies": {
+        "asynckit": "^0.4.0",
+        "combined-stream": "^1.0.8",
+        "mime-types": "^2.1.12"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/fs-extra": {
+      "version": "11.2.0",
+      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz",
+      "integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==",
+      "dependencies": {
+        "graceful-fs": "^4.2.0",
+        "jsonfile": "^6.0.1",
+        "universalify": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=14.14"
+      }
+    },
+    "node_modules/fs-extra/node_modules/universalify": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
+      "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
+      "engines": {
+        "node": ">= 10.0.0"
+      }
+    },
+    "node_modules/get-caller-file": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
+      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
+      "engines": {
+        "node": "6.* || 8.* || >= 10.*"
+      }
+    },
+    "node_modules/get-stream": {
+      "version": "5.2.0",
+      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz",
+      "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==",
+      "dependencies": {
+        "pump": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/get-uri": {
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.3.tgz",
+      "integrity": "sha512-BzUrJBS9EcUb4cFol8r4W3v1cPsSyajLSthNkz5BxbpDcHN5tIrM10E2eNvfnvBn3DaT3DUgx0OpsBKkaOpanw==",
+      "dependencies": {
+        "basic-ftp": "^5.0.2",
+        "data-uri-to-buffer": "^6.0.2",
+        "debug": "^4.3.4",
+        "fs-extra": "^11.2.0"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/getpass": {
+      "version": "0.1.7",
+      "resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz",
+      "integrity": "sha512-0fzj9JxOLfJ+XGLhR8ze3unN0KZCgZwiSSDz168VERjK8Wl8kVSdcu2kspd4s4wtAa1y/qrVRiAA0WclVsu0ng==",
+      "dependencies": {
+        "assert-plus": "^1.0.0"
+      }
+    },
+    "node_modules/graceful-fs": {
+      "version": "4.2.11",
+      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
+      "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="
+    },
+    "node_modules/har-schema": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz",
+      "integrity": "sha512-Oqluz6zhGX8cyRaTQlFMPw80bSJVG2x/cFb8ZPhUILGgHka9SsokCCOQgpveePerqidZOrT14ipqfJb7ILcW5Q==",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/har-validator": {
+      "version": "5.1.5",
+      "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.5.tgz",
+      "integrity": "sha512-nmT2T0lljbxdQZfspsno9hgrG3Uir6Ks5afism62poxqBM6sDnMEuPmzTq8XN0OEwqKLLdh1jQI3qyE66Nzb3w==",
+      "deprecated": "this library is no longer supported",
+      "dependencies": {
+        "ajv": "^6.12.3",
+        "har-schema": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/heap": {
+      "version": "0.2.7",
+      "resolved": "https://registry.npmjs.org/heap/-/heap-0.2.7.tgz",
+      "integrity": "sha512-2bsegYkkHO+h/9MGbn6KWcE45cHZgPANo5LXF7EvWdT0yT2EguSVO1nDgU5c8+ZOPwp2vMNa7YFsJhVcDR9Sdg=="
+    },
+    "node_modules/html-encoding-sniffer": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz",
+      "integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==",
+      "dependencies": {
+        "whatwg-encoding": "^3.1.1"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/htmlparser2": {
+      "version": "3.10.1",
+      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.1.tgz",
+      "integrity": "sha512-IgieNijUMbkDovyoKObU1DUhm1iwNYE/fuifEoEHfd1oZKZDaONBSkal7Y01shxsM49R4XaMdGez3WnF9UfiCQ==",
+      "dependencies": {
+        "domelementtype": "^1.3.1",
+        "domhandler": "^2.3.0",
+        "domutils": "^1.5.1",
+        "entities": "^1.1.1",
+        "inherits": "^2.0.1",
+        "readable-stream": "^3.1.1"
+      }
+    },
+    "node_modules/http-proxy-agent": {
+      "version": "7.0.2",
+      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
+      "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
+      "dependencies": {
+        "agent-base": "^7.1.0",
+        "debug": "^4.3.4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/http-signature": {
+      "version": "1.3.6",
+      "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.3.6.tgz",
+      "integrity": "sha512-3adrsD6zqo4GsTqtO7FyrejHNv+NgiIfAfv68+jVlFmSr9OGy7zrxONceFRLKvnnZA5jbxQBX1u9PpB6Wi32Gw==",
+      "dependencies": {
+        "assert-plus": "^1.0.0",
+        "jsprim": "^2.0.2",
+        "sshpk": "^1.14.1"
+      },
+      "engines": {
+        "node": ">=0.10"
+      }
+    },
+    "node_modules/https-proxy-agent": {
+      "version": "7.0.4",
+      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz",
+      "integrity": "sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==",
+      "dependencies": {
+        "agent-base": "^7.0.2",
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/iconv-lite": {
+      "version": "0.5.0",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.5.0.tgz",
+      "integrity": "sha512-NnEhI9hIEKHOzJ4f697DMz9IQEXr/MMJ5w64vN2/4Ai+wRnvV7SBrL0KLoRlwaKVghOc7LQ5YkPLuX146b6Ydw==",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/ieee754": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
+      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ]
+    },
+    "node_modules/immediate": {
+      "version": "3.0.6",
+      "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
+      "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ=="
+    },
+    "node_modules/inherits": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
+    },
+    "node_modules/ip-address": {
+      "version": "9.0.5",
+      "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-9.0.5.tgz",
+      "integrity": "sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==",
+      "dependencies": {
+        "jsbn": "1.1.0",
+        "sprintf-js": "^1.1.3"
+      },
+      "engines": {
+        "node": ">= 12"
+      }
+    },
+    "node_modules/is-fullwidth-code-point": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
+      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-potential-custom-element-name": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz",
+      "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ=="
+    },
+    "node_modules/is-typedarray": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz",
+      "integrity": "sha512-cyA56iCMHAh5CdzjJIa4aohJyeO1YbwLi3Jc35MmRU6poroFjIGZzUzupGiRPOjgHg9TLu43xbpwXk523fMxKA=="
+    },
+    "node_modules/isarray": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
+      "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ=="
+    },
+    "node_modules/isstream": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz",
+      "integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g=="
+    },
+    "node_modules/jsbn": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-1.1.0.tgz",
+      "integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A=="
+    },
+    "node_modules/jsdom": {
+      "version": "23.2.0",
+      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-23.2.0.tgz",
+      "integrity": "sha512-L88oL7D/8ufIES+Zjz7v0aes+oBMh2Xnh3ygWvL0OaICOomKEPKuPnIfBJekiXr+BHbbMjrWn/xqrDQuxFTeyA==",
+      "dependencies": {
+        "@asamuzakjp/dom-selector": "^2.0.1",
+        "cssstyle": "^4.0.1",
+        "data-urls": "^5.0.0",
+        "decimal.js": "^10.4.3",
+        "form-data": "^4.0.0",
+        "html-encoding-sniffer": "^4.0.0",
+        "http-proxy-agent": "^7.0.0",
+        "https-proxy-agent": "^7.0.2",
+        "is-potential-custom-element-name": "^1.0.1",
+        "parse5": "^7.1.2",
+        "rrweb-cssom": "^0.6.0",
+        "saxes": "^6.0.0",
+        "symbol-tree": "^3.2.4",
+        "tough-cookie": "^4.1.3",
+        "w3c-xmlserializer": "^5.0.0",
+        "webidl-conversions": "^7.0.0",
+        "whatwg-encoding": "^3.1.1",
+        "whatwg-mimetype": "^4.0.0",
+        "whatwg-url": "^14.0.0",
+        "ws": "^8.16.0",
+        "xml-name-validator": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "canvas": "^2.11.2"
+      },
+      "peerDependenciesMeta": {
+        "canvas": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/jsdom/node_modules/tr46": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.0.0.tgz",
+      "integrity": "sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==",
+      "dependencies": {
+        "punycode": "^2.3.1"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/jsdom/node_modules/whatwg-url": {
+      "version": "14.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.0.0.tgz",
+      "integrity": "sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==",
+      "dependencies": {
+        "tr46": "^5.0.0",
+        "webidl-conversions": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/json-schema": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz",
+      "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="
+    },
+    "node_modules/json-schema-traverse": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
+      "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg=="
+    },
+    "node_modules/json-stringify-safe": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
+      "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="
+    },
+    "node_modules/jsonfile": {
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz",
+      "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==",
+      "dependencies": {
+        "universalify": "^2.0.0"
+      },
+      "optionalDependencies": {
+        "graceful-fs": "^4.1.6"
+      }
+    },
+    "node_modules/jsonfile/node_modules/universalify": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
+      "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
+      "engines": {
+        "node": ">= 10.0.0"
+      }
+    },
+    "node_modules/jsprim": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-2.0.2.tgz",
+      "integrity": "sha512-gqXddjPqQ6G40VdnI6T6yObEC+pDNvyP95wdQhkWkg7crHH3km5qP1FsOXEkzEQwnz6gz5qGTn1c2Y52wP3OyQ==",
+      "engines": [
+        "node >=0.6.0"
+      ],
+      "dependencies": {
+        "assert-plus": "1.0.0",
+        "extsprintf": "1.3.0",
+        "json-schema": "0.4.0",
+        "verror": "1.10.0"
+      }
+    },
+    "node_modules/jszip": {
+      "version": "3.10.1",
+      "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz",
+      "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==",
+      "dependencies": {
+        "lie": "~3.3.0",
+        "pako": "~1.0.2",
+        "readable-stream": "~2.3.6",
+        "setimmediate": "^1.0.5"
+      }
+    },
+    "node_modules/jszip/node_modules/readable-stream": {
+      "version": "2.3.8",
+      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
+      "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
+      "dependencies": {
+        "core-util-is": "~1.0.0",
+        "inherits": "~2.0.3",
+        "isarray": "~1.0.0",
+        "process-nextick-args": "~2.0.0",
+        "safe-buffer": "~5.1.1",
+        "string_decoder": "~1.1.1",
+        "util-deprecate": "~1.0.1"
+      }
+    },
+    "node_modules/jszip/node_modules/safe-buffer": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
+      "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
+    },
+    "node_modules/jszip/node_modules/string_decoder": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
+      "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
+      "dependencies": {
+        "safe-buffer": "~5.1.0"
+      }
+    },
+    "node_modules/lie": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz",
+      "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==",
+      "dependencies": {
+        "immediate": "~3.0.5"
+      }
+    },
+    "node_modules/lodash": {
+      "version": "4.17.21",
+      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
+      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
+    },
+    "node_modules/lodash.assignin": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/lodash.assignin/-/lodash.assignin-4.2.0.tgz",
+      "integrity": "sha512-yX/rx6d/UTVh7sSVWVSIMjfnz95evAgDFdb1ZozC35I9mSFCkmzptOzevxjgbQUsc78NR44LVHWjsoMQXy9FDg=="
+    },
+    "node_modules/lodash.bind": {
+      "version": "4.2.1",
+      "resolved": "https://registry.npmjs.org/lodash.bind/-/lodash.bind-4.2.1.tgz",
+      "integrity": "sha512-lxdsn7xxlCymgLYo1gGvVrfHmkjDiyqVv62FAeF2i5ta72BipE1SLxw8hPEPLhD4/247Ijw07UQH7Hq/chT5LA=="
+    },
+    "node_modules/lodash.defaults": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
+      "integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ=="
+    },
+    "node_modules/lodash.filter": {
+      "version": "4.6.0",
+      "resolved": "https://registry.npmjs.org/lodash.filter/-/lodash.filter-4.6.0.tgz",
+      "integrity": "sha512-pXYUy7PR8BCLwX5mgJ/aNtyOvuJTdZAo9EQFUvMIYugqmJxnrYaANvTbgndOzHSCSR0wnlBBfRXJL5SbWxo3FQ=="
+    },
+    "node_modules/lodash.flatten": {
+      "version": "4.4.0",
+      "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz",
+      "integrity": "sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g=="
+    },
+    "node_modules/lodash.foreach": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/lodash.foreach/-/lodash.foreach-4.5.0.tgz",
+      "integrity": "sha512-aEXTF4d+m05rVOAUG3z4vZZ4xVexLKZGF0lIxuHZ1Hplpk/3B6Z1+/ICICYRLm7c41Z2xiejbkCkJoTlypoXhQ=="
+    },
+    "node_modules/lodash.map": {
+      "version": "4.6.0",
+      "resolved": "https://registry.npmjs.org/lodash.map/-/lodash.map-4.6.0.tgz",
+      "integrity": "sha512-worNHGKLDetmcEYDvh2stPCrrQRkP20E4l0iIS7F8EvzMqBBi7ltvFN5m1HvTf1P7Jk1txKhvFcmYsCr8O2F1Q=="
+    },
+    "node_modules/lodash.merge": {
+      "version": "4.6.2",
+      "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
+      "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ=="
+    },
+    "node_modules/lodash.pick": {
+      "version": "4.4.0",
+      "resolved": "https://registry.npmjs.org/lodash.pick/-/lodash.pick-4.4.0.tgz",
+      "integrity": "sha512-hXt6Ul/5yWjfklSGvLQl8vM//l3FtyHZeuelpzK6mm99pNvN9yTDruNZPEJZD1oWrqo+izBmB7oUfWgcCX7s4Q=="
+    },
+    "node_modules/lodash.reduce": {
+      "version": "4.6.0",
+      "resolved": "https://registry.npmjs.org/lodash.reduce/-/lodash.reduce-4.6.0.tgz",
+      "integrity": "sha512-6raRe2vxCYBhpBu+B+TtNGUzah+hQjVdu3E17wfusjyrXBka2nBS8OH/gjVZ5PvHOhWmIZTYri09Z6n/QfnNMw=="
+    },
+    "node_modules/lodash.reject": {
+      "version": "4.6.0",
+      "resolved": "https://registry.npmjs.org/lodash.reject/-/lodash.reject-4.6.0.tgz",
+      "integrity": "sha512-qkTuvgEzYdyhiJBx42YPzPo71R1aEr0z79kAv7Ixg8wPFEjgRgJdUsGMG3Hf3OYSF/kHI79XhNlt+5Ar6OzwxQ=="
+    },
+    "node_modules/lodash.some": {
+      "version": "4.6.0",
+      "resolved": "https://registry.npmjs.org/lodash.some/-/lodash.some-4.6.0.tgz",
+      "integrity": "sha512-j7MJE+TuT51q9ggt4fSgVqro163BEFjAt3u97IqU+JA2DkWl80nFTrowzLpZ/BnpN7rrl0JA/593NAdd8p/scQ=="
+    },
+    "node_modules/lru-cache": {
+      "version": "7.18.3",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz",
+      "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/mdn-data": {
+      "version": "2.0.30",
+      "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz",
+      "integrity": "sha512-GaqWWShW4kv/G9IEucWScBx9G1/vsFZZJUO+tD26M8J8z3Kw5RDQjaoZe03YAClgeS/SWPOcb4nkFBTEi5DUEA=="
+    },
+    "node_modules/mime-db": {
+      "version": "1.52.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
+      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mime-types": {
+      "version": "2.1.35",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
+      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
+      "dependencies": {
+        "mime-db": "1.52.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mitt": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
+      "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw=="
+    },
+    "node_modules/mkdirp-classic": {
+      "version": "0.5.3",
+      "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
+      "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A=="
+    },
+    "node_modules/moment-parseformat": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/moment-parseformat/-/moment-parseformat-3.0.0.tgz",
+      "integrity": "sha512-dVgXe6b6DLnv4CHG7a1zUe5mSXaIZ3c6lSHm/EKeVeQI2/4pwe0VRde8OyoCE1Ro2lKT5P6uT9JElF7KDLV+jw=="
+    },
+    "node_modules/ms": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
+      "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
+    },
+    "node_modules/netmask": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz",
+      "integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==",
+      "engines": {
+        "node": ">= 0.4.0"
+      }
+    },
+    "node_modules/node-fetch": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
+      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
+      "dependencies": {
+        "whatwg-url": "^5.0.0"
+      },
+      "engines": {
+        "node": "4.x || >=6.0.0"
+      },
+      "peerDependencies": {
+        "encoding": "^0.1.0"
+      },
+      "peerDependenciesMeta": {
+        "encoding": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/nth-check": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.2.tgz",
+      "integrity": "sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg==",
+      "dependencies": {
+        "boolbase": "~1.0.0"
+      }
+    },
+    "node_modules/nwsapi": {
+      "version": "2.2.9",
+      "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.9.tgz",
+      "integrity": "sha512-2f3F0SEEer8bBu0dsNCFF50N0cTThV1nWFYcEYFZttdW0lDAoybv9cQoK7X7/68Z89S7FoRrVjP1LPX4XRf9vg=="
+    },
+    "node_modules/oauth-sign": {
+      "version": "0.9.0",
+      "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz",
+      "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/once": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
+      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
+      "dependencies": {
+        "wrappy": "1"
+      }
+    },
+    "node_modules/pac-proxy-agent": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.0.1.tgz",
+      "integrity": "sha512-ASV8yU4LLKBAjqIPMbrgtaKIvxQri/yh2OpI+S6hVa9JRkUI3Y3NPFbfngDtY7oFtSMD3w31Xns89mDa3Feo5A==",
+      "dependencies": {
+        "@tootallnate/quickjs-emscripten": "^0.23.0",
+        "agent-base": "^7.0.2",
+        "debug": "^4.3.4",
+        "get-uri": "^6.0.1",
+        "http-proxy-agent": "^7.0.0",
+        "https-proxy-agent": "^7.0.2",
+        "pac-resolver": "^7.0.0",
+        "socks-proxy-agent": "^8.0.2"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/pac-resolver": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz",
+      "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==",
+      "dependencies": {
+        "degenerator": "^5.0.0",
+        "netmask": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/pako": {
+      "version": "1.0.11",
+      "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
+      "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="
+    },
+    "node_modules/parse5": {
+      "version": "7.1.2",
+      "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
+      "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
+      "dependencies": {
+        "entities": "^4.4.0"
+      },
+      "funding": {
+        "url": "https://github.com/inikulin/parse5?sponsor=1"
+      }
+    },
+    "node_modules/parse5/node_modules/entities": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
+      "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
+    "node_modules/pend": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
+      "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg=="
+    },
+    "node_modules/performance-now": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz",
+      "integrity": "sha512-7EAHlyLHI56VEIdK57uwHdHKIaAGbnXPiw0yWbarQZOKaKpvUIgW0jWRVLiatnM+XXlSwsanIBH/hzGMJulMow=="
+    },
+    "node_modules/postman-request": {
+      "version": "2.88.1-postman.33",
+      "resolved": "https://registry.npmjs.org/postman-request/-/postman-request-2.88.1-postman.33.tgz",
+      "integrity": "sha512-uL9sCML4gPH6Z4hreDWbeinKU0p0Ke261nU7OvII95NU22HN6Dk7T/SaVPaj6T4TsQqGKIFw6/woLZnH7ugFNA==",
+      "dependencies": {
+        "@postman/form-data": "~3.1.1",
+        "@postman/tough-cookie": "~4.1.3-postman.1",
+        "@postman/tunnel-agent": "^0.6.3",
+        "aws-sign2": "~0.7.0",
+        "aws4": "^1.12.0",
+        "brotli": "^1.3.3",
+        "caseless": "~0.12.0",
+        "combined-stream": "~1.0.6",
+        "extend": "~3.0.2",
+        "forever-agent": "~0.6.1",
+        "har-validator": "~5.1.3",
+        "http-signature": "~1.3.1",
+        "is-typedarray": "~1.0.0",
+        "isstream": "~0.1.2",
+        "json-stringify-safe": "~5.0.1",
+        "mime-types": "^2.1.35",
+        "oauth-sign": "~0.9.0",
+        "performance-now": "^2.1.0",
+        "qs": "~6.5.3",
+        "safe-buffer": "^5.1.2",
+        "stream-length": "^1.0.2",
+        "uuid": "^8.3.2"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/process-nextick-args": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
+      "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag=="
+    },
+    "node_modules/progress": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
+      "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/proxy-agent": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.3.1.tgz",
+      "integrity": "sha512-Rb5RVBy1iyqOtNl15Cw/llpeLH8bsb37gM1FUfKQ+Wck6xHlbAhWGUFiTRHtkjqGTA5pSHz6+0hrPW/oECihPQ==",
+      "dependencies": {
+        "agent-base": "^7.0.2",
+        "debug": "^4.3.4",
+        "http-proxy-agent": "^7.0.0",
+        "https-proxy-agent": "^7.0.2",
+        "lru-cache": "^7.14.1",
+        "pac-proxy-agent": "^7.0.1",
+        "proxy-from-env": "^1.1.0",
+        "socks-proxy-agent": "^8.0.2"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/proxy-from-env": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
+      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
+    },
+    "node_modules/psl": {
+      "version": "1.9.0",
+      "resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz",
+      "integrity": "sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag=="
+    },
+    "node_modules/pump": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
+      "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
+      "dependencies": {
+        "end-of-stream": "^1.1.0",
+        "once": "^1.3.1"
+      }
+    },
+    "node_modules/punycode": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
+      "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/puppeteer-core": {
+      "version": "22.0.0",
+      "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.0.0.tgz",
+      "integrity": "sha512-S3s91rLde0A86PWVeNY82h+P0fdS7CTiNWAicCVH/bIspRP4nS2PnO5j+VTFqCah0ZJizGzpVPAmxVYbLxTc9w==",
+      "dependencies": {
+        "@puppeteer/browsers": "2.0.0",
+        "chromium-bidi": "0.5.8",
+        "cross-fetch": "4.0.0",
+        "debug": "4.3.4",
+        "devtools-protocol": "0.0.1232444",
+        "ws": "8.16.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/qs": {
+      "version": "6.5.3",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.3.tgz",
+      "integrity": "sha512-qxXIEh4pCGfHICj1mAJQ2/2XVZkjCDTcEgfoSQxc/fYivUZxTkk7L3bDBJSoNrEzXI17oUO5Dp07ktqE5KzczA==",
+      "engines": {
+        "node": ">=0.6"
+      }
+    },
+    "node_modules/querystringify": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz",
+      "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ=="
+    },
+    "node_modules/queue-tick": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz",
+      "integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag=="
+    },
+    "node_modules/readability-extractor": {
+      "version": "0.0.11",
+      "resolved": "git+ssh://[email protected]/ArchiveBox/readability-extractor.git#057f2046f9535cfc6df7b8d551aaad32a9e6226c",
+      "dependencies": {
+        "@mozilla/readability": "^0.5.0",
+        "dompurify": "^3.0.6",
+        "jsdom": "^23.0.1"
+      },
+      "bin": {
+        "readability-extractor": "readability-extractor"
+      }
+    },
+    "node_modules/readable-stream": {
+      "version": "3.6.2",
+      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
+      "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
+      "dependencies": {
+        "inherits": "^2.0.3",
+        "string_decoder": "^1.1.1",
+        "util-deprecate": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/regenerator-runtime": {
+      "version": "0.14.1",
+      "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz",
+      "integrity": "sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw=="
+    },
+    "node_modules/require-directory": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
+      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/require-from-string": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+      "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/requires-port": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz",
+      "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ=="
+    },
+    "node_modules/rrweb-cssom": {
+      "version": "0.6.0",
+      "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.6.0.tgz",
+      "integrity": "sha512-APM0Gt1KoXBz0iIkkdB/kfvGOwC4UuJFeG/c+yV7wSc7q96cG/kJ0HiYCnzivD9SB53cLV1MlHFNfOuPaadYSw=="
+    },
+    "node_modules/safe-buffer": {
+      "version": "5.2.1",
+      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
+      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ]
+    },
+    "node_modules/safer-buffer": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
+    },
+    "node_modules/saxes": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
+      "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==",
+      "dependencies": {
+        "xmlchars": "^2.2.0"
+      },
+      "engines": {
+        "node": ">=v12.22.7"
+      }
+    },
+    "node_modules/selenium-webdriver": {
+      "version": "4.17.0",
+      "resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.17.0.tgz",
+      "integrity": "sha512-e2E+2XBlGepzwgFbyQfSwo9Cbj6G5fFfs9MzAS00nC99EewmcS2rwn2MwtgfP7I5p1e7DYv4HQJXtWedsu6DvA==",
+      "dependencies": {
+        "jszip": "^3.10.1",
+        "tmp": "^0.2.1",
+        "ws": ">=8.14.2"
+      },
+      "engines": {
+        "node": ">= 14.20.0"
+      }
+    },
+    "node_modules/setimmediate": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz",
+      "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA=="
+    },
+    "node_modules/single-file-cli": {
+      "version": "1.1.54",
+      "resolved": "https://registry.npmjs.org/single-file-cli/-/single-file-cli-1.1.54.tgz",
+      "integrity": "sha512-wnVPg7BklhswwFVrtuFXbmluI4piHxg2dC0xATxYTeXAld6PnRPlnp7ufallRKArjFBZdP2u+ihMkOIp7A38XA==",
+      "dependencies": {
+        "file-url": "3.0.0",
+        "iconv-lite": "0.6.3",
+        "jsdom": "24.0.0",
+        "puppeteer-core": "22.0.0",
+        "selenium-webdriver": "4.17.0",
+        "single-file-core": "1.3.24",
+        "strong-data-uri": "1.0.6",
+        "yargs": "17.7.2"
+      },
+      "bin": {
+        "single-file": "single-file"
+      }
+    },
+    "node_modules/single-file-cli/node_modules/iconv-lite": {
+      "version": "0.6.3",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
+      "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/single-file-cli/node_modules/jsdom": {
+      "version": "24.0.0",
+      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-24.0.0.tgz",
+      "integrity": "sha512-UDS2NayCvmXSXVP6mpTj+73JnNQadZlr9N68189xib2tx5Mls7swlTNao26IoHv46BZJFvXygyRtyXd1feAk1A==",
+      "dependencies": {
+        "cssstyle": "^4.0.1",
+        "data-urls": "^5.0.0",
+        "decimal.js": "^10.4.3",
+        "form-data": "^4.0.0",
+        "html-encoding-sniffer": "^4.0.0",
+        "http-proxy-agent": "^7.0.0",
+        "https-proxy-agent": "^7.0.2",
+        "is-potential-custom-element-name": "^1.0.1",
+        "nwsapi": "^2.2.7",
+        "parse5": "^7.1.2",
+        "rrweb-cssom": "^0.6.0",
+        "saxes": "^6.0.0",
+        "symbol-tree": "^3.2.4",
+        "tough-cookie": "^4.1.3",
+        "w3c-xmlserializer": "^5.0.0",
+        "webidl-conversions": "^7.0.0",
+        "whatwg-encoding": "^3.1.1",
+        "whatwg-mimetype": "^4.0.0",
+        "whatwg-url": "^14.0.0",
+        "ws": "^8.16.0",
+        "xml-name-validator": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "canvas": "^2.11.2"
+      },
+      "peerDependenciesMeta": {
+        "canvas": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/single-file-cli/node_modules/tr46": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-5.0.0.tgz",
+      "integrity": "sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==",
+      "dependencies": {
+        "punycode": "^2.3.1"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/single-file-cli/node_modules/whatwg-url": {
+      "version": "14.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-14.0.0.tgz",
+      "integrity": "sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==",
+      "dependencies": {
+        "tr46": "^5.0.0",
+        "webidl-conversions": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/single-file-core": {
+      "version": "1.3.24",
+      "resolved": "https://registry.npmjs.org/single-file-core/-/single-file-core-1.3.24.tgz",
+      "integrity": "sha512-1B256mKBbNV8jXAV+hRyEv0aMa7tn0C0Ci+zx7Ya4ZXZB3b9/1MgKsB/fxVwDiL28WJSU0pxzh8ftIYubCNn9w=="
+    },
+    "node_modules/smart-buffer": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz",
+      "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==",
+      "engines": {
+        "node": ">= 6.0.0",
+        "npm": ">= 3.0.0"
+      }
+    },
+    "node_modules/socks": {
+      "version": "2.8.3",
+      "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.3.tgz",
+      "integrity": "sha512-l5x7VUUWbjVFbafGLxPWkYsHIhEvmF85tbIeFZWc8ZPtoMyybuEhL7Jye/ooC4/d48FgOjSJXgsF/AJPYCW8Zw==",
+      "dependencies": {
+        "ip-address": "^9.0.5",
+        "smart-buffer": "^4.2.0"
+      },
+      "engines": {
+        "node": ">= 10.0.0",
+        "npm": ">= 3.0.0"
+      }
+    },
+    "node_modules/socks-proxy-agent": {
+      "version": "8.0.3",
+      "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.3.tgz",
+      "integrity": "sha512-VNegTZKhuGq5vSD6XNKlbqWhyt/40CgoEw8XxD6dhnm8Jq9IEa3nIa4HwnM8XOqU0CdB0BwWVXusqiFXfHB3+A==",
+      "dependencies": {
+        "agent-base": "^7.1.1",
+        "debug": "^4.3.4",
+        "socks": "^2.7.1"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/source-map": {
+      "version": "0.6.1",
+      "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
+      "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
+      "optional": true,
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/source-map-js": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz",
+      "integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/sprintf-js": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz",
+      "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="
+    },
+    "node_modules/sshpk": {
+      "version": "1.18.0",
+      "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.18.0.tgz",
+      "integrity": "sha512-2p2KJZTSqQ/I3+HX42EpYOa2l3f8Erv8MWKsy2I9uf4wA7yFIkXRffYdsx86y6z4vHtV8u7g+pPlr8/4ouAxsQ==",
+      "dependencies": {
+        "asn1": "~0.2.3",
+        "assert-plus": "^1.0.0",
+        "bcrypt-pbkdf": "^1.0.0",
+        "dashdash": "^1.12.0",
+        "ecc-jsbn": "~0.1.1",
+        "getpass": "^0.1.1",
+        "jsbn": "~0.1.0",
+        "safer-buffer": "^2.0.2",
+        "tweetnacl": "~0.14.0"
+      },
+      "bin": {
+        "sshpk-conv": "bin/sshpk-conv",
+        "sshpk-sign": "bin/sshpk-sign",
+        "sshpk-verify": "bin/sshpk-verify"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/sshpk/node_modules/jsbn": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
+      "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg=="
+    },
+    "node_modules/stream-length": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/stream-length/-/stream-length-1.0.2.tgz",
+      "integrity": "sha512-aI+qKFiwoDV4rsXiS7WRoCt+v2RX1nUj17+KJC5r2gfh5xoSJIfP6Y3Do/HtvesFcTSWthIuJ3l1cvKQY/+nZg==",
+      "dependencies": {
+        "bluebird": "^2.6.2"
+      }
+    },
+    "node_modules/streamx": {
+      "version": "2.16.1",
+      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.16.1.tgz",
+      "integrity": "sha512-m9QYj6WygWyWa3H1YY69amr4nVgy61xfjys7xO7kviL5rfIEc2naf+ewFiOA+aEJD7y0JO3h2GoiUv4TDwEGzQ==",
+      "dependencies": {
+        "fast-fifo": "^1.1.0",
+        "queue-tick": "^1.0.1"
+      },
+      "optionalDependencies": {
+        "bare-events": "^2.2.0"
+      }
+    },
+    "node_modules/string_decoder": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
+      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
+      "dependencies": {
+        "safe-buffer": "~5.2.0"
+      }
+    },
+    "node_modules/string-direction": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/string-direction/-/string-direction-0.1.2.tgz",
+      "integrity": "sha512-NJHQRg6GlOEMLA6jEAlSy21KaXvJDNoAid/v6fBAJbqdvOEIiPpCrIPTHnl4636wUF/IGyktX5A9eddmETb1Cw=="
+    },
+    "node_modules/string-width": {
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
+      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-ansi": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
+      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
+      "dependencies": {
+        "ansi-regex": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strong-data-uri": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/strong-data-uri/-/strong-data-uri-1.0.6.tgz",
+      "integrity": "sha512-zhzBZev0uhT2IrFUerenXhfaE0vFUYwAZsnG0gIKGpfM/Gi6jOUQ3cmcvyTsXeDLIPiTubHESeO7EbD6FoPmzw==",
+      "dependencies": {
+        "truncate": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=0.8.0"
+      }
+    },
+    "node_modules/symbol-tree": {
+      "version": "3.2.4",
+      "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz",
+      "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw=="
+    },
+    "node_modules/tar-fs": {
+      "version": "3.0.4",
+      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.4.tgz",
+      "integrity": "sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w==",
+      "dependencies": {
+        "mkdirp-classic": "^0.5.2",
+        "pump": "^3.0.0",
+        "tar-stream": "^3.1.5"
+      }
+    },
+    "node_modules/tar-stream": {
+      "version": "3.1.7",
+      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz",
+      "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==",
+      "dependencies": {
+        "b4a": "^1.6.4",
+        "fast-fifo": "^1.2.0",
+        "streamx": "^2.15.0"
+      }
+    },
+    "node_modules/through": {
+      "version": "2.3.8",
+      "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
+      "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg=="
+    },
+    "node_modules/tmp": {
+      "version": "0.2.3",
+      "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.3.tgz",
+      "integrity": "sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==",
+      "engines": {
+        "node": ">=14.14"
+      }
+    },
+    "node_modules/tough-cookie": {
+      "version": "4.1.3",
+      "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.3.tgz",
+      "integrity": "sha512-aX/y5pVRkfRnfmuX+OdbSdXvPe6ieKX/G2s7e98f4poJHnqH3281gDPm/metm6E/WRamfx7WC4HUqkWHfQHprw==",
+      "dependencies": {
+        "psl": "^1.1.33",
+        "punycode": "^2.1.1",
+        "universalify": "^0.2.0",
+        "url-parse": "^1.5.3"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/tr46": {
+      "version": "0.0.3",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
+      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
+    },
+    "node_modules/truncate": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/truncate/-/truncate-2.1.0.tgz",
+      "integrity": "sha512-em3E3SUDONOjTBcZ36DTm3RvDded3IRU9rX32oHwwXNt3rJD5MVaFlJTQvs8tJoHRoeYP36OuQ1eL/Q7bNEWIQ==",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/tslib": {
+      "version": "2.6.2",
+      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
+      "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
+    },
+    "node_modules/turndown": {
+      "version": "7.1.3",
+      "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.1.3.tgz",
+      "integrity": "sha512-Z3/iJ6IWh8VBiACWQJaA5ulPQE5E1QwvBHj00uGzdQxdRnd8fh1DPqNOJqzQDu6DkOstORrtXzf/9adB+vMtEA==",
+      "dependencies": {
+        "domino": "^2.1.6"
+      }
+    },
+    "node_modules/tweetnacl": {
+      "version": "0.14.5",
+      "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz",
+      "integrity": "sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA=="
+    },
+    "node_modules/unbzip2-stream": {
+      "version": "1.4.3",
+      "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz",
+      "integrity": "sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==",
+      "dependencies": {
+        "buffer": "^5.2.1",
+        "through": "^2.3.8"
+      }
+    },
+    "node_modules/undici-types": {
+      "version": "5.26.5",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
+      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
+      "optional": true
+    },
+    "node_modules/universalify": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.2.0.tgz",
+      "integrity": "sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg==",
+      "engines": {
+        "node": ">= 4.0.0"
+      }
+    },
+    "node_modules/uri-js": {
+      "version": "4.4.1",
+      "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
+      "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
+      "dependencies": {
+        "punycode": "^2.1.0"
+      }
+    },
+    "node_modules/url-parse": {
+      "version": "1.5.10",
+      "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.10.tgz",
+      "integrity": "sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==",
+      "dependencies": {
+        "querystringify": "^2.1.1",
+        "requires-port": "^1.0.0"
+      }
+    },
+    "node_modules/urlpattern-polyfill": {
+      "version": "10.0.0",
+      "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
+      "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
+    },
+    "node_modules/util-deprecate": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
+      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
+    },
+    "node_modules/uuid": {
+      "version": "8.3.2",
+      "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz",
+      "integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==",
+      "bin": {
+        "uuid": "dist/bin/uuid"
+      }
+    },
+    "node_modules/valid-url": {
+      "version": "1.0.9",
+      "resolved": "https://registry.npmjs.org/valid-url/-/valid-url-1.0.9.tgz",
+      "integrity": "sha512-QQDsV8OnSf5Uc30CKSwG9lnhMPe6exHtTXLRYX8uMwKENy640pU+2BgBL0LRbDh/eYRahNCS7aewCx0wf3NYVA=="
+    },
+    "node_modules/verror": {
+      "version": "1.10.0",
+      "resolved": "https://registry.npmjs.org/verror/-/verror-1.10.0.tgz",
+      "integrity": "sha512-ZZKSmDAEFOijERBLkmYfJ+vmk3w+7hOLYDNkRCuRuMJGEmqYNCNLyBBFwWKVMhfwaEF3WOd0Zlw86U/WC/+nYw==",
+      "engines": [
+        "node >=0.6.0"
+      ],
+      "dependencies": {
+        "assert-plus": "^1.0.0",
+        "core-util-is": "1.0.2",
+        "extsprintf": "^1.2.0"
+      }
+    },
+    "node_modules/w3c-xmlserializer": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz",
+      "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==",
+      "dependencies": {
+        "xml-name-validator": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/webidl-conversions": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz",
+      "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/whatwg-encoding": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz",
+      "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==",
+      "dependencies": {
+        "iconv-lite": "0.6.3"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/whatwg-encoding/node_modules/iconv-lite": {
+      "version": "0.6.3",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
+      "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/whatwg-mimetype": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz",
+      "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==",
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/whatwg-url": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
+      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+      "dependencies": {
+        "tr46": "~0.0.3",
+        "webidl-conversions": "^3.0.0"
+      }
+    },
+    "node_modules/whatwg-url/node_modules/webidl-conversions": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
+      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
+    },
+    "node_modules/wrap-ansi": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
+      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
+      "dependencies": {
+        "ansi-styles": "^4.0.0",
+        "string-width": "^4.1.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+      }
+    },
+    "node_modules/wrappy": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
+      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
+    },
+    "node_modules/ws": {
+      "version": "8.16.0",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.16.0.tgz",
+      "integrity": "sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/wuzzy": {
+      "version": "0.1.8",
+      "resolved": "https://registry.npmjs.org/wuzzy/-/wuzzy-0.1.8.tgz",
+      "integrity": "sha512-FUzKQepFSTnANsDYwxpIzGJ/dIJaqxuMre6tzzbvWwFAiUHPsI1nVQVCLK4Xqr67KO7oYAK0kaCcI/+WYj/7JA==",
+      "dependencies": {
+        "lodash": "^4.17.15"
+      }
+    },
+    "node_modules/xml-name-validator": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz",
+      "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==",
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/xmlchars": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz",
+      "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw=="
+    },
+    "node_modules/y18n": {
+      "version": "5.0.8",
+      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
+      "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/yargs": {
+      "version": "17.7.2",
+      "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
+      "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
+      "dependencies": {
+        "cliui": "^8.0.1",
+        "escalade": "^3.1.1",
+        "get-caller-file": "^2.0.5",
+        "require-directory": "^2.1.1",
+        "string-width": "^4.2.3",
+        "y18n": "^5.0.5",
+        "yargs-parser": "^21.1.1"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/yargs-parser": {
+      "version": "15.0.3",
+      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-15.0.3.tgz",
+      "integrity": "sha512-/MVEVjTXy/cGAjdtQf8dW3V9b97bPN7rNn8ETj6BmAQL7ibC7O1Q9SPJbGjgh3SlwoBNXMzj/ZGIj8mBgl12YA==",
+      "dependencies": {
+        "camelcase": "^5.0.0",
+        "decamelize": "^1.2.0"
+      }
+    },
+    "node_modules/yargs/node_modules/yargs-parser": {
+      "version": "21.1.1",
+      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
+      "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/yauzl": {
+      "version": "2.10.0",
+      "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",
+      "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==",
+      "dependencies": {
+        "buffer-crc32": "~0.2.3",
+        "fd-slicer": "~1.1.0"
+      }
+    }
+  }
+}

+ 2 - 2
archivebox/package.json

@@ -1,6 +1,6 @@
 {
 {
   "name": "archivebox",
   "name": "archivebox",
-  "version": "0.7.2",
+  "version": "0.8.0",
   "description": "ArchiveBox: The self-hosted internet archive",
   "description": "ArchiveBox: The self-hosted internet archive",
   "author": "Nick Sweeting <[email protected]>",
   "author": "Nick Sweeting <[email protected]>",
   "repository": "github:ArchiveBox/ArchiveBox",
   "repository": "github:ArchiveBox/ArchiveBox",
@@ -8,6 +8,6 @@
   "dependencies": {
   "dependencies": {
     "@postlight/parser": "^2.2.3",
     "@postlight/parser": "^2.2.3",
     "readability-extractor": "github:ArchiveBox/readability-extractor",
     "readability-extractor": "github:ArchiveBox/readability-extractor",
-    "single-file-cli": "^1.1.46"
+    "single-file-cli": "^1.1.54"
   }
   }
 }
 }

+ 2 - 0
archivebox/parsers/__init__.py

@@ -44,6 +44,7 @@ from . import medium_rss
 from . import netscape_html
 from . import netscape_html
 from . import generic_rss
 from . import generic_rss
 from . import generic_json
 from . import generic_json
+from . import generic_jsonl
 from . import generic_html
 from . import generic_html
 from . import generic_txt
 from . import generic_txt
 from . import url_list
 from . import url_list
@@ -63,6 +64,7 @@ PARSERS = {
     netscape_html.KEY:  (netscape_html.NAME,    netscape_html.PARSER),
     netscape_html.KEY:  (netscape_html.NAME,    netscape_html.PARSER),
     generic_rss.KEY:    (generic_rss.NAME,      generic_rss.PARSER),
     generic_rss.KEY:    (generic_rss.NAME,      generic_rss.PARSER),
     generic_json.KEY:   (generic_json.NAME,     generic_json.PARSER),
     generic_json.KEY:   (generic_json.NAME,     generic_json.PARSER),
+    generic_jsonl.KEY:  (generic_jsonl.NAME,    generic_jsonl.PARSER),
     generic_html.KEY:   (generic_html.NAME,     generic_html.PARSER),
     generic_html.KEY:   (generic_html.NAME,     generic_html.PARSER),
 
 
     # Catchall fallback parser
     # Catchall fallback parser

+ 66 - 46
archivebox/parsers/generic_json.py

@@ -11,6 +11,60 @@ from ..util import (
     enforce_types,
     enforce_types,
 )
 )
 
 
+# This gets used by generic_jsonl, too
+def jsonObjectToLink(link: str, source: str):
+    json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z')
+
+    # example line
+    # {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}]
+    # Parse URL
+    url = link.get('href') or link.get('url') or link.get('URL')
+    if not url:
+        raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]')
+
+    # Parse the timestamp
+    ts_str = str(datetime.now(timezone.utc).timestamp())
+    if link.get('timestamp'):
+        # chrome/ff histories use a very precise timestamp
+        ts_str = str(link['timestamp'] / 10000000)
+    elif link.get('time'):
+        ts_str = str(json_date(link['time'].split(',', 1)[0]).timestamp())
+    elif link.get('created_at'):
+        ts_str = str(json_date(link['created_at']).timestamp())
+    elif link.get('created'):
+        ts_str = str(json_date(link['created']).timestamp())
+    elif link.get('date'):
+        ts_str = str(json_date(link['date']).timestamp())
+    elif link.get('bookmarked'):
+        ts_str = str(json_date(link['bookmarked']).timestamp())
+    elif link.get('saved'):
+        ts_str = str(json_date(link['saved']).timestamp())
+
+    # Parse the title
+    title = None
+    if link.get('title'):
+        title = link['title'].strip()
+    elif link.get('description'):
+        title = link['description'].replace(' — Readability', '').strip()
+    elif link.get('name'):
+        title = link['name'].strip()
+
+    # if we have a list, join it with commas
+    tags = link.get('tags')
+    if type(tags) == list:
+        tags = ','.join(tags)
+    elif type(tags) == str:
+        # if there's no comma, assume it was space-separated
+        if ',' not in tags:
+            tags = tags.replace(' ', ',')
+
+    return Link(
+        url=htmldecode(url),
+        timestamp=ts_str,
+        title=htmldecode(title) or None,
+        tags=htmldecode(tags),
+        sources=[source],
+    )
 
 
 @enforce_types
 @enforce_types
 def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
 def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
@@ -18,55 +72,21 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
 
 
     json_file.seek(0)
     json_file.seek(0)
 
 
-    # sometimes the first line is a comment or filepath, so we get everything after the first {
-    json_file_json_str = '{' + json_file.read().split('{', 1)[-1]
-    links = json.loads(json_file_json_str)
-    json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z')
+    try:
+        links = json.load(json_file)
+        if type(links) != list:
+            raise Exception('JSON parser expects list of objects, maybe this is JSONL?')
+    except json.decoder.JSONDecodeError:
+        # sometimes the first line is a comment or other junk, so try without
+        json_file.seek(0)
+        first_line = json_file.readline()
+        #print('      > Trying JSON parser without first line: "', first_line.strip(), '"', sep= '')
+        links = json.load(json_file)
+        # we may fail again, which means we really don't know what to do
 
 
     for link in links:
     for link in links:
-        # example line
-        # {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}]
         if link:
         if link:
-            # Parse URL
-            url = link.get('href') or link.get('url') or link.get('URL')
-            if not url:
-                raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]')
-
-            # Parse the timestamp
-            ts_str = str(datetime.now(timezone.utc).timestamp())
-            if link.get('timestamp'):
-                # chrome/ff histories use a very precise timestamp
-                ts_str = str(link['timestamp'] / 10000000)  
-            elif link.get('time'):
-                ts_str = str(json_date(link['time'].split(',', 1)[0]).timestamp())
-            elif link.get('created_at'):
-                ts_str = str(json_date(link['created_at']).timestamp())
-            elif link.get('created'):
-                ts_str = str(json_date(link['created']).timestamp())
-            elif link.get('date'):
-                ts_str = str(json_date(link['date']).timestamp())
-            elif link.get('bookmarked'):
-                ts_str = str(json_date(link['bookmarked']).timestamp())
-            elif link.get('saved'):
-                ts_str = str(json_date(link['saved']).timestamp())
-            
-            # Parse the title
-            title = None
-            if link.get('title'):
-                title = link['title'].strip()
-            elif link.get('description'):
-                title = link['description'].replace(' — Readability', '').strip()
-            elif link.get('name'):
-                title = link['name'].strip()
-
-            yield Link(
-                url=htmldecode(url),
-                timestamp=ts_str,
-                title=htmldecode(title) or None,
-                tags=htmldecode(link.get('tags')) or '',
-                sources=[json_file.name],
-            )
-
+            yield jsonObjectToLink(link,json_file.name)
 
 
 KEY = 'json'
 KEY = 'json'
 NAME = 'Generic JSON'
 NAME = 'Generic JSON'

+ 34 - 0
archivebox/parsers/generic_jsonl.py

@@ -0,0 +1,34 @@
+__package__ = 'archivebox.parsers'
+
+import json
+
+from typing import IO, Iterable
+from datetime import datetime, timezone
+
+from ..index.schema import Link
+from ..util import (
+    htmldecode,
+    enforce_types,
+)
+
+from .generic_json import jsonObjectToLink
+
+def parse_line(line: str):
+    if line.strip() != "":
+        return json.loads(line)
+
+@enforce_types
+def parse_generic_jsonl_export(json_file: IO[str], **_kwargs) -> Iterable[Link]:
+    """Parse JSONL format bookmarks export files"""
+
+    json_file.seek(0)
+
+    links = [ parse_line(line) for line in json_file ]
+
+    for link in links:
+        if link:
+            yield jsonObjectToLink(link,json_file.name)
+
+KEY = 'jsonl'
+NAME = 'Generic JSONL'
+PARSER = parse_generic_jsonl_export

+ 20 - 28
archivebox/parsers/generic_rss.py

@@ -2,13 +2,13 @@ __package__ = 'archivebox.parsers'
 
 
 
 
 from typing import IO, Iterable
 from typing import IO, Iterable
-from datetime import datetime
+from time import mktime
+from feedparser import parse as feedparser
 
 
 from ..index.schema import Link
 from ..index.schema import Link
 from ..util import (
 from ..util import (
     htmldecode,
     htmldecode,
-    enforce_types,
-    str_between,
+    enforce_types
 )
 )
 
 
 @enforce_types
 @enforce_types
@@ -16,35 +16,27 @@ def parse_generic_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
     """Parse RSS XML-format files into links"""
     """Parse RSS XML-format files into links"""
 
 
     rss_file.seek(0)
     rss_file.seek(0)
-    items = rss_file.read().split('<item>')
-    items = items[1:] if items else []
-    for item in items:
-        # example item:
-        # <item>
-        # <title><![CDATA[How JavaScript works: inside the V8 engine]]></title>
-        # <category>Unread</category>
-        # <link>https://blog.sessionstack.com/how-javascript-works-inside</link>
-        # <guid>https://blog.sessionstack.com/how-javascript-works-inside</guid>
-        # <pubDate>Mon, 21 Aug 2017 14:21:58 -0500</pubDate>
-        # </item>
-
-        trailing_removed = item.split('</item>', 1)[0]
-        leading_removed = trailing_removed.split('<item>', 1)[-1].strip()
-        rows = leading_removed.split('\n')
-
-        def get_row(key):
-            return [r for r in rows if r.strip().startswith('<{}>'.format(key))][0]
-
-        url = str_between(get_row('link'), '<link>', '</link>')
-        ts_str = str_between(get_row('pubDate'), '<pubDate>', '</pubDate>')
-        time = datetime.strptime(ts_str, "%a, %d %b %Y %H:%M:%S %z")
-        title = str_between(get_row('title'), '<![CDATA[', ']]').strip()
+    feed = feedparser(rss_file.read())
+    for item in feed.entries:
+        url = item.link
+        title = item.title
+        time = mktime(item.updated_parsed)
+
+        try:
+            tags = ','.join(map(lambda tag: tag.term, item.tags))
+        except AttributeError:
+            tags = ''
+
+        if url is None:
+            # Yielding a Link with no URL will
+            # crash on a URL validation assertion
+            continue
 
 
         yield Link(
         yield Link(
             url=htmldecode(url),
             url=htmldecode(url),
-            timestamp=str(time.timestamp()),
+            timestamp=str(time),
             title=htmldecode(title) or None,
             title=htmldecode(title) or None,
-            tags=None,
+            tags=tags,
             sources=[rss_file.name],
             sources=[rss_file.name],
         )
         )
 
 

+ 16 - 25
archivebox/parsers/pinboard_rss.py

@@ -2,50 +2,41 @@ __package__ = 'archivebox.parsers'
 
 
 
 
 from typing import IO, Iterable
 from typing import IO, Iterable
-from datetime import datetime, timezone
-
-from xml.etree import ElementTree
+from time import mktime
+from feedparser import parse as feedparser
 
 
 from ..index.schema import Link
 from ..index.schema import Link
 from ..util import (
 from ..util import (
     htmldecode,
     htmldecode,
-    enforce_types,
+    enforce_types
 )
 )
 
 
-
 @enforce_types
 @enforce_types
 def parse_pinboard_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
 def parse_pinboard_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]:
     """Parse Pinboard RSS feed files into links"""
     """Parse Pinboard RSS feed files into links"""
 
 
     rss_file.seek(0)
     rss_file.seek(0)
-    root = ElementTree.parse(rss_file).getroot()
-    items = root.findall("{http://purl.org/rss/1.0/}item")
-    for item in items:
-        find = lambda p: item.find(p).text.strip() if item.find(p) is not None else None    # type: ignore
-
-        url = find("{http://purl.org/rss/1.0/}link")
-        tags = find("{http://purl.org/dc/elements/1.1/}subject")
-        title = find("{http://purl.org/rss/1.0/}title")
-        ts_str = find("{http://purl.org/dc/elements/1.1/}date")
+    feed = feedparser(rss_file.read())
+    for item in feed.entries:
+        url = item.link
+        # title will start with "[priv] " if pin was marked private. useful?
+        title = item.title
+        time = mktime(item.updated_parsed)
+
+        # all tags are in one entry.tags with spaces in it. annoying!
+        try:
+            tags = item.tags[0].term.replace(' ', ',')
+        except AttributeError:
+            tags = ''
         
         
         if url is None:
         if url is None:
             # Yielding a Link with no URL will
             # Yielding a Link with no URL will
             # crash on a URL validation assertion
             # crash on a URL validation assertion
             continue
             continue
 
 
-        # Pinboard includes a colon in its date stamp timezone offsets, which
-        # Python can't parse. Remove it:
-        if ts_str and ts_str[-3:-2] == ":":
-            ts_str = ts_str[:-3]+ts_str[-2:]
-
-        if ts_str:
-            time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z")
-        else:
-            time = datetime.now(timezone.utc)
-
         yield Link(
         yield Link(
             url=htmldecode(url),
             url=htmldecode(url),
-            timestamp=str(time.timestamp()),
+            timestamp=str(time),
             title=htmldecode(title) or None,
             title=htmldecode(title) or None,
             tags=htmldecode(tags) or None,
             tags=htmldecode(tags) or None,
             sources=[rss_file.name],
             sources=[rss_file.name],

+ 19 - 16
archivebox/system.py

@@ -30,8 +30,7 @@ def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False,
 
 
     if capture_output:
     if capture_output:
         if ('stdout' in kwargs) or ('stderr' in kwargs):
         if ('stdout' in kwargs) or ('stderr' in kwargs):
-            raise ValueError('stdout and stderr arguments may not be used '
-                             'with capture_output.')
+            raise ValueError('stdout and stderr arguments may not be used with capture_output.')
         kwargs['stdout'] = PIPE
         kwargs['stdout'] = PIPE
         kwargs['stderr'] = PIPE
         kwargs['stderr'] = PIPE
 
 
@@ -146,20 +145,24 @@ def get_dir_size(path: Union[str, Path], recursive: bool=True, pattern: Optional
        recursively and limiting to a given filter list
        recursively and limiting to a given filter list
     """
     """
     num_bytes, num_dirs, num_files = 0, 0, 0
     num_bytes, num_dirs, num_files = 0, 0, 0
-    for entry in os.scandir(path):
-        if (pattern is not None) and (pattern not in entry.path):
-            continue
-        if entry.is_dir(follow_symlinks=False):
-            if not recursive:
+    try:
+        for entry in os.scandir(path):
+            if (pattern is not None) and (pattern not in entry.path):
                 continue
                 continue
-            num_dirs += 1
-            bytes_inside, dirs_inside, files_inside = get_dir_size(entry.path)
-            num_bytes += bytes_inside
-            num_dirs += dirs_inside
-            num_files += files_inside
-        else:
-            num_bytes += entry.stat(follow_symlinks=False).st_size
-            num_files += 1
+            if entry.is_dir(follow_symlinks=False):
+                if not recursive:
+                    continue
+                num_dirs += 1
+                bytes_inside, dirs_inside, files_inside = get_dir_size(entry.path)
+                num_bytes += bytes_inside
+                num_dirs += dirs_inside
+                num_files += files_inside
+            else:
+                num_bytes += entry.stat(follow_symlinks=False).st_size
+                num_files += 1
+    except OSError:
+        # e.g. FileNameTooLong or other error while trying to read dir
+        pass
     return num_bytes, num_dirs, num_files
     return num_bytes, num_dirs, num_files
 
 
 
 
@@ -171,7 +174,7 @@ def dedupe_cron_jobs(cron: CronTab) -> CronTab:
     deduped: Set[Tuple[str, str]] = set()
     deduped: Set[Tuple[str, str]] = set()
 
 
     for job in list(cron):
     for job in list(cron):
-        unique_tuple = (str(job.slices), job.command)
+        unique_tuple = (str(job.slices), str(job.command))
         if unique_tuple not in deduped:
         if unique_tuple not in deduped:
             deduped.add(unique_tuple)
             deduped.add(unique_tuple)
         cron.remove(job)
         cron.remove(job)

+ 1 - 1
archivebox/templates/core/navigation.html

@@ -5,7 +5,7 @@
     <a href="{% url 'Home' %}">Snapshots</a> |
     <a href="{% url 'Home' %}">Snapshots</a> |
     <a href="/admin/core/tag/">Tags</a> |
     <a href="/admin/core/tag/">Tags</a> |
     <a href="/admin/core/archiveresult/?o=-1">Log</a> &nbsp; &nbsp;
     <a href="/admin/core/archiveresult/?o=-1">Log</a> &nbsp; &nbsp;
-    <a href="{% url 'Docs' %}">Docs</a> | 
+    <a href="{% url 'Docs' %}" target="_blank" rel="noopener noreferrer">Docs</a> | 
     <a href="{% url 'public-index' %}">Public</a> | 
     <a href="{% url 'public-index' %}">Public</a> | 
     <a href="/admin/">Admin</a>
     <a href="/admin/">Admin</a>
      &nbsp; &nbsp;
      &nbsp; &nbsp;

File diff suppressed because it is too large
+ 0 - 0
archivebox/templates/core/snapshot.html


+ 54 - 10
archivebox/util.py

@@ -3,6 +3,7 @@ __package__ = 'archivebox'
 import re
 import re
 import requests
 import requests
 import json as pyjson
 import json as pyjson
+import http.cookiejar
 
 
 from typing import List, Optional, Any
 from typing import List, Optional, Any
 from pathlib import Path
 from pathlib import Path
@@ -200,9 +201,22 @@ def parse_date(date: Any) -> Optional[datetime]:
 @enforce_types
 @enforce_types
 def download_url(url: str, timeout: int=None) -> str:
 def download_url(url: str, timeout: int=None) -> str:
     """Download the contents of a remote url and return the text"""
     """Download the contents of a remote url and return the text"""
-    from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
+    from .config import (
+        TIMEOUT,
+        CHECK_SSL_VALIDITY,
+        WGET_USER_AGENT,
+        COOKIES_FILE,
+    )
     timeout = timeout or TIMEOUT
     timeout = timeout or TIMEOUT
-    response = requests.get(
+    session = requests.Session()
+
+    if COOKIES_FILE and Path(COOKIES_FILE).is_file():
+        cookie_jar = http.cookiejar.MozillaCookieJar(COOKIES_FILE)
+        cookie_jar.load(ignore_discard=True, ignore_expires=True)
+        for cookie in cookie_jar:
+            session.cookies.set(cookie.name, cookie.value, domain=cookie.domain, path=cookie.path)
+
+    response = session.get(
         url,
         url,
         headers={'User-Agent': WGET_USER_AGENT},
         headers={'User-Agent': WGET_USER_AGENT},
         verify=CHECK_SSL_VALIDITY,
         verify=CHECK_SSL_VALIDITY,
@@ -215,7 +229,11 @@ def download_url(url: str, timeout: int=None) -> str:
     if encoding is not None:
     if encoding is not None:
         response.encoding = encoding
         response.encoding = encoding
 
 
-    return response.text
+    try:
+        return response.text
+    except UnicodeDecodeError:
+        # if response is non-test (e.g. image or other binary files), just return the filename instead
+        return url.rsplit('/', 1)[-1]
 
 
 @enforce_types
 @enforce_types
 def get_headers(url: str, timeout: int=None) -> str:
 def get_headers(url: str, timeout: int=None) -> str:
@@ -257,7 +275,13 @@ def get_headers(url: str, timeout: int=None) -> str:
 def chrome_args(**options) -> List[str]:
 def chrome_args(**options) -> List[str]:
     """helper to build up a chrome shell command with arguments"""
     """helper to build up a chrome shell command with arguments"""
 
 
-    from .config import CHROME_OPTIONS, CHROME_VERSION
+    # Chrome CLI flag documentation: https://peter.sh/experiments/chromium-command-line-switches/
+
+    from .config import (
+        CHROME_OPTIONS,
+        CHROME_VERSION,
+        CHROME_EXTRA_ARGS,
+    )
 
 
     options = {**CHROME_OPTIONS, **options}
     options = {**CHROME_OPTIONS, **options}
 
 
@@ -266,6 +290,8 @@ def chrome_args(**options) -> List[str]:
 
 
     cmd_args = [options['CHROME_BINARY']]
     cmd_args = [options['CHROME_BINARY']]
 
 
+    cmd_args += CHROME_EXTRA_ARGS
+
     if options['CHROME_HEADLESS']:
     if options['CHROME_HEADLESS']:
         chrome_major_version = int(re.search(r'\s(\d+)\.\d', CHROME_VERSION)[1])
         chrome_major_version = int(re.search(r'\s(\d+)\.\d', CHROME_VERSION)[1])
         if chrome_major_version >= 111:
         if chrome_major_version >= 111:
@@ -284,14 +310,19 @@ def chrome_args(**options) -> List[str]:
             "--disable-software-rasterizer",
             "--disable-software-rasterizer",
             "--run-all-compositor-stages-before-draw",
             "--run-all-compositor-stages-before-draw",
             "--hide-scrollbars",
             "--hide-scrollbars",
-            "--window-size=1440,2000",
             "--autoplay-policy=no-user-gesture-required",
             "--autoplay-policy=no-user-gesture-required",
             "--no-first-run",
             "--no-first-run",
             "--use-fake-ui-for-media-stream",
             "--use-fake-ui-for-media-stream",
             "--use-fake-device-for-media-stream",
             "--use-fake-device-for-media-stream",
             "--disable-sync",
             "--disable-sync",
+            # "--password-store=basic",
         )
         )
+    
+    # disable automatic updating when running headless, as there's no user to see the upgrade prompts
+    cmd_args += ("--simulate-outdated-no-au='Tue, 31 Dec 2099 23:59:59 GMT'",)
 
 
+    # set window size for screenshot/pdf/etc. rendering
+    cmd_args += ('--window-size={}'.format(options['RESOLUTION']),)
 
 
     if not options['CHECK_SSL_VALIDITY']:
     if not options['CHECK_SSL_VALIDITY']:
         cmd_args += ('--disable-web-security', '--ignore-certificate-errors')
         cmd_args += ('--disable-web-security', '--ignore-certificate-errors')
@@ -299,16 +330,15 @@ def chrome_args(**options) -> List[str]:
     if options['CHROME_USER_AGENT']:
     if options['CHROME_USER_AGENT']:
         cmd_args += ('--user-agent={}'.format(options['CHROME_USER_AGENT']),)
         cmd_args += ('--user-agent={}'.format(options['CHROME_USER_AGENT']),)
 
 
-    if options['RESOLUTION']:
-        cmd_args += ('--window-size={}'.format(options['RESOLUTION']),)
-
     if options['CHROME_TIMEOUT']:
     if options['CHROME_TIMEOUT']:
        cmd_args += ('--timeout={}'.format(options['CHROME_TIMEOUT'] * 1000),)
        cmd_args += ('--timeout={}'.format(options['CHROME_TIMEOUT'] * 1000),)
 
 
     if options['CHROME_USER_DATA_DIR']:
     if options['CHROME_USER_DATA_DIR']:
         cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR']))
         cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR']))
-    
-    return cmd_args
+        cmd_args.append('--profile-directory=Default')
+
+    return dedupe(cmd_args)
+
 
 
 def chrome_cleanup():
 def chrome_cleanup():
     """
     """
@@ -345,6 +375,20 @@ def ansi_to_html(text):
     return COLOR_REGEX.sub(single_sub, text)
     return COLOR_REGEX.sub(single_sub, text)
 
 
 
 
+@enforce_types
+def dedupe(options: List[str]) -> List[str]:
+    """
+    Deduplicates the given options. Options that come later clobber earlier
+    conflicting options.
+    """
+    deduped = {}
+
+    for option in options:
+        deduped[option.split('=')[0]] = option
+
+    return list(deduped.values())
+
+
 class AttributeDict(dict):
 class AttributeDict(dict):
     """Helper to allow accessing dict values via Example.key or Example['key']"""
     """Helper to allow accessing dict values via Example.key or Example['key']"""
 
 

+ 6 - 0
archivebox/vendor/requirements.txt

@@ -0,0 +1,6 @@
+# this folder contains vendored versions of these packages
+
+atomicwrites==1.4.0
+pocket==0.3.7
+django-taggit==1.3.0
+base32-crockford==0.3.0

+ 2 - 2
bin/build_brew.sh

@@ -10,7 +10,7 @@ set -o nounset
 set -o pipefail
 set -o pipefail
 IFS=$'\n'
 IFS=$'\n'
 
 
-REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
+REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" > /dev/null 2>&1 && cd .. && pwd )"
 
 
 
 
 CURRENT_PLAFORM="$(uname)"
 CURRENT_PLAFORM="$(uname)"
@@ -45,7 +45,7 @@ pip3 uninstall yt-dlp || true
 # brew untap archivebox/archivebox || true
 # brew untap archivebox/archivebox || true
 
 
 echo
 echo
-echo "[+] Installing and building hombrew bottle from https://Github.com/ArchiveBox/homebrew-archivebox#main"
+echo "[+] Installing and building hombrew bottle from https://github.com/ArchiveBox/homebrew-archivebox#main"
 brew tap archivebox/archivebox
 brew tap archivebox/archivebox
 brew install --build-bottle archivebox
 brew install --build-bottle archivebox
 brew bottle archivebox
 brew bottle archivebox

+ 14 - 0
bin/build_deb.sh

@@ -31,6 +31,20 @@ else
     echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
     echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
 fi
 fi
 
 
+
+# Build python package lists
+# https://pdm-project.org/latest/usage/lockfile/
+echo "[+] Generating requirements.txt and pdm.lock from pyproject.toml..."
+pdm lock --group=':all' --production --lockfile pdm.lock --strategy="cross_platform"
+pdm sync --group=':all' --production --lockfile pdm.lock --clean || pdm sync --group=':all' --production --lockfile pdm.lock --clean
+pdm export --group=':all' --production --lockfile pdm.lock --without-hashes -o requirements.txt
+
+pdm lock --group=':all' --dev --lockfile pdm.dev.lock --strategy="cross_platform" 
+pdm sync --group=':all' --dev --lockfile pdm.dev.lock --clean || pdm sync --group=':all' --dev --lockfile pdm.dev.lock --clean
+pdm export --group=':all' --dev --lockfile pdm.dev.lock --without-hashes -o requirements-dev.txt
+
+
+
 # cleanup build artifacts
 # cleanup build artifacts
 rm -Rf build deb_dist dist archivebox-*.tar.gz
 rm -Rf build deb_dist dist archivebox-*.tar.gz
 
 

+ 15 - 1
bin/build_dev.sh

@@ -21,6 +21,20 @@ VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
 SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
 SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
 REQUIRED_PLATFORMS="${2:-"linux/arm64,linux/amd64,linux/arm/v7"}"
 REQUIRED_PLATFORMS="${2:-"linux/arm64,linux/amd64,linux/arm/v7"}"
 
 
+
+# Build python package lists
+# https://pdm-project.org/latest/usage/lockfile/
+echo "[+] Generating requirements.txt and pdm.lock from pyproject.toml..."
+pdm lock --group=':all' --production --lockfile pdm.lock --strategy="cross_platform"
+pdm sync --group=':all' --production --lockfile pdm.lock --clean || pdm sync --group=':all' --production --lockfile pdm.lock --clean
+pdm export --group=':all' --production --lockfile pdm.lock --without-hashes -o requirements.txt
+
+pdm lock --group=':all' --dev --lockfile pdm.dev.lock --strategy="cross_platform" 
+pdm sync --group=':all' --dev --lockfile pdm.dev.lock --clean || pdm sync --group=':all' --dev --lockfile pdm.dev.lock --clean
+pdm export --group=':all' --dev --lockfile pdm.dev.lock --without-hashes -o requirements-dev.txt
+
+
+
 echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$REQUIRED_PLATFORMS"
 echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$REQUIRED_PLATFORMS"
 
 
 
 
@@ -32,4 +46,4 @@ docker build . --no-cache -t archivebox-dev --load
 #                -t archivebox \
 #                -t archivebox \
 #                -t archivebox:$TAG_NAME \
 #                -t archivebox:$TAG_NAME \
 #                -t archivebox:$VERSION \
 #                -t archivebox:$VERSION \
-#                -t archivebox:$SHORT_VERSION
+#                -t archivebox:$SHORT_VERSION

+ 2 - 4
bin/build_docker.sh

@@ -71,10 +71,8 @@ docker buildx use xbuilder 2>&1 >/dev/null || create_builder
 check_platforms || (recreate_builder && check_platforms) || exit 1
 check_platforms || (recreate_builder && check_platforms) || exit 1
 
 
 
 
-# Build python package lists
-echo "[+] Generating requirements.txt and pdm.lock from pyproject.toml..."
-pdm lock --group=':all' --strategy="cross_platform" --production
-pdm export --group=':all' --production --without-hashes -o requirements.txt
+# Make sure pyproject.toml, pdm{.dev}.lock, requirements{-dev}.txt, package{-lock}.json are all up-to-date
+bash ./bin/lock_pkgs.sh
 
 
 
 
 echo "[+] Building archivebox:$VERSION docker image..."
 echo "[+] Building archivebox:$VERSION docker image..."

+ 4 - 11
bin/build_pip.sh

@@ -20,20 +20,13 @@ else
 fi
 fi
 cd "$REPO_DIR"
 cd "$REPO_DIR"
 
 
-echo "[*] Cleaning up build dirs"
-cd "$REPO_DIR"
-rm -Rf build dist
+# Generate pdm.lock, requirements.txt, and package-lock.json
+bash ./bin/lock_pkgs.sh
 
 
 echo "[+] Building sdist, bdist_wheel, and egg_info"
 echo "[+] Building sdist, bdist_wheel, and egg_info"
-rm -f archivebox/package.json
-cp package.json archivebox/package.json
-
-pdm self update
-pdm install
+rm -Rf build dist
 pdm build
 pdm build
-pdm export --without-hashes -o ./pip_dist/requirements.txt
-
 cp dist/* ./pip_dist/
 cp dist/* ./pip_dist/
 
 
 echo
 echo
-echo "[√] Finished. Don't forget to commit the new sdist and wheel files in ./pip_dist/"
+echo "[√] Finished. Don't forget to commit the new sdist and wheel files in ./pip_dist/"

+ 61 - 14
bin/docker_entrypoint.sh

@@ -35,7 +35,7 @@ export DEFAULT_PGID=911
 if [[ "$PUID" == "0" ]]; then
 if [[ "$PUID" == "0" ]]; then
     echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr
     echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr
     echo -e "    Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr
     echo -e "    Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr
-        echo -e "          leave PUID/PGID unset, or use values the filesystem prefers (defaults to $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr
+        echo -e "          leave PUID/PGID unset, disable root_squash, or use values the drive prefers (default is $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr
         echo -e "    https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
         echo -e "    https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
     exit 3
     exit 3
 fi
 fi
@@ -46,6 +46,7 @@ export DETECTED_PGID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null ||
 
 
 # If data directory exists but is owned by root, use defaults instead of root because root is not allowed
 # If data directory exists but is owned by root, use defaults instead of root because root is not allowed
 [[ "$DETECTED_PUID" == "0" ]] && export DETECTED_PUID="$DEFAULT_PUID"
 [[ "$DETECTED_PUID" == "0" ]] && export DETECTED_PUID="$DEFAULT_PUID"
+# (GUID / DETECTED_GUID is allowed to be 0 though)
 
 
 # Set archivebox user and group ids to desired PUID/PGID
 # Set archivebox user and group ids to desired PUID/PGID
 usermod -o -u "${PUID:-$DETECTED_PUID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
 usermod -o -u "${PUID:-$DETECTED_PUID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
@@ -64,32 +65,42 @@ if [[ -d "$DATA_DIR/archive" ]]; then
         # echo "[√] Permissions are correct"
         # echo "[√] Permissions are correct"
     else
     else
      # the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
      # the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
-        echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir (currently owned by $(stat -c '%u' "$DATA_DIR"):$(stat -c '%g' "$DATA_DIR")." >&2
+        echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data/archive dir (currently owned by $(stat -c '%u' "$DATA_DIR/archive"):$(stat -c '%g' "$DATA_DIR/archive")." > /dev/stderr
         echo -e "    Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr
         echo -e "    Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr
         echo -e "       \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr
         echo -e "       \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr
         echo -e "    Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr
         echo -e "    Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr
         echo -e "       https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr
         echo -e "       https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr
         echo -e "    Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr
         echo -e "    Hint: some NFS/SMB/FUSE/etc. filesystems force-remap/ignore all permissions," > /dev/stderr
-        echo -e "          leave PUID/PGID unset, or use values the filesystem prefers (defaults to $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr
+        echo -e "          leave PUID/PGID unset, disable root_squash, or use values the drive prefers (default is $DEFAULT_PUID:$DEFAULT_PGID)" > /dev/stderr
         echo -e "    https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
         echo -e "    https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
         exit 3
         exit 3
     fi
     fi
 else
 else
-    # create data directory
+    # create data directory (and logs, since its the first dir ArchiveBox needs to write to)
     mkdir -p "$DATA_DIR/logs"
     mkdir -p "$DATA_DIR/logs"
 fi
 fi
 
 
 # force set the ownership of the data dir contents to the archivebox user and group
 # force set the ownership of the data dir contents to the archivebox user and group
 # this is needed because Docker Desktop often does not map user permissions from the host properly
 # this is needed because Docker Desktop often does not map user permissions from the host properly
 chown $PUID:$PGID "$DATA_DIR"
 chown $PUID:$PGID "$DATA_DIR"
-chown $PUID:$PGID "$DATA_DIR"/*
+if ! chown $PUID:$PGID "$DATA_DIR"/* > /dev/null 2>&1; then
+    # users may store the ./data/archive folder on a network mount that prevents chmod/chown
+    # fallback to chowning everything else in ./data and leaving ./data/archive alone
+    find "$DATA_DIR" -type d -not -path "$DATA_DIR/archive*" -exec chown $PUID:$PGID {} \; > /dev/null 2>&1
+    find "$DATA_DIR" -type f -not -path "$DATA_DIR/archive/*" -exec chown $PUID:$PGID {} \; > /dev/null 2>&1
+fi
+    
 
 
-# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome at runtime
+# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to 'playwright install chromium' at runtime
 export PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
 export PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
 mkdir -p "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
 mkdir -p "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
-chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
-chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/*
 rm -Rf "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
 rm -Rf "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
+chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
+if [[ -d "$PLAYWRIGHT_BROWSERS_PATH/.links" ]]; then
+    chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/*
+    chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.*
+    chown -h $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.links/*
+fi
 
 
 
 
 # (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious)
 # (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious)
@@ -100,7 +111,7 @@ if [[ "$IN_QEMU" == "True" ]]; then
     echo -e "    See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" > /dev/stderr
     echo -e "    See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" > /dev/stderr
 fi
 fi
 
 
-# check disk space free on / and /data, warn on <500Mb free, error on <100Mb free
+# check disk space free on /, /data, and /data/archive, warn on <500Mb free, error on <100Mb free
 export ROOT_USAGE="$(df --output=pcent,avail / | tail -n 1 | xargs)"
 export ROOT_USAGE="$(df --output=pcent,avail / | tail -n 1 | xargs)"
 export ROOT_USED_PCT="${ROOT_USAGE%%%*}"
 export ROOT_USED_PCT="${ROOT_USAGE%%%*}"
 export ROOT_AVAIL_KB="$(echo "$ROOT_USAGE" | awk '{print $2}')"
 export ROOT_AVAIL_KB="$(echo "$ROOT_USAGE" | awk '{print $2}')"
@@ -117,22 +128,58 @@ elif [[ "$ROOT_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
     df -kh / > /dev/stderr
     df -kh / > /dev/stderr
 fi
 fi
 
 
-export DATA_USAGE="$(df --output=pcent,avail /data | tail -n 1 | xargs)"
+export DATA_USAGE="$(df --output=pcent,avail "$DATA_DIR" | tail -n 1 | xargs)"
 export DATA_USED_PCT="${DATA_USAGE%%%*}"
 export DATA_USED_PCT="${DATA_USAGE%%%*}"
 export DATA_AVAIL_KB="$(echo "$DATA_USAGE" | awk '{print $2}')"
 export DATA_AVAIL_KB="$(echo "$DATA_USAGE" | awk '{print $2}')"
 if [[ "$DATA_AVAIL_KB" -lt 100000 ]]; then
 if [[ "$DATA_AVAIL_KB" -lt 100000 ]]; then
-    echo -e "\n[!] Warning: Docker data volume is completely out of space! (${DATA_USED_PCT}% used on /data)" > /dev/stderr
+    echo -e "\n[!] Warning: Docker data volume is completely out of space! (${DATA_USED_PCT}% used on $DATA_DIR)" > /dev/stderr
     echo -e "    you need to free up at least 100Mb on the drive holding your data directory" > /dev/stderr
     echo -e "    you need to free up at least 100Mb on the drive holding your data directory" > /dev/stderr
     echo -e "    \$ ncdu -x data\n" > /dev/stderr
     echo -e "    \$ ncdu -x data\n" > /dev/stderr
-    df -kh /data > /dev/stderr
+    df -kh "$DATA_DIR" > /dev/stderr
     sleep 5
     sleep 5
 elif [[ "$DATA_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
 elif [[ "$DATA_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
-    echo -e "\n[!] Warning: Docker data volume is running out of space! (${DATA_USED_PCT}% used on /data)" > /dev/stderr
+    echo -e "\n[!] Warning: Docker data volume is running out of space! (${DATA_USED_PCT}% used on $DATA_DIR)" > /dev/stderr
     echo -e "    you may need to free up space on the drive holding your data directory soon" > /dev/stderr
     echo -e "    you may need to free up space on the drive holding your data directory soon" > /dev/stderr
     echo -e "    \$ ncdu -x data\n" > /dev/stderr
     echo -e "    \$ ncdu -x data\n" > /dev/stderr
-    df -kh /data > /dev/stderr
+    df -kh "$DATA_DIR" > /dev/stderr
+else
+    # data/ has space available, but check data/archive separately, because it might be on a network mount or external drive
+    if [[ -d "$DATA_DIR/archive" ]]; then
+        export ARCHIVE_USAGE="$(df --output=pcent,avail "$DATA_DIR/archive" | tail -n 1 | xargs)"
+        export ARCHIVE_USED_PCT="${ARCHIVE_USAGE%%%*}"
+        export ARCHIVE_AVAIL_KB="$(echo "$ARCHIVE_USAGE" | awk '{print $2}')"
+        if [[ "$ARCHIVE_AVAIL_KB" -lt 100000 ]]; then
+            echo -e "\n[!] Warning: data/archive folder is completely out of space! (${ARCHIVE_USED_PCT}% used on $DATA_DIR/archive)" > /dev/stderr
+            echo -e "    you need to free up at least 100Mb on the drive holding your data/archive directory" > /dev/stderr
+            echo -e "    \$ ncdu -x data/archive\n" > /dev/stderr
+            df -kh "$DATA_DIR/archive" > /dev/stderr
+            sleep 5
+        elif [[ "$ARCHIVE_USED_PCT" -ge 99 ]] || [[ "$ROOT_AVAIL_KB" -lt 500000 ]]; then
+            echo -e "\n[!] Warning: data/archive folder is running out of space! (${ARCHIVE_USED_PCT}% used on $DATA_DIR/archive)" > /dev/stderr
+            echo -e "    you may need to free up space on the drive holding your data/archive directory soon" > /dev/stderr
+            echo -e "    \$ ncdu -x data/archive\n" > /dev/stderr
+            df -kh "$DATA_DIR/archive" > /dev/stderr
+        fi
+    fi
+fi
+
+# symlink etc crontabs into place
+mkdir -p "$DATA_DIR/crontabs"
+if ! test -L /var/spool/cron/crontabs; then
+    # copy files from old location into new data dir location
+    for file in $(ls /var/spool/cron/crontabs); do
+        cp /var/spool/cron/crontabs/"$file" "$DATA_DIR/crontabs"
+    done
+    # replace old system path with symlink to data dir location
+    rm -Rf /var/spool/cron/crontabs
+    ln -s "$DATA_DIR/crontabs" /var/spool/cron/crontabs
 fi
 fi
 
 
+# set DBUS_SYSTEM_BUS_ADDRESS & DBUS_SESSION_BUS_ADDRESS
+# (dbus is not actually needed, it makes chrome log fewer warnings but isn't worth making our docker images bigger)
+# service dbus start >/dev/null 2>&1 &
+# export $(dbus-launch --close-stderr)
+
 
 
 export ARCHIVEBOX_BIN_PATH="$(which archivebox)"
 export ARCHIVEBOX_BIN_PATH="$(which archivebox)"
 
 

+ 100 - 0
bin/lock_pkgs.sh

@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+
+### Bash Environment Setup
+# http://redsymbol.net/articles/unofficial-bash-strict-mode/
+# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
+# set -o xtrace
+set -o errexit
+set -o errtrace
+set -o nounset
+set -o pipefail
+IFS=$'\n'
+
+REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
+
+cd "$REPO_DIR"
+
+py_version="$(grep 'version = ' pyproject.toml | awk '{print $3}' | jq -r)"
+js_version="$(jq -r '.version' package.json)"
+
+if [[ "$py_version" != "$js_version" ]]; then
+    echo "[❌] Version in pyproject.toml ($py_version) does not match version in package.json ($js_version)!"
+    exit 1
+fi
+
+echo "[🔒] Locking all ArchiveBox dependencies (pip, npm)"
+echo
+echo "pyproject.toml:              archivebox $py_version"
+echo "package.json:                archivebox $js_version"
+echo
+echo
+
+echo "[*] Cleaning up old lockfiles and build files"
+rm -Rf build dist
+rm -f pdm.lock
+rm -f pdm.dev.lock
+rm -f requirements.txt
+rm -f requirements-dev.txt
+rm -f package-lock.json
+rm -f archivebox/package.json
+rm -f archivebox/package-lock.json
+rm -Rf ./.venv
+rm -Rf ./node_modules
+rm -Rf ./archivebox/node_modules
+
+echo
+echo
+
+echo "[+] Generating dev & prod requirements.txt & pdm.lock from pyproject.toml..."
+pip install --upgrade pip setuptools
+pdm self update
+pdm venv create 3.12
+echo
+echo "pyproject.toml:    archivebox $(grep 'version = ' pyproject.toml | awk '{print $3}' | jq -r)"
+echo "$(which python):   $(python --version | head -n 1)"
+echo "$(which pdm):      $(pdm --version | head -n 1)"
+pdm info --env
+pdm info
+
+echo
+# https://pdm-project.org/latest/usage/lockfile/
+# prod
+pdm lock --group=':all' --production --lockfile pdm.lock --strategy="cross_platform"
+pdm sync --group=':all' --production --lockfile pdm.lock --clean
+pdm export --group=':all' --production --lockfile pdm.lock --without-hashes -o requirements.txt
+cp ./pdm.lock ./pip_dist/
+cp ./requirements.txt ./pip_dist/
+# dev
+pdm lock --group=':all' --dev --lockfile pdm.dev.lock --strategy="cross_platform" 
+pdm sync --group=':all' --dev --lockfile pdm.dev.lock --clean
+pdm export --group=':all' --dev --lockfile pdm.dev.lock --without-hashes -o requirements-dev.txt
+cp ./pdm.dev.lock ./pip_dist/
+cp ./requirements-dev.txt ./pip_dist/
+
+echo
+echo "[+]] Generating package-lock.json from package.json..."
+npm install -g npm
+echo
+echo "package.json:    archivebox $(jq -r '.version' package.json)"
+echo
+echo "$(which node):   $(node --version | head -n 1)"
+echo "$(which npm):    $(npm --version | head -n 1)"
+
+echo
+npm install --package-lock-only
+cp package.json archivebox/package.json
+cp package-lock.json archivebox/package-lock.json
+
+echo
+echo "[√] Finished. Don't forget to commit the new lockfiles:"
+echo
+ls "pyproject.toml" | cat
+ls "pdm.lock" | cat
+ls "pdm.dev.lock" | cat
+ls "requirements.txt" | cat
+ls "requirements-dev.txt" | cat
+echo
+ls "package.json" | cat
+ls "package-lock.json" | cat
+ls "archivebox/package.json" | cat
+ls "archivebox/package-lock.json" | cat

+ 1 - 1
bin/setup.sh

@@ -165,7 +165,7 @@ if ! (python3 --version && python3 -m pip --version && python3 -m django --versi
     exit 1
     exit 1
 fi
 fi
 
 
-if ! (python3 -m django --version && which -a archivebox); then
+if ! (python3 -m django --version && python3 -m pip show archivebox && which -a archivebox); then
     echo "[X] Django and ArchiveBox were not found after installing!"
     echo "[X] Django and ArchiveBox were not found after installing!"
     echo "    Check to see if a previous step failed."
     echo "    Check to see if a previous step failed."
     echo
     echo

+ 1 - 1
bin/test.sh

@@ -14,4 +14,4 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
 
 
 source "$DIR/.venv/bin/activate"
 source "$DIR/.venv/bin/activate"
 
 
-pytest -s --basetemp=tests/out --ignore=archivebox/vendor --ignore=deb_dist --ignore=pip_dist --ignore=brew_dist
+pytest -s --basetemp=tests/out "$@"

+ 107 - 75
docker-compose.yml

@@ -1,39 +1,31 @@
 # Usage:
 # Usage:
-#     docker compose run archivebox init --setup
 #     docker compose up
 #     docker compose up
-#     echo "https://example.com" | docker compose run archivebox archivebox add
-#     docker compose run archivebox add --depth=1 https://example.com/some/feed.rss
-#     docker compose run archivebox config --set MEDIA_MAX_SIZE=750m
+#     echo 'https://example.com' | docker compose run -T archivebox add
+#     docker compose run archivebox add --depth=1 'https://news.ycombinator.com'
+#     docker compose run archivebox config --set SAVE_ARCHIVE_DOT_ORG=False
 #     docker compose run archivebox help
 #     docker compose run archivebox help
 # Documentation:
 # Documentation:
 #     https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
 #     https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
 
 
-version: '3.9'
-
 services:
 services:
     archivebox:
     archivebox:
-        #image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
-        image: archivebox/archivebox:dev
-        command: server --quick-init 0.0.0.0:8000
+        image: archivebox/archivebox:latest
         ports:
         ports:
             - 8000:8000
             - 8000:8000
         volumes:
         volumes:
             - ./data:/data
             - ./data:/data
-            # - ./etc/crontabs:/var/spool/cron/crontabs  # uncomment this and archivebox_scheduler below to set up automatic recurring archive jobs
-            # - ./archivebox:/app/archivebox             # uncomment this to mount the ArchiveBox source code at runtime (for developers working on archivebox)
-        # build: .                                       # uncomment this to build the image from source code at buildtime (for developers working on archivebox)
         environment:
         environment:
-            - ALLOWED_HOSTS=*                   # restrict this to only accept incoming traffic via specific domain name
-            # - PUBLIC_INDEX=True               # set to False to prevent anonymous users from viewing snapshot list
-            # - PUBLIC_SNAPSHOTS=True           # set to False to prevent anonymous users from viewing snapshot content
-            # - PUBLIC_ADD_VIEW=False           # set to True to allow anonymous users to submit new URLs to archive
             # - ADMIN_USERNAME=admin            # create an admin user on first run with the given user/pass combo
             # - ADMIN_USERNAME=admin            # create an admin user on first run with the given user/pass combo
             # - ADMIN_PASSWORD=SomeSecretPassword
             # - ADMIN_PASSWORD=SomeSecretPassword
+            - ALLOWED_HOSTS=*                   # restrict this to only accept incoming traffic via specific domain name
+            - PUBLIC_INDEX=True                 # set to False to prevent anonymous users from viewing snapshot list
+            - PUBLIC_SNAPSHOTS=True             # set to False to prevent anonymous users from viewing snapshot content
+            - PUBLIC_ADD_VIEW=False             # set to True to allow anonymous users to submit new URLs to archive
+            - SEARCH_BACKEND_ENGINE=sonic       # tells ArchiveBox to use sonic container below for fast full-text search
+            - SEARCH_BACKEND_HOST_NAME=sonic
+            - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
             # - PUID=911                        # set to your host user's UID & GID if you encounter permissions issues
             # - PUID=911                        # set to your host user's UID & GID if you encounter permissions issues
-            # - PGID=911
-            # - SEARCH_BACKEND_ENGINE=sonic     # uncomment these and sonic container below for better full-text search
-            # - SEARCH_BACKEND_HOST_NAME=sonic
-            # - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
+            # - PGID=911                        # UID/GIDs <500 may clash with existing users and are not recommended
             # - MEDIA_MAX_SIZE=750m             # increase this filesize limit to allow archiving larger audio/video files
             # - MEDIA_MAX_SIZE=750m             # increase this filesize limit to allow archiving larger audio/video files
             # - TIMEOUT=60                      # increase this number to 120+ seconds if you see many slow downloads timing out
             # - TIMEOUT=60                      # increase this number to 120+ seconds if you see many slow downloads timing out
             # - CHECK_SSL_VALIDITY=True         # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
             # - CHECK_SSL_VALIDITY=True         # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
@@ -41,8 +33,7 @@ services:
             # ...
             # ...
             # add further configuration options from archivebox/config.py as needed (to apply them only to this container)
             # add further configuration options from archivebox/config.py as needed (to apply them only to this container)
             # or set using `docker compose run archivebox config --set SOME_KEY=someval` (to persist config across all containers)
             # or set using `docker compose run archivebox config --set SOME_KEY=someval` (to persist config across all containers)
-        
-        # For ad-blocking during archiving, uncomment this section and pihole service section below 
+        # For ad-blocking during archiving, uncomment this section and pihole service section below
         # networks:
         # networks:
         #   - dns
         #   - dns
         # dns:
         # dns:
@@ -51,29 +42,85 @@ services:
 
 
     ######## Optional Addons: tweak examples below as needed for your specific use case ########
     ######## Optional Addons: tweak examples below as needed for your specific use case ########
 
 
-    ### Example: To run the Sonic full-text search backend, first download the config file to sonic.cfg
-    #   $ curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg
-    # After starting, backfill any existing Snapshots into the full-text index:
+    ### This optional container runs any scheduled tasks in the background, add new tasks like so:
+    #   $ docker compose run archivebox schedule --add --every=day --depth=1 'https://example.com/some/rss/feed.xml'
+    # then restart the scheduler container to apply any changes to the scheduled task list:
+    #   $ docker compose restart archivebox_scheduler
+
+    archivebox_scheduler:
+       image: archivebox/archivebox:latest
+       command: schedule --foreground --update --every=day
+       environment:
+           - TIMEOUT=120                       # use a higher timeout than the main container to give slow tasks more time when retrying
+           # - PUID=502                        # set to your host user's UID & GID if you encounter permissions issues
+           # - PGID=20
+       volumes:
+           - ./data:/data
+       # cpus: 2                               # uncomment / edit these values to limit scheduler container resource consumption
+       # mem_limit: 2048m
+       # restart: always
+
+
+    ### This runs the optional Sonic full-text search backend (much faster than default rg backend).
+    # If Sonic is ever started after not running for a while, update its full-text index by running:
     #   $ docker-compose run archivebox update --index-only
     #   $ docker-compose run archivebox update --index-only
 
 
-    # sonic:
-    #    image: valeriansaliou/sonic:latest
-    #    expose:
-    #        - 1491
-    #    environment:
-    #        - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
-    #    volumes:
-    #        - ./sonic.cfg:/etc/sonic.cfg:ro
-    #        - ./data/sonic:/var/lib/sonic/store
-    
+    sonic:
+        image: valeriansaliou/sonic:latest
+        build:
+            # custom build just auto-downloads archivebox's default sonic.cfg as a convenience
+            # not needed after first run / if you have already have ./etc/sonic.cfg present
+            dockerfile_inline: |
+                FROM quay.io/curl/curl:latest AS config_downloader
+                RUN curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/etc/sonic.cfg' > /tmp/sonic.cfg
+                FROM valeriansaliou/sonic:latest
+                COPY --from=config_downloader /tmp/sonic.cfg /etc/sonic.cfg
+        expose:
+            - 1491
+        environment:
+            - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
+        volumes:
+            - ./sonic.cfg:/etc/sonic.cfg
+            - ./data/sonic:/var/lib/sonic/store
+
+
+    ### This container runs xvfb+noVNC so you can watch the ArchiveBox browser as it archives things,
+    # or remote control it to set up a chrome profile w/ login credentials for sites you want to archive.
+    # https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile
+
+    novnc:
+        image: theasp/novnc:latest
+        environment:
+            - DISPLAY_WIDTH=1920
+            - DISPLAY_HEIGHT=1080
+            - RUN_XTERM=no
+        ports:
+            # to view/control ArchiveBox's browser, visit: http://127.0.0.1:8080/vnc.html
+            # restricted to access from localhost by default because it has no authentication
+            - 127.0.0.1:8080:8080
+
     
     
+    ### Example: Put Nginx in front of the ArchiveBox server for SSL termination and static file serving.
+    # You can also any other ingress provider for SSL like Apache, Caddy, Traefik, Cloudflare Tunnels, etc.
+
+    # nginx:
+    #     image: nginx:alpine
+    #     ports:
+    #         - 443:443
+    #         - 80:80
+    #     volumes:
+    #         - ./etc/nginx.conf:/etc/nginx/nginx.conf
+    #         - ./data:/var/www
+
+
     ### Example: To run pihole in order to block ad/tracker requests during archiving,
     ### Example: To run pihole in order to block ad/tracker requests during archiving,
     # uncomment this block and set up pihole using its admin interface
     # uncomment this block and set up pihole using its admin interface
 
 
     # pihole:
     # pihole:
     #   image: pihole/pihole:latest
     #   image: pihole/pihole:latest
     #   ports:
     #   ports:
-    #     - 127.0.0.1:8090:80       # uncomment to access the admin HTTP interface on http://localhost:8090
+    #     # access the admin HTTP interface on http://localhost:8090
+    #     - 127.0.0.1:8090:80
     #   environment:
     #   environment:
     #     - WEBPASSWORD=SET_THIS_TO_SOME_SECRET_PASSWORD_FOR_ADMIN_DASHBOARD
     #     - WEBPASSWORD=SET_THIS_TO_SOME_SECRET_PASSWORD_FOR_ADMIN_DASHBOARD
     #     - DNSMASQ_LISTENING=all
     #     - DNSMASQ_LISTENING=all
@@ -88,43 +135,8 @@ services:
     #     - ./etc/dnsmasq:/etc/dnsmasq.d
     #     - ./etc/dnsmasq:/etc/dnsmasq.d
 
 
 
 
-    ### Example: Enable ability to run regularly scheduled archiving tasks by uncommenting this container
-    #   $ docker compose run archivebox schedule --every=day --depth=1 'https://example.com/some/rss/feed.xml'
-    # then restart the scheduler container to apply the changes to the schedule
-    #   $ docker compose restart archivebox_scheduler
-
-    # archivebox_scheduler:
-    #    image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
-    #    command: schedule --foreground
-    #    environment:
-    #        - MEDIA_MAX_SIZE=750m               # increase this number to allow archiving larger audio/video files
-    #        # - TIMEOUT=60                      # increase if you see timeouts often during archiving / on slow networks
-    #        # - ONLY_NEW=True                   # set to False to retry previously failed URLs when re-adding instead of skipping them
-    #        # - CHECK_SSL_VALIDITY=True         # set to False to allow saving URLs w/ broken SSL certs
-    #        # - SAVE_ARCHIVE_DOT_ORG=True       # set to False to disable submitting URLs to Archive.org when archiving
-    #        # - PUID=502                        # set to your host user's UID & GID if you encounter permissions issues
-    #        # - PGID=20
-    #    volumes:
-    #        - ./data:/data
-    #        - ./etc/crontabs:/var/spool/cron/crontabs
-    #    # cpus: 2                               # uncomment / edit these values to limit container resource consumption
-    #    # mem_limit: 2048m
-    #    # shm_size: 1024m
-
-
-    ### Example: Put Nginx in front of the ArchiveBox server for SSL termination
-
-    # nginx:
-    #     image: nginx:alpine
-    #     ports:
-    #         - 443:443
-    #         - 80:80
-    #     volumes:
-    #         - ./etc/nginx.conf:/etc/nginx/nginx.conf
-    #         - ./data:/var/www
-
-
-    ### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
+    ### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel to avoid IP blocks.
+    # You can also use any other VPN that works at the docker IP level, e.g. Tailscale, OpenVPN, etc.
 
 
     # wireguard:
     # wireguard:
     #   image: linuxserver/wireguard:latest
     #   image: linuxserver/wireguard:latest
@@ -155,10 +167,30 @@ services:
 
 
 
 
 networks:
 networks:
-
-    # network needed for pihole container to offer :53 dns resolving on fixed ip for archivebox container
+    # network just used for pihole container to offer :53 dns resolving on fixed ip for archivebox container
     dns:
     dns:
         ipam:
         ipam:
             driver: default
             driver: default
             config:
             config:
                 - subnet: 172.20.0.0/24
                 - subnet: 172.20.0.0/24
+
+
+# To use remote storage for your ./data/archive (e.g. Amazon S3, Backblaze B2, Google Drive, OneDrive, SFTP, etc.)
+#   Follow the steps here to set up the Docker RClone Plugin https://rclone.org/docker/
+#     $ docker plugin install rclone/docker-volume-rclone:amd64 --grant-all-permissions --alias rclone
+#     $ nano /var/lib/docker-plugins/rclone/config/rclone.conf
+#     [examplegdrive]
+#     type = drive
+#     scope = drive
+#     drive_id = 1234567...
+#     root_folder_id = 0Abcd...
+#     token = {"access_token":...}
+
+# volumes:
+#     archive:
+#         driver: rclone
+#         driver_opts:
+#             remote: 'examplegdrive:archivebox'
+#             allow_other: 'true'
+#             vfs_cache_mode: full
+#             poll_interval: 0

+ 1 - 0
etc/sonic.cfg

@@ -6,6 +6,7 @@
 
 
 [server]
 [server]
 
 
+# log_level = "debug"
 log_level = "warn"
 log_level = "warn"
 
 
 
 

+ 231 - 253
package-lock.json

@@ -1,23 +1,33 @@
 {
 {
   "name": "archivebox",
   "name": "archivebox",
-  "version": "0.7.2",
+  "version": "0.8.0",
   "lockfileVersion": 3,
   "lockfileVersion": 3,
   "requires": true,
   "requires": true,
   "packages": {
   "packages": {
     "": {
     "": {
       "name": "archivebox",
       "name": "archivebox",
-      "version": "0.7.2",
+      "version": "0.8.0",
       "license": "MIT",
       "license": "MIT",
       "dependencies": {
       "dependencies": {
         "@postlight/parser": "^2.2.3",
         "@postlight/parser": "^2.2.3",
         "readability-extractor": "github:ArchiveBox/readability-extractor",
         "readability-extractor": "github:ArchiveBox/readability-extractor",
-        "single-file-cli": "^1.1.46"
+        "single-file-cli": "^1.1.54"
+      }
+    },
+    "node_modules/@asamuzakjp/dom-selector": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-2.0.2.tgz",
+      "integrity": "sha512-x1KXOatwofR6ZAYzXRBL5wrdV0vwNxlTCK9NCuLqAzQYARqGcvFwiJA6A1ERuh+dgeA4Dxm3JBYictIes+SqUQ==",
+      "dependencies": {
+        "bidi-js": "^1.0.3",
+        "css-tree": "^2.3.1",
+        "is-potential-custom-element-name": "^1.0.1"
       }
       }
     },
     },
     "node_modules/@babel/runtime-corejs2": {
     "node_modules/@babel/runtime-corejs2": {
-      "version": "7.23.7",
-      "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.23.7.tgz",
-      "integrity": "sha512-JmMk2t1zGDNkvsY2MsLLksocjY+ufGzSk8UlcNcxzfrzAPu4nMx0HRFakzIg2bhcqQq6xBI2nUaW/sHoaYIHdQ==",
+      "version": "7.24.4",
+      "resolved": "https://registry.npmjs.org/@babel/runtime-corejs2/-/runtime-corejs2-7.24.4.tgz",
+      "integrity": "sha512-ZCKqyUKt/Coimg+3Kafu43yNetgYnTXzNbEGAgxc81J5sI0qFNbQ613w7PNny+SmijAmGVroL0GDvx5rG/JI5Q==",
       "dependencies": {
       "dependencies": {
         "core-js": "^2.6.12",
         "core-js": "^2.6.12",
         "regenerator-runtime": "^0.14.0"
         "regenerator-runtime": "^0.14.0"
@@ -168,9 +178,9 @@
       }
       }
     },
     },
     "node_modules/@puppeteer/browsers": {
     "node_modules/@puppeteer/browsers": {
-      "version": "1.8.0",
-      "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-1.8.0.tgz",
-      "integrity": "sha512-TkRHIV6k2D8OlUe8RtG+5jgOF/H98Myx0M6AOafC8DdNVOFiBSFa5cpRDtpm8LXOa9sVwe0+e6Q3FC56X/DZfg==",
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.0.0.tgz",
+      "integrity": "sha512-3PS82/5+tnpEaUWonjAFFvlf35QHF15xqyGd34GBa5oP5EPVfFXRsbSxIGYf1M+vZlqBZ3oxT1kRg9OYhtt8ng==",
       "dependencies": {
       "dependencies": {
         "debug": "4.3.4",
         "debug": "4.3.4",
         "extract-zip": "2.0.1",
         "extract-zip": "2.0.1",
@@ -184,7 +194,7 @@
         "browsers": "lib/cjs/main-cli.js"
         "browsers": "lib/cjs/main-cli.js"
       },
       },
       "engines": {
       "engines": {
-        "node": ">=16.3.0"
+        "node": ">=18"
       }
       }
     },
     },
     "node_modules/@tootallnate/quickjs-emscripten": {
     "node_modules/@tootallnate/quickjs-emscripten": {
@@ -193,9 +203,9 @@
       "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA=="
       "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA=="
     },
     },
     "node_modules/@types/node": {
     "node_modules/@types/node": {
-      "version": "20.10.6",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.10.6.tgz",
-      "integrity": "sha512-Vac8H+NlRNNlAmDfGUP7b5h/KA+AtWIzuXy0E6OyP8f1tCLYAtPvKRRDJjAPqhpCb0t6U2j7/xqAuLEebW2kiw==",
+      "version": "20.12.7",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz",
+      "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==",
       "optional": true,
       "optional": true,
       "dependencies": {
       "dependencies": {
         "undici-types": "~5.26.4"
         "undici-types": "~5.26.4"
@@ -211,9 +221,9 @@
       }
       }
     },
     },
     "node_modules/agent-base": {
     "node_modules/agent-base": {
-      "version": "7.1.0",
-      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.0.tgz",
-      "integrity": "sha512-o/zjMZRhJxny7OyEF+Op8X+efiELC7k7yOjMzgfzVqOzXqkBkWI79YoTdOtsuWd5BWhAGAuOY/Xa6xpiaWXiNg==",
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz",
+      "integrity": "sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==",
       "dependencies": {
       "dependencies": {
         "debug": "^4.3.4"
         "debug": "^4.3.4"
       },
       },
@@ -304,14 +314,15 @@
       "integrity": "sha512-NmWvPnx0F1SfrQbYwOi7OeaNGokp9XhzNioJ/CSBs8Qa4vxug81mhJEAVZwxXuBmYB5KDRfMq/F3RR0BIU7sWg=="
       "integrity": "sha512-NmWvPnx0F1SfrQbYwOi7OeaNGokp9XhzNioJ/CSBs8Qa4vxug81mhJEAVZwxXuBmYB5KDRfMq/F3RR0BIU7sWg=="
     },
     },
     "node_modules/b4a": {
     "node_modules/b4a": {
-      "version": "1.6.4",
-      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.4.tgz",
-      "integrity": "sha512-fpWrvyVHEKyeEvbKZTVOeZF3VSKKWtJxFIxX/jaVPf+cLbGUSitjb49pHLqPV2BUNNZ0LcoeEGfE/YCpyDYHIw=="
-    },
-    "node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
+      "version": "1.6.6",
+      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz",
+      "integrity": "sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg=="
+    },
+    "node_modules/bare-events": {
+      "version": "2.2.2",
+      "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.2.2.tgz",
+      "integrity": "sha512-h7z00dWdG0PYOQEvChhOSWvOfkIKsdZGkWr083FgN/HyoQuebSew/cgirYqh9SCuy/hRvxc5Vy6Fw8xAmYHLkQ==",
+      "optional": true
     },
     },
     "node_modules/base64-js": {
     "node_modules/base64-js": {
       "version": "1.5.1",
       "version": "1.5.1",
@@ -333,9 +344,9 @@
       ]
       ]
     },
     },
     "node_modules/basic-ftp": {
     "node_modules/basic-ftp": {
-      "version": "5.0.4",
-      "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.4.tgz",
-      "integrity": "sha512-8PzkB0arJFV4jJWSGOYR+OEic6aeKMu/osRhBULN6RY0ykby6LKhbmuQ5ublvaas5BOwboah5D87nrHyuh8PPA==",
+      "version": "5.0.5",
+      "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz",
+      "integrity": "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==",
       "engines": {
       "engines": {
         "node": ">=10.0.0"
         "node": ">=10.0.0"
       }
       }
@@ -348,6 +359,14 @@
         "tweetnacl": "^0.14.3"
         "tweetnacl": "^0.14.3"
       }
       }
     },
     },
+    "node_modules/bidi-js": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz",
+      "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==",
+      "dependencies": {
+        "require-from-string": "^2.0.2"
+      }
+    },
     "node_modules/bluebird": {
     "node_modules/bluebird": {
       "version": "2.11.0",
       "version": "2.11.0",
       "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-2.11.0.tgz",
       "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-2.11.0.tgz",
@@ -358,15 +377,6 @@
       "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
       "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
       "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
       "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
     },
     },
-    "node_modules/brace-expansion": {
-      "version": "1.1.11",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
-      "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
-      "dependencies": {
-        "balanced-match": "^1.0.0",
-        "concat-map": "0.0.1"
-      }
-    },
     "node_modules/brotli": {
     "node_modules/brotli": {
       "version": "1.3.3",
       "version": "1.3.3",
       "resolved": "https://registry.npmjs.org/brotli/-/brotli-1.3.3.tgz",
       "resolved": "https://registry.npmjs.org/brotli/-/brotli-1.3.3.tgz",
@@ -446,12 +456,12 @@
       }
       }
     },
     },
     "node_modules/chromium-bidi": {
     "node_modules/chromium-bidi": {
-      "version": "0.4.33",
-      "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.4.33.tgz",
-      "integrity": "sha512-IxoFM5WGQOIAd95qrSXzJUv4eXIrh+RvU3rwwqIiwYuvfE7U/Llj4fejbsJnjJMUYCuGtVQsY2gv7oGl4aTNSQ==",
+      "version": "0.5.8",
+      "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.5.8.tgz",
+      "integrity": "sha512-blqh+1cEQbHBKmok3rVJkBlBxt9beKBgOsxbFgs7UJcoVbbeZ+K7+6liAsjgpc8l1Xd55cQUy14fXZdGSb4zIw==",
       "dependencies": {
       "dependencies": {
         "mitt": "3.0.1",
         "mitt": "3.0.1",
-        "urlpattern-polyfill": "9.0.0"
+        "urlpattern-polyfill": "10.0.0"
       },
       },
       "peerDependencies": {
       "peerDependencies": {
         "devtools-protocol": "*"
         "devtools-protocol": "*"
@@ -497,11 +507,6 @@
         "node": ">= 0.8"
         "node": ">= 0.8"
       }
       }
     },
     },
-    "node_modules/concat-map": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
-      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg=="
-    },
     "node_modules/core-js": {
     "node_modules/core-js": {
       "version": "2.6.12",
       "version": "2.6.12",
       "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.12.tgz",
       "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.12.tgz",
@@ -533,6 +538,18 @@
         "nth-check": "~1.0.1"
         "nth-check": "~1.0.1"
       }
       }
     },
     },
+    "node_modules/css-tree": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-2.3.1.tgz",
+      "integrity": "sha512-6Fv1DV/TYw//QF5IzQdqsNDjx/wc8TrMBZsqjL9eW01tWb7R7k/mq+/VXfJCl7SoD5emsJop9cOByJZfs8hYIw==",
+      "dependencies": {
+        "mdn-data": "2.0.30",
+        "source-map-js": "^1.0.1"
+      },
+      "engines": {
+        "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0"
+      }
+    },
     "node_modules/css-what": {
     "node_modules/css-what": {
       "version": "2.1.3",
       "version": "2.1.3",
       "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz",
       "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz",
@@ -542,14 +559,14 @@
       }
       }
     },
     },
     "node_modules/cssstyle": {
     "node_modules/cssstyle": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-3.0.0.tgz",
-      "integrity": "sha512-N4u2ABATi3Qplzf0hWbVCdjenim8F3ojEXpBDF5hBpjzW182MjNGLqfmQ0SkSPeQ+V86ZXgeH8aXj6kayd4jgg==",
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-4.0.1.tgz",
+      "integrity": "sha512-8ZYiJ3A/3OkDd093CBT/0UKDWry7ak4BdPTFP2+QEP7cmhouyq/Up709ASSj2cK02BbZiMgk7kYjZNS4QP5qrQ==",
       "dependencies": {
       "dependencies": {
         "rrweb-cssom": "^0.6.0"
         "rrweb-cssom": "^0.6.0"
       },
       },
       "engines": {
       "engines": {
-        "node": ">=14"
+        "node": ">=18"
       }
       }
     },
     },
     "node_modules/dashdash": {
     "node_modules/dashdash": {
@@ -564,9 +581,9 @@
       }
       }
     },
     },
     "node_modules/data-uri-to-buffer": {
     "node_modules/data-uri-to-buffer": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.1.tgz",
-      "integrity": "sha512-MZd3VlchQkp8rdend6vrx7MmVDJzSNTBvghvKjirLkD+WTChA3KUf0jkE68Q4UyctNqI11zZO9/x2Yx+ub5Cvg==",
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
+      "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==",
       "engines": {
       "engines": {
         "node": ">= 14"
         "node": ">= 14"
       }
       }
@@ -657,9 +674,9 @@
       }
       }
     },
     },
     "node_modules/devtools-protocol": {
     "node_modules/devtools-protocol": {
-      "version": "0.0.1203626",
-      "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1203626.tgz",
-      "integrity": "sha512-nEzHZteIUZfGCZtTiS1fRpC8UZmsfD1SiyPvaUNvS13dvKf666OAm8YTi0+Ca3n1nLEyu49Cy4+dPWpaHFJk9g=="
+      "version": "0.0.1232444",
+      "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1232444.tgz",
+      "integrity": "sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg=="
     },
     },
     "node_modules/difflib": {
     "node_modules/difflib": {
       "version": "0.2.6",
       "version": "0.2.6",
@@ -696,9 +713,9 @@
       "integrity": "sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ=="
       "integrity": "sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ=="
     },
     },
     "node_modules/dompurify": {
     "node_modules/dompurify": {
-      "version": "3.0.7",
-      "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.0.7.tgz",
-      "integrity": "sha512-BViYTZoqP3ak/ULKOc101y+CtHDUvBsVgSxIF1ku0HmK6BRf+C03MC+tArMvOPtVtZp83DDh5puywKDu4sbVjQ=="
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.0.tgz",
+      "integrity": "sha512-yoU4rhgPKCo+p5UrWWWNKiIq+ToGqmVVhk0PmMYBK4kRsR3/qhemNFL8f6CFmBd4gMwm3F4T7HBoydP5uY07fA=="
     },
     },
     "node_modules/domutils": {
     "node_modules/domutils": {
       "version": "1.5.1",
       "version": "1.5.1",
@@ -726,6 +743,11 @@
         "safer-buffer": "^2.1.0"
         "safer-buffer": "^2.1.0"
       }
       }
     },
     },
+    "node_modules/ecc-jsbn/node_modules/jsbn": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
+      "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg=="
+    },
     "node_modules/ellipsize": {
     "node_modules/ellipsize": {
       "version": "0.1.0",
       "version": "0.1.0",
       "resolved": "https://registry.npmjs.org/ellipsize/-/ellipsize-0.1.0.tgz",
       "resolved": "https://registry.npmjs.org/ellipsize/-/ellipsize-0.1.0.tgz",
@@ -750,9 +772,9 @@
       "integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w=="
       "integrity": "sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w=="
     },
     },
     "node_modules/escalade": {
     "node_modules/escalade": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz",
-      "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==",
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz",
+      "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==",
       "engines": {
       "engines": {
         "node": ">=6"
         "node": ">=6"
       }
       }
@@ -890,31 +912,26 @@
       }
       }
     },
     },
     "node_modules/fs-extra": {
     "node_modules/fs-extra": {
-      "version": "8.1.0",
-      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-8.1.0.tgz",
-      "integrity": "sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==",
+      "version": "11.2.0",
+      "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz",
+      "integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==",
       "dependencies": {
       "dependencies": {
         "graceful-fs": "^4.2.0",
         "graceful-fs": "^4.2.0",
-        "jsonfile": "^4.0.0",
-        "universalify": "^0.1.0"
+        "jsonfile": "^6.0.1",
+        "universalify": "^2.0.0"
       },
       },
       "engines": {
       "engines": {
-        "node": ">=6 <7 || >=8"
+        "node": ">=14.14"
       }
       }
     },
     },
     "node_modules/fs-extra/node_modules/universalify": {
     "node_modules/fs-extra/node_modules/universalify": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz",
-      "integrity": "sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==",
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
+      "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
       "engines": {
       "engines": {
-        "node": ">= 4.0.0"
+        "node": ">= 10.0.0"
       }
       }
     },
     },
-    "node_modules/fs.realpath": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
-      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw=="
-    },
     "node_modules/get-caller-file": {
     "node_modules/get-caller-file": {
       "version": "2.0.5",
       "version": "2.0.5",
       "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
       "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
@@ -938,14 +955,14 @@
       }
       }
     },
     },
     "node_modules/get-uri": {
     "node_modules/get-uri": {
-      "version": "6.0.2",
-      "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.2.tgz",
-      "integrity": "sha512-5KLucCJobh8vBY1K07EFV4+cPZH3mrV9YeAruUseCQKHB58SGjjT2l9/eA9LD082IiuMjSlFJEcdJ27TXvbZNw==",
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.3.tgz",
+      "integrity": "sha512-BzUrJBS9EcUb4cFol8r4W3v1cPsSyajLSthNkz5BxbpDcHN5tIrM10E2eNvfnvBn3DaT3DUgx0OpsBKkaOpanw==",
       "dependencies": {
       "dependencies": {
         "basic-ftp": "^5.0.2",
         "basic-ftp": "^5.0.2",
-        "data-uri-to-buffer": "^6.0.0",
+        "data-uri-to-buffer": "^6.0.2",
         "debug": "^4.3.4",
         "debug": "^4.3.4",
-        "fs-extra": "^8.1.0"
+        "fs-extra": "^11.2.0"
       },
       },
       "engines": {
       "engines": {
         "node": ">= 14"
         "node": ">= 14"
@@ -959,25 +976,6 @@
         "assert-plus": "^1.0.0"
         "assert-plus": "^1.0.0"
       }
       }
     },
     },
-    "node_modules/glob": {
-      "version": "7.2.3",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
-      "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
-      "dependencies": {
-        "fs.realpath": "^1.0.0",
-        "inflight": "^1.0.4",
-        "inherits": "2",
-        "minimatch": "^3.1.1",
-        "once": "^1.3.0",
-        "path-is-absolute": "^1.0.0"
-      },
-      "engines": {
-        "node": "*"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
     "node_modules/graceful-fs": {
     "node_modules/graceful-fs": {
       "version": "4.2.11",
       "version": "4.2.11",
       "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
       "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
@@ -1034,9 +1032,9 @@
       }
       }
     },
     },
     "node_modules/http-proxy-agent": {
     "node_modules/http-proxy-agent": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz",
-      "integrity": "sha512-+ZT+iBxVUQ1asugqnD6oWoRiS25AkjNfG085dKJGtGxkdwLQrMKU5wJr2bOOFAXzKcTuqq+7fZlTMgG3SRfIYQ==",
+      "version": "7.0.2",
+      "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
+      "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
       "dependencies": {
       "dependencies": {
         "agent-base": "^7.1.0",
         "agent-base": "^7.1.0",
         "debug": "^4.3.4"
         "debug": "^4.3.4"
@@ -1059,9 +1057,9 @@
       }
       }
     },
     },
     "node_modules/https-proxy-agent": {
     "node_modules/https-proxy-agent": {
-      "version": "7.0.2",
-      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.2.tgz",
-      "integrity": "sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==",
+      "version": "7.0.4",
+      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz",
+      "integrity": "sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==",
       "dependencies": {
       "dependencies": {
         "agent-base": "^7.0.2",
         "agent-base": "^7.0.2",
         "debug": "4"
         "debug": "4"
@@ -1105,24 +1103,22 @@
       "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
       "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
       "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ=="
       "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ=="
     },
     },
-    "node_modules/inflight": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
-      "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
-      "dependencies": {
-        "once": "^1.3.0",
-        "wrappy": "1"
-      }
-    },
     "node_modules/inherits": {
     "node_modules/inherits": {
       "version": "2.0.4",
       "version": "2.0.4",
       "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
       "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
       "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
       "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
     },
     },
-    "node_modules/ip": {
-      "version": "1.1.8",
-      "resolved": "https://registry.npmjs.org/ip/-/ip-1.1.8.tgz",
-      "integrity": "sha512-PuExPYUiu6qMBQb4l06ecm6T6ujzhmh+MeJcW9wa89PoAz5pvd4zPgN5WJV104mb6S2T1AwNIAaB70JNrLQWhg=="
+    "node_modules/ip-address": {
+      "version": "9.0.5",
+      "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-9.0.5.tgz",
+      "integrity": "sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==",
+      "dependencies": {
+        "jsbn": "1.1.0",
+        "sprintf-js": "^1.1.3"
+      },
+      "engines": {
+        "node": ">= 12"
+      }
     },
     },
     "node_modules/is-fullwidth-code-point": {
     "node_modules/is-fullwidth-code-point": {
       "version": "3.0.0",
       "version": "3.0.0",
@@ -1153,16 +1149,17 @@
       "integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g=="
       "integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g=="
     },
     },
     "node_modules/jsbn": {
     "node_modules/jsbn": {
-      "version": "0.1.1",
-      "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
-      "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg=="
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-1.1.0.tgz",
+      "integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A=="
     },
     },
     "node_modules/jsdom": {
     "node_modules/jsdom": {
-      "version": "23.0.1",
-      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-23.0.1.tgz",
-      "integrity": "sha512-2i27vgvlUsGEBO9+/kJQRbtqtm+191b5zAZrU/UezVmnC2dlDAFLgDYJvAEi94T4kjsRKkezEtLQTgsNEsW2lQ==",
+      "version": "23.2.0",
+      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-23.2.0.tgz",
+      "integrity": "sha512-L88oL7D/8ufIES+Zjz7v0aes+oBMh2Xnh3ygWvL0OaICOomKEPKuPnIfBJekiXr+BHbbMjrWn/xqrDQuxFTeyA==",
       "dependencies": {
       "dependencies": {
-        "cssstyle": "^3.0.0",
+        "@asamuzakjp/dom-selector": "^2.0.1",
+        "cssstyle": "^4.0.1",
         "data-urls": "^5.0.0",
         "data-urls": "^5.0.0",
         "decimal.js": "^10.4.3",
         "decimal.js": "^10.4.3",
         "form-data": "^4.0.0",
         "form-data": "^4.0.0",
@@ -1170,7 +1167,6 @@
         "http-proxy-agent": "^7.0.0",
         "http-proxy-agent": "^7.0.0",
         "https-proxy-agent": "^7.0.2",
         "https-proxy-agent": "^7.0.2",
         "is-potential-custom-element-name": "^1.0.1",
         "is-potential-custom-element-name": "^1.0.1",
-        "nwsapi": "^2.2.7",
         "parse5": "^7.1.2",
         "parse5": "^7.1.2",
         "rrweb-cssom": "^0.6.0",
         "rrweb-cssom": "^0.6.0",
         "saxes": "^6.0.0",
         "saxes": "^6.0.0",
@@ -1181,7 +1177,7 @@
         "whatwg-encoding": "^3.1.1",
         "whatwg-encoding": "^3.1.1",
         "whatwg-mimetype": "^4.0.0",
         "whatwg-mimetype": "^4.0.0",
         "whatwg-url": "^14.0.0",
         "whatwg-url": "^14.0.0",
-        "ws": "^8.14.2",
+        "ws": "^8.16.0",
         "xml-name-validator": "^5.0.0"
         "xml-name-validator": "^5.0.0"
       },
       },
       "engines": {
       "engines": {
@@ -1235,13 +1231,24 @@
       "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="
       "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="
     },
     },
     "node_modules/jsonfile": {
     "node_modules/jsonfile": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-4.0.0.tgz",
-      "integrity": "sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==",
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz",
+      "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==",
+      "dependencies": {
+        "universalify": "^2.0.0"
+      },
       "optionalDependencies": {
       "optionalDependencies": {
         "graceful-fs": "^4.1.6"
         "graceful-fs": "^4.1.6"
       }
       }
     },
     },
+    "node_modules/jsonfile/node_modules/universalify": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/universalify/-/universalify-2.0.1.tgz",
+      "integrity": "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==",
+      "engines": {
+        "node": ">= 10.0.0"
+      }
+    },
     "node_modules/jsprim": {
     "node_modules/jsprim": {
       "version": "2.0.2",
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-2.0.2.tgz",
       "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-2.0.2.tgz",
@@ -1375,6 +1382,11 @@
         "node": ">=12"
         "node": ">=12"
       }
       }
     },
     },
+    "node_modules/mdn-data": {
+      "version": "2.0.30",
+      "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz",
+      "integrity": "sha512-GaqWWShW4kv/G9IEucWScBx9G1/vsFZZJUO+tD26M8J8z3Kw5RDQjaoZe03YAClgeS/SWPOcb4nkFBTEi5DUEA=="
+    },
     "node_modules/mime-db": {
     "node_modules/mime-db": {
       "version": "1.52.0",
       "version": "1.52.0",
       "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
       "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
@@ -1394,17 +1406,6 @@
         "node": ">= 0.6"
         "node": ">= 0.6"
       }
       }
     },
     },
-    "node_modules/minimatch": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
-      "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
-      "dependencies": {
-        "brace-expansion": "^1.1.7"
-      },
-      "engines": {
-        "node": "*"
-      }
-    },
     "node_modules/mitt": {
     "node_modules/mitt": {
       "version": "3.0.1",
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
       "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz",
@@ -1461,9 +1462,9 @@
       }
       }
     },
     },
     "node_modules/nwsapi": {
     "node_modules/nwsapi": {
-      "version": "2.2.7",
-      "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.7.tgz",
-      "integrity": "sha512-ub5E4+FBPKwAZx0UwIQOjYWGHTEq5sPqHQNRN8Z9e4A7u3Tj1weLJsL59yH9vmvqEtBHaOmT6cYQKIZOxp35FQ=="
+      "version": "2.2.9",
+      "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.2.9.tgz",
+      "integrity": "sha512-2f3F0SEEer8bBu0dsNCFF50N0cTThV1nWFYcEYFZttdW0lDAoybv9cQoK7X7/68Z89S7FoRrVjP1LPX4XRf9vg=="
     },
     },
     "node_modules/oauth-sign": {
     "node_modules/oauth-sign": {
       "version": "0.9.0",
       "version": "0.9.0",
@@ -1500,12 +1501,11 @@
       }
       }
     },
     },
     "node_modules/pac-resolver": {
     "node_modules/pac-resolver": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.0.tgz",
-      "integrity": "sha512-Fd9lT9vJbHYRACT8OhCbZBbxr6KRSawSovFpy8nDGshaK99S/EBhVIHp9+crhxrsZOuvLpgL1n23iyPg6Rl2hg==",
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz",
+      "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==",
       "dependencies": {
       "dependencies": {
         "degenerator": "^5.0.0",
         "degenerator": "^5.0.0",
-        "ip": "^1.1.8",
         "netmask": "^2.0.2"
         "netmask": "^2.0.2"
       },
       },
       "engines": {
       "engines": {
@@ -1539,14 +1539,6 @@
         "url": "https://github.com/fb55/entities?sponsor=1"
         "url": "https://github.com/fb55/entities?sponsor=1"
       }
       }
     },
     },
-    "node_modules/path-is-absolute": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
-      "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/pend": {
     "node_modules/pend": {
       "version": "1.2.0",
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
       "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
@@ -1648,39 +1640,19 @@
       }
       }
     },
     },
     "node_modules/puppeteer-core": {
     "node_modules/puppeteer-core": {
-      "version": "21.5.2",
-      "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-21.5.2.tgz",
-      "integrity": "sha512-v4T0cWnujSKs+iEfmb8ccd7u4/x8oblEyKqplqKnJ582Kw8PewYAWvkH4qUWhitN3O2q9RF7dzkvjyK5HbzjLA==",
+      "version": "22.0.0",
+      "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.0.0.tgz",
+      "integrity": "sha512-S3s91rLde0A86PWVeNY82h+P0fdS7CTiNWAicCVH/bIspRP4nS2PnO5j+VTFqCah0ZJizGzpVPAmxVYbLxTc9w==",
       "dependencies": {
       "dependencies": {
-        "@puppeteer/browsers": "1.8.0",
-        "chromium-bidi": "0.4.33",
+        "@puppeteer/browsers": "2.0.0",
+        "chromium-bidi": "0.5.8",
         "cross-fetch": "4.0.0",
         "cross-fetch": "4.0.0",
         "debug": "4.3.4",
         "debug": "4.3.4",
-        "devtools-protocol": "0.0.1203626",
-        "ws": "8.14.2"
+        "devtools-protocol": "0.0.1232444",
+        "ws": "8.16.0"
       },
       },
       "engines": {
       "engines": {
-        "node": ">=16.13.2"
-      }
-    },
-    "node_modules/puppeteer-core/node_modules/ws": {
-      "version": "8.14.2",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz",
-      "integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==",
-      "engines": {
-        "node": ">=10.0.0"
-      },
-      "peerDependencies": {
-        "bufferutil": "^4.0.1",
-        "utf-8-validate": ">=5.0.2"
-      },
-      "peerDependenciesMeta": {
-        "bufferutil": {
-          "optional": true
-        },
-        "utf-8-validate": {
-          "optional": true
-        }
+        "node": ">=18"
       }
       }
     },
     },
     "node_modules/qs": {
     "node_modules/qs": {
@@ -1703,8 +1675,7 @@
     },
     },
     "node_modules/readability-extractor": {
     "node_modules/readability-extractor": {
       "version": "0.0.11",
       "version": "0.0.11",
-      "resolved": "git+ssh://[email protected]/ArchiveBox/readability-extractor.git#2fb4689a65c6433036453dcbee7a268840604eb9",
-      "license": "MIT",
+      "resolved": "git+ssh://[email protected]/ArchiveBox/readability-extractor.git#057f2046f9535cfc6df7b8d551aaad32a9e6226c",
       "dependencies": {
       "dependencies": {
         "@mozilla/readability": "^0.5.0",
         "@mozilla/readability": "^0.5.0",
         "dompurify": "^3.0.6",
         "dompurify": "^3.0.6",
@@ -1740,25 +1711,19 @@
         "node": ">=0.10.0"
         "node": ">=0.10.0"
       }
       }
     },
     },
+    "node_modules/require-from-string": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+      "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/requires-port": {
     "node_modules/requires-port": {
       "version": "1.0.0",
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz",
       "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz",
       "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ=="
       "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ=="
     },
     },
-    "node_modules/rimraf": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
-      "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==",
-      "dependencies": {
-        "glob": "^7.1.3"
-      },
-      "bin": {
-        "rimraf": "bin.js"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/isaacs"
-      }
-    },
     "node_modules/rrweb-cssom": {
     "node_modules/rrweb-cssom": {
       "version": "0.6.0",
       "version": "0.6.0",
       "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.6.0.tgz",
       "resolved": "https://registry.npmjs.org/rrweb-cssom/-/rrweb-cssom-0.6.0.tgz",
@@ -1800,9 +1765,9 @@
       }
       }
     },
     },
     "node_modules/selenium-webdriver": {
     "node_modules/selenium-webdriver": {
-      "version": "4.15.0",
-      "resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.15.0.tgz",
-      "integrity": "sha512-BNG1bq+KWiBGHcJ/wULi0eKY0yaDqFIbEmtbsYJmfaEghdCkXBsx1akgOorhNwjBipOr0uwpvNXqT6/nzl+zjg==",
+      "version": "4.17.0",
+      "resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.17.0.tgz",
+      "integrity": "sha512-e2E+2XBlGepzwgFbyQfSwo9Cbj6G5fFfs9MzAS00nC99EewmcS2rwn2MwtgfP7I5p1e7DYv4HQJXtWedsu6DvA==",
       "dependencies": {
       "dependencies": {
         "jszip": "^3.10.1",
         "jszip": "^3.10.1",
         "tmp": "^0.2.1",
         "tmp": "^0.2.1",
@@ -1818,16 +1783,16 @@
       "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA=="
       "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA=="
     },
     },
     "node_modules/single-file-cli": {
     "node_modules/single-file-cli": {
-      "version": "1.1.46",
-      "resolved": "https://registry.npmjs.org/single-file-cli/-/single-file-cli-1.1.46.tgz",
-      "integrity": "sha512-+vFj0a5Y4ESqpMwH0T6738pg8ZA9KVhhl6OlIOsicamGNU9DnMa+q9dL1S2KnLWHoauKjU0BThhR/YKUleJSxw==",
+      "version": "1.1.54",
+      "resolved": "https://registry.npmjs.org/single-file-cli/-/single-file-cli-1.1.54.tgz",
+      "integrity": "sha512-wnVPg7BklhswwFVrtuFXbmluI4piHxg2dC0xATxYTeXAld6PnRPlnp7ufallRKArjFBZdP2u+ihMkOIp7A38XA==",
       "dependencies": {
       "dependencies": {
         "file-url": "3.0.0",
         "file-url": "3.0.0",
         "iconv-lite": "0.6.3",
         "iconv-lite": "0.6.3",
-        "jsdom": "23.0.0",
-        "puppeteer-core": "21.5.2",
-        "selenium-webdriver": "4.15.0",
-        "single-file-core": "1.3.15",
+        "jsdom": "24.0.0",
+        "puppeteer-core": "22.0.0",
+        "selenium-webdriver": "4.17.0",
+        "single-file-core": "1.3.24",
         "strong-data-uri": "1.0.6",
         "strong-data-uri": "1.0.6",
         "yargs": "17.7.2"
         "yargs": "17.7.2"
       },
       },
@@ -1847,11 +1812,11 @@
       }
       }
     },
     },
     "node_modules/single-file-cli/node_modules/jsdom": {
     "node_modules/single-file-cli/node_modules/jsdom": {
-      "version": "23.0.0",
-      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-23.0.0.tgz",
-      "integrity": "sha512-cbL/UCtohJguhFC7c2/hgW6BeZCNvP7URQGnx9tSJRYKCdnfbfWOrtuLTMfiB2VxKsx5wPHVsh/J0aBy9lIIhQ==",
+      "version": "24.0.0",
+      "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-24.0.0.tgz",
+      "integrity": "sha512-UDS2NayCvmXSXVP6mpTj+73JnNQadZlr9N68189xib2tx5Mls7swlTNao26IoHv46BZJFvXygyRtyXd1feAk1A==",
       "dependencies": {
       "dependencies": {
-        "cssstyle": "^3.0.0",
+        "cssstyle": "^4.0.1",
         "data-urls": "^5.0.0",
         "data-urls": "^5.0.0",
         "decimal.js": "^10.4.3",
         "decimal.js": "^10.4.3",
         "form-data": "^4.0.0",
         "form-data": "^4.0.0",
@@ -1870,14 +1835,14 @@
         "whatwg-encoding": "^3.1.1",
         "whatwg-encoding": "^3.1.1",
         "whatwg-mimetype": "^4.0.0",
         "whatwg-mimetype": "^4.0.0",
         "whatwg-url": "^14.0.0",
         "whatwg-url": "^14.0.0",
-        "ws": "^8.14.2",
+        "ws": "^8.16.0",
         "xml-name-validator": "^5.0.0"
         "xml-name-validator": "^5.0.0"
       },
       },
       "engines": {
       "engines": {
         "node": ">=18"
         "node": ">=18"
       },
       },
       "peerDependencies": {
       "peerDependencies": {
-        "canvas": "^3.0.0"
+        "canvas": "^2.11.2"
       },
       },
       "peerDependenciesMeta": {
       "peerDependenciesMeta": {
         "canvas": {
         "canvas": {
@@ -1909,9 +1874,9 @@
       }
       }
     },
     },
     "node_modules/single-file-core": {
     "node_modules/single-file-core": {
-      "version": "1.3.15",
-      "resolved": "https://registry.npmjs.org/single-file-core/-/single-file-core-1.3.15.tgz",
-      "integrity": "sha512-/YNpHBwASWNxmSmZXz0xRolmXf0+PGAbwpVrwn6A8tYeuAdezxxde5RYTTQ7V4Zv68+H4JMhE2DwCRV0sVUGNA=="
+      "version": "1.3.24",
+      "resolved": "https://registry.npmjs.org/single-file-core/-/single-file-core-1.3.24.tgz",
+      "integrity": "sha512-1B256mKBbNV8jXAV+hRyEv0aMa7tn0C0Ci+zx7Ya4ZXZB3b9/1MgKsB/fxVwDiL28WJSU0pxzh8ftIYubCNn9w=="
     },
     },
     "node_modules/smart-buffer": {
     "node_modules/smart-buffer": {
       "version": "4.2.0",
       "version": "4.2.0",
@@ -1923,24 +1888,24 @@
       }
       }
     },
     },
     "node_modules/socks": {
     "node_modules/socks": {
-      "version": "2.7.1",
-      "resolved": "https://registry.npmjs.org/socks/-/socks-2.7.1.tgz",
-      "integrity": "sha512-7maUZy1N7uo6+WVEX6psASxtNlKaNVMlGQKkG/63nEDdLOWNbiUMoLK7X4uYoLhQstau72mLgfEWcXcwsaHbYQ==",
+      "version": "2.8.3",
+      "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.3.tgz",
+      "integrity": "sha512-l5x7VUUWbjVFbafGLxPWkYsHIhEvmF85tbIeFZWc8ZPtoMyybuEhL7Jye/ooC4/d48FgOjSJXgsF/AJPYCW8Zw==",
       "dependencies": {
       "dependencies": {
-        "ip": "^2.0.0",
+        "ip-address": "^9.0.5",
         "smart-buffer": "^4.2.0"
         "smart-buffer": "^4.2.0"
       },
       },
       "engines": {
       "engines": {
-        "node": ">= 10.13.0",
+        "node": ">= 10.0.0",
         "npm": ">= 3.0.0"
         "npm": ">= 3.0.0"
       }
       }
     },
     },
     "node_modules/socks-proxy-agent": {
     "node_modules/socks-proxy-agent": {
-      "version": "8.0.2",
-      "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.2.tgz",
-      "integrity": "sha512-8zuqoLv1aP/66PHF5TqwJ7Czm3Yv32urJQHrVyhD7mmA6d61Zv8cIXQYPTWwmg6qlupnPvs/QKDmfa4P/qct2g==",
+      "version": "8.0.3",
+      "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.3.tgz",
+      "integrity": "sha512-VNegTZKhuGq5vSD6XNKlbqWhyt/40CgoEw8XxD6dhnm8Jq9IEa3nIa4HwnM8XOqU0CdB0BwWVXusqiFXfHB3+A==",
       "dependencies": {
       "dependencies": {
-        "agent-base": "^7.0.2",
+        "agent-base": "^7.1.1",
         "debug": "^4.3.4",
         "debug": "^4.3.4",
         "socks": "^2.7.1"
         "socks": "^2.7.1"
       },
       },
@@ -1948,11 +1913,6 @@
         "node": ">= 14"
         "node": ">= 14"
       }
       }
     },
     },
-    "node_modules/socks/node_modules/ip": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/ip/-/ip-2.0.0.tgz",
-      "integrity": "sha512-WKa+XuLG1A1R0UWhl2+1XQSi+fZWMsYKffMZTTYsiZaUD8k2yDAj5atimTUD2TZkyCkNEeYE5NhFZmupOGtjYQ=="
-    },
     "node_modules/source-map": {
     "node_modules/source-map": {
       "version": "0.6.1",
       "version": "0.6.1",
       "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
       "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
@@ -1962,6 +1922,19 @@
         "node": ">=0.10.0"
         "node": ">=0.10.0"
       }
       }
     },
     },
+    "node_modules/source-map-js": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz",
+      "integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/sprintf-js": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz",
+      "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="
+    },
     "node_modules/sshpk": {
     "node_modules/sshpk": {
       "version": "1.18.0",
       "version": "1.18.0",
       "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.18.0.tgz",
       "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.18.0.tgz",
@@ -1986,6 +1959,11 @@
         "node": ">=0.10.0"
         "node": ">=0.10.0"
       }
       }
     },
     },
+    "node_modules/sshpk/node_modules/jsbn": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
+      "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg=="
+    },
     "node_modules/stream-length": {
     "node_modules/stream-length": {
       "version": "1.0.2",
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/stream-length/-/stream-length-1.0.2.tgz",
       "resolved": "https://registry.npmjs.org/stream-length/-/stream-length-1.0.2.tgz",
@@ -1995,12 +1973,15 @@
       }
       }
     },
     },
     "node_modules/streamx": {
     "node_modules/streamx": {
-      "version": "2.15.6",
-      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.15.6.tgz",
-      "integrity": "sha512-q+vQL4AAz+FdfT137VF69Cc/APqUbxy+MDOImRrMvchJpigHj9GksgDU2LYbO9rx7RX6osWgxJB2WxhYv4SZAw==",
+      "version": "2.16.1",
+      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.16.1.tgz",
+      "integrity": "sha512-m9QYj6WygWyWa3H1YY69amr4nVgy61xfjys7xO7kviL5rfIEc2naf+ewFiOA+aEJD7y0JO3h2GoiUv4TDwEGzQ==",
       "dependencies": {
       "dependencies": {
         "fast-fifo": "^1.1.0",
         "fast-fifo": "^1.1.0",
         "queue-tick": "^1.0.1"
         "queue-tick": "^1.0.1"
+      },
+      "optionalDependencies": {
+        "bare-events": "^2.2.0"
       }
       }
     },
     },
     "node_modules/string_decoder": {
     "node_modules/string_decoder": {
@@ -2067,9 +2048,9 @@
       }
       }
     },
     },
     "node_modules/tar-stream": {
     "node_modules/tar-stream": {
-      "version": "3.1.6",
-      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.6.tgz",
-      "integrity": "sha512-B/UyjYwPpMBv+PaFSWAmtYjwdrlEaZQEhMIBFNC5oEG8lpiW8XjcSdmEaClj28ArfKScKHs2nshz3k2le6crsg==",
+      "version": "3.1.7",
+      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz",
+      "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==",
       "dependencies": {
       "dependencies": {
         "b4a": "^1.6.4",
         "b4a": "^1.6.4",
         "fast-fifo": "^1.2.0",
         "fast-fifo": "^1.2.0",
@@ -2082,14 +2063,11 @@
       "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg=="
       "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg=="
     },
     },
     "node_modules/tmp": {
     "node_modules/tmp": {
-      "version": "0.2.1",
-      "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.1.tgz",
-      "integrity": "sha512-76SUhtfqR2Ijn+xllcI5P1oyannHNHByD80W1q447gU3mp9G9PSpGdWmjUOHRDPiHYacIk66W7ubDTuPF3BEtQ==",
-      "dependencies": {
-        "rimraf": "^3.0.0"
-      },
+      "version": "0.2.3",
+      "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.3.tgz",
+      "integrity": "sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==",
       "engines": {
       "engines": {
-        "node": ">=8.17.0"
+        "node": ">=14.14"
       }
       }
     },
     },
     "node_modules/tough-cookie": {
     "node_modules/tough-cookie": {
@@ -2125,9 +2103,9 @@
       "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
       "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
     },
     },
     "node_modules/turndown": {
     "node_modules/turndown": {
-      "version": "7.1.2",
-      "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.1.2.tgz",
-      "integrity": "sha512-ntI9R7fcUKjqBP6QU8rBK2Ehyt8LAzt3UBT9JR9tgo6GtuKvyUzpayWmeMKJw1DPdXzktvtIT8m2mVXz+bL/Qg==",
+      "version": "7.1.3",
+      "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.1.3.tgz",
+      "integrity": "sha512-Z3/iJ6IWh8VBiACWQJaA5ulPQE5E1QwvBHj00uGzdQxdRnd8fh1DPqNOJqzQDu6DkOstORrtXzf/9adB+vMtEA==",
       "dependencies": {
       "dependencies": {
         "domino": "^2.1.6"
         "domino": "^2.1.6"
       }
       }
@@ -2178,9 +2156,9 @@
       }
       }
     },
     },
     "node_modules/urlpattern-polyfill": {
     "node_modules/urlpattern-polyfill": {
-      "version": "9.0.0",
-      "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-9.0.0.tgz",
-      "integrity": "sha512-WHN8KDQblxd32odxeIgo83rdVDE2bvdkb86it7bMhYZwWKJz0+O0RK/eZiHYnM+zgt/U7hAHOlCQGfjjvSkw2g=="
+      "version": "10.0.0",
+      "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
+      "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
     },
     },
     "node_modules/util-deprecate": {
     "node_modules/util-deprecate": {
       "version": "1.0.2",
       "version": "1.0.2",

+ 2 - 2
package.json

@@ -1,6 +1,6 @@
 {
 {
   "name": "archivebox",
   "name": "archivebox",
-  "version": "0.7.2",
+  "version": "0.8.0",
   "description": "ArchiveBox: The self-hosted internet archive",
   "description": "ArchiveBox: The self-hosted internet archive",
   "author": "Nick Sweeting <[email protected]>",
   "author": "Nick Sweeting <[email protected]>",
   "repository": "github:ArchiveBox/ArchiveBox",
   "repository": "github:ArchiveBox/ArchiveBox",
@@ -8,6 +8,6 @@
   "dependencies": {
   "dependencies": {
     "@postlight/parser": "^2.2.3",
     "@postlight/parser": "^2.2.3",
     "readability-extractor": "github:ArchiveBox/readability-extractor",
     "readability-extractor": "github:ArchiveBox/readability-extractor",
-    "single-file-cli": "^1.1.46"
+    "single-file-cli": "^1.1.54"
   }
   }
 }
 }

File diff suppressed because it is too large
+ 178 - 560
pdm.lock


+ 1 - 1
pip_dist

@@ -1 +1 @@
-Subproject commit 5323fc773d33ef3f219c35c946f3b353b1251d37
+Subproject commit 1380be7e4ef156d85957dfef8c6d154ef9880578

+ 92 - 45
pyproject.toml

@@ -1,26 +1,48 @@
 [project]
 [project]
 name = "archivebox"
 name = "archivebox"
-version = "0.7.2"
+version = "0.8.0"
+package-dir = "archivebox"
+requires-python = ">=3.10,<3.13"
+platform = "py3-none-any"
 description = "Self-hosted internet archiving solution."
 description = "Self-hosted internet archiving solution."
-authors = [
-    {name = "Nick Sweeting", email = "[email protected]"},
-]
+authors = [{name = "Nick Sweeting", email = "[email protected]"}]
+license = {text = "MIT"}
+readme = "README.md"
+
+# pdm install
+# pdm update --unconstrained
 dependencies = [
 dependencies = [
-    "croniter>=0.3.34",
+    # Base Framework and Language Dependencies
+    "setuptools>=69.5.1",
+    "django>=4.2.0,<5.0",
+    "django-extensions>=3.2.3",
+    "mypy-extensions>=1.0.0",
+
+    # Python Helper Libraries
+    "requests>=2.31.0",
     "dateparser>=1.0.0",
     "dateparser>=1.0.0",
-    "django-extensions>=3.0.3",
-    "django>=3.1.3,<3.2",
-    "ipython>5.0.0",
-    "mypy-extensions>=0.4.3",
-    "python-crontab>=2.5.1",
-    "requests>=2.24.0",
+    "feedparser>=6.0.11",
     "w3lib>=1.22.0",
     "w3lib>=1.22.0",
-    "yt-dlp>=2023.10.13",
-    # "playwright>=1.39.0; platform_machine != 'armv7l'",
+
+    # Feature-Specific Dependencies
+    "python-crontab>=2.5.1",          # for: archivebox schedule
+    "croniter>=0.3.34",               # for: archivebox schedule
+    "ipython>5.0.0",                  # for: archivebox shell
+
+    # Extractor Dependencies
+    "yt-dlp>=2024.4.9",               # for: media
+    "playwright>=1.43.0; platform_machine != 'armv7l'",  # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages
+    
+    # TODO: add more extractors
+    #  - gallery-dl
+    #  - scihubdl
+    #  - See Github issues for more...
 ]
 ]
-requires-python = ">=3.9,<3.12"
-readme = "README.md"
-license = {text = "MIT"}
+
+homepage = "https://github.com/ArchiveBox/ArchiveBox"
+repository = "https://github.com/ArchiveBox/ArchiveBox"
+documentation = "https://github.com/ArchiveBox/ArchiveBox/wiki"
+keywords = ["internet archiving", "web archiving", "digipres", "warc", "preservation", "backups", "archiving", "web", "bookmarks", "puppeteer", "browser", "download"]
 classifiers = [
 classifiers = [
     "Development Status :: 4 - Beta",
     "Development Status :: 4 - Beta",
     "Environment :: Console",
     "Environment :: Console",
@@ -53,59 +75,84 @@ classifiers = [
     "Topic :: Utilities",
     "Topic :: Utilities",
     "Typing :: Typed",
     "Typing :: Typed",
 ]
 ]
+# dynamic = ["version"]  # TODO: programatticaly fetch version from package.json at build time
 
 
-# pdm lock -G:all
+# pdm lock --group=':all' 
 # pdm install -G:all
 # pdm install -G:all
+# pdm update --group=':all' --unconstrained
+[project.optional-dependencies]
+sonic = [
+    # echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list
+    # curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg
+    # apt install sonic
+    "sonic-client>=1.0.0",
+]
+ldap = [
+    # apt install libldap2-dev libsasl2-dev python3-ldap
+    "python-ldap>=3.4.3",
+    "django-auth-ldap>=4.1.0",
+]
+
+
+# pdm lock --group=':all' --dev
+# pdm install -G:all --dev
+# pdm update --dev --unconstrained
 [tool.pdm.dev-dependencies]
 [tool.pdm.dev-dependencies]
-dev = [
-    # build
-    "setuptools>=69.0.3",
+build = [
+    "setuptools>=69.5.1",
+    "pip",
     "wheel",
     "wheel",
     "pdm",
     "pdm",
-    "homebrew-pypi-poet>=0.10.0",
-    # docs
+    "homebrew-pypi-poet>=0.10.0",      # for: generating archivebox.rb brewfile list of python packages
+]
+docs = [
     "recommonmark",
     "recommonmark",
     "sphinx",
     "sphinx",
     "sphinx-rtd-theme",
     "sphinx-rtd-theme",
-    # debug
+]
+debug = [
     "django-debug-toolbar",
     "django-debug-toolbar",
     "djdt_flamegraph",
     "djdt_flamegraph",
     "ipdb",
     "ipdb",
-    # test
+]
+test = [
+    "pdm[pytest]",
     "pytest",
     "pytest",
-    # lint
+]
+lint = [
     "flake8",
     "flake8",
     "mypy",
     "mypy",
     "django-stubs",
     "django-stubs",
 ]
 ]
 
 
+[build-system]
+requires = ["pdm-backend"]
+build-backend = "pdm.backend"
+
+[project.scripts]
+archivebox = "archivebox.cli:main"
+
+
 [tool.pdm.scripts]
 [tool.pdm.scripts]
 lint = "./bin/lint.sh"
 lint = "./bin/lint.sh"
 test = "./bin/test.sh"
 test = "./bin/test.sh"
 # all = {composite = ["lint mypackage/", "test -v tests/"]}
 # all = {composite = ["lint mypackage/", "test -v tests/"]}
 
 
-[project.optional-dependencies]
-sonic = [
-    # echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list
-    # curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg
-    "sonic-client>=0.0.5",
-]
-ldap = [
-    # apt install libldap2-dev libsasl2-dev
-    "python-ldap>=3.4.3",
-    "django-auth-ldap>=4.1.0",
-]
-# playwright = [
-#     platform_machine isnt respected by pdm export -o requirements.txt, this breaks arm/v7
-#     "playwright>=1.39.0; platform_machine != 'armv7l'",
-# ]
+[tool.pytest.ini_options]
+testpaths = [ "tests" ]
 
 
-[project.scripts]
-archivebox = "archivebox.cli:main"
+[tool.mypy]
+mypy_path = "archivebox"
+namespace_packages = true
+explicit_package_bases = true
+# follow_imports = "silent"
+# ignore_missing_imports = true
+# disallow_incomplete_defs = true
+# disallow_untyped_defs = true
+# disallow_untyped_decorators = true
+# exclude = "pdm/(pep582/|models/in_process/.+\\.py)"
+plugins = ["mypy_django_plugin.main"]
 
 
-[build-system]
-requires = ["pdm-backend"]
-build-backend = "pdm.backend"
 
 
 
 
 [project.urls]
 [project.urls]

+ 33 - 27
requirements.txt

@@ -1,54 +1,60 @@
 # This file is @generated by PDM.
 # This file is @generated by PDM.
 # Please do not edit it manually.
 # Please do not edit it manually.
 
 
-asgiref==3.7.2
+asgiref==3.8.1
 asttokens==2.4.1
 asttokens==2.4.1
 brotli==1.1.0; implementation_name == "cpython"
 brotli==1.1.0; implementation_name == "cpython"
 brotlicffi==1.1.0.0; implementation_name != "cpython"
 brotlicffi==1.1.0.0; implementation_name != "cpython"
-certifi==2023.11.17
+certifi==2024.2.2
 cffi==1.16.0; implementation_name != "cpython"
 cffi==1.16.0; implementation_name != "cpython"
 charset-normalizer==3.3.2
 charset-normalizer==3.3.2
 colorama==0.4.6; sys_platform == "win32"
 colorama==0.4.6; sys_platform == "win32"
-croniter==2.0.1
+croniter==2.0.5
 dateparser==1.2.0
 dateparser==1.2.0
 decorator==5.1.1
 decorator==5.1.1
-django==3.1.14
-django-auth-ldap==4.1.0
-django-extensions==3.1.5
-exceptiongroup==1.2.0; python_version < "3.11"
+django==4.2.11
+django-auth-ldap==4.8.0
+django-extensions==3.2.3
+exceptiongroup==1.2.1; python_version < "3.11"
 executing==2.0.1
 executing==2.0.1
-idna==3.6
-ipython==8.18.1
+feedparser==6.0.11
+greenlet==3.0.3; platform_machine != "armv7l"
+idna==3.7
+ipython==8.23.0
 jedi==0.19.1
 jedi==0.19.1
-matplotlib-inline==0.1.6
+matplotlib-inline==0.1.7
 mutagen==1.47.0
 mutagen==1.47.0
 mypy-extensions==1.0.0
 mypy-extensions==1.0.0
-parso==0.8.3
-pexpect==4.9.0; sys_platform != "win32"
+parso==0.8.4
+pexpect==4.9.0; sys_platform != "win32" and sys_platform != "emscripten"
+playwright==1.43.0; platform_machine != "armv7l"
 prompt-toolkit==3.0.43
 prompt-toolkit==3.0.43
-ptyprocess==0.7.0; sys_platform != "win32"
+ptyprocess==0.7.0; sys_platform != "win32" and sys_platform != "emscripten"
 pure-eval==0.2.2
 pure-eval==0.2.2
-pyasn1==0.5.1
-pyasn1-modules==0.3.0
-pycparser==2.21; implementation_name != "cpython"
-pycryptodomex==3.19.1
+pyasn1==0.6.0
+pyasn1-modules==0.4.0
+pycparser==2.22; implementation_name != "cpython"
+pycryptodomex==3.20.0
+pyee==11.1.0; platform_machine != "armv7l"
 pygments==2.17.2
 pygments==2.17.2
 python-crontab==3.0.0
 python-crontab==3.0.0
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
 python-ldap==3.4.4
 python-ldap==3.4.4
-pytz==2023.3.post1
-regex==2023.12.25
+pytz==2024.1
+regex==2024.4.16
 requests==2.31.0
 requests==2.31.0
+setuptools==69.5.1
+sgmllib3k==1.0.0
 six==1.16.0
 six==1.16.0
 sonic-client==1.0.0
 sonic-client==1.0.0
-sqlparse==0.4.4
+sqlparse==0.5.0
 stack-data==0.6.3
 stack-data==0.6.3
-traitlets==5.14.1
-typing-extensions==4.9.0; python_version < "3.11"
-tzdata==2023.4; platform_system == "Windows"
+traitlets==5.14.3
+typing-extensions==4.11.0; python_version < "3.12" or platform_machine != "armv7l"
+tzdata==2024.1; sys_platform == "win32" or platform_system == "Windows"
 tzlocal==5.2
 tzlocal==5.2
-urllib3==2.1.0
+urllib3==2.2.1
 w3lib==2.1.2
 w3lib==2.1.2
-wcwidth==0.2.12
+wcwidth==0.2.13
 websockets==12.0
 websockets==12.0
-yt-dlp==2023.12.30
+yt-dlp==2024.4.9

+ 1 - 1
tests/mock_server/server.py

@@ -50,4 +50,4 @@ def redirect_to_static(filename):
 
 
 
 
 def start():
 def start():
-    run(host='localhost', port=8080)
+    run(host='localhost', port=8080, quiet=True)

+ 1 - 0
tests/mock_server/templates/example-single.jsonl

@@ -0,0 +1 @@
+{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"}

+ 24 - 0
tests/mock_server/templates/example.atom

@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed
+ xml:lang="en"
+ xmlns="http://www.w3.org/2005/Atom"
+>
+ <id>http://www.example.com/</id>
+ <title>Example of an Atom feed</title>
+ <link rel="self" type="application/atom+xml" href="http://www.example.com/index.atom" />
+ <link rel="alternate" type="text/html" href="http://www.example.com/" />
+ <author>
+  <name>Jim Winstead</name>
+ </author>
+ <updated>2024-02-26T03:18:26Z</updated>
+ <entry>
+  <title>Example</title>
+  <link rel="alternate" type="text/html" href="http://127.0.0.1:8080/static/example.com.html" />
+  <id>tag:example.com,2024-02-25:3319</id>
+  <updated>2024-02-26T03:18:26Z</updated>
+  <published>2024-02-25T19:18:25-08:00</published>
+  <category term="Tag1" scheme="http://example.com/archive" />
+  <category term="Tag2" scheme="http://example.com/archive" />
+  <content type="html">This is some &lt;b&gt;content&lt;/b&gt;</content>
+ </entry>
+</feed>

+ 6 - 0
tests/mock_server/templates/example.json

@@ -0,0 +1,6 @@
+[
+{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"},
+{"href":"http://127.0.0.1:8080/static/iana.org.html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:43Z","shared":"no","toread":"no","tags":"Tag3,Tag4 with Space"},
+{"href":"http://127.0.0.1:8080/static/shift_jis.html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:44Z","shared":"no","toread":"no","tags":["Tag5","Tag6 with Space"]},
+{"href":"http://127.0.0.1:8080/static/title_og_with_html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:45Z","shared":"no","toread":"no"}
+]

+ 2 - 0
tests/mock_server/templates/example.json.bad

@@ -0,0 +1,2 @@
+this line would cause problems but --parser=json will actually skip it
+[{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"}]

+ 4 - 0
tests/mock_server/templates/example.jsonl

@@ -0,0 +1,4 @@
+{"href":"http://127.0.0.1:8080/static/example.com.html","description":"Example","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"Tag1 Tag2","trap":"http://www.example.com/should-not-exist"}
+{"href":"http://127.0.0.1:8080/static/iana.org.html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:43Z","shared":"no","toread":"no","tags":"Tag3,Tag4 with Space"}
+{"href":"http://127.0.0.1:8080/static/shift_jis.html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:44Z","shared":"no","toread":"no","tags":["Tag5","Tag6 with Space"]}
+{"href":"http://127.0.0.1:8080/static/title_og_with_html","description":"Example 2","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:45Z","shared":"no","toread":"no"}

+ 32 - 0
tests/mock_server/templates/example.rss

@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="utf-8"?>
+<rss version="2.0"
+     xmlns:dc="http://purl.org/dc/elements/1.1/"
+     xmlns:admin="http://webns.net/mvcb/"
+     xmlns:content="http://purl.org/rss/1.0/modules/content/"
+     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+<channel>
+  <title>Sample Feed</title>
+  <link>http://example.org/</link>
+  <description>For documentation only</description>
+  <dc:language>en-us</dc:language>
+  <dc:creator>Nobody ([email protected])</dc:creator>
+  <dc:rights>Public domain</dc:rights>
+  <dc:date>2024-02-26T17:28:12-08:00</dc:date>
+  <admin:generatorAgent rdf:resource="http://www.example.org/"/>
+  <admin:errorReportsTo rdf:resource="mailto:[email protected]"/>
+
+  <item>
+    <title>First!</title>
+    <link>http://127.0.0.1:8080/static/example.com.html</link>
+    <guid isPermaLink="false">[email protected]</guid>
+    <description>
+      This has a description.
+    </description>
+    <dc:subject>Tag1 Tag2</dc:subject>
+    <dc:date>2024-02-26T17:28:12-08:00</dc:date>
+    <content:encoded><![CDATA[
+      This has a <b>description</b>.]]>
+    </content:encoded>
+  </item>
+</channel>
+</rss>

+ 195 - 0
tests/test_add.py

@@ -91,3 +91,198 @@ def test_extract_input_uses_only_passed_extractors(tmp_path, process):
 
 
     assert (archived_item_path / "warc").exists()
     assert (archived_item_path / "warc").exists()
     assert not (archived_item_path / "singlefile.html").exists()
     assert not (archived_item_path / "singlefile.html").exists()
+
+def test_json(tmp_path, process, disable_extractors_dict):
+    with open('../../mock_server/templates/example.json', 'r', encoding='utf-8') as f:
+        arg_process = subprocess.run(
+            ["archivebox", "add", "--index-only", "--parser=json"],
+            stdin=f,
+            capture_output=True,
+            env=disable_extractors_dict,
+        )
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    urls = c.execute("SELECT url from core_snapshot").fetchall()
+    tags = c.execute("SELECT name from core_tag").fetchall()
+    conn.commit()
+    conn.close()
+
+    urls = list(map(lambda x: x[0], urls))
+    assert "http://127.0.0.1:8080/static/example.com.html" in urls
+    assert "http://127.0.0.1:8080/static/iana.org.html" in urls
+    assert "http://127.0.0.1:8080/static/shift_jis.html" in urls
+    assert "http://127.0.0.1:8080/static/title_og_with_html" in urls
+    # if the following URL appears, we must have fallen back to another parser
+    assert not "http://www.example.com/should-not-exist" in urls
+
+    tags = list(map(lambda x: x[0], tags))
+    assert "Tag1" in tags
+    assert "Tag2" in tags
+    assert "Tag3" in tags
+    assert "Tag4 with Space" in tags
+    assert "Tag5" in tags
+    assert "Tag6 with Space" in tags
+
+def test_json_with_leading_garbage(tmp_path, process, disable_extractors_dict):
+    with open('../../mock_server/templates/example.json.bad', 'r', encoding='utf-8') as f:
+        arg_process = subprocess.run(
+            ["archivebox", "add", "--index-only", "--parser=json"],
+            stdin=f,
+            capture_output=True,
+            env=disable_extractors_dict,
+        )
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    urls = c.execute("SELECT url from core_snapshot").fetchall()
+    tags = c.execute("SELECT name from core_tag").fetchall()
+    conn.commit()
+    conn.close()
+
+    urls = list(map(lambda x: x[0], urls))
+    assert "http://127.0.0.1:8080/static/example.com.html" in urls
+    # if the following URL appears, we must have fallen back to another parser
+    assert not "http://www.example.com/should-not-exist" in urls
+
+    tags = list(map(lambda x: x[0], tags))
+    assert "Tag1" in tags
+    assert "Tag2" in tags
+
+def test_generic_rss(tmp_path, process, disable_extractors_dict):
+    with open('../../mock_server/templates/example.rss', 'r', encoding='utf-8') as f:
+        arg_process = subprocess.run(
+            ["archivebox", "add", "--index-only", "--parser=rss"],
+            stdin=f,
+            capture_output=True,
+            env=disable_extractors_dict,
+        )
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    urls = c.execute("SELECT url from core_snapshot").fetchall()
+    tags = c.execute("SELECT name from core_tag").fetchall()
+    conn.commit()
+    conn.close()
+
+    urls = list(map(lambda x: x[0], urls))
+    assert "http://127.0.0.1:8080/static/example.com.html" in urls
+    # if the following URL appears, we must have fallen back to another parser
+    assert not "http://purl.org/dc/elements/1.1/" in urls
+
+    tags = list(map(lambda x: x[0], tags))
+    assert "Tag1 Tag2" in tags
+
+def test_pinboard_rss(tmp_path, process, disable_extractors_dict):
+    with open('../../mock_server/templates/example.rss', 'r', encoding='utf-8') as f:
+        arg_process = subprocess.run(
+            ["archivebox", "add", "--index-only", "--parser=pinboard_rss"],
+            stdin=f,
+            capture_output=True,
+            env=disable_extractors_dict,
+        )
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    tags = c.execute("SELECT name from core_tag").fetchall()
+    conn.commit()
+    conn.close()
+
+    tags = list(map(lambda x: x[0], tags))
+    assert "Tag1" in tags
+    assert "Tag2" in tags
+
+def test_atom(tmp_path, process, disable_extractors_dict):
+    with open('../../mock_server/templates/example.atom', 'r', encoding='utf-8') as f:
+        arg_process = subprocess.run(
+            ["archivebox", "add", "--index-only", "--parser=rss"],
+            stdin=f,
+            capture_output=True,
+            env=disable_extractors_dict,
+        )
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    urls = c.execute("SELECT url from core_snapshot").fetchall()
+    tags = c.execute("SELECT name from core_tag").fetchall()
+    conn.commit()
+    conn.close()
+
+    urls = list(map(lambda x: x[0], urls))
+    assert "http://127.0.0.1:8080/static/example.com.html" in urls
+    # if the following URL appears, we must have fallen back to another parser
+    assert not "http://www.w3.org/2005/Atom" in urls
+
+    tags = list(map(lambda x: x[0], tags))
+    assert "Tag1" in tags
+    assert "Tag2" in tags
+
+def test_jsonl(tmp_path, process, disable_extractors_dict):
+    with open('../../mock_server/templates/example.jsonl', 'r', encoding='utf-8') as f:
+        arg_process = subprocess.run(
+            ["archivebox", "add", "--index-only", "--parser=jsonl"],
+            stdin=f,
+            capture_output=True,
+            env=disable_extractors_dict,
+        )
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    urls = c.execute("SELECT url from core_snapshot").fetchall()
+    tags = c.execute("SELECT name from core_tag").fetchall()
+    conn.commit()
+    conn.close()
+
+    urls = list(map(lambda x: x[0], urls))
+    assert "http://127.0.0.1:8080/static/example.com.html" in urls
+    assert "http://127.0.0.1:8080/static/iana.org.html" in urls
+    assert "http://127.0.0.1:8080/static/shift_jis.html" in urls
+    assert "http://127.0.0.1:8080/static/title_og_with_html" in urls
+    # if the following URL appears, we must have fallen back to another parser
+    assert not "http://www.example.com/should-not-exist" in urls
+
+    tags = list(map(lambda x: x[0], tags))
+    assert "Tag1" in tags
+    assert "Tag2" in tags
+    assert "Tag3" in tags
+    assert "Tag4 with Space" in tags
+    assert "Tag5" in tags
+    assert "Tag6 with Space" in tags
+
+def test_jsonl_single(tmp_path, process, disable_extractors_dict):
+    with open('../../mock_server/templates/example-single.jsonl', 'r', encoding='utf-8') as f:
+        arg_process = subprocess.run(
+            ["archivebox", "add", "--index-only", "--parser=jsonl"],
+            stdin=f,
+            capture_output=True,
+            env=disable_extractors_dict,
+        )
+
+    conn = sqlite3.connect("index.sqlite3")
+    c = conn.cursor()
+    urls = c.execute("SELECT url from core_snapshot").fetchall()
+    tags = c.execute("SELECT name from core_tag").fetchall()
+    conn.commit()
+    conn.close()
+
+    urls = list(map(lambda x: x[0], urls))
+    assert "http://127.0.0.1:8080/static/example.com.html" in urls
+    # if the following URL appears, we must have fallen back to another parser
+    assert not "http://www.example.com/should-not-exist" in urls
+
+    tags = list(map(lambda x: x[0], tags))
+    assert "Tag1" in tags
+    assert "Tag2" in tags
+
+# make sure that JSON parser rejects a single line of JSONL which is valid
+# JSON but not our expected format
+def test_json_single(tmp_path, process, disable_extractors_dict):
+    with open('../../mock_server/templates/example-single.jsonl', 'r', encoding='utf-8') as f:
+        arg_process = subprocess.run(
+            ["archivebox", "add", "--index-only", "--parser=json"],
+            stdin=f,
+            capture_output=True,
+            env=disable_extractors_dict,
+        )
+
+    assert 'expects list of objects' in arg_process.stderr.decode("utf-8")

Some files were not shown because too many files changed in this diff