Browse Source

Merge branch 'dev' into feature/kludge-984-UTF8-bug

Joseph Turian 3 years ago
parent
commit
07de4a79a1
3 changed files with 7 additions and 2 deletions
  1. 5 0
      Dockerfile
  2. 1 1
      archivebox/extractors/__init__.py
  3. 1 1
      archivebox/parsers/__init__.py

+ 5 - 0
Dockerfile

@@ -1,6 +1,8 @@
 # This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
 # This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
 #     python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, yt-dlp, single-file
 #     python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, yt-dlp, single-file
 # Usage:
 # Usage:
+#     git submodule update --init --recursive
+#     git pull --recurse-submodules
 #     docker build . -t archivebox --no-cache
 #     docker build . -t archivebox --no-cache
 #     docker run -v "$PWD/data":/data archivebox init
 #     docker run -v "$PWD/data":/data archivebox init
 #     docker run -v "$PWD/data":/data archivebox add 'https://example.com'
 #     docker run -v "$PWD/data":/data archivebox add 'https://example.com'
@@ -9,6 +11,9 @@
 # Multi-arch build:
 # Multi-arch build:
 #     docker buildx create --use
 #     docker buildx create --use
 #     docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
 #     docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
+#
+# Read more about [developing
+# Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
 
 
 
 
 FROM python:3.10-slim-bullseye
 FROM python:3.10-slim-bullseye

+ 1 - 1
archivebox/extractors/__init__.py

@@ -128,7 +128,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s
                 else:
                 else:
                     # print('{black}      X {}{reset}'.format(method_name, **ANSI))
                     # print('{black}      X {}{reset}'.format(method_name, **ANSI))
                     stats['skipped'] += 1
                     stats['skipped'] += 1
-            except Exception:
+            except Exception as e:
                 # Disabled until https://github.com/ArchiveBox/ArchiveBox/issues/984
                 # Disabled until https://github.com/ArchiveBox/ArchiveBox/issues/984
                 # and https://github.com/ArchiveBox/ArchiveBox/issues/1014
                 # and https://github.com/ArchiveBox/ArchiveBox/issues/1014
                 # are fixed.
                 # are fixed.

+ 1 - 1
archivebox/parsers/__init__.py

@@ -154,7 +154,7 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir:
 
 
     for entry in raw_text.split():
     for entry in raw_text.split():
         try:
         try:
-            if Path(entry).exists:
+            if Path(entry).exists():
                 referenced_texts += Path(entry).read_text()
                 referenced_texts += Path(entry).read_text()
         except Exception as err:
         except Exception as err:
             print(err)
             print(err)