Browse Source

add extra information to headers extractor output

Nick Sweeting 1 year ago
parent
commit
c6faa9ab76
2 changed files with 5 additions and 1 deletions
  1. 1 1
      Dockerfile
  2. 4 0
      archivebox/util.py

+ 1 - 1
Dockerfile

@@ -15,8 +15,8 @@
 # Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
 
 
-# Use Debian 12 w/ faster package updates: https://packages.debian.org/bookworm-backports/
 FROM python:3.11-slim-bookworm
+# Uses Debian 12 w/ faster-updating apt-lists added below: https://packages.debian.org/bookworm-backports/
 
 LABEL name="archivebox" \
     maintainer="Nick Sweeting <[email protected]>" \

+ 4 - 0
archivebox/util.py

@@ -210,7 +210,11 @@ def get_headers(url: str, timeout: int=None) -> str:
     
     return pyjson.dumps(
         {
+            'URL': url,
             'Status-Code': response.status_code,
+            'Elapsed': response.elapsed,
+            'Encoding': response.encoding,
+            'Apparent-Encoding': response.apparent_encoding,
             **dict(response.headers),
         },
         indent=4,