Browse Source

new version handling and absolute imports

Nick Sweeting 6 years ago
parent
commit
93216a3c3e

+ 12 - 9
archivebox/archive.py

@@ -13,34 +13,37 @@ __package__ = 'archivebox'
 import os
 import sys
 
+
 from typing import List, Optional
 
-from schema import Link
-from links import links_after_timestamp
-from index import write_links_index, load_links_index
-from archive_methods import archive_link
-from config import (
+from .schema import Link
+from .links import links_after_timestamp
+from .index import write_links_index, load_links_index
+from .archive_methods import archive_link
+from .config import (
     ONLY_NEW,
     OUTPUT_DIR,
-    GIT_SHA,
+    PYTHON_DIR,
+    VERSION,
 )
-from util import (
+from .util import (
     enforce_types,
     save_remote_source,
     save_stdin_source,
 )
-from logs import (
+from .logs import (
     log_archiving_started,
     log_archiving_paused,
     log_archiving_finished,
 )
 
 __AUTHOR__ = 'Nick Sweeting <[email protected]>'
-__VERSION__ = GIT_SHA[:9]
+__VERSION__ = VERSION
 __DESCRIPTION__ = 'ArchiveBox: The self-hosted internet archive.'
 __DOCUMENTATION__ = 'https://github.com/pirate/ArchiveBox/wiki'
 
 
+
 def print_help():
     print('ArchiveBox: The self-hosted internet archive.\n')
     print("Documentation:")

+ 8 - 7
archivebox/archive_methods.py

@@ -4,13 +4,13 @@ from typing import Dict, List, Tuple
 from collections import defaultdict
 from datetime import datetime
 
-from schema import Link, ArchiveResult, ArchiveError
-from index import (
+from .schema import Link, ArchiveResult, ArchiveError
+from .index import (
     write_link_index,
     patch_links_index,
     load_json_link_index,
 )
-from config import (
+from .config import (
     CURL_BINARY,
     GIT_BINARY,
     WGET_BINARY,
@@ -31,7 +31,7 @@ from config import (
     ANSI,
     OUTPUT_DIR,
     GIT_DOMAINS,
-    GIT_SHA,
+    VERSION,
     WGET_USER_AGENT,
     CHECK_SSL_VALIDITY,
     COOKIES_FILE,
@@ -43,7 +43,7 @@ from config import (
     ONLY_NEW,
     WGET_AUTO_COMPRESSION,
 )
-from util import (
+from .util import (
     enforce_types,
     domain,
     extension,
@@ -58,7 +58,7 @@ from util import (
     run, PIPE, DEVNULL,
     Link,
 )
-from logs import (
+from .logs import (
     log_link_archiving_started,
     log_link_archiving_finished,
     log_archive_method_started,
@@ -123,6 +123,7 @@ def archive_link(link: Link, page=None) -> Link:
         if was_changed:
             patch_links_index(link)
 
+
         log_link_archiving_finished(link.link_dir, link, is_new, stats)
 
     except KeyboardInterrupt:
@@ -606,7 +607,7 @@ def archive_dot_org(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveR
         CURL_BINARY,
         '--location',
         '--head',
-        '--user-agent', 'ArchiveBox/{} (+https://github.com/pirate/ArchiveBox/)'.format(GIT_SHA),  # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
+        '--user-agent', 'ArchiveBox/{} (+https://github.com/pirate/ArchiveBox/)'.format(VERSION),  # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
         '--max-time', str(timeout),
         *(() if CHECK_SSL_VALIDITY else ('--insecure',)),
         submit_url,

+ 4 - 12
archivebox/config.py

@@ -40,7 +40,7 @@ SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True'
 CHECK_SSL_VALIDITY =     os.getenv('CHECK_SSL_VALIDITY',     'True'             ).lower() == 'true'
 RESOLUTION =             os.getenv('RESOLUTION',             '1440,2000'        )
 GIT_DOMAINS =            os.getenv('GIT_DOMAINS',            'github.com,bitbucket.org,gitlab.com').split(',')
-WGET_USER_AGENT =        os.getenv('WGET_USER_AGENT',        'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')
+WGET_USER_AGENT =        os.getenv('WGET_USER_AGENT',        'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')
 COOKIES_FILE =           os.getenv('COOKIES_FILE',           None)
 CHROME_USER_DATA_DIR =   os.getenv('CHROME_USER_DATA_DIR',   None)
 CHROME_HEADLESS =        os.getenv('CHROME_HEADLESS',        'True'             ).lower() == 'true'
@@ -163,21 +163,13 @@ def find_chrome_data_dir() -> Optional[str]:
     return None
 
 
-def get_git_version() -> str:
-    """get the git commit hash of the python code folder (aka code version)"""
-    try:
-        return run([GIT_BINARY, 'rev-list', '-1', 'HEAD', './'], stdout=PIPE, cwd=REPO_DIR).stdout.strip().decode()
-    except Exception:
-        print('[!] Warning: unable to determine git version, is git installed and in your $PATH?')
-    return 'unknown'
-
-
 # ******************************************************************************
 # ************************ Environment & Dependencies **************************
 # ******************************************************************************
 
 try:
-    GIT_SHA = get_git_version()
+    VERSION = open(os.path.join(PYTHON_DIR, 'VERSION'), 'r').read().strip()
+    GIT_SHA = VERSION.split('+')[1]
 
     ### Terminal Configuration
     TERM_WIDTH = lambda: shutil.get_terminal_size((100, 10)).columns
@@ -234,7 +226,7 @@ try:
         WGET_AUTO_COMPRESSION = not run([WGET_BINARY, "--compression=auto", "--help"], stdout=DEVNULL).returncode
         
     WGET_USER_AGENT = WGET_USER_AGENT.format(
-        GIT_SHA=GIT_SHA[:9],
+        VERSION=VERSION,
         WGET_VERSION=WGET_VERSION or '',
     )
 

+ 8 - 7
archivebox/index.py

@@ -6,15 +6,16 @@ from string import Template
 from typing import List, Tuple, Iterator, Optional
 from dataclasses import fields
 
-from schema import Link, ArchiveIndex, ArchiveResult
-from config import (
+from .schema import Link, ArchiveResult
+from .config import (
     OUTPUT_DIR,
     TEMPLATES_DIR,
+    VERSION,
     GIT_SHA,
     FOOTER_INFO,
     TIMEOUT,
 )
-from util import (
+from .util import (
     merge_links,
     chmod_file,
     urlencode,
@@ -25,9 +26,9 @@ from util import (
     TimedProgress,
     copy_and_overwrite,
 )
-from parse import parse_links
-from links import validate_links
-from logs import (
+from .parse import parse_links
+from .links import validate_links
+from .logs import (
     log_indexing_process_started,
     log_indexing_started,
     log_indexing_finished,
@@ -178,8 +179,8 @@ def write_html_links_index(out_dir: str, links: List[Link], finished: bool=False
         'date_updated': datetime.now().strftime('%Y-%m-%d'),
         'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
         'footer_info': FOOTER_INFO,
+        'version': VERSION,
         'git_sha': GIT_SHA,
-        'short_git_sha': GIT_SHA[:8],
         'rows': link_rows,
         'status': 'finished' if finished else 'running',
     }

+ 2 - 2
archivebox/links.py

@@ -22,8 +22,8 @@ Link {
 from typing import Iterable
 from collections import OrderedDict
 
-from schema import Link
-from util import (
+from .schema import Link
+from .util import (
     scheme,
     fuzzy_url,
     merge_links,

+ 2 - 2
archivebox/parse.py

@@ -24,8 +24,8 @@ from typing import Tuple, List, IO, Iterable
 from datetime import datetime
 import xml.etree.ElementTree as etree
 
-from config import TIMEOUT
-from util import (
+from .config import TIMEOUT
+from .util import (
     htmldecode,
     str_between,
     URL_REGEX,

+ 16 - 16
archivebox/schema.py

@@ -108,60 +108,60 @@ class Link:
 
     @property
     def link_dir(self) -> str:
-        from config import ARCHIVE_DIR
+        from .config import ARCHIVE_DIR
         return os.path.join(ARCHIVE_DIR, self.timestamp)
 
     @property
     def archive_path(self) -> str:
-        from config import ARCHIVE_DIR_NAME
+        from .config import ARCHIVE_DIR_NAME
         return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
     
     ### URL Helpers
     @property
     def urlhash(self):
-        from util import hashurl
+        from .util import hashurl
 
         return hashurl(self.url)
 
     @property
     def extension(self) -> str:
-        from util import extension
+        from .util import extension
         return extension(self.url)
 
     @property
     def domain(self) -> str:
-        from util import domain
+        from .util import domain
         return domain(self.url)
 
     @property
     def path(self) -> str:
-        from util import path
+        from .util import path
         return path(self.url)
 
     @property
     def basename(self) -> str:
-        from util import basename
+        from .util import basename
         return basename(self.url)
 
     @property
     def base_url(self) -> str:
-        from util import base_url
+        from .util import base_url
         return base_url(self.url)
 
     ### Pretty Printing Helpers
     @property
     def bookmarked_date(self) -> Optional[str]:
-        from util import ts_to_date
+        from .util import ts_to_date
         return ts_to_date(self.timestamp) if self.timestamp else None
 
     @property
     def updated_date(self) -> Optional[str]:
-        from util import ts_to_date
+        from .util import ts_to_date
         return ts_to_date(self.updated) if self.updated else None
 
     @property
     def oldest_archive_date(self) -> Optional[datetime]:
-        from util import ts_to_date
+        from .util import ts_to_date
 
         most_recent = min(
             (ts_to_date(result.start_ts)
@@ -173,7 +173,7 @@ class Link:
 
     @property
     def newest_archive_date(self) -> Optional[datetime]:
-        from util import ts_to_date
+        from .util import ts_to_date
 
         most_recent = max(
             (ts_to_date(result.start_ts)
@@ -197,13 +197,13 @@ class Link:
 
     @property
     def is_static(self) -> bool:
-        from util import is_static_file
+        from .util import is_static_file
         return is_static_file(self.url)
 
     @property
     def is_archived(self) -> bool:
-        from config import ARCHIVE_DIR
-        from util import domain
+        from .config import ARCHIVE_DIR
+        from .util import domain
 
         return os.path.exists(os.path.join(
             ARCHIVE_DIR,
@@ -240,7 +240,7 @@ class Link:
         return latest
 
     def canonical_outputs(self) -> Dict[str, Optional[str]]:
-        from util import wget_output_path
+        from .util import wget_output_path
         canonical = {
             'index_url': 'index.html',
             'favicon_url': 'favicon.ico',

+ 1 - 1
archivebox/templates/index.html

@@ -209,7 +209,7 @@
             <center>
                 <small>
                     Archive created using <a href="https://github.com/pirate/ArchiveBox" title="Github">ArchiveBox</a>
-                    version <a href="https://github.com/pirate/ArchiveBox/commit/$git_sha" title="Git commit">$short_git_sha</a> &nbsp; | &nbsp; 
+                    version <a href="https://github.com/pirate/ArchiveBox/commit/$git_sha" title="Git commit">$version</a> &nbsp; | &nbsp; 
                     Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
                     <br/><br/>
                     $footer_info

+ 5 - 5
archivebox/util.py

@@ -25,8 +25,8 @@ from subprocess import (
 
 from base32_crockford import encode as base32_encode
 
-from schema import Link
-from config import (
+from .schema import Link
+from .config import (
     ANSI,
     TERM_WIDTH,
     SOURCES_DIR,
@@ -37,9 +37,9 @@ from config import (
     CHECK_SSL_VALIDITY,
     WGET_USER_AGENT,
     CHROME_OPTIONS,
-    PYTHON_PATH,
+    PYTHON_DIR,
 )
-from logs import pretty_path
+from .logs import pretty_path
 
 ### Parsing Helpers
 
@@ -334,7 +334,7 @@ def wget_output_path(link: Link) -> Optional[str]:
 
 @enforce_types
 def read_js_script(script_name: str) -> str:
-    script_path = os.path.join(PYTHON_PATH, 'scripts', script_name)
+    script_path = os.path.join(PYTHON_DIR, 'scripts', script_name)
 
     with open(script_path, 'r') as f:
         return f.read().split('// INFO BELOW HERE')[0].strip()