archive_downloader.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. #
  2. # Copyright (c) Contributors to the Open 3D Engine Project.
  3. # For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. #
  5. # SPDX-License-Identifier: Apache-2.0 OR MIT
  6. #
  7. #
  8. import argparse
  9. import hashlib
  10. import os
  11. import pathlib
  12. import platform
  13. import urllib.request
  14. import subprocess
  15. import sys
  16. import zipfile
  17. SUPPORTED_HASH_ALGORITHMS = {
  18. 'md5': lambda: hashlib.md5(),
  19. 'sha1': lambda: hashlib.sha1(),
  20. 'sha224': lambda: hashlib.sha224(),
  21. 'sha256': lambda: hashlib.sha256()
  22. }
  23. ARCHIVE_EXTS_ZIP = { '.zip' }
  24. ARCHIVE_EXTS_TAR = { '.tgz', 'gz', '.xz', '.tar.xz' }
  25. ARCHIVE_EXTS_7ZIP = { '.7z' }
  26. INDENT=' '*4
  27. def hash_file(file_path:str, hash_algorithm:str='md5')->str:
  28. """
  29. Calculate a hash based on the input file path and selected hash algorithm and return a hex-string representation of it
  30. (Refer to SUPPORTED_HASH_ALGORITHMS for the supported hash algorithms)
  31. :param file_path: The path to the file to calculate the hash
  32. :param hash_algorithm: The desired hash algorith. See 'SUPPORTED_HASH_ALGORITHMS' for the list of algorithms
  33. """
  34. if not os.path.isfile(file_path):
  35. raise FileNotFoundError(f"File to hash {file_path} does not exist.")
  36. hasher_create = SUPPORTED_HASH_ALGORITHMS.get(hash_algorithm)
  37. if not hasher_create:
  38. raise KeyError("Invalid hash algorithm selected for hash calculation: '{hash_algorithm}'")
  39. hasher = hasher_create()
  40. # we don't follow symlinks here, this is strictly to check actual packages.
  41. with open(file_path, 'rb') as afile:
  42. buf = afile.read()
  43. hasher.update(buf)
  44. hash_result = hasher.hexdigest()
  45. return hash_result
  46. def download_and_verify(src_url: str, src_zip_hash:str, src_zip_hash_algorithm:str,target_folder:str)->str:
  47. """
  48. Calculate a hash based on the input file path and selected hash algorithm and return a hex-string representation of it
  49. Supported hash algorithms are: md5, sha1, sha224, sha256
  50. :param src_url: The full url of the archive file to download
  51. :param src_zip_hash: The expected hash to use to verify the integrity of the downloaded file. If 'None', then
  52. verification is replaced by the calculation and reporting of the result hash of the downloaded package.
  53. :param src_zip_hash_algorithm: The desired hash algorith. See 'SUPPORTED_HASH_ALGORITHMS' for the list of algorithms
  54. :param target_folder: The target folder to download the archive file to
  55. """
  56. target_folder_path = pathlib.Path(target_folder)
  57. src_filename = os.path.basename(src_url)
  58. tgt_filename = target_folder_path / src_filename
  59. # If the file has been downloaded, check its hash
  60. current_hash = None
  61. if tgt_filename.is_file():
  62. current_hash = hash_file(file_path=str(tgt_filename),
  63. hash_algorithm=src_zip_hash_algorithm)
  64. print(f"Current hash of {tgt_filename}:{current_hash}")
  65. if current_hash and current_hash == src_zip_hash:
  66. print(f"{INDENT}File '{src_filename}' already downloaded to {tgt_filename}, skipping.")
  67. return str(tgt_filename)
  68. print(f"{INDENT}Downloading {src_url}")
  69. if tgt_filename.exists():
  70. tgt_filename.unlink()
  71. urllib.request.urlretrieve(src_url, tgt_filename)
  72. # Calculate the downloaded file hash
  73. downloaded_hash = hash_file(file_path=str(tgt_filename),
  74. hash_algorithm=src_zip_hash_algorithm)
  75. if src_zip_hash and src_zip_hash != downloaded_hash:
  76. raise RuntimeError(f"Hash {src_zip_hash_algorithm} verification failed for {tgt_filename} (downloaded hash: {downloaded_hash}")
  77. print(f"{INDENT}Package hash : ({src_zip_hash_algorithm}) {downloaded_hash}")
  78. return str(tgt_filename)
  79. def extract_package(src_package_file: str, target_folder:str):
  80. src_package_file_path = pathlib.Path(src_package_file)
  81. target_folder_path = pathlib.Path(target_folder)
  82. if not src_package_file_path.is_file():
  83. raise FileNotFoundError(f"Package to extract '{src_package_file_path}' does not exist.")
  84. package_name, package_ext = os.path.splitext(str(src_package_file_path.name))
  85. destination_path = target_folder_path / package_name
  86. print(f"{INDENT}Extracting {src_package_file_path} to {destination_path}")
  87. if package_ext in ARCHIVE_EXTS_ZIP:
  88. import zipfile
  89. with zipfile.ZipFile(str(src_package_file_path.resolve()), 'r') as dep_zip:
  90. dep_zip.extractall(destination_path)
  91. elif package_ext in ARCHIVE_EXTS_TAR:
  92. import tarfile
  93. with tarfile.open(str(src_package_file_path.resolve())) as tar_file:
  94. tarfile.extractall(destination_path)
  95. elif package_ext in ARCHIVE_EXTS_7ZIP:
  96. try:
  97. os.makedirs(destination_path, exist_ok=True)
  98. subprocess.call(['7z', 'x', '-y', str(src_package_file_path.resolve())], cwd=destination_path)
  99. except Exception:
  100. raise RuntimeError(f"Archive file {src_package_file_path} requires 7Zip to be installed and on the command path. ")
  101. else:
  102. print(f"Extracted to {destination_path}")
  103. else:
  104. raise RuntimeError(f"Unsupported package extension: {package_ext}")
  105. return destination_path
  106. if __name__ == "__main__":
  107. parser = argparse.ArgumentParser(description="Download, verify hash, and unpack remote zip file")
  108. parser.add_argument('src-url',
  109. help='The download url of the zip package to download',
  110. nargs=1)
  111. parser.add_argument('--hash-algorithm',
  112. help=f'The hash algorithm to use to calculate the fingerprint ({" ".join(SUPPORTED_HASH_ALGORITHMS.keys())})',
  113. default='sha256',
  114. required=False)
  115. parser.add_argument('--hash',
  116. help='The hash fingerprint to validate against',
  117. default='',
  118. required=False)
  119. parser.add_argument('--target-folder',
  120. help='The target location for the download',
  121. required=True)
  122. parsed_args = parser.parse_args()
  123. downloaded_package_file = download_and_verify(src_url=parsed_args.src_url[0],
  124. src_zip_hash=parsed_args.hash,
  125. src_zip_hash_algorithm=parsed_args.hash_algorithm,
  126. target_folder=parsed_args.target_folder)
  127. extracted_package_path = extract_package(src_package_file=downloaded_package_file,
  128. target_folder=parsed_args.target_folder)
  129. sys.exit(1)