ArchiveReader.cpp 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include "ArchiveReader.h"
  9. #include <AzCore/IO/ByteContainerStream.h>
  10. #include <AzCore/IO/GenericStreams.h>
  11. #include <AzCore/IO/OpenMode.h>
  12. #include <AzCore/std/parallel/scoped_lock.h>
  13. #include <AzCore/Task/TaskGraph.h>
  14. #include <Archive/ArchiveTypeIds.h>
  15. #include <Compression/DecompressionInterfaceAPI.h>
  16. namespace Archive
  17. {
  18. // Implement TypeInfo, Rtti and Allocator support
  19. AZ_TYPE_INFO_WITH_NAME_IMPL(ArchiveReader, "ArchiveReader", ArchiveReaderTypeId);
  20. AZ_RTTI_NO_TYPE_INFO_IMPL(ArchiveReader, IArchiveReader);
  21. AZ_CLASS_ALLOCATOR_IMPL(ArchiveReader, AZ::SystemAllocator);
  22. ArchiveReader::ArchiveReader() = default;
  23. ArchiveReader::~ArchiveReader()
  24. {
  25. UnmountArchive();
  26. }
  27. ArchiveReader::ArchiveReader(const ArchiveReaderSettings& writerSettings)
  28. : m_settings(writerSettings)
  29. {}
  30. ArchiveReader::ArchiveReader(AZ::IO::PathView archivePath, const ArchiveReaderSettings& writerSettings)
  31. : m_settings(writerSettings)
  32. {
  33. MountArchive(archivePath);
  34. }
  35. ArchiveReader::ArchiveReader(ArchiveStreamPtr archiveStream, const ArchiveReaderSettings& writerSettings)
  36. : m_settings(writerSettings)
  37. {
  38. MountArchive(AZStd::move(archiveStream));
  39. }
  40. bool ArchiveReader::ReadArchiveHeader(ArchiveHeader& archiveHeader, AZ::IO::GenericStream& archiveStream)
  41. {
  42. // Read the Archive header into memory
  43. AZStd::scoped_lock archiveLock(m_archiveStreamMutex);
  44. archiveStream.Seek(0, AZ::IO::GenericStream::SeekMode::ST_SEEK_BEGIN);
  45. AZ::IO::SizeType bytesRead = archiveStream.Read(sizeof(archiveHeader), &archiveHeader);
  46. archiveStream.Seek(0, AZ::IO::GenericStream::SeekMode::ST_SEEK_BEGIN);
  47. if (bytesRead != sizeof(archiveHeader))
  48. {
  49. m_settings.m_errorCallback({ ArchiveReaderErrorCode::ErrorReadingHeader, ArchiveReaderErrorString::format(
  50. "Archive header should have size %zu, but only %llu bytes were read from the beginning of the archive",
  51. sizeof(archiveHeader), bytesRead) });
  52. }
  53. return true;
  54. }
  55. // Define the Archive TableOfContentReader constructors
  56. ArchiveReader::ArchiveTableOfContentsReader::ArchiveTableOfContentsReader() = default;
  57. // Stores the buffer containing the Table of Contents raw data
  58. // and an ArchiveTableOfContentsView instance that points into that raw data
  59. ArchiveReader::ArchiveTableOfContentsReader::ArchiveTableOfContentsReader(AZStd::vector<AZStd::byte> tocBuffer,
  60. ArchiveTableOfContentsView tocView)
  61. : m_tocBuffer(AZStd::move(tocBuffer))
  62. , m_tocView(AZStd::move(tocView))
  63. {}
  64. bool ArchiveReader::ReadArchiveTOC(ArchiveTableOfContentsReader& archiveToc, AZ::IO::GenericStream& archiveStream,
  65. const ArchiveHeader& archiveHeader)
  66. {
  67. // RAII structure which resets the archive stream to offset 0
  68. // when it goes out of scope
  69. struct SeekStreamToBeginRAII
  70. {
  71. ~SeekStreamToBeginRAII()
  72. {
  73. m_stream.Seek(0, AZ::IO::GenericStream::SeekMode::ST_SEEK_BEGIN);
  74. }
  75. AZ::IO::GenericStream& m_stream;
  76. };
  77. if (archiveHeader.m_tocOffset > archiveStream.GetLength())
  78. {
  79. // The TOC offset is invalid since it is after the end of the stream
  80. m_settings.m_errorCallback({ ArchiveReaderErrorCode::ErrorReadingTableOfContents,
  81. ArchiveReaderErrorString::format("TOC offset is invalid. It is pass the end of the stream."
  82. " Offset value %llu, archive stream size %llu",
  83. static_cast<AZ::u64>(archiveHeader.m_tocOffset), archiveStream.GetLength()) });
  84. return false;
  85. }
  86. // Buffer which stores the raw table of contents data from the archive file
  87. AZStd::vector<AZStd::byte> tocBuffer;
  88. // Seek to the location of the Table of Contents
  89. {
  90. AZStd::scoped_lock archiveLock(m_archiveStreamMutex);
  91. // Make sure the archive offset is reset to 0 on return
  92. SeekStreamToBeginRAII seekToBeginScope{ archiveStream };
  93. archiveStream.Seek(archiveHeader.m_tocOffset, AZ::IO::GenericStream::SeekMode::ST_SEEK_BEGIN);
  94. tocBuffer.resize_no_construct(archiveHeader.GetTocStoredSize());
  95. AZ::IO::SizeType bytesRead = archiveStream.Read(tocBuffer.size(), tocBuffer.data());
  96. if (bytesRead != tocBuffer.size())
  97. {
  98. m_settings.m_errorCallback({ ArchiveReaderErrorCode::ErrorReadingTableOfContents,
  99. ArchiveReaderErrorString::format("Unable to read all TOC bytes from the archive."
  100. " The TOC size is %zu, but only %llu bytes were read",
  101. tocBuffer.size(), bytesRead) });
  102. return false;
  103. }
  104. }
  105. // Check if the archive table of contents is compressed
  106. if (archiveHeader.m_tocCompressionAlgoIndex < UncompressedAlgorithmIndex)
  107. {
  108. auto decompressionRegistrar = Compression::DecompressionRegistrar::Get();
  109. if (decompressionRegistrar == nullptr)
  110. {
  111. // The decompression registrar does not exist
  112. m_settings.m_errorCallback({ ArchiveReaderErrorCode::ErrorReadingTableOfContents,
  113. ArchiveReaderErrorString("The Decompression Registry is not available"
  114. " Is the Compression gem active?") });
  115. return false;
  116. }
  117. Compression::CompressionAlgorithmId tocCompressionAlgorithmId =
  118. archiveHeader.m_compressionAlgorithmsIds[archiveHeader.m_tocCompressionAlgoIndex];
  119. Compression::IDecompressionInterface* decompressionInterface =
  120. decompressionRegistrar->FindDecompressionInterface(tocCompressionAlgorithmId);
  121. if (decompressionInterface == nullptr)
  122. {
  123. // Compression algorithm isn't registered with the decompression registrar
  124. m_settings.m_errorCallback({ ArchiveReaderErrorCode::ErrorReadingTableOfContents,
  125. ArchiveReaderErrorString::format("Compression Algorithm %u used by TOC"
  126. " isn't registered with decompression registrar",
  127. AZStd::to_underlying(tocCompressionAlgorithmId)) });
  128. return false;
  129. }
  130. // Resize the uncompressed TOC buffer to be the size of the uncompressed Table of Contents
  131. AZStd::vector<AZStd::byte> uncompressedTocBuffer;
  132. uncompressedTocBuffer.resize_no_construct(archiveHeader.GetUncompressedTocSize());
  133. // Run the compressed toc data through the decompressor
  134. if (Compression::DecompressionResultData decompressionResultData =
  135. decompressionInterface->DecompressBlock(uncompressedTocBuffer, tocBuffer, Compression::DecompressionOptions{});
  136. decompressionResultData)
  137. {
  138. // If decompression succeed, move the uncompressed buffer to the tocBuffer variable
  139. tocBuffer = AZStd::move(uncompressedTocBuffer);
  140. if (decompressionResultData.GetUncompressedByteCount() != tocBuffer.size())
  141. {
  142. // The size of uncompressed size of the data does not match the total uncompressed
  143. // TOC size read from the ArchiveHeader::GetUncompressedTocSize() function
  144. m_settings.m_errorCallback({ ArchiveReaderErrorCode::ErrorReadingTableOfContents,
  145. ArchiveReaderErrorString::format("The uncompressed TOC size %llu does not match the total uncompressed size %zu"
  146. " read from the archive header",
  147. decompressionResultData.GetUncompressedByteCount(), tocBuffer.size()) });
  148. return false;
  149. }
  150. }
  151. }
  152. // Wrap the table of contents in an reader structure that encapsulates the raw tocBuffer data on disk
  153. // and a view into the Table of Contents memory
  154. if (auto tocView = ArchiveTableOfContentsView::CreateFromArchiveHeaderAndBuffer(archiveHeader, tocBuffer);
  155. tocView)
  156. {
  157. archiveToc = ArchiveTableOfContentsReader{ AZStd::move(tocBuffer), AZStd::move(tocView).value() };
  158. }
  159. else
  160. {
  161. // Invoke the error callback indicating an error reading the table of contents
  162. m_settings.m_errorCallback({ ArchiveReaderErrorCode::ErrorReadingTableOfContents, tocView.error().m_errorMessage });
  163. return false;
  164. }
  165. return true;
  166. }
  167. bool ArchiveReader::BuildFilePathMap(const ArchiveTableOfContentsView& tocView)
  168. {
  169. m_pathMap.clear();
  170. // Build a map of file path view to within the FilePathIndex array of the TOC View
  171. auto BuildViewOfFilePaths = [this, filePathBlobTable = &tocView.m_filePathBlob, filePathIndex = 0]
  172. (AZ::u64 filePathBlobOffset, AZ::u16 filePathSize) mutable
  173. {
  174. AZ::IO::PathView contentPathView(filePathBlobTable->substr(filePathBlobOffset, filePathSize));
  175. m_pathMap.emplace(contentPathView, filePathIndex++);
  176. };
  177. EnumerateFilePathIndexOffsets(BuildViewOfFilePaths, tocView);
  178. return true;
  179. }
  180. bool ArchiveReader::MountArchive(AZ::IO::PathView archivePath)
  181. {
  182. UnmountArchive();
  183. AZ::IO::FixedMaxPath mountPath{ archivePath };
  184. constexpr AZ::IO::OpenMode openMode =
  185. AZ::IO::OpenMode::ModeRead
  186. | AZ::IO::OpenMode::ModeBinary;
  187. m_archiveStream.reset(aznew AZ::IO::SystemFileStream(mountPath.c_str(), openMode));
  188. // Early return if the archive is not open
  189. if (!m_archiveStream->IsOpen())
  190. {
  191. m_settings.m_errorCallback({ ArchiveReaderErrorCode::ErrorOpeningArchive, ArchiveReaderErrorString::format(
  192. "Archive with filename %s could not be open",
  193. mountPath.c_str()) });
  194. return false;
  195. }
  196. // If the Archive header and TOC could not be read
  197. // then unmount the archive and return false
  198. if (!ReadArchiveHeaderAndToc())
  199. {
  200. // UnmountArchive is invoked to reset
  201. // the Archive Header, TOC and the path map structures
  202. UnmountArchive();
  203. return false;
  204. }
  205. return true;
  206. }
  207. bool ArchiveReader::MountArchive(ArchiveStreamPtr archiveStream)
  208. {
  209. UnmountArchive();
  210. m_archiveStream = AZStd::move(archiveStream);
  211. if (m_archiveStream == nullptr || !m_archiveStream->IsOpen())
  212. {
  213. m_settings.m_errorCallback({ ArchiveReaderErrorCode::ErrorOpeningArchive,
  214. ArchiveReaderErrorString("Archive stream pointer is nullptr or not open") });
  215. return false;
  216. }
  217. if (!ReadArchiveHeaderAndToc())
  218. {
  219. // UnmountArchive is invoked to reset
  220. // the Archive Header, TOC and the path map structures
  221. UnmountArchive();
  222. return false;
  223. }
  224. return true;
  225. }
  226. bool ArchiveReader::ReadArchiveHeaderAndToc()
  227. {
  228. if (m_archiveStream == nullptr)
  229. {
  230. return false;
  231. }
  232. const bool mountResult = ReadArchiveHeader(m_archiveHeader, *m_archiveStream)
  233. && ReadArchiveTOC(m_archiveToc, *m_archiveStream, m_archiveHeader)
  234. && BuildFilePathMap(m_archiveToc.m_tocView);
  235. return mountResult;
  236. }
  237. void ArchiveReader::UnmountArchive()
  238. {
  239. if (m_archiveStream != nullptr && m_archiveStream->IsOpen())
  240. {
  241. // Clear the path mount on unmount as it has pointers
  242. // into the table of contents reader
  243. m_pathMap.clear();
  244. // Now clear the table of contents reader
  245. m_archiveToc = {};
  246. // Finally clear the archive header
  247. m_archiveHeader = {};
  248. }
  249. m_archiveStream.reset();
  250. }
  251. bool ArchiveReader::IsMounted() const
  252. {
  253. return m_archiveStream != nullptr && m_archiveStream->IsOpen();
  254. }
  255. ArchiveExtractFileResult ArchiveReader::ExtractFileFromArchive(AZStd::span<AZStd::byte> outputSpan,
  256. const ArchiveReaderFileSettings& fileSettings)
  257. {
  258. ArchiveListFileResult listResult;
  259. if (auto filePathString = AZStd::get_if<AZ::IO::PathView>(&fileSettings.m_filePathIdentifier);
  260. filePathString != nullptr)
  261. {
  262. listResult = ListFileInArchive(*filePathString);
  263. }
  264. else
  265. {
  266. // The only remaining alternative is the ArchiveFileToken
  267. // so use AZStd::get is used on a reference to the variant
  268. // Make sure the filePathToken points to file within the TOC
  269. const ArchiveFileToken archiveFileToken = AZStd::get<ArchiveFileToken>(fileSettings.m_filePathIdentifier);
  270. listResult = ListFileInArchive(archiveFileToken);
  271. }
  272. // Copy the result of listing the file in the archive to the extract result structure
  273. ArchiveExtractFileResult extractResult;
  274. extractResult.m_relativeFilePath = listResult.m_relativeFilePath;
  275. extractResult.m_filePathToken = listResult.m_filePathToken;
  276. extractResult.m_compressionAlgorithm = listResult.m_compressionAlgorithm;
  277. extractResult.m_uncompressedSize = listResult.m_uncompressedSize;
  278. extractResult.m_compressedSize = listResult.m_compressedSize;
  279. extractResult.m_offset = listResult.m_offset;
  280. extractResult.m_crc32 = listResult.m_crc32;
  281. extractResult.m_resultOutcome = listResult.m_resultOutcome;
  282. // If querying of the file within the archive failed,
  283. // then return the extract file result which copied the error
  284. // state from the list file result
  285. if (!extractResult)
  286. {
  287. return extractResult;
  288. }
  289. // Determine if the file is compressed
  290. const bool isFileCompressed = extractResult.m_compressionAlgorithm != Compression::Uncompressed
  291. && extractResult.m_compressionAlgorithm != Compression::Invalid;
  292. // Check if the file should be decompressed
  293. const bool shouldDecompressFile = fileSettings.m_decompressFile
  294. && isFileCompressed;
  295. // if the should be decompressed, decompress it
  296. if (shouldDecompressFile)
  297. {
  298. // If the decompressFile option is true, then decompress the file into the output buffer
  299. if (fileSettings.m_decompressFile)
  300. {
  301. // Decompress the data into the output span
  302. if (ReadCompressedFileOutcome readFileOutcome = ReadCompressedFileIntoBuffer(outputSpan,
  303. fileSettings, extractResult);
  304. readFileOutcome)
  305. {
  306. // On success populate a span with the exact size of the file data read
  307. // from the archive
  308. extractResult.m_fileSpan = readFileOutcome.value();
  309. }
  310. else
  311. {
  312. extractResult.m_resultOutcome = AZStd::unexpected(AZStd::move(readFileOutcome.error()));
  313. }
  314. }
  315. }
  316. else
  317. {
  318. // When performing a raw read, use the knowledge of the file being compressed
  319. // to decide the file size to read
  320. AZ::u64 fileSize = isFileCompressed ? extractResult.m_compressedSize : extractResult.m_uncompressedSize;
  321. // Read the raw file data directly into the output span if possible
  322. if (ReadRawFileOutcome readFileOutcome = ReadRawFileIntoBuffer(outputSpan, extractResult.m_offset,
  323. fileSize, fileSettings);
  324. readFileOutcome)
  325. {
  326. // On success populate a span with the exact size of the file data read
  327. // from the archive
  328. extractResult.m_fileSpan = readFileOutcome.value();
  329. }
  330. else
  331. {
  332. extractResult.m_resultOutcome = AZStd::unexpected(AZStd::move(readFileOutcome.error()));
  333. }
  334. }
  335. return extractResult;
  336. }
  337. auto ArchiveReader::ReadRawFileIntoBuffer(AZStd::span<AZStd::byte> fileBuffer, AZ::u64 offset,
  338. AZ::u64 fileSize,
  339. const ArchiveReaderFileSettings& fileSettings)
  340. -> ReadRawFileOutcome
  341. {
  342. // Calculate the start offset where to read the file content from
  343. // It must be within the the range of [offset, offset + size)
  344. AZ::IO::OffsetType readOffset = AZStd::clamp(offset, offset + fileSettings.m_startOffset, offset + fileSize);
  345. // Next clamp the bytesToRead to not read pass the end of the file
  346. AZ::IO::SizeType bytesAvailableForRead = (offset + fileSize) - readOffset;
  347. // Set the amount of bytes to read to be the minimum of the file size and the amount of bytes to read
  348. AZ::IO::SizeType bytesToRead = AZStd::min(bytesAvailableForRead, fileSettings.m_bytesToRead);
  349. if (fileBuffer.size() < bytesToRead)
  350. {
  351. return AZStd::unexpected(ResultString::format("Buffer size is not large enough to read the raw file data at"
  352. " archive file offset %lld."
  353. " Buffer size is %zu, while %llu is required.", readOffset, fileBuffer.size(), bytesToRead));
  354. }
  355. AZStd::scoped_lock archiveReadLock(m_archiveStreamMutex);
  356. if (AZ::IO::SizeType bytesRead = m_archiveStream->ReadAtOffset(bytesToRead, fileBuffer.data(), readOffset);
  357. bytesRead < bytesToRead)
  358. {
  359. return AZStd::unexpected(ResultString::format("Attempted to read %llu bytes from the archive at offset %lld."
  360. " But only %llu bytes were able to be read.", bytesToRead, readOffset, bytesRead));
  361. }
  362. // Make a span with the exact amount of data read
  363. return fileBuffer.first(bytesToRead);
  364. }
  365. auto ArchiveReader::ReadCompressedFileIntoBuffer(AZStd::span<AZStd::byte> decompressionResultSpan,
  366. const ArchiveReaderFileSettings& fileSettings,
  367. const ArchiveExtractFileResult& extractFileResult) -> ReadCompressedFileOutcome
  368. {
  369. // If the file is empty, there is nothing to decompress
  370. if (extractFileResult.m_uncompressedSize == 0)
  371. {
  372. // Return a successful expectation with an empty span
  373. return {};
  374. }
  375. auto decompressionRegistrar = Compression::DecompressionRegistrar::Get();
  376. if (decompressionRegistrar == nullptr)
  377. {
  378. return AZStd::unexpected(ResultString("Decompression Registrar is not available. File cannot be decompressed"));
  379. }
  380. Compression::IDecompressionInterface* decompressionInterface =
  381. decompressionRegistrar->FindDecompressionInterface(extractFileResult.m_compressionAlgorithm);
  382. if (decompressionInterface == nullptr)
  383. {
  384. return AZStd::unexpected(ResultString::format("Compression Algorithm with ID %x is not registered"
  385. " with the decompression registrar.",
  386. extractFileResult.m_compressionAlgorithm));
  387. }
  388. // Retrieve a subspan of the block lines for the file being extracted
  389. // The file path token doubles as the index into the table of contents FileMetadataTable and FilePathIndexTable vector
  390. auto fileMetadataTableIndex = static_cast<AZ::u64>(extractFileResult.m_filePathToken);
  391. auto blockLineSpanOutcome = GetBlockLineSpanForFile(m_archiveToc.m_tocView, fileMetadataTableIndex);
  392. if (!blockLineSpanOutcome)
  393. {
  394. // Return the error for retrieving the block line for the file
  395. return AZStd::unexpected(AZStd::move(blockLineSpanOutcome.error()));
  396. }
  397. AZStd::span<const ArchiveBlockLineUnion> fileBlockLineSpan = blockLineSpanOutcome.value();
  398. // Determine the range of compressed blocks within the file to read
  399. // The cap is uncompressed size of the file
  400. if (fileSettings.m_startOffset > extractFileResult.m_uncompressedSize)
  401. {
  402. return AZStd::unexpected(ResultString::format("Start offset %llu to read file data from is larger."
  403. " than the size of the file %llu for file %s", fileSettings.m_startOffset,
  404. extractFileResult.m_compressedSize,
  405. extractFileResult.m_relativeFilePath.c_str()));
  406. }
  407. // Clamp the bytes that can read for the file to be at
  408. // most the difference in uncompressed size and the start offset
  409. AZ::u64 maxBytesToReadForFile = AZStd::min(fileSettings.m_bytesToRead,
  410. extractFileResult.m_uncompressedSize - fileSettings.m_startOffset);
  411. // Set the amount of bytes to read to be the minimum of the file size and the amount of bytes to read
  412. auto blockRange = GetBlockRangeToRead(fileSettings.m_startOffset, maxBytesToReadForFile);
  413. // Get the number of 2-MiB blocks for the file
  414. AZ::u32 blockCount = GetBlockCountIfCompressed(extractFileResult.m_uncompressedSize);
  415. // First calculate if the first block line is a jump entry
  416. // If there are more than 3 blocks lines, then the file contains at least a jump from
  417. // block line[0] -> block line[3] and the file contains at least 10 blocks of data
  418. // If the file only contains 3 block lines, then there would not be a jump entry
  419. // and the file would contain at most 9 blocks
  420. // See the ArchiveInterfaceStructs.h header for more information
  421. // The aligned seek offset where to read to start reading the compressed
  422. // data will calculated by adding up the 512-byte aligned sizes of each compressed block
  423. AZ::IO::SizeType alignedFirstSeekOffset{};
  424. for (AZ::u64 blockIndex{}; blockIndex < blockRange.first;)
  425. {
  426. // The internal archive code will never trigger the error case of (blockIndex >= blockCount),
  427. // so checking it will be skipped
  428. size_t blockLineIndex = GetBlockLineIndexFromBlockIndex(blockCount, blockIndex).m_blockLineIndex;
  429. // Block line indices which are multiples of 3 all have jump entries unless they are part of the final 3 block
  430. // lines of a file
  431. const bool blockLineContainsJump = (blockLineIndex % BlockLinesToSkipWithJumpEntry == 0)
  432. && (fileBlockLineSpan.size() - blockLineIndex) > BlockLinesToSkipWithJumpEntry;
  433. if (blockLineContainsJump)
  434. {
  435. const ArchiveBlockLineJump& blockLineWithJump = fileBlockLineSpan[blockLineIndex].m_blockLineWithJump;
  436. // there is a jump entry for the file, so the logic gets a bit more trickier
  437. // First check if the blockIndex + BlocksToSkipWithJumpEntry is less than blockRange.first
  438. if (blockIndex + BlocksToSkipWithJumpEntry < blockRange.first)
  439. {
  440. // In this case the jump entry can be used to skip the next 8 blocks(3 block lines)
  441. // The jump entry contains the number of 512-byte sectors the next 8 blocks
  442. // take in the raw file section of the archive
  443. // The value is multiplied by ArchiveDefaultBlockAlignment to get the correct value
  444. alignedFirstSeekOffset += blockLineWithJump.m_blockJump * ArchiveDefaultBlockAlignment;
  445. // Increment the block index by 8, as it was the number of blocks that were skipped
  446. blockIndex += BlocksToSkipWithJumpEntry;
  447. }
  448. else
  449. {
  450. // Otherwise process up to the remaining two block entries in this block line if possible
  451. alignedFirstSeekOffset += AZ_SIZE_ALIGN_UP(blockLineWithJump.m_block0, ArchiveDefaultBlockAlignment);
  452. ++blockIndex;
  453. // If the blockIndex is still less than the beginning of the block range to read
  454. // then grab the second and final block from the block line
  455. if ((blockIndex + 1) < blockRange.first)
  456. {
  457. alignedFirstSeekOffset += AZ_SIZE_ALIGN_UP(blockLineWithJump.m_block1, ArchiveDefaultBlockAlignment);
  458. ++blockIndex;
  459. }
  460. }
  461. }
  462. else
  463. {
  464. // There aren't anymore jump entry for the file so accumulate the aligned compressed block offsets
  465. const ArchiveBlockLine& blockLine = fileBlockLineSpan[blockLineIndex].m_blockLine;
  466. // Try to process up to 3 block lines per for loop iteration
  467. // This allows skipping the blockIndex / BlocksPerBlockLine division twice
  468. // If the blockIndex is within 3 of the blockRange.first value then up to that amount of blocks are processed
  469. const AZ::u64 blocksToProcess = AZStd::min(BlocksPerBlockLine, blockRange.first - blockIndex);
  470. // Align all the compressed sizes up to 512-byte alignment to get the correct
  471. // seek offset for the file
  472. // blocksToProcess is >=1 due to the for loop condition
  473. alignedFirstSeekOffset += AZ_SIZE_ALIGN_UP(blockLine.m_block0, ArchiveDefaultBlockAlignment);
  474. alignedFirstSeekOffset += blocksToProcess >= BlocksPerBlockLine - 1
  475. ? AZ_SIZE_ALIGN_UP(blockLine.m_block1, ArchiveDefaultBlockAlignment)
  476. : 0;
  477. alignedFirstSeekOffset += blocksToProcess >= BlocksPerBlockLine
  478. ? AZ_SIZE_ALIGN_UP(blockLine.m_block2, ArchiveDefaultBlockAlignment)
  479. : 0;
  480. // Increment the block index by the blocks that were processed
  481. blockIndex += blocksToProcess;
  482. }
  483. }
  484. // Stores the list of compressed blocks to decompress
  485. AZStd::vector<AZStd::byte> compressedBlocks;
  486. compressedBlocks.resize_no_construct((blockRange.second - blockRange.first) * ArchiveBlockSizeForCompression);
  487. AZStd::span<AZStd::byte> compressedBlockRemainingSpan = compressedBlocks;
  488. AZ::IO::SizeType fileRelativeSeekOffset = alignedFirstSeekOffset;
  489. for (AZ::u64 blockIndex = blockRange.first; blockIndex != blockRange.second; ++blockIndex)
  490. {
  491. const AZ::u64 blockCompressedSize = GetCompressedSizeForBlock(fileBlockLineSpan, blockCount, blockIndex);
  492. // Get the next 2 MiB block (or less if in the final block) of memory to store the compressed block data
  493. const auto availableBytesInCompressedBlock = AZStd::min<size_t>(compressedBlockRemainingSpan.size(),
  494. ArchiveBlockSizeForCompression);
  495. const AZStd::span<AZStd::byte> compressedBlockToReadInto = compressedBlockRemainingSpan.first(
  496. availableBytesInCompressedBlock);
  497. // Slide the compressed block remaining span view ahead by the 2 MiB that is being used for the read span
  498. compressedBlockRemainingSpan = compressedBlockRemainingSpan.subspan(availableBytesInCompressedBlock);
  499. const AZ::u64 absoluteSeekOffset = extractFileResult.m_offset + fileRelativeSeekOffset;
  500. if (AZ::IO::SizeType bytesRead = m_archiveStream->ReadAtOffset(blockCompressedSize,
  501. compressedBlockToReadInto.data(), absoluteSeekOffset);
  502. bytesRead != blockCompressedSize)
  503. {
  504. return AZStd::unexpected(ResultString::format("Cannot read all of compressed block for"
  505. " block %llu. The compressed block size is %llu, but only %llu was able to be read",
  506. blockIndex, blockCompressedSize, bytesRead));
  507. }
  508. // As the read was successful add the aligned compressed size to the fileRelativeSeekOffset
  509. // The value is the read offset where the next block data starts
  510. fileRelativeSeekOffset += AZ_SIZE_ALIGN_UP(blockCompressedSize, ArchiveDefaultBlockAlignment);
  511. }
  512. // Reset the compressed block remaining to the start of the compressedBlocks vector
  513. compressedBlockRemainingSpan = compressedBlocks;
  514. // The span below is used to slide a 2 MiB window for storing decompressed file contents
  515. AZStd::span<AZStd::byte> decompressionRemainingSpan = decompressionResultSpan;
  516. // Get a reference to the the caller supplied decompression options if available
  517. const auto& decompressionOptions = fileSettings.m_decompressionOptions != nullptr
  518. ? *fileSettings.m_decompressionOptions
  519. : Compression::DecompressionOptions{};
  520. // m_maxDecompressTasks has a minimum value of 1
  521. // This makes sure there is never a scenario where the there are blocks to decompress
  522. // but the decompress task count is 0
  523. const AZ::u32 maxDecompressTasks = AZStd::min(
  524. AZStd::max(1U, m_settings.m_maxDecompressTasks),
  525. static_cast<AZ::u32>(blockRange.second - blockRange.first));
  526. AZStd::vector<Compression::DecompressionResultData> decompressedBlockResults(maxDecompressTasks);
  527. for (AZ::u64 blockIndex = blockRange.first; blockIndex < blockRange.second;)
  528. {
  529. // Determine the number of decompression task that can be run in parallel
  530. const AZ::u32 decompressTaskCount = AZStd::min(static_cast<AZ::u32>(blockRange.second - blockIndex),
  531. maxDecompressTasks);
  532. // Task graph event used to block decompressing blocks in parallel
  533. auto taskDecompressGraphEvent = AZStd::make_unique<AZ::TaskGraphEvent>("Content File Decompress Sync");
  534. AZ::TaskGraph taskGraph{ "Archive Decompress Tasks" };
  535. AZ::TaskDescriptor decompressTaskDescriptor{ "Decompress Block", "Archive Content File Decompression" };
  536. // Increment the block index as part of the inner loop that creates the decompression task
  537. for (AZ::u32 decompressTaskSlot = 0; decompressTaskSlot < decompressTaskCount; ++decompressTaskSlot,
  538. ++blockIndex)
  539. {
  540. const AZ::u64 blockCompressedSize = GetCompressedSizeForBlock(fileBlockLineSpan, blockCount, blockIndex);
  541. // Downsize the 2 MiB span that was used to read the compressed data to the exact compressed size
  542. const auto availableBytesInCompressedBlock = AZStd::min<size_t>(compressedBlockRemainingSpan.size(),
  543. ArchiveBlockSizeForCompression);
  544. AZStd::span<const AZStd::byte> compressedDataForBlock = compressedBlockRemainingSpan
  545. .first(availableBytesInCompressedBlock)
  546. .first(blockCompressedSize);
  547. // Slide the compressed block remaining span by 2 MiB
  548. compressedBlockRemainingSpan = compressedBlockRemainingSpan.subspan(availableBytesInCompressedBlock);
  549. // Get the block span for storing the decompressed block
  550. // As the uncompressed size is 2 MiB for all blocks except the last
  551. // the entire contiguous file sequence will be available in the decompressedResultSpan after the loop
  552. const auto remainingBytesInBlockSpan = AZStd::min<size_t>(decompressionRemainingSpan.size(),
  553. ArchiveBlockSizeForCompression);
  554. AZStd::span<AZStd::byte> decompressionBlockSpan = decompressionRemainingSpan
  555. .first(remainingBytesInBlockSpan);
  556. // Slide the remaining decompressed span by 2 MiB as well
  557. decompressionRemainingSpan = decompressionRemainingSpan.subspan(remainingBytesInBlockSpan);
  558. //! Decompress Task to execute in task executor
  559. auto decompressTask = [decompressionInterface, &decompressionOptions, decompressionBlockSpan, compressedDataForBlock,
  560. &decompressedBlockResult = decompressedBlockResults[decompressTaskSlot]]()
  561. {
  562. // Decompressed the compressed block
  563. decompressedBlockResult = decompressionInterface->DecompressBlock(
  564. decompressionBlockSpan, compressedDataForBlock, decompressionOptions);
  565. };
  566. taskGraph.AddTask(decompressTaskDescriptor, AZStd::move(decompressTask));
  567. }
  568. taskGraph.SubmitOnExecutor(m_taskExecutor, taskDecompressGraphEvent.get());
  569. // Sync on the task completion
  570. taskDecompressGraphEvent->Wait();
  571. // Validate the decompression for all blocks
  572. for (AZ::u32 decompressTaskSlot = 0; decompressTaskSlot < decompressTaskCount; ++decompressTaskSlot)
  573. {
  574. const auto& decompressedBlockResult = decompressedBlockResults[decompressTaskSlot];
  575. if (!decompressedBlockResult)
  576. {
  577. // If one of the decompression task fails, early return with the error message
  578. return AZStd::unexpected(AZStd::move(decompressedBlockResult.m_decompressionOutcome.m_resultString));
  579. }
  580. }
  581. }
  582. // Return a subspan that accounts for the start offset within the compressed file to start
  583. // reading from, up to the bytes read amount
  584. // Due to the logic in the function only reading the set of 2 MiB blocks that are needed
  585. // the start offset for reading is calculated by a modulo operation
  586. // with the ArchiveBlockSizeForCompression (2 MiB).
  587. // The start offset will always be in the first read block
  588. size_t startOffset = fileSettings.m_startOffset % ArchiveBlockSizeForCompression;
  589. size_t endOffset = AZStd::min<size_t>(decompressionResultSpan.size() - startOffset, maxBytesToReadForFile);
  590. return decompressionResultSpan.subspan(startOffset, endOffset);
  591. }
  592. ArchiveListFileResult ArchiveReader::ListFileInArchive(ArchiveFileToken archiveFileToken) const
  593. {
  594. if (static_cast<AZ::u64>(archiveFileToken) > m_archiveToc.m_tocView.m_filePathIndexTable.size())
  595. {
  596. ArchiveListFileResult errorResult;
  597. errorResult.m_filePathToken = archiveFileToken;
  598. errorResult.m_resultOutcome = AZStd::unexpected(
  599. ResultString::format(R"(A file token "%llu" is being used to extract the file and that token does not point)"
  600. " to a file within the archive TOC.", static_cast<AZ::u64>(archiveFileToken)));
  601. return errorResult;
  602. }
  603. // Populate the path view from the Table of Contents View
  604. ArchiveTocFilePathIndex filePathOffsetSize = m_archiveToc.m_tocView.m_filePathIndexTable[
  605. static_cast<AZ::u64>(archiveFileToken)];
  606. if (filePathOffsetSize.m_size == 0)
  607. {
  608. ArchiveListFileResult errorResult;
  609. errorResult.m_filePathToken = archiveFileToken;
  610. errorResult.m_resultOutcome = AZStd::unexpected(
  611. ResultString::format(R"(A file token "%llu" is being used to extract the file,)"
  612. " but the file path stored in the TOC is empty."
  613. "This indicates that the token is referring to a deleted file.",
  614. static_cast<AZ::u64>(archiveFileToken)));
  615. return errorResult;
  616. }
  617. // The file has been found and has a non-empty path
  618. // Populate the ArchiveListFileResult structure
  619. ArchiveListFileResult listResult;
  620. // Extract the path stored in the file path blob into the extract result
  621. listResult.m_relativeFilePath = AZ::IO::PathView(
  622. m_archiveToc.m_tocView.m_filePathBlob.substr(filePathOffsetSize.m_offset, filePathOffsetSize.m_size));
  623. listResult.m_filePathToken = archiveFileToken;
  624. // Gather the file metadata
  625. const ArchiveTocFileMetadata& fileMetadata = m_archiveToc.m_tocView.m_fileMetadataTable[
  626. static_cast<AZ::u64>(archiveFileToken)];
  627. // Use the compression algorithm index to lookup the compression algorithm ID
  628. // if the file the value is less than the size of the compression AlgorithmIds array
  629. if (fileMetadata.m_compressionAlgoIndex < m_archiveHeader.m_compressionAlgorithmsIds.size())
  630. {
  631. listResult.m_compressionAlgorithm = m_archiveHeader.m_compressionAlgorithmsIds[
  632. fileMetadata.m_compressionAlgoIndex];
  633. }
  634. listResult.m_uncompressedSize = fileMetadata.m_uncompressedSize;
  635. if (auto rawFileSizeResult = GetRawFileSize(fileMetadata, m_archiveToc.m_tocView.m_blockOffsetTable);
  636. !rawFileSizeResult)
  637. {
  638. ArchiveListFileResult errorResult;
  639. errorResult.m_filePathToken = archiveFileToken;
  640. // Take the error from GetRawFileSize call and return that
  641. errorResult.m_resultOutcome = AZStd::unexpected(AZStd::move(rawFileSizeResult.error()));
  642. return errorResult;
  643. }
  644. else
  645. {
  646. listResult.m_compressedSize = rawFileSizeResult.value();
  647. }
  648. listResult.m_offset = fileMetadata.m_offset;
  649. listResult.m_crc32 = fileMetadata.m_crc32;
  650. return listResult;
  651. }
  652. ArchiveListFileResult ArchiveReader::ListFileInArchive(AZ::IO::PathView relativePath) const
  653. {
  654. if (relativePath.empty())
  655. {
  656. ArchiveListFileResult errorResult;
  657. errorResult.m_resultOutcome = AZStd::unexpected(
  658. ResultString("An empty file path has been supplied and cannot be found in the archive."));
  659. return errorResult;
  660. }
  661. auto foundIt = m_pathMap.find(relativePath);
  662. if (foundIt == m_pathMap.end())
  663. {
  664. ArchiveListFileResult errorResult;
  665. errorResult.m_relativeFilePath = AZStd::move(relativePath);
  666. errorResult.m_resultOutcome = AZStd::unexpected(
  667. ResultString::format(R"(The file path "%.*s")"
  668. " does not exist in the archive.", AZ_PATH_ARG(errorResult.m_relativeFilePath)));
  669. return errorResult;
  670. }
  671. // Now that the file has been found, pass in the ArchiveFileToken to the
  672. // the other overload
  673. return ListFileInArchive(static_cast<ArchiveFileToken>(foundIt->second));
  674. }
  675. bool ArchiveReader::ContainsFile(AZ::IO::PathView relativePath) const
  676. {
  677. return static_cast<bool>(ListFileInArchive(relativePath));
  678. }
  679. EnumerateArchiveResult ArchiveReader::EnumerateFilesInArchive(ListFileCallback listFileCallback) const
  680. {
  681. ResultOutcome fileResultOutcome;
  682. // Lambda is marked mutable to allow the filePathIndex variable to be incremented each call
  683. auto EnumerateAllFiles = [&listFileCallback, &fileResultOutcome, this, filePathIndex = 0]
  684. (AZ::u64 filePathBlobOffset, AZ::u16 filePathSize) mutable
  685. {
  686. // Invoke callback on each file with a non-empty path
  687. if (AZ::IO::PathView contentPathView(m_archiveToc.m_tocView.m_filePathBlob.substr(filePathBlobOffset, filePathSize));
  688. !contentPathView.empty())
  689. {
  690. ArchiveListFileResult listResult;
  691. listResult.m_relativeFilePath = contentPathView;
  692. listResult.m_filePathToken = static_cast<ArchiveFileToken>(filePathIndex);
  693. // Gather the file metadata
  694. const ArchiveTocFileMetadata& fileMetadata = m_archiveToc.m_tocView.m_fileMetadataTable[filePathIndex];
  695. // Use the compression algorithm index to lookup the compression algorithm ID
  696. if (fileMetadata.m_compressionAlgoIndex < m_archiveHeader.m_compressionAlgorithmsIds.size())
  697. {
  698. listResult.m_compressionAlgorithm = m_archiveHeader.m_compressionAlgorithmsIds[
  699. fileMetadata.m_compressionAlgoIndex];
  700. }
  701. listResult.m_uncompressedSize = fileMetadata.m_uncompressedSize;
  702. if (auto rawFileSizeResult = GetRawFileSize(fileMetadata, m_archiveToc.m_tocView.m_blockOffsetTable);
  703. !rawFileSizeResult)
  704. {
  705. fileResultOutcome = AZStd::unexpected(AZStd::move(rawFileSizeResult.error()));
  706. return;
  707. }
  708. else
  709. {
  710. listResult.m_compressedSize = rawFileSizeResult.value();
  711. }
  712. listResult.m_offset = fileMetadata.m_offset;
  713. listResult.m_crc32 = fileMetadata.m_crc32;
  714. listFileCallback(AZStd::move(listResult));
  715. }
  716. ++filePathIndex;
  717. };
  718. EnumerateFilePathIndexOffsets(EnumerateAllFiles, m_archiveToc.m_tocView);
  719. // There are currently no error messages that enumerate file path sets
  720. // So a default constructed instance which converts to boolean true is returned
  721. return {};
  722. }
  723. bool ArchiveReader::DumpArchiveMetadata(AZ::IO::GenericStream& metadataStream,
  724. const ArchiveMetadataSettings& metadataSettings) const
  725. {
  726. using MetadataString = AZStd::fixed_string<256>;
  727. if (metadataSettings.m_writeFileCount)
  728. {
  729. auto fileCountString = MetadataString::format("Total File Count: %u\n", m_archiveHeader.m_fileCount);
  730. metadataStream.Write(fileCountString.size(), fileCountString.data());
  731. }
  732. if (metadataSettings.m_writeFilePaths)
  733. {
  734. // Validate the file path and file metadata tables are in sync
  735. if (m_archiveToc.m_tocView.m_filePathIndexTable.size() != m_archiveToc.m_tocView.m_fileMetadataTable.size())
  736. {
  737. auto errorString = MetadataString::format("Error: The Archive TOC of contents has a mismatched size between"
  738. " the file path index vector (size=%zu) and the file metadata vector (size=%zu).\n"
  739. "This indicates a code error in the ArchiveReader.",
  740. m_archiveToc.m_tocView.m_filePathIndexTable.size(), m_archiveToc.m_tocView.m_fileMetadataTable.size());
  741. metadataStream.Write(errorString.size(), errorString.data());
  742. return false;
  743. }
  744. // Trackes the index of the file being output
  745. size_t activeFileOffset{};
  746. for (size_t filePathIndexTableIndex{}; filePathIndexTableIndex < m_archiveToc.m_tocView.m_filePathIndexTable.size();
  747. ++filePathIndexTableIndex)
  748. {
  749. // Use the FilePathIndex entry to lookup the offset and size of the file path within the FilePath blob
  750. const auto& contentFilePathIndex = m_archiveToc.m_tocView.m_filePathIndexTable[filePathIndexTableIndex];
  751. AZ::IO::PathView contentFilePath(m_archiveToc.m_tocView.m_filePathBlob.substr(contentFilePathIndex.m_offset,
  752. contentFilePathIndex.m_size));
  753. // An empty file path is used to track removed files from the archive,
  754. // therefore only non-empty paths are iterated
  755. if (!contentFilePath.empty())
  756. {
  757. const ArchiveTocFileMetadata& contentFileMetadata = m_archiveToc.m_tocView.m_fileMetadataTable[filePathIndexTableIndex];
  758. auto fileMetadataString = MetadataString::format(R"(File %zu: path="%.*s")", activeFileOffset, AZ_PATH_ARG(contentFilePath));
  759. if (metadataSettings.m_writeFileOffsets)
  760. {
  761. fileMetadataString += MetadataString::format(R"(, offset=%llu)", contentFileMetadata.m_offset);
  762. }
  763. if (metadataSettings.m_writeFileSizesAndCompression)
  764. {
  765. fileMetadataString += MetadataString::format(R"(, uncompressed_size=%llu)", contentFileMetadata.m_uncompressedSize);
  766. // Only output compressed size if an compression that compresses data is being used
  767. if (contentFileMetadata.m_compressionAlgoIndex < UncompressedAlgorithmIndex)
  768. {
  769. if (auto rawFileSizeResult = GetRawFileSize(contentFileMetadata, m_archiveToc.m_tocView.m_blockOffsetTable);
  770. rawFileSizeResult)
  771. {
  772. AZ::u64 compressedSize = rawFileSizeResult.value();
  773. fileMetadataString += MetadataString::format(R"(, compressed_size=%llu)",
  774. compressedSize);
  775. }
  776. fileMetadataString += MetadataString::format(R"(, compression_algorithm_id=%u)",
  777. AZStd::to_underlying(m_archiveHeader.m_compressionAlgorithmsIds[contentFileMetadata.m_compressionAlgoIndex]));
  778. }
  779. }
  780. // Append a newline before writing to the stream
  781. fileMetadataString.push_back('\n');
  782. metadataStream.Write(fileMetadataString.size(), fileMetadataString.data());
  783. // Increment the active file offset for non-removed files
  784. ++activeFileOffset;
  785. }
  786. }
  787. }
  788. return true;
  789. }
  790. } // namespace Archive