BitstreamReader.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This header defines the BitstreamReader class. This class can be used to
  11. // read an arbitrary bitstream, regardless of its contents.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #ifndef LLVM_BITCODE_BITSTREAMREADER_H
  15. #define LLVM_BITCODE_BITSTREAMREADER_H
  16. #include "llvm/Bitcode/BitCodes.h"
  17. #include "llvm/Support/Endian.h"
  18. #include "llvm/Support/StreamingMemoryObject.h"
  19. #include <climits>
  20. #include <string>
  21. #include <vector>
  22. namespace llvm {
  23. // HLSL Change Starts
  24. class BitstreamCursor;
  25. class BitstreamUseTracker {
  26. private:
  27. typedef std::pair<uint64_t, uint64_t> UseRange;
  28. SmallVector<UseRange, 8> Ranges;
  29. enum ExtendResult {
  30. Exclusive,
  31. ExtendedBegin,
  32. ExtendedEnd,
  33. ExtendedBoth,
  34. Included
  35. };
  36. static ExtendResult extendRange(UseRange &R, UseRange &NewRange);
  37. static bool contains(const UseRange &UR);
  38. bool considerMergeRight(size_t idx);
  39. public:
  40. struct AutoTrack {
  41. BitstreamUseTracker *BT;
  42. uint64_t begin;
  43. void end(uint64_t end) {
  44. BitstreamUseTracker::track(BT, begin, end);
  45. }
  46. };
  47. static AutoTrack start(BitstreamUseTracker *BT, uint64_t begin) {
  48. AutoTrack AT; AT.BT = BT; AT.begin = begin; return AT;
  49. }
  50. struct ScopeTrack {
  51. BitstreamCursor *BC;
  52. uint64_t begin;
  53. ~ScopeTrack();
  54. };
  55. static ScopeTrack scope_track(BitstreamCursor *BC);
  56. static void track(BitstreamUseTracker *BT, uint64_t begin, uint64_t end);
  57. void insert(uint64_t begin, uint64_t end);
  58. bool isDense(uint64_t endBitoffset) const;
  59. };
  60. // HLSL Change Ends
  61. /// This class is used to read from an LLVM bitcode stream, maintaining
  62. /// information that is global to decoding the entire file. While a file is
  63. /// being read, multiple cursors can be independently advanced or skipped around
  64. /// within the file. These are represented by the BitstreamCursor class.
  65. class BitstreamReader {
  66. public:
  67. /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
  68. /// describe abbreviations that all blocks of the specified ID inherit.
  69. struct BlockInfo {
  70. unsigned BlockID;
  71. std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
  72. std::string Name;
  73. std::vector<std::pair<unsigned, std::string> > RecordNames;
  74. };
  75. private:
  76. std::unique_ptr<MemoryObject> BitcodeBytes;
  77. std::vector<BlockInfo> BlockInfoRecords;
  78. /// This is set to true if we don't care about the block/record name
  79. /// information in the BlockInfo block. Only llvm-bcanalyzer uses this.
  80. bool IgnoreBlockInfoNames;
  81. BitstreamReader(const BitstreamReader&) = delete;
  82. void operator=(const BitstreamReader&) = delete;
  83. public:
  84. BitstreamReader() : IgnoreBlockInfoNames(true) {
  85. }
  86. BitstreamReader(const unsigned char *Start, const unsigned char *End)
  87. : IgnoreBlockInfoNames(true) {
  88. init(Start, End);
  89. }
  90. BitstreamUseTracker *Tracker = nullptr; // HLSL Change
  91. BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes)
  92. : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {}
  93. BitstreamReader(BitstreamReader &&Other) {
  94. *this = std::move(Other);
  95. }
  96. BitstreamReader &operator=(BitstreamReader &&Other) {
  97. BitcodeBytes = std::move(Other.BitcodeBytes);
  98. // Explicitly swap block info, so that nothing gets destroyed twice.
  99. std::swap(BlockInfoRecords, Other.BlockInfoRecords);
  100. IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames;
  101. return *this;
  102. }
  103. void init(const unsigned char *Start, const unsigned char *End) {
  104. assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
  105. BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
  106. }
  107. MemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
  108. /// This is called by clients that want block/record name information.
  109. void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
  110. bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
  111. //===--------------------------------------------------------------------===//
  112. // Block Manipulation
  113. //===--------------------------------------------------------------------===//
  114. /// Return true if we've already read and processed the block info block for
  115. /// this Bitstream. We only process it for the first cursor that walks over
  116. /// it.
  117. bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
  118. /// If there is block info for the specified ID, return it, otherwise return
  119. /// null.
  120. const BlockInfo *getBlockInfo(unsigned BlockID) const {
  121. // Common case, the most recent entry matches BlockID.
  122. if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
  123. return &BlockInfoRecords.back();
  124. for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
  125. i != e; ++i)
  126. if (BlockInfoRecords[i].BlockID == BlockID)
  127. return &BlockInfoRecords[i];
  128. return nullptr;
  129. }
  130. BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
  131. if (const BlockInfo *BI = getBlockInfo(BlockID))
  132. return *const_cast<BlockInfo*>(BI);
  133. // Otherwise, add a new record.
  134. BlockInfoRecords.emplace_back();
  135. BlockInfoRecords.back().BlockID = BlockID;
  136. return BlockInfoRecords.back();
  137. }
  138. /// Takes block info from the other bitstream reader.
  139. ///
  140. /// This is a "take" operation because BlockInfo records are non-trivial, and
  141. /// indeed rather expensive.
  142. void takeBlockInfo(BitstreamReader &&Other) {
  143. assert(!hasBlockInfoRecords());
  144. BlockInfoRecords = std::move(Other.BlockInfoRecords);
  145. }
  146. };
  147. /// When advancing through a bitstream cursor, each advance can discover a few
  148. /// different kinds of entries:
  149. struct BitstreamEntry {
  150. enum {
  151. Error, // Malformed bitcode was found.
  152. EndBlock, // We've reached the end of the current block, (or the end of the
  153. // file, which is treated like a series of EndBlock records.
  154. SubBlock, // This is the start of a new subblock of a specific ID.
  155. Record // This is a record with a specific AbbrevID.
  156. } Kind;
  157. unsigned ID;
  158. static BitstreamEntry getError() {
  159. BitstreamEntry E; E.Kind = Error; return E;
  160. }
  161. static BitstreamEntry getEndBlock() {
  162. BitstreamEntry E; E.Kind = EndBlock; return E;
  163. }
  164. static BitstreamEntry getSubBlock(unsigned ID) {
  165. BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
  166. }
  167. static BitstreamEntry getRecord(unsigned AbbrevID) {
  168. BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
  169. }
  170. };
  171. /// This represents a position within a bitcode file. There may be multiple
  172. /// independent cursors reading within one bitstream, each maintaining their own
  173. /// local state.
  174. ///
  175. /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
  176. /// be passed by value.
  177. class BitstreamCursor {
  178. BitstreamReader *BitStream;
  179. size_t NextChar;
  180. // The size of the bicode. 0 if we don't know it yet.
  181. size_t Size;
  182. /// This is the current data we have pulled from the stream but have not
  183. /// returned to the client. This is specifically and intentionally defined to
  184. /// follow the word size of the host machine for efficiency. We use word_t in
  185. /// places that are aware of this to make it perfectly explicit what is going
  186. /// on.
  187. typedef size_t word_t;
  188. word_t CurWord;
  189. /// This is the number of bits in CurWord that are valid. This is always from
  190. /// [0...bits_of(size_t)-1] inclusive.
  191. unsigned BitsInCurWord;
  192. // This is the declared size of code values used for the current block, in
  193. // bits.
  194. unsigned CurCodeSize;
  195. /// Abbrevs installed at in this block.
  196. std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
  197. struct Block {
  198. unsigned PrevCodeSize;
  199. std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
  200. explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
  201. };
  202. /// This tracks the codesize of parent blocks.
  203. SmallVector<Block, 8> BlockScope;
  204. public:
  205. static const size_t MaxChunkSize = sizeof(word_t) * 8;
  206. BitstreamCursor() { init(nullptr); }
  207. explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
  208. void init(BitstreamReader *R) {
  209. freeState();
  210. BitStream = R;
  211. NextChar = 0;
  212. Size = 0;
  213. BitsInCurWord = 0;
  214. CurCodeSize = 2;
  215. }
  216. void freeState();
  217. bool canSkipToPos(size_t pos) const {
  218. // pos can be skipped to if it is a valid address or one byte past the end.
  219. return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
  220. static_cast<uint64_t>(pos - 1));
  221. }
  222. bool AtEndOfStream() {
  223. if (BitsInCurWord != 0)
  224. return false;
  225. if (Size != 0)
  226. return Size == NextChar;
  227. fillCurWord();
  228. return BitsInCurWord == 0;
  229. }
  230. /// Return the number of bits used to encode an abbrev #.
  231. unsigned getAbbrevIDWidth() const { return CurCodeSize; }
  232. /// Return the bit # of the bit we are reading.
  233. uint64_t GetCurrentBitNo() const {
  234. return NextChar*CHAR_BIT - BitsInCurWord;
  235. }
  236. BitstreamReader *getBitStreamReader() {
  237. return BitStream;
  238. }
  239. const BitstreamReader *getBitStreamReader() const {
  240. return BitStream;
  241. }
  242. /// Flags that modify the behavior of advance().
  243. enum {
  244. /// If this flag is used, the advance() method does not automatically pop
  245. /// the block scope when the end of a block is reached.
  246. AF_DontPopBlockAtEnd = 1,
  247. /// If this flag is used, abbrev entries are returned just like normal
  248. /// records.
  249. AF_DontAutoprocessAbbrevs = 2
  250. };
  251. /// Advance the current bitstream, returning the next entry in the stream.
  252. BitstreamEntry advance(unsigned Flags = 0) {
  253. while (1) {
  254. unsigned Code = ReadCode();
  255. if (Code == bitc::END_BLOCK) {
  256. // Pop the end of the block unless Flags tells us not to.
  257. if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
  258. return BitstreamEntry::getError();
  259. return BitstreamEntry::getEndBlock();
  260. }
  261. if (Code == bitc::ENTER_SUBBLOCK)
  262. return BitstreamEntry::getSubBlock(ReadSubBlockID());
  263. if (Code == bitc::DEFINE_ABBREV &&
  264. !(Flags & AF_DontAutoprocessAbbrevs)) {
  265. // We read and accumulate abbrev's, the client can't do anything with
  266. // them anyway.
  267. ReadAbbrevRecord();
  268. continue;
  269. }
  270. return BitstreamEntry::getRecord(Code);
  271. }
  272. }
  273. /// This is a convenience function for clients that don't expect any
  274. /// subblocks. This just skips over them automatically.
  275. BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0, unsigned *pCount = nullptr) {
  276. while (1) {
  277. // If we found a normal entry, return it.
  278. BitstreamEntry Entry = advance(Flags);
  279. if (Entry.Kind != BitstreamEntry::SubBlock)
  280. return Entry;
  281. // If we found a sub-block, just skip over it and check the next entry.
  282. if (pCount) *pCount += 1; // HLSL Change - count skipped blocks
  283. if (SkipBlock())
  284. return BitstreamEntry::getError();
  285. }
  286. }
  287. /// Reset the stream to the specified bit number.
  288. void JumpToBit(uint64_t BitNo) {
  289. size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
  290. unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
  291. assert(canSkipToPos(ByteNo) && "Invalid location");
  292. // Move the cursor to the right word.
  293. NextChar = ByteNo;
  294. BitsInCurWord = 0;
  295. // Skip over any bits that are already consumed.
  296. if (WordBitNo)
  297. Read(WordBitNo);
  298. }
  299. void fillCurWord() {
  300. if (Size != 0 && NextChar >= Size)
  301. report_fatal_error("Unexpected end of file");
  302. // Read the next word from the stream.
  303. uint8_t Array[sizeof(word_t)] = {0};
  304. uint64_t BytesRead =
  305. BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
  306. // If we run out of data, stop at the end of the stream.
  307. if (BytesRead == 0) {
  308. Size = NextChar;
  309. return;
  310. }
  311. CurWord =
  312. support::endian::read<word_t, support::little, support::unaligned>(
  313. Array);
  314. NextChar += BytesRead;
  315. BitsInCurWord = BytesRead * 8;
  316. }
  317. word_t Read(unsigned NumBits) {
  318. static const unsigned BitsInWord = MaxChunkSize;
  319. assert(NumBits && NumBits <= BitsInWord &&
  320. "Cannot return zero or more than BitsInWord bits!");
  321. static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
  322. // If the field is fully contained by CurWord, return it quickly.
  323. auto T = BitstreamUseTracker::scope_track(this); // HLSL Change
  324. if (BitsInCurWord >= NumBits) {
  325. word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
  326. // Use a mask to avoid undefined behavior.
  327. CurWord >>= (NumBits & Mask);
  328. BitsInCurWord -= NumBits;
  329. return R;
  330. }
  331. word_t R = BitsInCurWord ? CurWord : 0;
  332. unsigned BitsLeft = NumBits - BitsInCurWord;
  333. fillCurWord();
  334. // If we run out of data, stop at the end of the stream.
  335. if (BitsLeft > BitsInCurWord)
  336. return 0;
  337. word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
  338. // Use a mask to avoid undefined behavior.
  339. CurWord >>= (BitsLeft & Mask);
  340. BitsInCurWord -= BitsLeft;
  341. R |= R2 << (NumBits - BitsLeft);
  342. return R;
  343. }
  344. uint32_t ReadVBR(unsigned NumBits) {
  345. uint32_t Piece = Read(NumBits);
  346. if ((Piece & (1U << (NumBits-1))) == 0)
  347. return Piece;
  348. uint32_t Result = 0;
  349. unsigned NextBit = 0;
  350. while (1) {
  351. Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
  352. if ((Piece & (1U << (NumBits-1))) == 0)
  353. return Result;
  354. NextBit += NumBits-1;
  355. Piece = Read(NumBits);
  356. }
  357. }
  358. // Read a VBR that may have a value up to 64-bits in size. The chunk size of
  359. // the VBR must still be <= 32 bits though.
  360. uint64_t ReadVBR64(unsigned NumBits) {
  361. uint32_t Piece = Read(NumBits);
  362. if ((Piece & (1U << (NumBits-1))) == 0)
  363. return uint64_t(Piece);
  364. uint64_t Result = 0;
  365. unsigned NextBit = 0;
  366. while (1) {
  367. Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
  368. if ((Piece & (1U << (NumBits-1))) == 0)
  369. return Result;
  370. NextBit += NumBits-1;
  371. Piece = Read(NumBits);
  372. }
  373. }
  374. private:
  375. void SkipToFourByteBoundary() {
  376. // If word_t is 64-bits and if we've read less than 32 bits, just dump
  377. // the bits we have up to the next 32-bit boundary.
  378. auto T = BitstreamUseTracker::scope_track(this); // HLSL Change
  379. if (sizeof(word_t) > 4 &&
  380. BitsInCurWord >= 32) {
  381. CurWord >>= BitsInCurWord-32;
  382. BitsInCurWord = 32;
  383. return;
  384. }
  385. BitsInCurWord = 0;
  386. }
  387. public:
  388. unsigned ReadCode() {
  389. return Read(CurCodeSize);
  390. }
  391. // Block header:
  392. // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
  393. /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
  394. unsigned ReadSubBlockID() {
  395. return ReadVBR(bitc::BlockIDWidth);
  396. }
  397. /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
  398. /// of this block. If the block record is malformed, return true.
  399. bool SkipBlock() {
  400. // Read and ignore the codelen value. Since we are skipping this block, we
  401. // don't care what code widths are used inside of it.
  402. ReadVBR(bitc::CodeLenWidth);
  403. SkipToFourByteBoundary();
  404. unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
  405. // Check that the block wasn't partially defined, and that the offset isn't
  406. // bogus.
  407. size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
  408. if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
  409. return true;
  410. JumpToBit(SkipTo);
  411. return false;
  412. }
  413. /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true
  414. /// if the block has an error.
  415. bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
  416. bool ReadBlockEnd() {
  417. if (BlockScope.empty()) return true;
  418. // Block tail:
  419. // [END_BLOCK, <align4bytes>]
  420. SkipToFourByteBoundary();
  421. popBlockScope();
  422. return false;
  423. }
  424. private:
  425. void popBlockScope() {
  426. CurCodeSize = BlockScope.back().PrevCodeSize;
  427. CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
  428. BlockScope.pop_back();
  429. }
  430. //===--------------------------------------------------------------------===//
  431. // Record Processing
  432. //===--------------------------------------------------------------------===//
  433. public:
  434. /// Return the abbreviation for the specified AbbrevId.
  435. const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
  436. unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
  437. if (AbbrevNo >= CurAbbrevs.size())
  438. report_fatal_error("Invalid abbrev number");
  439. return CurAbbrevs[AbbrevNo].get();
  440. }
  441. /// Read the current record and discard it.
  442. void skipRecord(unsigned AbbrevID);
  443. unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
  444. StringRef *Blob = nullptr);
  445. //===--------------------------------------------------------------------===//
  446. // Abbrev Processing
  447. //===--------------------------------------------------------------------===//
  448. void ReadAbbrevRecord();
  449. bool ReadBlockInfoBlock(unsigned *pCount = nullptr);
  450. };
  451. } // End llvm namespace
  452. #endif