BinaryReader.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. /*============================================================
  5. **
  6. **
  7. **
  8. **
  9. **
  10. ** Purpose: Wraps a stream and provides convenient read functionality
  11. ** for strings and primitive types.
  12. **
  13. **
  14. ============================================================*/
  15. using System.Buffers.Binary;
  16. using System.Diagnostics;
  17. using System.Runtime.CompilerServices;
  18. using System.Runtime.InteropServices;
  19. using System.Text;
  20. namespace System.IO
  21. {
  22. public class BinaryReader : IDisposable
  23. {
  24. private const int MaxCharBytesSize = 128;
  25. private readonly Stream _stream;
  26. private readonly byte[] _buffer;
  27. private readonly Decoder _decoder;
  28. private byte[]? _charBytes;
  29. private char[]? _charBuffer;
  30. private int _maxCharsSize; // From MaxCharBytesSize & Encoding
  31. // Performance optimization for Read() w/ Unicode. Speeds us up by ~40%
  32. private bool _2BytesPerChar;
  33. private bool _isMemoryStream; // "do we sit on MemoryStream?" for Read/ReadInt32 perf
  34. private bool _leaveOpen;
  35. private bool _disposed;
  36. public BinaryReader(Stream input) : this(input, Encoding.UTF8, false)
  37. {
  38. }
  39. public BinaryReader(Stream input, Encoding encoding) : this(input, encoding, false)
  40. {
  41. }
  42. public BinaryReader(Stream input, Encoding encoding, bool leaveOpen)
  43. {
  44. if (input == null)
  45. {
  46. throw new ArgumentNullException(nameof(input));
  47. }
  48. if (encoding == null)
  49. {
  50. throw new ArgumentNullException(nameof(encoding));
  51. }
  52. if (!input.CanRead)
  53. {
  54. throw new ArgumentException(SR.Argument_StreamNotReadable);
  55. }
  56. _stream = input;
  57. _decoder = encoding.GetDecoder();
  58. _maxCharsSize = encoding.GetMaxCharCount(MaxCharBytesSize);
  59. int minBufferSize = encoding.GetMaxByteCount(1); // max bytes per one char
  60. if (minBufferSize < 16)
  61. {
  62. minBufferSize = 16;
  63. }
  64. _buffer = new byte[minBufferSize];
  65. // _charBuffer and _charBytes will be left null.
  66. // For Encodings that always use 2 bytes per char (or more),
  67. // special case them here to make Read() & Peek() faster.
  68. _2BytesPerChar = encoding is UnicodeEncoding;
  69. // check if BinaryReader is based on MemoryStream, and keep this for it's life
  70. // we cannot use "as" operator, since derived classes are not allowed
  71. _isMemoryStream = (_stream.GetType() == typeof(MemoryStream));
  72. _leaveOpen = leaveOpen;
  73. Debug.Assert(_decoder != null, "[BinaryReader.ctor]_decoder!=null");
  74. }
  75. public virtual Stream BaseStream
  76. {
  77. get
  78. {
  79. return _stream;
  80. }
  81. }
  82. protected virtual void Dispose(bool disposing)
  83. {
  84. if (!_disposed)
  85. {
  86. if (disposing && !_leaveOpen)
  87. {
  88. _stream.Close();
  89. }
  90. _disposed = true;
  91. }
  92. }
  93. public void Dispose()
  94. {
  95. Dispose(true);
  96. }
  97. /// <remarks>
  98. /// Override Dispose(bool) instead of Close(). This API exists for compatibility purposes.
  99. /// </remarks>
  100. public virtual void Close()
  101. {
  102. Dispose(true);
  103. }
  104. private void ThrowIfDisposed()
  105. {
  106. if (_disposed)
  107. {
  108. throw Error.GetFileNotOpen();
  109. }
  110. }
  111. public virtual int PeekChar()
  112. {
  113. ThrowIfDisposed();
  114. if (!_stream.CanSeek)
  115. {
  116. return -1;
  117. }
  118. long origPos = _stream.Position;
  119. int ch = Read();
  120. _stream.Position = origPos;
  121. return ch;
  122. }
  123. public virtual int Read()
  124. {
  125. ThrowIfDisposed();
  126. int charsRead = 0;
  127. int numBytes;
  128. long posSav = 0;
  129. if (_stream.CanSeek)
  130. {
  131. posSav = _stream.Position;
  132. }
  133. if (_charBytes == null)
  134. {
  135. _charBytes = new byte[MaxCharBytesSize]; //REVIEW: We need at most 2 bytes/char here?
  136. }
  137. Span<char> singleChar = stackalloc char[1];
  138. while (charsRead == 0)
  139. {
  140. // We really want to know what the minimum number of bytes per char
  141. // is for our encoding. Otherwise for UnicodeEncoding we'd have to
  142. // do ~1+log(n) reads to read n characters.
  143. // Assume 1 byte can be 1 char unless _2BytesPerChar is true.
  144. numBytes = _2BytesPerChar ? 2 : 1;
  145. int r = _stream.ReadByte();
  146. _charBytes[0] = (byte)r;
  147. if (r == -1)
  148. {
  149. numBytes = 0;
  150. }
  151. if (numBytes == 2)
  152. {
  153. r = _stream.ReadByte();
  154. _charBytes[1] = (byte)r;
  155. if (r == -1)
  156. {
  157. numBytes = 1;
  158. }
  159. }
  160. if (numBytes == 0)
  161. {
  162. return -1;
  163. }
  164. Debug.Assert(numBytes == 1 || numBytes == 2, "BinaryReader::ReadOneChar assumes it's reading one or 2 bytes only.");
  165. try
  166. {
  167. charsRead = _decoder.GetChars(new ReadOnlySpan<byte>(_charBytes, 0, numBytes), singleChar, flush: false);
  168. }
  169. catch
  170. {
  171. // Handle surrogate char
  172. if (_stream.CanSeek)
  173. {
  174. _stream.Seek((posSav - _stream.Position), SeekOrigin.Current);
  175. }
  176. // else - we can't do much here
  177. throw;
  178. }
  179. Debug.Assert(charsRead < 2, "BinaryReader::ReadOneChar - assuming we only got 0 or 1 char, not 2!");
  180. }
  181. Debug.Assert(charsRead > 0);
  182. return singleChar[0];
  183. }
  184. public virtual byte ReadByte() => InternalReadByte();
  185. [MethodImpl(MethodImplOptions.AggressiveInlining)] // Inlined to avoid some method call overhead with InternalRead.
  186. private byte InternalReadByte()
  187. {
  188. ThrowIfDisposed();
  189. int b = _stream.ReadByte();
  190. if (b == -1)
  191. {
  192. throw Error.GetEndOfFile();
  193. }
  194. return (byte)b;
  195. }
  196. [CLSCompliant(false)]
  197. public virtual sbyte ReadSByte() => (sbyte)InternalReadByte();
  198. public virtual bool ReadBoolean() => InternalReadByte() != 0;
  199. public virtual char ReadChar()
  200. {
  201. int value = Read();
  202. if (value == -1)
  203. {
  204. throw Error.GetEndOfFile();
  205. }
  206. return (char)value;
  207. }
  208. public virtual short ReadInt16() => BinaryPrimitives.ReadInt16LittleEndian(InternalRead(2));
  209. [CLSCompliant(false)]
  210. public virtual ushort ReadUInt16() => BinaryPrimitives.ReadUInt16LittleEndian(InternalRead(2));
  211. public virtual int ReadInt32() => BinaryPrimitives.ReadInt32LittleEndian(InternalRead(4));
  212. [CLSCompliant(false)]
  213. public virtual uint ReadUInt32() => BinaryPrimitives.ReadUInt32LittleEndian(InternalRead(4));
  214. public virtual long ReadInt64() => BinaryPrimitives.ReadInt64LittleEndian(InternalRead(8));
  215. [CLSCompliant(false)]
  216. public virtual ulong ReadUInt64() => BinaryPrimitives.ReadUInt64LittleEndian(InternalRead(8));
  217. public virtual unsafe float ReadSingle() => BitConverter.Int32BitsToSingle(BinaryPrimitives.ReadInt32LittleEndian(InternalRead(4)));
  218. public virtual unsafe double ReadDouble() => BitConverter.Int64BitsToDouble(BinaryPrimitives.ReadInt64LittleEndian(InternalRead(8)));
  219. public virtual decimal ReadDecimal()
  220. {
  221. ReadOnlySpan<byte> span = InternalRead(16);
  222. try
  223. {
  224. return decimal.ToDecimal(span);
  225. }
  226. catch (ArgumentException e)
  227. {
  228. // ReadDecimal cannot leak out ArgumentException
  229. throw new IOException(SR.Arg_DecBitCtor, e);
  230. }
  231. }
  232. public virtual string ReadString()
  233. {
  234. ThrowIfDisposed();
  235. int currPos = 0;
  236. int n;
  237. int stringLength;
  238. int readLength;
  239. int charsRead;
  240. // Length of the string in bytes, not chars
  241. stringLength = Read7BitEncodedInt();
  242. if (stringLength < 0)
  243. {
  244. throw new IOException(SR.Format(SR.IO_InvalidStringLen_Len, stringLength));
  245. }
  246. if (stringLength == 0)
  247. {
  248. return string.Empty;
  249. }
  250. if (_charBytes == null)
  251. {
  252. _charBytes = new byte[MaxCharBytesSize];
  253. }
  254. if (_charBuffer == null)
  255. {
  256. _charBuffer = new char[_maxCharsSize];
  257. }
  258. StringBuilder? sb = null;
  259. do
  260. {
  261. readLength = ((stringLength - currPos) > MaxCharBytesSize) ? MaxCharBytesSize : (stringLength - currPos);
  262. n = _stream.Read(_charBytes, 0, readLength);
  263. if (n == 0)
  264. {
  265. throw Error.GetEndOfFile();
  266. }
  267. charsRead = _decoder.GetChars(_charBytes, 0, n, _charBuffer, 0);
  268. if (currPos == 0 && n == stringLength)
  269. {
  270. return new string(_charBuffer, 0, charsRead);
  271. }
  272. if (sb == null)
  273. {
  274. sb = StringBuilderCache.Acquire(stringLength); // Actual string length in chars may be smaller.
  275. }
  276. sb.Append(_charBuffer, 0, charsRead);
  277. currPos += n;
  278. } while (currPos < stringLength);
  279. return StringBuilderCache.GetStringAndRelease(sb);
  280. }
  281. public virtual int Read(char[] buffer, int index, int count)
  282. {
  283. if (buffer == null)
  284. {
  285. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  286. }
  287. if (index < 0)
  288. {
  289. throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_NeedNonNegNum);
  290. }
  291. if (count < 0)
  292. {
  293. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  294. }
  295. if (buffer.Length - index < count)
  296. {
  297. throw new ArgumentException(SR.Argument_InvalidOffLen);
  298. }
  299. ThrowIfDisposed();
  300. // SafeCritical: index and count have already been verified to be a valid range for the buffer
  301. return InternalReadChars(new Span<char>(buffer, index, count));
  302. }
  303. public virtual int Read(Span<char> buffer)
  304. {
  305. ThrowIfDisposed();
  306. return InternalReadChars(buffer);
  307. }
  308. private int InternalReadChars(Span<char> buffer)
  309. {
  310. Debug.Assert(!_disposed);
  311. int numBytes = 0;
  312. int index = 0;
  313. int charsRemaining = buffer.Length;
  314. if (_charBytes == null)
  315. {
  316. _charBytes = new byte[MaxCharBytesSize];
  317. }
  318. while (charsRemaining > 0)
  319. {
  320. int charsRead = 0;
  321. // We really want to know what the minimum number of bytes per char
  322. // is for our encoding. Otherwise for UnicodeEncoding we'd have to
  323. // do ~1+log(n) reads to read n characters.
  324. numBytes = charsRemaining;
  325. if (_2BytesPerChar)
  326. {
  327. numBytes <<= 1;
  328. }
  329. if (numBytes > MaxCharBytesSize)
  330. {
  331. numBytes = MaxCharBytesSize;
  332. }
  333. int position = 0;
  334. byte[]? byteBuffer = null;
  335. if (_isMemoryStream)
  336. {
  337. Debug.Assert(_stream is MemoryStream);
  338. MemoryStream mStream = (MemoryStream)_stream;
  339. position = mStream.InternalGetPosition();
  340. numBytes = mStream.InternalEmulateRead(numBytes);
  341. byteBuffer = mStream.InternalGetBuffer();
  342. }
  343. else
  344. {
  345. numBytes = _stream.Read(_charBytes, 0, numBytes);
  346. byteBuffer = _charBytes;
  347. }
  348. if (numBytes == 0)
  349. {
  350. return (buffer.Length - charsRemaining);
  351. }
  352. Debug.Assert(byteBuffer != null, "expected byteBuffer to be non-null");
  353. checked
  354. {
  355. if (position < 0 || numBytes < 0 || position > byteBuffer.Length - numBytes)
  356. {
  357. throw new ArgumentOutOfRangeException(nameof(numBytes));
  358. }
  359. if (index < 0 || charsRemaining < 0 || index > buffer.Length - charsRemaining)
  360. {
  361. throw new ArgumentOutOfRangeException(nameof(charsRemaining));
  362. }
  363. unsafe
  364. {
  365. fixed (byte* pBytes = byteBuffer)
  366. fixed (char* pChars = &MemoryMarshal.GetReference(buffer))
  367. {
  368. charsRead = _decoder.GetChars(pBytes + position, numBytes, pChars + index, charsRemaining, flush: false);
  369. }
  370. }
  371. }
  372. charsRemaining -= charsRead;
  373. index += charsRead;
  374. }
  375. // this should never fail
  376. Debug.Assert(charsRemaining >= 0, "We read too many characters.");
  377. // we may have read fewer than the number of characters requested if end of stream reached
  378. // or if the encoding makes the char count too big for the buffer (e.g. fallback sequence)
  379. return (buffer.Length - charsRemaining);
  380. }
  381. public virtual char[] ReadChars(int count)
  382. {
  383. if (count < 0)
  384. {
  385. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  386. }
  387. ThrowIfDisposed();
  388. if (count == 0)
  389. {
  390. return Array.Empty<char>();
  391. }
  392. // SafeCritical: we own the chars buffer, and therefore can guarantee that the index and count are valid
  393. char[] chars = new char[count];
  394. int n = InternalReadChars(new Span<char>(chars));
  395. if (n != count)
  396. {
  397. char[] copy = new char[n];
  398. Buffer.BlockCopy(chars, 0, copy, 0, 2 * n); // sizeof(char)
  399. chars = copy;
  400. }
  401. return chars;
  402. }
  403. public virtual int Read(byte[] buffer, int index, int count)
  404. {
  405. if (buffer == null)
  406. {
  407. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  408. }
  409. if (index < 0)
  410. {
  411. throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_NeedNonNegNum);
  412. }
  413. if (count < 0)
  414. {
  415. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  416. }
  417. if (buffer.Length - index < count)
  418. {
  419. throw new ArgumentException(SR.Argument_InvalidOffLen);
  420. }
  421. ThrowIfDisposed();
  422. return _stream.Read(buffer, index, count);
  423. }
  424. public virtual int Read(Span<byte> buffer)
  425. {
  426. ThrowIfDisposed();
  427. return _stream.Read(buffer);
  428. }
  429. public virtual byte[] ReadBytes(int count)
  430. {
  431. if (count < 0)
  432. {
  433. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  434. }
  435. ThrowIfDisposed();
  436. if (count == 0)
  437. {
  438. return Array.Empty<byte>();
  439. }
  440. byte[] result = new byte[count];
  441. int numRead = 0;
  442. do
  443. {
  444. int n = _stream.Read(result, numRead, count);
  445. if (n == 0)
  446. {
  447. break;
  448. }
  449. numRead += n;
  450. count -= n;
  451. } while (count > 0);
  452. if (numRead != result.Length)
  453. {
  454. // Trim array. This should happen on EOF & possibly net streams.
  455. byte[] copy = new byte[numRead];
  456. Buffer.BlockCopy(result, 0, copy, 0, numRead);
  457. result = copy;
  458. }
  459. return result;
  460. }
  461. private ReadOnlySpan<byte> InternalRead(int numBytes)
  462. {
  463. Debug.Assert(numBytes >= 2 && numBytes <= 16, "value of 1 should use ReadByte. value > 16 requires to change the minimal _buffer size");
  464. if (_isMemoryStream)
  465. {
  466. // read directly from MemoryStream buffer
  467. Debug.Assert(_stream is MemoryStream);
  468. return ((MemoryStream)_stream).InternalReadSpan(numBytes);
  469. }
  470. else
  471. {
  472. ThrowIfDisposed();
  473. int bytesRead = 0;
  474. int n = 0;
  475. do
  476. {
  477. n = _stream.Read(_buffer, bytesRead, numBytes - bytesRead);
  478. if (n == 0)
  479. {
  480. throw Error.GetEndOfFile();
  481. }
  482. bytesRead += n;
  483. } while (bytesRead < numBytes);
  484. return _buffer;
  485. }
  486. }
  487. // FillBuffer is not performing well when reading from MemoryStreams as it is using the public Stream interface.
  488. // We introduced new function InternalRead which can work directly on the MemoryStream internal buffer or using the public Stream
  489. // interface when working with all other streams. This function is not needed anymore but we decided not to delete it for compatibility
  490. // reasons. More about the subject in: https://github.com/dotnet/coreclr/pull/22102
  491. protected virtual void FillBuffer(int numBytes)
  492. {
  493. if (numBytes < 0 || numBytes > _buffer.Length)
  494. {
  495. throw new ArgumentOutOfRangeException(nameof(numBytes), SR.ArgumentOutOfRange_BinaryReaderFillBuffer);
  496. }
  497. int bytesRead = 0;
  498. int n = 0;
  499. ThrowIfDisposed();
  500. // Need to find a good threshold for calling ReadByte() repeatedly
  501. // vs. calling Read(byte[], int, int) for both buffered & unbuffered
  502. // streams.
  503. if (numBytes == 1)
  504. {
  505. n = _stream.ReadByte();
  506. if (n == -1)
  507. {
  508. throw Error.GetEndOfFile();
  509. }
  510. _buffer[0] = (byte)n;
  511. return;
  512. }
  513. do
  514. {
  515. n = _stream.Read(_buffer, bytesRead, numBytes - bytesRead);
  516. if (n == 0)
  517. {
  518. throw Error.GetEndOfFile();
  519. }
  520. bytesRead += n;
  521. } while (bytesRead < numBytes);
  522. }
  523. protected internal int Read7BitEncodedInt()
  524. {
  525. // Read out an Int32 7 bits at a time. The high bit
  526. // of the byte when on means to continue reading more bytes.
  527. int count = 0;
  528. int shift = 0;
  529. byte b;
  530. do
  531. {
  532. // Check for a corrupted stream. Read a max of 5 bytes.
  533. // In a future version, add a DataFormatException.
  534. if (shift == 5 * 7) // 5 bytes max per Int32, shift += 7
  535. {
  536. throw new FormatException(SR.Format_Bad7BitInt32);
  537. }
  538. // ReadByte handles end of stream cases for us.
  539. b = ReadByte();
  540. count |= (b & 0x7F) << shift;
  541. shift += 7;
  542. } while ((b & 0x80) != 0);
  543. return count;
  544. }
  545. }
  546. }