BinaryReader.cs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. /*============================================================
  5. **
  6. **
  7. **
  8. **
  9. **
  10. ** Purpose: Wraps a stream and provides convenient read functionality
  11. ** for strings and primitive types.
  12. **
  13. **
  14. ============================================================*/
  15. using System.Buffers.Binary;
  16. using System.Diagnostics;
  17. using System.Runtime.CompilerServices;
  18. using System.Text;
  19. namespace System.IO
  20. {
  21. public class BinaryReader : IDisposable
  22. {
  23. private const int MaxCharBytesSize = 128;
  24. private readonly Stream _stream;
  25. private readonly byte[] _buffer;
  26. private readonly Decoder _decoder;
  27. private byte[]? _charBytes;
  28. private char[]? _charBuffer;
  29. private readonly int _maxCharsSize; // From MaxCharBytesSize & Encoding
  30. // Performance optimization for Read() w/ Unicode. Speeds us up by ~40%
  31. private readonly bool _2BytesPerChar;
  32. private readonly bool _isMemoryStream; // "do we sit on MemoryStream?" for Read/ReadInt32 perf
  33. private readonly bool _leaveOpen;
  34. private bool _disposed;
  35. public BinaryReader(Stream input) : this(input, Encoding.UTF8, false)
  36. {
  37. }
  38. public BinaryReader(Stream input, Encoding encoding) : this(input, encoding, false)
  39. {
  40. }
  41. public BinaryReader(Stream input, Encoding encoding, bool leaveOpen)
  42. {
  43. if (input == null)
  44. {
  45. throw new ArgumentNullException(nameof(input));
  46. }
  47. if (encoding == null)
  48. {
  49. throw new ArgumentNullException(nameof(encoding));
  50. }
  51. if (!input.CanRead)
  52. {
  53. throw new ArgumentException(SR.Argument_StreamNotReadable);
  54. }
  55. _stream = input;
  56. _decoder = encoding.GetDecoder();
  57. _maxCharsSize = encoding.GetMaxCharCount(MaxCharBytesSize);
  58. int minBufferSize = encoding.GetMaxByteCount(1); // max bytes per one char
  59. if (minBufferSize < 16)
  60. {
  61. minBufferSize = 16;
  62. }
  63. _buffer = new byte[minBufferSize];
  64. // _charBuffer and _charBytes will be left null.
  65. // For Encodings that always use 2 bytes per char (or more),
  66. // special case them here to make Read() & Peek() faster.
  67. _2BytesPerChar = encoding is UnicodeEncoding;
  68. // check if BinaryReader is based on MemoryStream, and keep this for it's life
  69. // we cannot use "as" operator, since derived classes are not allowed
  70. _isMemoryStream = (_stream.GetType() == typeof(MemoryStream));
  71. _leaveOpen = leaveOpen;
  72. Debug.Assert(_decoder != null, "[BinaryReader.ctor]_decoder!=null");
  73. }
  74. public virtual Stream BaseStream => _stream;
  75. protected virtual void Dispose(bool disposing)
  76. {
  77. if (!_disposed)
  78. {
  79. if (disposing && !_leaveOpen)
  80. {
  81. _stream.Close();
  82. }
  83. _disposed = true;
  84. }
  85. }
  86. public void Dispose()
  87. {
  88. Dispose(true);
  89. }
  90. /// <remarks>
  91. /// Override Dispose(bool) instead of Close(). This API exists for compatibility purposes.
  92. /// </remarks>
  93. public virtual void Close()
  94. {
  95. Dispose(true);
  96. }
  97. private void ThrowIfDisposed()
  98. {
  99. if (_disposed)
  100. {
  101. throw Error.GetFileNotOpen();
  102. }
  103. }
  104. public virtual int PeekChar()
  105. {
  106. ThrowIfDisposed();
  107. if (!_stream.CanSeek)
  108. {
  109. return -1;
  110. }
  111. long origPos = _stream.Position;
  112. int ch = Read();
  113. _stream.Position = origPos;
  114. return ch;
  115. }
  116. public virtual int Read()
  117. {
  118. ThrowIfDisposed();
  119. int charsRead = 0;
  120. int numBytes;
  121. long posSav = 0;
  122. if (_stream.CanSeek)
  123. {
  124. posSav = _stream.Position;
  125. }
  126. _charBytes ??= new byte[MaxCharBytesSize];
  127. Span<char> singleChar = stackalloc char[1];
  128. while (charsRead == 0)
  129. {
  130. // We really want to know what the minimum number of bytes per char
  131. // is for our encoding. Otherwise for UnicodeEncoding we'd have to
  132. // do ~1+log(n) reads to read n characters.
  133. // Assume 1 byte can be 1 char unless _2BytesPerChar is true.
  134. numBytes = _2BytesPerChar ? 2 : 1;
  135. int r = _stream.ReadByte();
  136. _charBytes[0] = (byte)r;
  137. if (r == -1)
  138. {
  139. numBytes = 0;
  140. }
  141. if (numBytes == 2)
  142. {
  143. r = _stream.ReadByte();
  144. _charBytes[1] = (byte)r;
  145. if (r == -1)
  146. {
  147. numBytes = 1;
  148. }
  149. }
  150. if (numBytes == 0)
  151. {
  152. return -1;
  153. }
  154. Debug.Assert(numBytes == 1 || numBytes == 2, "BinaryReader::ReadOneChar assumes it's reading one or 2 bytes only.");
  155. try
  156. {
  157. charsRead = _decoder.GetChars(new ReadOnlySpan<byte>(_charBytes, 0, numBytes), singleChar, flush: false);
  158. }
  159. catch
  160. {
  161. // Handle surrogate char
  162. if (_stream.CanSeek)
  163. {
  164. _stream.Seek(posSav - _stream.Position, SeekOrigin.Current);
  165. }
  166. // else - we can't do much here
  167. throw;
  168. }
  169. Debug.Assert(charsRead < 2, "BinaryReader::ReadOneChar - assuming we only got 0 or 1 char, not 2!");
  170. }
  171. Debug.Assert(charsRead > 0);
  172. return singleChar[0];
  173. }
  174. public virtual byte ReadByte() => InternalReadByte();
  175. [MethodImpl(MethodImplOptions.AggressiveInlining)] // Inlined to avoid some method call overhead with InternalRead.
  176. private byte InternalReadByte()
  177. {
  178. ThrowIfDisposed();
  179. int b = _stream.ReadByte();
  180. if (b == -1)
  181. {
  182. throw Error.GetEndOfFile();
  183. }
  184. return (byte)b;
  185. }
  186. [CLSCompliant(false)]
  187. public virtual sbyte ReadSByte() => (sbyte)InternalReadByte();
  188. public virtual bool ReadBoolean() => InternalReadByte() != 0;
  189. public virtual char ReadChar()
  190. {
  191. int value = Read();
  192. if (value == -1)
  193. {
  194. throw Error.GetEndOfFile();
  195. }
  196. return (char)value;
  197. }
  198. public virtual short ReadInt16() => BinaryPrimitives.ReadInt16LittleEndian(InternalRead(2));
  199. [CLSCompliant(false)]
  200. public virtual ushort ReadUInt16() => BinaryPrimitives.ReadUInt16LittleEndian(InternalRead(2));
  201. public virtual int ReadInt32() => BinaryPrimitives.ReadInt32LittleEndian(InternalRead(4));
  202. [CLSCompliant(false)]
  203. public virtual uint ReadUInt32() => BinaryPrimitives.ReadUInt32LittleEndian(InternalRead(4));
  204. public virtual long ReadInt64() => BinaryPrimitives.ReadInt64LittleEndian(InternalRead(8));
  205. [CLSCompliant(false)]
  206. public virtual ulong ReadUInt64() => BinaryPrimitives.ReadUInt64LittleEndian(InternalRead(8));
  207. public virtual unsafe float ReadSingle() => BitConverter.Int32BitsToSingle(BinaryPrimitives.ReadInt32LittleEndian(InternalRead(4)));
  208. public virtual unsafe double ReadDouble() => BitConverter.Int64BitsToDouble(BinaryPrimitives.ReadInt64LittleEndian(InternalRead(8)));
  209. public virtual decimal ReadDecimal()
  210. {
  211. ReadOnlySpan<byte> span = InternalRead(16);
  212. try
  213. {
  214. return decimal.ToDecimal(span);
  215. }
  216. catch (ArgumentException e)
  217. {
  218. // ReadDecimal cannot leak out ArgumentException
  219. throw new IOException(SR.Arg_DecBitCtor, e);
  220. }
  221. }
  222. public virtual string ReadString()
  223. {
  224. ThrowIfDisposed();
  225. int currPos = 0;
  226. int n;
  227. int stringLength;
  228. int readLength;
  229. int charsRead;
  230. // Length of the string in bytes, not chars
  231. stringLength = Read7BitEncodedInt();
  232. if (stringLength < 0)
  233. {
  234. throw new IOException(SR.Format(SR.IO_InvalidStringLen_Len, stringLength));
  235. }
  236. if (stringLength == 0)
  237. {
  238. return string.Empty;
  239. }
  240. _charBytes ??= new byte[MaxCharBytesSize];
  241. _charBuffer ??= new char[_maxCharsSize];
  242. StringBuilder? sb = null;
  243. do
  244. {
  245. readLength = ((stringLength - currPos) > MaxCharBytesSize) ? MaxCharBytesSize : (stringLength - currPos);
  246. n = _stream.Read(_charBytes, 0, readLength);
  247. if (n == 0)
  248. {
  249. throw Error.GetEndOfFile();
  250. }
  251. charsRead = _decoder.GetChars(_charBytes, 0, n, _charBuffer, 0);
  252. if (currPos == 0 && n == stringLength)
  253. {
  254. return new string(_charBuffer, 0, charsRead);
  255. }
  256. sb ??= StringBuilderCache.Acquire(stringLength); // Actual string length in chars may be smaller.
  257. sb.Append(_charBuffer, 0, charsRead);
  258. currPos += n;
  259. } while (currPos < stringLength);
  260. return StringBuilderCache.GetStringAndRelease(sb);
  261. }
  262. public virtual int Read(char[] buffer, int index, int count)
  263. {
  264. if (buffer == null)
  265. {
  266. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  267. }
  268. if (index < 0)
  269. {
  270. throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_NeedNonNegNum);
  271. }
  272. if (count < 0)
  273. {
  274. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  275. }
  276. if (buffer.Length - index < count)
  277. {
  278. throw new ArgumentException(SR.Argument_InvalidOffLen);
  279. }
  280. ThrowIfDisposed();
  281. return InternalReadChars(new Span<char>(buffer, index, count));
  282. }
  283. public virtual int Read(Span<char> buffer)
  284. {
  285. ThrowIfDisposed();
  286. return InternalReadChars(buffer);
  287. }
  288. private int InternalReadChars(Span<char> buffer)
  289. {
  290. Debug.Assert(!_disposed);
  291. int totalCharsRead = 0;
  292. while (!buffer.IsEmpty)
  293. {
  294. int numBytes = buffer.Length;
  295. // We really want to know what the minimum number of bytes per char
  296. // is for our encoding. Otherwise for UnicodeEncoding we'd have to
  297. // do ~1+log(n) reads to read n characters.
  298. if (_2BytesPerChar)
  299. {
  300. numBytes <<= 1;
  301. }
  302. // We do not want to read even a single byte more than necessary.
  303. //
  304. // Subtract pending bytes that the decoder may be holding onto. This assumes that each
  305. // decoded char corresponds to one or more bytes. Note that custom encodings or encodings with
  306. // a custom replacement sequence may violate this assumption.
  307. if (numBytes > 1)
  308. {
  309. DecoderNLS? decoder = _decoder as DecoderNLS;
  310. // For internal decoders, we can check whether the decoder has any pending state.
  311. // For custom decoders, assume that the decoder has pending state.
  312. if (decoder == null || decoder.HasState)
  313. {
  314. numBytes--;
  315. // The worst case is charsRemaining = 2 and UTF32Decoder holding onto 3 pending bytes. We need to read just
  316. // one byte in this case.
  317. if (_2BytesPerChar && numBytes > 2)
  318. numBytes -= 2;
  319. }
  320. }
  321. ReadOnlySpan<byte> byteBuffer;
  322. if (_isMemoryStream)
  323. {
  324. Debug.Assert(_stream is MemoryStream);
  325. MemoryStream mStream = (MemoryStream)_stream;
  326. int position = mStream.InternalGetPosition();
  327. numBytes = mStream.InternalEmulateRead(numBytes);
  328. byteBuffer = new ReadOnlySpan<byte>(mStream.InternalGetBuffer(), position, numBytes);
  329. }
  330. else
  331. {
  332. _charBytes ??= new byte[MaxCharBytesSize];
  333. if (numBytes > MaxCharBytesSize)
  334. {
  335. numBytes = MaxCharBytesSize;
  336. }
  337. numBytes = _stream.Read(_charBytes, 0, numBytes);
  338. byteBuffer = new ReadOnlySpan<byte>(_charBytes, 0, numBytes);
  339. }
  340. if (byteBuffer.IsEmpty)
  341. {
  342. break;
  343. }
  344. int charsRead = _decoder.GetChars(byteBuffer, buffer, flush: false);
  345. buffer = buffer.Slice(charsRead);
  346. totalCharsRead += charsRead;
  347. }
  348. // we may have read fewer than the number of characters requested if end of stream reached
  349. // or if the encoding makes the char count too big for the buffer (e.g. fallback sequence)
  350. return totalCharsRead;
  351. }
  352. public virtual char[] ReadChars(int count)
  353. {
  354. if (count < 0)
  355. {
  356. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  357. }
  358. ThrowIfDisposed();
  359. if (count == 0)
  360. {
  361. return Array.Empty<char>();
  362. }
  363. char[] chars = new char[count];
  364. int n = InternalReadChars(new Span<char>(chars));
  365. if (n != count)
  366. {
  367. char[] copy = new char[n];
  368. Buffer.BlockCopy(chars, 0, copy, 0, 2 * n); // sizeof(char)
  369. chars = copy;
  370. }
  371. return chars;
  372. }
  373. public virtual int Read(byte[] buffer, int index, int count)
  374. {
  375. if (buffer == null)
  376. {
  377. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  378. }
  379. if (index < 0)
  380. {
  381. throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_NeedNonNegNum);
  382. }
  383. if (count < 0)
  384. {
  385. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  386. }
  387. if (buffer.Length - index < count)
  388. {
  389. throw new ArgumentException(SR.Argument_InvalidOffLen);
  390. }
  391. ThrowIfDisposed();
  392. return _stream.Read(buffer, index, count);
  393. }
  394. public virtual int Read(Span<byte> buffer)
  395. {
  396. ThrowIfDisposed();
  397. return _stream.Read(buffer);
  398. }
  399. public virtual byte[] ReadBytes(int count)
  400. {
  401. if (count < 0)
  402. {
  403. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  404. }
  405. ThrowIfDisposed();
  406. if (count == 0)
  407. {
  408. return Array.Empty<byte>();
  409. }
  410. byte[] result = new byte[count];
  411. int numRead = 0;
  412. do
  413. {
  414. int n = _stream.Read(result, numRead, count);
  415. if (n == 0)
  416. {
  417. break;
  418. }
  419. numRead += n;
  420. count -= n;
  421. } while (count > 0);
  422. if (numRead != result.Length)
  423. {
  424. // Trim array. This should happen on EOF & possibly net streams.
  425. byte[] copy = new byte[numRead];
  426. Buffer.BlockCopy(result, 0, copy, 0, numRead);
  427. result = copy;
  428. }
  429. return result;
  430. }
  431. private ReadOnlySpan<byte> InternalRead(int numBytes)
  432. {
  433. Debug.Assert(numBytes >= 2 && numBytes <= 16, "value of 1 should use ReadByte. value > 16 requires to change the minimal _buffer size");
  434. if (_isMemoryStream)
  435. {
  436. // read directly from MemoryStream buffer
  437. Debug.Assert(_stream is MemoryStream);
  438. return ((MemoryStream)_stream).InternalReadSpan(numBytes);
  439. }
  440. else
  441. {
  442. ThrowIfDisposed();
  443. int bytesRead = 0;
  444. do
  445. {
  446. int n = _stream.Read(_buffer, bytesRead, numBytes - bytesRead);
  447. if (n == 0)
  448. {
  449. throw Error.GetEndOfFile();
  450. }
  451. bytesRead += n;
  452. } while (bytesRead < numBytes);
  453. return _buffer;
  454. }
  455. }
  456. // FillBuffer is not performing well when reading from MemoryStreams as it is using the public Stream interface.
  457. // We introduced new function InternalRead which can work directly on the MemoryStream internal buffer or using the public Stream
  458. // interface when working with all other streams. This function is not needed anymore but we decided not to delete it for compatibility
  459. // reasons. More about the subject in: https://github.com/dotnet/coreclr/pull/22102
  460. protected virtual void FillBuffer(int numBytes)
  461. {
  462. if (numBytes < 0 || numBytes > _buffer.Length)
  463. {
  464. throw new ArgumentOutOfRangeException(nameof(numBytes), SR.ArgumentOutOfRange_BinaryReaderFillBuffer);
  465. }
  466. int bytesRead = 0;
  467. int n = 0;
  468. ThrowIfDisposed();
  469. // Need to find a good threshold for calling ReadByte() repeatedly
  470. // vs. calling Read(byte[], int, int) for both buffered & unbuffered
  471. // streams.
  472. if (numBytes == 1)
  473. {
  474. n = _stream.ReadByte();
  475. if (n == -1)
  476. {
  477. throw Error.GetEndOfFile();
  478. }
  479. _buffer[0] = (byte)n;
  480. return;
  481. }
  482. do
  483. {
  484. n = _stream.Read(_buffer, bytesRead, numBytes - bytesRead);
  485. if (n == 0)
  486. {
  487. throw Error.GetEndOfFile();
  488. }
  489. bytesRead += n;
  490. } while (bytesRead < numBytes);
  491. }
  492. protected internal int Read7BitEncodedInt()
  493. {
  494. // Read out an Int32 7 bits at a time. The high bit
  495. // of the byte when on means to continue reading more bytes.
  496. int count = 0;
  497. int shift = 0;
  498. byte b;
  499. do
  500. {
  501. // Check for a corrupted stream. Read a max of 5 bytes.
  502. // In a future version, add a DataFormatException.
  503. if (shift == 5 * 7) // 5 bytes max per Int32, shift += 7
  504. {
  505. throw new FormatException(SR.Format_Bad7BitInt32);
  506. }
  507. // ReadByte handles end of stream cases for us.
  508. b = ReadByte();
  509. count |= (b & 0x7F) << shift;
  510. shift += 7;
  511. } while ((b & 0x80) != 0);
  512. return count;
  513. }
  514. }
  515. }