StreamReader.cs 53 KB


  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Diagnostics.CodeAnalysis;
  6. using System.Text;
  7. using System.Threading;
  8. using System.Threading.Tasks;
  9. namespace System.IO
  10. {
  11. // This class implements a TextReader for reading characters to a Stream.
  12. // This is designed for character input in a particular Encoding,
  13. // whereas the Stream class is designed for byte input and output.
  14. public class StreamReader : TextReader
  15. {
  16. // StreamReader.Null is threadsafe.
  17. public static new readonly StreamReader Null = new NullStreamReader();
  18. // Using a 1K byte buffer and a 4K FileStream buffer works out pretty well
  19. // perf-wise. On even a 40 MB text file, any perf loss by using a 4K
  20. // buffer is negated by the win of allocating a smaller byte[], which
  21. // saves construction time. This does break adaptive buffering,
  22. // but this is slightly faster.
  23. private const int DefaultBufferSize = 1024; // Byte buffer size
  24. private const int DefaultFileStreamBufferSize = 4096;
  25. private const int MinBufferSize = 128;
  26. private readonly Stream _stream;
  27. private Encoding _encoding = null!; // only null in NullStreamReader where this is never used
  28. private Decoder _decoder = null!; // only null in NullStreamReader where this is never used
  29. private readonly byte[] _byteBuffer = null!; // only null in NullStreamReader where this is never used
  30. private char[] _charBuffer = null!; // only null in NullStreamReader where this is never used
  31. private int _charPos;
  32. private int _charLen;
  33. // Record the number of valid bytes in the byteBuffer, for a few checks.
  34. private int _byteLen;
  35. // This is used only for preamble detection
  36. private int _bytePos;
  37. // This is the maximum number of chars we can get from one call to
  38. // ReadBuffer. Used so ReadBuffer can tell when to copy data into
  39. // a user's char[] directly, instead of our internal char[].
  40. private int _maxCharsPerBuffer;
  41. /// <summary>True if the writer has been disposed; otherwise, false.</summary>
  42. private bool _disposed;
  43. // We will support looking for byte order marks in the stream and trying
  44. // to decide what the encoding might be from the byte order marks, IF they
  45. // exist. But that's all we'll do.
  46. private bool _detectEncoding;
  47. // Whether we must still check for the encoding's given preamble at the
  48. // beginning of this file.
  49. private bool _checkPreamble;
  50. // Whether the stream is most likely not going to give us back as much
  51. // data as we want the next time we call it. We must do the computation
  52. // before we do any byte order mark handling and save the result. Note
  53. // that we need this to allow users to handle streams used for an
  54. // interactive protocol, where they block waiting for the remote end
  55. // to send a response, like logging in on a Unix machine.
  56. private bool _isBlocked;
  57. // The intent of this field is to leave open the underlying stream when
  58. // disposing of this StreamReader. A name like _leaveOpen is better,
  59. // but this type is serializable, and this field's name was _closable.
  60. private readonly bool _closable; // Whether to close the underlying stream.
  61. // We don't guarantee thread safety on StreamReader, but we should at
  62. // least prevent users from trying to read anything while an Async
  63. // read from the same thread is in progress.
  64. private Task _asyncReadTask = Task.CompletedTask;
  65. private void CheckAsyncTaskInProgress()
  66. {
  67. // We are not locking the access to _asyncReadTask because this is not meant to guarantee thread safety.
  68. // We are simply trying to deter calling any Read APIs while an async Read from the same thread is in progress.
  69. if (!_asyncReadTask.IsCompleted)
  70. {
  71. ThrowAsyncIOInProgress();
  72. }
  73. }
  74. [DoesNotReturn]
  75. private static void ThrowAsyncIOInProgress() =>
  76. throw new InvalidOperationException(SR.InvalidOperation_AsyncIOInProgress);
  77. // StreamReader by default will ignore illegal UTF8 characters. We don't want to
  78. // throw here because we want to be able to read ill-formed data without choking.
  79. // The high level goal is to be tolerant of encoding errors when we read and very strict
  80. // when we write. Hence, default StreamWriter encoding will throw on error.
  81. private StreamReader()
  82. {
  83. Debug.Assert(this is NullStreamReader);
  84. _stream = Stream.Null;
  85. _closable = true;
  86. }
  87. public StreamReader(Stream stream)
  88. : this(stream, true)
  89. {
  90. }
  91. public StreamReader(Stream stream, bool detectEncodingFromByteOrderMarks)
  92. : this(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks, DefaultBufferSize, false)
  93. {
  94. }
  95. public StreamReader(Stream stream, Encoding encoding)
  96. : this(stream, encoding, true, DefaultBufferSize, false)
  97. {
  98. }
  99. public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
  100. : this(stream, encoding, detectEncodingFromByteOrderMarks, DefaultBufferSize, false)
  101. {
  102. }
  103. // Creates a new StreamReader for the given stream. The
  104. // character encoding is set by encoding and the buffer size,
  105. // in number of 16-bit characters, is set by bufferSize.
  106. //
  107. // Note that detectEncodingFromByteOrderMarks is a very
  108. // loose attempt at detecting the encoding by looking at the first
  109. // 3 bytes of the stream. It will recognize UTF-8, little endian
  110. // unicode, and big endian unicode text, but that's it. If neither
  111. // of those three match, it will use the Encoding you provided.
  112. //
  113. public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
  114. : this(stream, encoding, detectEncodingFromByteOrderMarks, bufferSize, false)
  115. {
  116. }
  117. public StreamReader(Stream stream, Encoding? encoding = null, bool detectEncodingFromByteOrderMarks = true, int bufferSize = -1, bool leaveOpen = false)
  118. {
  119. if (stream == null)
  120. {
  121. throw new ArgumentNullException(nameof(stream));
  122. }
  123. if (encoding == null)
  124. {
  125. encoding = Encoding.UTF8;
  126. }
  127. if (!stream.CanRead)
  128. {
  129. throw new ArgumentException(SR.Argument_StreamNotReadable);
  130. }
  131. if (bufferSize == -1)
  132. {
  133. bufferSize = DefaultBufferSize;
  134. }
  135. else if (bufferSize <= 0)
  136. {
  137. throw new ArgumentOutOfRangeException(nameof(bufferSize), SR.ArgumentOutOfRange_NeedPosNum);
  138. }
  139. _stream = stream;
  140. _encoding = encoding;
  141. _decoder = encoding.GetDecoder();
  142. if (bufferSize < MinBufferSize)
  143. {
  144. bufferSize = MinBufferSize;
  145. }
  146. _byteBuffer = new byte[bufferSize];
  147. _maxCharsPerBuffer = encoding.GetMaxCharCount(bufferSize);
  148. _charBuffer = new char[_maxCharsPerBuffer];
  149. _byteLen = 0;
  150. _bytePos = 0;
  151. _detectEncoding = detectEncodingFromByteOrderMarks;
  152. _checkPreamble = encoding.Preamble.Length > 0;
  153. _isBlocked = false;
  154. _closable = !leaveOpen;
  155. }
  156. public StreamReader(string path)
  157. : this(path, true)
  158. {
  159. }
  160. public StreamReader(string path, bool detectEncodingFromByteOrderMarks)
  161. : this(path, Encoding.UTF8, detectEncodingFromByteOrderMarks, DefaultBufferSize)
  162. {
  163. }
  164. public StreamReader(string path, Encoding encoding)
  165. : this(path, encoding, true, DefaultBufferSize)
  166. {
  167. }
  168. public StreamReader(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
  169. : this(path, encoding, detectEncodingFromByteOrderMarks, DefaultBufferSize)
  170. {
  171. }
  172. public StreamReader(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize) :
  173. this(ValidateArgsAndOpenPath(path, encoding, bufferSize), encoding, detectEncodingFromByteOrderMarks, bufferSize, leaveOpen: false)
  174. {
  175. }
  176. private static Stream ValidateArgsAndOpenPath(string path, Encoding encoding, int bufferSize)
  177. {
  178. if (path == null)
  179. throw new ArgumentNullException(nameof(path));
  180. if (encoding == null)
  181. throw new ArgumentNullException(nameof(encoding));
  182. if (path.Length == 0)
  183. throw new ArgumentException(SR.Argument_EmptyPath);
  184. if (bufferSize <= 0)
  185. throw new ArgumentOutOfRangeException(nameof(bufferSize), SR.ArgumentOutOfRange_NeedPosNum);
  186. return new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, DefaultFileStreamBufferSize, FileOptions.SequentialScan);
  187. }
  188. public override void Close()
  189. {
  190. Dispose(true);
  191. }
  192. protected override void Dispose(bool disposing)
  193. {
  194. if (_disposed)
  195. {
  196. return;
  197. }
  198. _disposed = true;
  199. // Dispose of our resources if this StreamReader is closable.
  200. if (_closable)
  201. {
  202. try
  203. {
  204. // Note that Stream.Close() can potentially throw here. So we need to
  205. // ensure cleaning up internal resources, inside the finally block.
  206. if (disposing)
  207. {
  208. _stream.Close();
  209. }
  210. }
  211. finally
  212. {
  213. _charPos = 0;
  214. _charLen = 0;
  215. base.Dispose(disposing);
  216. }
  217. }
  218. }
  219. public virtual Encoding CurrentEncoding => _encoding;
  220. public virtual Stream BaseStream => _stream;
  221. // DiscardBufferedData tells StreamReader to throw away its internal
  222. // buffer contents. This is useful if the user needs to seek on the
  223. // underlying stream to a known location then wants the StreamReader
  224. // to start reading from this new point. This method should be called
  225. // very sparingly, if ever, since it can lead to very poor performance.
  226. // However, it may be the only way of handling some scenarios where
  227. // users need to re-read the contents of a StreamReader a second time.
  228. public void DiscardBufferedData()
  229. {
  230. CheckAsyncTaskInProgress();
  231. _byteLen = 0;
  232. _charLen = 0;
  233. _charPos = 0;
  234. // in general we'd like to have an invariant that encoding isn't null. However,
  235. // for startup improvements for NullStreamReader, we want to delay load encoding.
  236. if (_encoding != null)
  237. {
  238. _decoder = _encoding.GetDecoder();
  239. }
  240. _isBlocked = false;
  241. }
  242. public bool EndOfStream
  243. {
  244. get
  245. {
  246. ThrowIfDisposed();
  247. CheckAsyncTaskInProgress();
  248. if (_charPos < _charLen)
  249. {
  250. return false;
  251. }
  252. // This may block on pipes!
  253. int numRead = ReadBuffer();
  254. return numRead == 0;
  255. }
  256. }
  257. public override int Peek()
  258. {
  259. ThrowIfDisposed();
  260. CheckAsyncTaskInProgress();
  261. if (_charPos == _charLen)
  262. {
  263. if (_isBlocked || ReadBuffer() == 0)
  264. {
  265. return -1;
  266. }
  267. }
  268. return _charBuffer[_charPos];
  269. }
  270. public override int Read()
  271. {
  272. ThrowIfDisposed();
  273. CheckAsyncTaskInProgress();
  274. if (_charPos == _charLen)
  275. {
  276. if (ReadBuffer() == 0)
  277. {
  278. return -1;
  279. }
  280. }
  281. int result = _charBuffer[_charPos];
  282. _charPos++;
  283. return result;
  284. }
  285. public override int Read(char[] buffer, int index, int count)
  286. {
  287. if (buffer == null)
  288. {
  289. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  290. }
  291. if (index < 0 || count < 0)
  292. {
  293. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  294. }
  295. if (buffer.Length - index < count)
  296. {
  297. throw new ArgumentException(SR.Argument_InvalidOffLen);
  298. }
  299. return ReadSpan(new Span<char>(buffer, index, count));
  300. }
  301. public override int Read(Span<char> buffer) =>
  302. GetType() == typeof(StreamReader) ? ReadSpan(buffer) :
  303. base.Read(buffer); // Defer to Read(char[], ...) if a derived type may have previously overridden it
  304. private int ReadSpan(Span<char> buffer)
  305. {
  306. ThrowIfDisposed();
  307. CheckAsyncTaskInProgress();
  308. int charsRead = 0;
  309. // As a perf optimization, if we had exactly one buffer's worth of
  310. // data read in, let's try writing directly to the user's buffer.
  311. bool readToUserBuffer = false;
  312. int count = buffer.Length;
  313. while (count > 0)
  314. {
  315. int n = _charLen - _charPos;
  316. if (n == 0)
  317. {
  318. n = ReadBuffer(buffer.Slice(charsRead), out readToUserBuffer);
  319. }
  320. if (n == 0)
  321. {
  322. break; // We're at EOF
  323. }
  324. if (n > count)
  325. {
  326. n = count;
  327. }
  328. if (!readToUserBuffer)
  329. {
  330. new Span<char>(_charBuffer, _charPos, n).CopyTo(buffer.Slice(charsRead));
  331. _charPos += n;
  332. }
  333. charsRead += n;
  334. count -= n;
  335. // This function shouldn't block for an indefinite amount of time,
  336. // or reading from a network stream won't work right. If we got
  337. // fewer bytes than we requested, then we want to break right here.
  338. if (_isBlocked)
  339. {
  340. break;
  341. }
  342. }
  343. return charsRead;
  344. }
  345. public override string ReadToEnd()
  346. {
  347. ThrowIfDisposed();
  348. CheckAsyncTaskInProgress();
  349. // Call ReadBuffer, then pull data out of charBuffer.
  350. StringBuilder sb = new StringBuilder(_charLen - _charPos);
  351. do
  352. {
  353. sb.Append(_charBuffer, _charPos, _charLen - _charPos);
  354. _charPos = _charLen; // Note we consumed these characters
  355. ReadBuffer();
  356. } while (_charLen > 0);
  357. return sb.ToString();
  358. }
  359. public override int ReadBlock(char[] buffer, int index, int count)
  360. {
  361. if (buffer == null)
  362. {
  363. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  364. }
  365. if (index < 0 || count < 0)
  366. {
  367. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  368. }
  369. if (buffer.Length - index < count)
  370. {
  371. throw new ArgumentException(SR.Argument_InvalidOffLen);
  372. }
  373. ThrowIfDisposed();
  374. CheckAsyncTaskInProgress();
  375. return base.ReadBlock(buffer, index, count);
  376. }
  377. public override int ReadBlock(Span<char> buffer)
  378. {
  379. if (GetType() != typeof(StreamReader))
  380. {
  381. // Defer to Read(char[], ...) if a derived type may have previously overridden it.
  382. return base.ReadBlock(buffer);
  383. }
  384. int i, n = 0;
  385. do
  386. {
  387. i = ReadSpan(buffer.Slice(n));
  388. n += i;
  389. } while (i > 0 && n < buffer.Length);
  390. return n;
  391. }
  392. // Trims n bytes from the front of the buffer.
  393. private void CompressBuffer(int n)
  394. {
  395. Debug.Assert(_byteLen >= n, "CompressBuffer was called with a number of bytes greater than the current buffer length. Are two threads using this StreamReader at the same time?");
  396. Buffer.BlockCopy(_byteBuffer, n, _byteBuffer, 0, _byteLen - n);
  397. _byteLen -= n;
  398. }
  399. private void DetectEncoding()
  400. {
  401. if (_byteLen < 2)
  402. {
  403. return;
  404. }
  405. _detectEncoding = false;
  406. bool changedEncoding = false;
  407. if (_byteBuffer[0] == 0xFE && _byteBuffer[1] == 0xFF)
  408. {
  409. // Big Endian Unicode
  410. _encoding = Encoding.BigEndianUnicode;
  411. CompressBuffer(2);
  412. changedEncoding = true;
  413. }
  414. else if (_byteBuffer[0] == 0xFF && _byteBuffer[1] == 0xFE)
  415. {
  416. // Little Endian Unicode, or possibly little endian UTF32
  417. if (_byteLen < 4 || _byteBuffer[2] != 0 || _byteBuffer[3] != 0)
  418. {
  419. _encoding = Encoding.Unicode;
  420. CompressBuffer(2);
  421. changedEncoding = true;
  422. }
  423. else
  424. {
  425. _encoding = Encoding.UTF32;
  426. CompressBuffer(4);
  427. changedEncoding = true;
  428. }
  429. }
  430. else if (_byteLen >= 3 && _byteBuffer[0] == 0xEF && _byteBuffer[1] == 0xBB && _byteBuffer[2] == 0xBF)
  431. {
  432. // UTF-8
  433. _encoding = Encoding.UTF8;
  434. CompressBuffer(3);
  435. changedEncoding = true;
  436. }
  437. else if (_byteLen >= 4 && _byteBuffer[0] == 0 && _byteBuffer[1] == 0 &&
  438. _byteBuffer[2] == 0xFE && _byteBuffer[3] == 0xFF)
  439. {
  440. // Big Endian UTF32
  441. _encoding = new UTF32Encoding(bigEndian: true, byteOrderMark: true);
  442. CompressBuffer(4);
  443. changedEncoding = true;
  444. }
  445. else if (_byteLen == 2)
  446. {
  447. _detectEncoding = true;
  448. }
  449. // Note: in the future, if we change this algorithm significantly,
  450. // we can support checking for the preamble of the given encoding.
  451. if (changedEncoding)
  452. {
  453. _decoder = _encoding.GetDecoder();
  454. int newMaxCharsPerBuffer = _encoding.GetMaxCharCount(_byteBuffer.Length);
  455. if (newMaxCharsPerBuffer > _maxCharsPerBuffer)
  456. {
  457. _charBuffer = new char[newMaxCharsPerBuffer];
  458. }
  459. _maxCharsPerBuffer = newMaxCharsPerBuffer;
  460. }
  461. }
  462. // Trims the preamble bytes from the byteBuffer. This routine can be called multiple times
  463. // and we will buffer the bytes read until the preamble is matched or we determine that
  464. // there is no match. If there is no match, every byte read previously will be available
  465. // for further consumption. If there is a match, we will compress the buffer for the
  466. // leading preamble bytes
  467. private bool IsPreamble()
  468. {
  469. if (!_checkPreamble)
  470. {
  471. return _checkPreamble;
  472. }
  473. ReadOnlySpan<byte> preamble = _encoding.Preamble;
  474. Debug.Assert(_bytePos <= preamble.Length, "_compressPreamble was called with the current bytePos greater than the preamble buffer length. Are two threads using this StreamReader at the same time?");
  475. int len = (_byteLen >= (preamble.Length)) ? (preamble.Length - _bytePos) : (_byteLen - _bytePos);
  476. for (int i = 0; i < len; i++, _bytePos++)
  477. {
  478. if (_byteBuffer[_bytePos] != preamble[_bytePos])
  479. {
  480. _bytePos = 0;
  481. _checkPreamble = false;
  482. break;
  483. }
  484. }
  485. Debug.Assert(_bytePos <= preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
  486. if (_checkPreamble)
  487. {
  488. if (_bytePos == preamble.Length)
  489. {
  490. // We have a match
  491. CompressBuffer(preamble.Length);
  492. _bytePos = 0;
  493. _checkPreamble = false;
  494. _detectEncoding = false;
  495. }
  496. }
  497. return _checkPreamble;
  498. }
  499. internal virtual int ReadBuffer()
  500. {
  501. _charLen = 0;
  502. _charPos = 0;
  503. if (!_checkPreamble)
  504. {
  505. _byteLen = 0;
  506. }
  507. do
  508. {
  509. if (_checkPreamble)
  510. {
  511. Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
  512. int len = _stream.Read(_byteBuffer, _bytePos, _byteBuffer.Length - _bytePos);
  513. Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  514. if (len == 0)
  515. {
  516. // EOF but we might have buffered bytes from previous
  517. // attempt to detect preamble that needs to be decoded now
  518. if (_byteLen > 0)
  519. {
  520. _charLen += _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, _charLen);
  521. // Need to zero out the byteLen after we consume these bytes so that we don't keep infinitely hitting this code path
  522. _bytePos = _byteLen = 0;
  523. }
  524. return _charLen;
  525. }
  526. _byteLen += len;
  527. }
  528. else
  529. {
  530. Debug.Assert(_bytePos == 0, "bytePos can be non zero only when we are trying to _checkPreamble. Are two threads using this StreamReader at the same time?");
  531. _byteLen = _stream.Read(_byteBuffer, 0, _byteBuffer.Length);
  532. Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  533. if (_byteLen == 0) // We're at EOF
  534. {
  535. return _charLen;
  536. }
  537. }
  538. // _isBlocked == whether we read fewer bytes than we asked for.
  539. // Note we must check it here because CompressBuffer or
  540. // DetectEncoding will change byteLen.
  541. _isBlocked = (_byteLen < _byteBuffer.Length);
  542. // Check for preamble before detect encoding. This is not to override the
  543. // user supplied Encoding for the one we implicitly detect. The user could
  544. // customize the encoding which we will loose, such as ThrowOnError on UTF8
  545. if (IsPreamble())
  546. {
  547. continue;
  548. }
  549. // If we're supposed to detect the encoding and haven't done so yet,
  550. // do it. Note this may need to be called more than once.
  551. if (_detectEncoding && _byteLen >= 2)
  552. {
  553. DetectEncoding();
  554. }
  555. _charLen += _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, _charLen);
  556. } while (_charLen == 0);
  557. return _charLen;
  558. }
  559. // This version has a perf optimization to decode data DIRECTLY into the
  560. // user's buffer, bypassing StreamReader's own buffer.
  561. // This gives a > 20% perf improvement for our encodings across the board,
  562. // but only when asking for at least the number of characters that one
  563. // buffer's worth of bytes could produce.
  564. // This optimization, if run, will break SwitchEncoding, so we must not do
  565. // this on the first call to ReadBuffer.
  566. private int ReadBuffer(Span<char> userBuffer, out bool readToUserBuffer)
  567. {
  568. _charLen = 0;
  569. _charPos = 0;
  570. if (!_checkPreamble)
  571. {
  572. _byteLen = 0;
  573. }
  574. int charsRead = 0;
  575. // As a perf optimization, we can decode characters DIRECTLY into a
  576. // user's char[]. We absolutely must not write more characters
  577. // into the user's buffer than they asked for. Calculating
  578. // encoding.GetMaxCharCount(byteLen) each time is potentially very
  579. // expensive - instead, cache the number of chars a full buffer's
  580. // worth of data may produce. Yes, this makes the perf optimization
  581. // less aggressive, in that all reads that asked for fewer than AND
  582. // returned fewer than _maxCharsPerBuffer chars won't get the user
  583. // buffer optimization. This affects reads where the end of the
  584. // Stream comes in the middle somewhere, and when you ask for
  585. // fewer chars than your buffer could produce.
  586. readToUserBuffer = userBuffer.Length >= _maxCharsPerBuffer;
  587. do
  588. {
  589. Debug.Assert(charsRead == 0);
  590. if (_checkPreamble)
  591. {
  592. Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
  593. int len = _stream.Read(_byteBuffer, _bytePos, _byteBuffer.Length - _bytePos);
  594. Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  595. if (len == 0)
  596. {
  597. // EOF but we might have buffered bytes from previous
  598. // attempt to detect preamble that needs to be decoded now
  599. if (_byteLen > 0)
  600. {
  601. if (readToUserBuffer)
  602. {
  603. charsRead = _decoder.GetChars(new ReadOnlySpan<byte>(_byteBuffer, 0, _byteLen), userBuffer.Slice(charsRead), flush: false);
  604. _charLen = 0; // StreamReader's buffer is empty.
  605. }
  606. else
  607. {
  608. charsRead = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, charsRead);
  609. _charLen += charsRead; // Number of chars in StreamReader's buffer.
  610. }
  611. }
  612. return charsRead;
  613. }
  614. _byteLen += len;
  615. }
  616. else
  617. {
  618. Debug.Assert(_bytePos == 0, "bytePos can be non zero only when we are trying to _checkPreamble. Are two threads using this StreamReader at the same time?");
  619. _byteLen = _stream.Read(_byteBuffer, 0, _byteBuffer.Length);
  620. Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  621. if (_byteLen == 0) // EOF
  622. {
  623. break;
  624. }
  625. }
  626. // _isBlocked == whether we read fewer bytes than we asked for.
  627. // Note we must check it here because CompressBuffer or
  628. // DetectEncoding will change byteLen.
  629. _isBlocked = (_byteLen < _byteBuffer.Length);
  630. // Check for preamble before detect encoding. This is not to override the
  631. // user supplied Encoding for the one we implicitly detect. The user could
  632. // customize the encoding which we will loose, such as ThrowOnError on UTF8
  633. // Note: we don't need to recompute readToUserBuffer optimization as IsPreamble
  634. // doesn't change the encoding or affect _maxCharsPerBuffer
  635. if (IsPreamble())
  636. {
  637. continue;
  638. }
  639. // On the first call to ReadBuffer, if we're supposed to detect the encoding, do it.
  640. if (_detectEncoding && _byteLen >= 2)
  641. {
  642. DetectEncoding();
  643. // DetectEncoding changes some buffer state. Recompute this.
  644. readToUserBuffer = userBuffer.Length >= _maxCharsPerBuffer;
  645. }
  646. _charPos = 0;
  647. if (readToUserBuffer)
  648. {
  649. charsRead += _decoder.GetChars(new ReadOnlySpan<byte>(_byteBuffer, 0, _byteLen), userBuffer.Slice(charsRead), flush: false);
  650. _charLen = 0; // StreamReader's buffer is empty.
  651. }
  652. else
  653. {
  654. charsRead = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, charsRead);
  655. _charLen += charsRead; // Number of chars in StreamReader's buffer.
  656. }
  657. } while (charsRead == 0);
  658. _isBlocked &= charsRead < userBuffer.Length;
  659. return charsRead;
  660. }
  661. // Reads a line. A line is defined as a sequence of characters followed by
  662. // a carriage return ('\r'), a line feed ('\n'), or a carriage return
  663. // immediately followed by a line feed. The resulting string does not
  664. // contain the terminating carriage return and/or line feed. The returned
  665. // value is null if the end of the input stream has been reached.
  666. //
  667. public override string? ReadLine()
  668. {
  669. ThrowIfDisposed();
  670. CheckAsyncTaskInProgress();
  671. if (_charPos == _charLen)
  672. {
  673. if (ReadBuffer() == 0)
  674. {
  675. return null;
  676. }
  677. }
  678. StringBuilder? sb = null;
  679. do
  680. {
  681. int i = _charPos;
  682. do
  683. {
  684. char ch = _charBuffer[i];
  685. // Note the following common line feed chars:
  686. // \n - UNIX \r\n - DOS \r - Mac
  687. if (ch == '\r' || ch == '\n')
  688. {
  689. string s;
  690. if (sb != null)
  691. {
  692. sb.Append(_charBuffer, _charPos, i - _charPos);
  693. s = sb.ToString();
  694. }
  695. else
  696. {
  697. s = new string(_charBuffer, _charPos, i - _charPos);
  698. }
  699. _charPos = i + 1;
  700. if (ch == '\r' && (_charPos < _charLen || ReadBuffer() > 0))
  701. {
  702. if (_charBuffer[_charPos] == '\n')
  703. {
  704. _charPos++;
  705. }
  706. }
  707. return s;
  708. }
  709. i++;
  710. } while (i < _charLen);
  711. i = _charLen - _charPos;
  712. sb ??= new StringBuilder(i + 80);
  713. sb.Append(_charBuffer, _charPos, i);
  714. } while (ReadBuffer() > 0);
  715. return sb.ToString();
  716. }
  717. public override Task<string?> ReadLineAsync()
  718. {
  719. // If we have been inherited into a subclass, the following implementation could be incorrect
  720. // since it does not call through to Read() which a subclass might have overridden.
  721. // To be safe we will only use this implementation in cases where we know it is safe to do so,
  722. // and delegate to our base class (which will call into Read) when we are not sure.
  723. if (GetType() != typeof(StreamReader))
  724. {
  725. return base.ReadLineAsync();
  726. }
  727. ThrowIfDisposed();
  728. CheckAsyncTaskInProgress();
  729. Task<string?> task = ReadLineAsyncInternal();
  730. _asyncReadTask = task;
  731. return task;
  732. }
  733. private async Task<string?> ReadLineAsyncInternal()
  734. {
  735. if (_charPos == _charLen && (await ReadBufferAsync(CancellationToken.None).ConfigureAwait(false)) == 0)
  736. {
  737. return null;
  738. }
  739. StringBuilder? sb = null;
  740. do
  741. {
  742. char[] tmpCharBuffer = _charBuffer;
  743. int tmpCharLen = _charLen;
  744. int tmpCharPos = _charPos;
  745. int i = tmpCharPos;
  746. do
  747. {
  748. char ch = tmpCharBuffer[i];
  749. // Note the following common line feed chars:
  750. // \n - UNIX \r\n - DOS \r - Mac
  751. if (ch == '\r' || ch == '\n')
  752. {
  753. string s;
  754. if (sb != null)
  755. {
  756. sb.Append(tmpCharBuffer, tmpCharPos, i - tmpCharPos);
  757. s = sb.ToString();
  758. }
  759. else
  760. {
  761. s = new string(tmpCharBuffer, tmpCharPos, i - tmpCharPos);
  762. }
  763. _charPos = tmpCharPos = i + 1;
  764. if (ch == '\r' && (tmpCharPos < tmpCharLen || (await ReadBufferAsync(CancellationToken.None).ConfigureAwait(false)) > 0))
  765. {
  766. tmpCharPos = _charPos;
  767. if (_charBuffer[tmpCharPos] == '\n')
  768. {
  769. _charPos = ++tmpCharPos;
  770. }
  771. }
  772. return s;
  773. }
  774. i++;
  775. } while (i < tmpCharLen);
  776. i = tmpCharLen - tmpCharPos;
  777. sb ??= new StringBuilder(i + 80);
  778. sb.Append(tmpCharBuffer, tmpCharPos, i);
  779. } while (await ReadBufferAsync(CancellationToken.None).ConfigureAwait(false) > 0);
  780. return sb.ToString();
  781. }
  782. public override Task<string> ReadToEndAsync()
  783. {
  784. // If we have been inherited into a subclass, the following implementation could be incorrect
  785. // since it does not call through to Read() which a subclass might have overridden.
  786. // To be safe we will only use this implementation in cases where we know it is safe to do so,
  787. // and delegate to our base class (which will call into Read) when we are not sure.
  788. if (GetType() != typeof(StreamReader))
  789. {
  790. return base.ReadToEndAsync();
  791. }
  792. ThrowIfDisposed();
  793. CheckAsyncTaskInProgress();
  794. Task<string> task = ReadToEndAsyncInternal();
  795. _asyncReadTask = task;
  796. return task;
  797. }
  798. private async Task<string> ReadToEndAsyncInternal()
  799. {
  800. // Call ReadBuffer, then pull data out of charBuffer.
  801. StringBuilder sb = new StringBuilder(_charLen - _charPos);
  802. do
  803. {
  804. int tmpCharPos = _charPos;
  805. sb.Append(_charBuffer, tmpCharPos, _charLen - tmpCharPos);
  806. _charPos = _charLen; // We consumed these characters
  807. await ReadBufferAsync(CancellationToken.None).ConfigureAwait(false);
  808. } while (_charLen > 0);
  809. return sb.ToString();
  810. }
  811. public override Task<int> ReadAsync(char[] buffer, int index, int count)
  812. {
  813. if (buffer == null)
  814. {
  815. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  816. }
  817. if (index < 0 || count < 0)
  818. {
  819. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  820. }
  821. if (buffer.Length - index < count)
  822. {
  823. throw new ArgumentException(SR.Argument_InvalidOffLen);
  824. }
  825. // If we have been inherited into a subclass, the following implementation could be incorrect
  826. // since it does not call through to Read() which a subclass might have overridden.
  827. // To be safe we will only use this implementation in cases where we know it is safe to do so,
  828. // and delegate to our base class (which will call into Read) when we are not sure.
  829. if (GetType() != typeof(StreamReader))
  830. {
  831. return base.ReadAsync(buffer, index, count);
  832. }
  833. ThrowIfDisposed();
  834. CheckAsyncTaskInProgress();
  835. Task<int> task = ReadAsyncInternal(new Memory<char>(buffer, index, count), CancellationToken.None).AsTask();
  836. _asyncReadTask = task;
  837. return task;
  838. }
  839. public override ValueTask<int> ReadAsync(Memory<char> buffer, CancellationToken cancellationToken = default)
  840. {
  841. if (GetType() != typeof(StreamReader))
  842. {
  843. // Ensure we use existing overrides if a class already overrode existing overloads.
  844. return base.ReadAsync(buffer, cancellationToken);
  845. }
  846. ThrowIfDisposed();
  847. CheckAsyncTaskInProgress();
  848. if (cancellationToken.IsCancellationRequested)
  849. {
  850. return new ValueTask<int>(Task.FromCanceled<int>(cancellationToken));
  851. }
  852. return ReadAsyncInternal(buffer, cancellationToken);
  853. }
  854. internal override async ValueTask<int> ReadAsyncInternal(Memory<char> buffer, CancellationToken cancellationToken)
  855. {
  856. if (_charPos == _charLen && (await ReadBufferAsync(cancellationToken).ConfigureAwait(false)) == 0)
  857. {
  858. return 0;
  859. }
  860. int charsRead = 0;
  861. // As a perf optimization, if we had exactly one buffer's worth of
  862. // data read in, let's try writing directly to the user's buffer.
  863. bool readToUserBuffer = false;
  864. byte[] tmpByteBuffer = _byteBuffer;
  865. Stream tmpStream = _stream;
  866. int count = buffer.Length;
  867. while (count > 0)
  868. {
  869. // n is the characters available in _charBuffer
  870. int n = _charLen - _charPos;
  871. // charBuffer is empty, let's read from the stream
  872. if (n == 0)
  873. {
  874. _charLen = 0;
  875. _charPos = 0;
  876. if (!_checkPreamble)
  877. {
  878. _byteLen = 0;
  879. }
  880. readToUserBuffer = count >= _maxCharsPerBuffer;
  881. // We loop here so that we read in enough bytes to yield at least 1 char.
  882. // We break out of the loop if the stream is blocked (EOF is reached).
  883. do
  884. {
  885. Debug.Assert(n == 0);
  886. if (_checkPreamble)
  887. {
  888. Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
  889. int tmpBytePos = _bytePos;
  890. int len = await tmpStream.ReadAsync(new Memory<byte>(tmpByteBuffer, tmpBytePos, tmpByteBuffer.Length - tmpBytePos), cancellationToken).ConfigureAwait(false);
  891. Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  892. if (len == 0)
  893. {
  894. // EOF but we might have buffered bytes from previous
  895. // attempts to detect preamble that needs to be decoded now
  896. if (_byteLen > 0)
  897. {
  898. if (readToUserBuffer)
  899. {
  900. n = _decoder.GetChars(new ReadOnlySpan<byte>(tmpByteBuffer, 0, _byteLen), buffer.Span.Slice(charsRead), flush: false);
  901. _charLen = 0; // StreamReader's buffer is empty.
  902. }
  903. else
  904. {
  905. n = _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, 0);
  906. _charLen += n; // Number of chars in StreamReader's buffer.
  907. }
  908. }
  909. // How can part of the preamble yield any chars?
  910. Debug.Assert(n == 0);
  911. _isBlocked = true;
  912. break;
  913. }
  914. else
  915. {
  916. _byteLen += len;
  917. }
  918. }
  919. else
  920. {
  921. Debug.Assert(_bytePos == 0, "_bytePos can be non zero only when we are trying to _checkPreamble. Are two threads using this StreamReader at the same time?");
  922. _byteLen = await tmpStream.ReadAsync(new Memory<byte>(tmpByteBuffer), cancellationToken).ConfigureAwait(false);
  923. Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  924. if (_byteLen == 0) // EOF
  925. {
  926. _isBlocked = true;
  927. break;
  928. }
  929. }
  930. // _isBlocked == whether we read fewer bytes than we asked for.
  931. // Note we must check it here because CompressBuffer or
  932. // DetectEncoding will change _byteLen.
  933. _isBlocked = (_byteLen < tmpByteBuffer.Length);
  934. // Check for preamble before detect encoding. This is not to override the
  935. // user supplied Encoding for the one we implicitly detect. The user could
  936. // customize the encoding which we will loose, such as ThrowOnError on UTF8
  937. // Note: we don't need to recompute readToUserBuffer optimization as IsPreamble
  938. // doesn't change the encoding or affect _maxCharsPerBuffer
  939. if (IsPreamble())
  940. {
  941. continue;
  942. }
  943. // On the first call to ReadBuffer, if we're supposed to detect the encoding, do it.
  944. if (_detectEncoding && _byteLen >= 2)
  945. {
  946. DetectEncoding();
  947. // DetectEncoding changes some buffer state. Recompute this.
  948. readToUserBuffer = count >= _maxCharsPerBuffer;
  949. }
  950. Debug.Assert(n == 0);
  951. _charPos = 0;
  952. if (readToUserBuffer)
  953. {
  954. n += _decoder.GetChars(new ReadOnlySpan<byte>(tmpByteBuffer, 0, _byteLen), buffer.Span.Slice(charsRead), flush: false);
  955. // Why did the bytes yield no chars?
  956. Debug.Assert(n > 0);
  957. _charLen = 0; // StreamReader's buffer is empty.
  958. }
  959. else
  960. {
  961. n = _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, 0);
  962. // Why did the bytes yield no chars?
  963. Debug.Assert(n > 0);
  964. _charLen += n; // Number of chars in StreamReader's buffer.
  965. }
  966. } while (n == 0);
  967. if (n == 0)
  968. {
  969. break; // We're at EOF
  970. }
  971. } // if (n == 0)
  972. // Got more chars in charBuffer than the user requested
  973. if (n > count)
  974. {
  975. n = count;
  976. }
  977. if (!readToUserBuffer)
  978. {
  979. new Span<char>(_charBuffer, _charPos, n).CopyTo(buffer.Span.Slice(charsRead));
  980. _charPos += n;
  981. }
  982. charsRead += n;
  983. count -= n;
  984. // This function shouldn't block for an indefinite amount of time,
  985. // or reading from a network stream won't work right. If we got
  986. // fewer bytes than we requested, then we want to break right here.
  987. if (_isBlocked)
  988. {
  989. break;
  990. }
  991. } // while (count > 0)
  992. return charsRead;
  993. }
  994. public override Task<int> ReadBlockAsync(char[] buffer, int index, int count)
  995. {
  996. if (buffer == null)
  997. {
  998. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  999. }
  1000. if (index < 0 || count < 0)
  1001. {
  1002. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  1003. }
  1004. if (buffer.Length - index < count)
  1005. {
  1006. throw new ArgumentException(SR.Argument_InvalidOffLen);
  1007. }
  1008. // If we have been inherited into a subclass, the following implementation could be incorrect
  1009. // since it does not call through to Read() which a subclass might have overridden.
  1010. // To be safe we will only use this implementation in cases where we know it is safe to do so,
  1011. // and delegate to our base class (which will call into Read) when we are not sure.
  1012. if (GetType() != typeof(StreamReader))
  1013. {
  1014. return base.ReadBlockAsync(buffer, index, count);
  1015. }
  1016. ThrowIfDisposed();
  1017. CheckAsyncTaskInProgress();
  1018. Task<int> task = base.ReadBlockAsync(buffer, index, count);
  1019. _asyncReadTask = task;
  1020. return task;
  1021. }
  1022. public override ValueTask<int> ReadBlockAsync(Memory<char> buffer, CancellationToken cancellationToken = default)
  1023. {
  1024. if (GetType() != typeof(StreamReader))
  1025. {
  1026. // If a derived type may have overridden ReadBlockAsync(char[], ...) before this overload
  1027. // was introduced, defer to it.
  1028. return base.ReadBlockAsync(buffer, cancellationToken);
  1029. }
  1030. ThrowIfDisposed();
  1031. CheckAsyncTaskInProgress();
  1032. if (cancellationToken.IsCancellationRequested)
  1033. {
  1034. return new ValueTask<int>(Task.FromCanceled<int>(cancellationToken));
  1035. }
  1036. ValueTask<int> vt = ReadBlockAsyncInternal(buffer, cancellationToken);
  1037. if (vt.IsCompletedSuccessfully)
  1038. {
  1039. return vt;
  1040. }
  1041. Task<int> t = vt.AsTask();
  1042. _asyncReadTask = t;
  1043. return new ValueTask<int>(t);
  1044. }
  1045. private async ValueTask<int> ReadBufferAsync(CancellationToken cancellationToken)
  1046. {
  1047. _charLen = 0;
  1048. _charPos = 0;
  1049. byte[] tmpByteBuffer = _byteBuffer;
  1050. Stream tmpStream = _stream;
  1051. if (!_checkPreamble)
  1052. {
  1053. _byteLen = 0;
  1054. }
  1055. do
  1056. {
  1057. if (_checkPreamble)
  1058. {
  1059. Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
  1060. int tmpBytePos = _bytePos;
  1061. int len = await tmpStream.ReadAsync(new Memory<byte>(tmpByteBuffer, tmpBytePos, tmpByteBuffer.Length - tmpBytePos), cancellationToken).ConfigureAwait(false);
  1062. Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  1063. if (len == 0)
  1064. {
  1065. // EOF but we might have buffered bytes from previous
  1066. // attempt to detect preamble that needs to be decoded now
  1067. if (_byteLen > 0)
  1068. {
  1069. _charLen += _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, _charLen);
  1070. // Need to zero out the _byteLen after we consume these bytes so that we don't keep infinitely hitting this code path
  1071. _bytePos = 0; _byteLen = 0;
  1072. }
  1073. return _charLen;
  1074. }
  1075. _byteLen += len;
  1076. }
  1077. else
  1078. {
  1079. Debug.Assert(_bytePos == 0, "_bytePos can be non zero only when we are trying to _checkPreamble. Are two threads using this StreamReader at the same time?");
  1080. _byteLen = await tmpStream.ReadAsync(new Memory<byte>(tmpByteBuffer), cancellationToken).ConfigureAwait(false);
  1081. Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! Bug in stream class.");
  1082. if (_byteLen == 0) // We're at EOF
  1083. {
  1084. return _charLen;
  1085. }
  1086. }
  1087. // _isBlocked == whether we read fewer bytes than we asked for.
  1088. // Note we must check it here because CompressBuffer or
  1089. // DetectEncoding will change _byteLen.
  1090. _isBlocked = (_byteLen < tmpByteBuffer.Length);
  1091. // Check for preamble before detect encoding. This is not to override the
  1092. // user supplied Encoding for the one we implicitly detect. The user could
  1093. // customize the encoding which we will loose, such as ThrowOnError on UTF8
  1094. if (IsPreamble())
  1095. {
  1096. continue;
  1097. }
  1098. // If we're supposed to detect the encoding and haven't done so yet,
  1099. // do it. Note this may need to be called more than once.
  1100. if (_detectEncoding && _byteLen >= 2)
  1101. {
  1102. DetectEncoding();
  1103. }
  1104. _charLen += _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, _charLen);
  1105. } while (_charLen == 0);
  1106. return _charLen;
  1107. }
  1108. private void ThrowIfDisposed()
  1109. {
  1110. if (_disposed)
  1111. {
  1112. ThrowObjectDisposedException();
  1113. }
  1114. void ThrowObjectDisposedException() => throw new ObjectDisposedException(GetType().Name, SR.ObjectDisposed_ReaderClosed);
  1115. }
  1116. // No data, class doesn't need to be serializable.
  1117. // Note this class is threadsafe.
  1118. private sealed class NullStreamReader : StreamReader
  1119. {
  1120. public override Encoding CurrentEncoding => Encoding.Unicode;
  1121. protected override void Dispose(bool disposing)
  1122. {
  1123. // Do nothing - this is essentially unclosable.
  1124. }
  1125. public override int Peek()
  1126. {
  1127. return -1;
  1128. }
  1129. public override int Read()
  1130. {
  1131. return -1;
  1132. }
  1133. public override int Read(char[] buffer, int index, int count)
  1134. {
  1135. return 0;
  1136. }
  1137. public override string? ReadLine()
  1138. {
  1139. return null;
  1140. }
  1141. public override string ReadToEnd()
  1142. {
  1143. return string.Empty;
  1144. }
  1145. internal override int ReadBuffer()
  1146. {
  1147. return 0;
  1148. }
  1149. }
  1150. }
  1151. }