StreamReader.cs 53 KB


  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Diagnostics.CodeAnalysis;
  6. using System.Text;
  7. using System.Threading;
  8. using System.Threading.Tasks;
  9. namespace System.IO
  10. {
  11. // This class implements a TextReader for reading characters to a Stream.
  12. // This is designed for character input in a particular Encoding,
  13. // whereas the Stream class is designed for byte input and output.
  14. public class StreamReader : TextReader
  15. {
  16. // StreamReader.Null is threadsafe.
  17. public static new readonly StreamReader Null = new NullStreamReader();
  18. // Using a 1K byte buffer and a 4K FileStream buffer works out pretty well
  19. // perf-wise. On even a 40 MB text file, any perf loss by using a 4K
  20. // buffer is negated by the win of allocating a smaller byte[], which
  21. // saves construction time. This does break adaptive buffering,
  22. // but this is slightly faster.
  23. private const int DefaultBufferSize = 1024; // Byte buffer size
  24. private const int DefaultFileStreamBufferSize = 4096;
  25. private const int MinBufferSize = 128;
  26. private readonly Stream _stream;
  27. private Encoding _encoding = null!; // only null in NullStreamReader where this is never used
  28. private Decoder _decoder = null!; // only null in NullStreamReader where this is never used
  29. private readonly byte[] _byteBuffer = null!; // only null in NullStreamReader where this is never used
  30. private char[] _charBuffer = null!; // only null in NullStreamReader where this is never used
  31. private int _charPos;
  32. private int _charLen;
  33. // Record the number of valid bytes in the byteBuffer, for a few checks.
  34. private int _byteLen;
  35. // This is used only for preamble detection
  36. private int _bytePos;
  37. // This is the maximum number of chars we can get from one call to
  38. // ReadBuffer. Used so ReadBuffer can tell when to copy data into
  39. // a user's char[] directly, instead of our internal char[].
  40. private int _maxCharsPerBuffer;
  41. /// <summary>True if the writer has been disposed; otherwise, false.</summary>
  42. private bool _disposed;
  43. // We will support looking for byte order marks in the stream and trying
  44. // to decide what the encoding might be from the byte order marks, IF they
  45. // exist. But that's all we'll do.
  46. private bool _detectEncoding;
  47. // Whether we must still check for the encoding's given preamble at the
  48. // beginning of this file.
  49. private bool _checkPreamble;
  50. // Whether the stream is most likely not going to give us back as much
  51. // data as we want the next time we call it. We must do the computation
  52. // before we do any byte order mark handling and save the result. Note
  53. // that we need this to allow users to handle streams used for an
  54. // interactive protocol, where they block waiting for the remote end
  55. // to send a response, like logging in on a Unix machine.
  56. private bool _isBlocked;
  57. // The intent of this field is to leave open the underlying stream when
  58. // disposing of this StreamReader. A name like _leaveOpen is better,
  59. // but this type is serializable, and this field's name was _closable.
  60. private bool _closable; // Whether to close the underlying stream.
  61. // We don't guarantee thread safety on StreamReader, but we should at
  62. // least prevent users from trying to read anything while an Async
  63. // read from the same thread is in progress.
  64. private Task _asyncReadTask = Task.CompletedTask;
  65. private void CheckAsyncTaskInProgress()
  66. {
  67. // We are not locking the access to _asyncReadTask because this is not meant to guarantee thread safety.
  68. // We are simply trying to deter calling any Read APIs while an async Read from the same thread is in progress.
  69. if (!_asyncReadTask.IsCompleted)
  70. {
  71. ThrowAsyncIOInProgress();
  72. }
  73. }
  74. [DoesNotReturn]
  75. private static void ThrowAsyncIOInProgress() =>
  76. throw new InvalidOperationException(SR.InvalidOperation_AsyncIOInProgress);
  77. // StreamReader by default will ignore illegal UTF8 characters. We don't want to
  78. // throw here because we want to be able to read ill-formed data without choking.
  79. // The high level goal is to be tolerant of encoding errors when we read and very strict
  80. // when we write. Hence, default StreamWriter encoding will throw on error.
  81. private StreamReader()
  82. {
  83. Debug.Assert(this is NullStreamReader);
  84. _stream = Stream.Null;
  85. _closable = true;
  86. }
  87. public StreamReader(Stream stream)
  88. : this(stream, true)
  89. {
  90. }
  91. public StreamReader(Stream stream, bool detectEncodingFromByteOrderMarks)
  92. : this(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks, DefaultBufferSize, false)
  93. {
  94. }
  95. public StreamReader(Stream stream, Encoding encoding)
  96. : this(stream, encoding, true, DefaultBufferSize, false)
  97. {
  98. }
  99. public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
  100. : this(stream, encoding, detectEncodingFromByteOrderMarks, DefaultBufferSize, false)
  101. {
  102. }
  103. // Creates a new StreamReader for the given stream. The
  104. // character encoding is set by encoding and the buffer size,
  105. // in number of 16-bit characters, is set by bufferSize.
  106. //
  107. // Note that detectEncodingFromByteOrderMarks is a very
  108. // loose attempt at detecting the encoding by looking at the first
  109. // 3 bytes of the stream. It will recognize UTF-8, little endian
  110. // unicode, and big endian unicode text, but that's it. If neither
  111. // of those three match, it will use the Encoding you provided.
  112. //
  113. public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
  114. : this(stream, encoding, detectEncodingFromByteOrderMarks, bufferSize, false)
  115. {
  116. }
  117. public StreamReader(Stream stream, Encoding? encoding = null, bool detectEncodingFromByteOrderMarks = true, int bufferSize = -1, bool leaveOpen = false)
  118. {
  119. if (stream == null)
  120. {
  121. throw new ArgumentNullException(nameof(stream));
  122. }
  123. if (encoding == null)
  124. {
  125. encoding = Encoding.UTF8;
  126. }
  127. if (!stream.CanRead)
  128. {
  129. throw new ArgumentException(SR.Argument_StreamNotReadable);
  130. }
  131. if (bufferSize == -1)
  132. {
  133. bufferSize = DefaultBufferSize;
  134. }
  135. else if (bufferSize <= 0)
  136. {
  137. throw new ArgumentOutOfRangeException(nameof(bufferSize), SR.ArgumentOutOfRange_NeedPosNum);
  138. }
  139. _stream = stream;
  140. _encoding = encoding;
  141. _decoder = encoding.GetDecoder();
  142. if (bufferSize < MinBufferSize)
  143. {
  144. bufferSize = MinBufferSize;
  145. }
  146. _byteBuffer = new byte[bufferSize];
  147. _maxCharsPerBuffer = encoding.GetMaxCharCount(bufferSize);
  148. _charBuffer = new char[_maxCharsPerBuffer];
  149. _byteLen = 0;
  150. _bytePos = 0;
  151. _detectEncoding = detectEncodingFromByteOrderMarks;
  152. _checkPreamble = encoding.Preamble.Length > 0;
  153. _isBlocked = false;
  154. _closable = !leaveOpen;
  155. }
  156. public StreamReader(string path)
  157. : this(path, true)
  158. {
  159. }
  160. public StreamReader(string path, bool detectEncodingFromByteOrderMarks)
  161. : this(path, Encoding.UTF8, detectEncodingFromByteOrderMarks, DefaultBufferSize)
  162. {
  163. }
  164. public StreamReader(string path, Encoding encoding)
  165. : this(path, encoding, true, DefaultBufferSize)
  166. {
  167. }
  168. public StreamReader(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
  169. : this(path, encoding, detectEncodingFromByteOrderMarks, DefaultBufferSize)
  170. {
  171. }
  172. public StreamReader(string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize) :
  173. this(ValidateArgsAndOpenPath(path, encoding, bufferSize), encoding, detectEncodingFromByteOrderMarks, bufferSize, leaveOpen: false)
  174. {
  175. }
  176. private static Stream ValidateArgsAndOpenPath(string path, Encoding encoding, int bufferSize)
  177. {
  178. if (path == null)
  179. throw new ArgumentNullException(nameof(path));
  180. if (encoding == null)
  181. throw new ArgumentNullException(nameof(encoding));
  182. if (path.Length == 0)
  183. throw new ArgumentException(SR.Argument_EmptyPath);
  184. if (bufferSize <= 0)
  185. throw new ArgumentOutOfRangeException(nameof(bufferSize), SR.ArgumentOutOfRange_NeedPosNum);
  186. return new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, DefaultFileStreamBufferSize, FileOptions.SequentialScan);
  187. }
  188. public override void Close()
  189. {
  190. Dispose(true);
  191. }
  192. protected override void Dispose(bool disposing)
  193. {
  194. if (_disposed)
  195. {
  196. return;
  197. }
  198. _disposed = true;
  199. // Dispose of our resources if this StreamReader is closable.
  200. if (_closable)
  201. {
  202. try
  203. {
  204. // Note that Stream.Close() can potentially throw here. So we need to
  205. // ensure cleaning up internal resources, inside the finally block.
  206. if (disposing)
  207. {
  208. _stream.Close();
  209. }
  210. }
  211. finally
  212. {
  213. _charPos = 0;
  214. _charLen = 0;
  215. base.Dispose(disposing);
  216. }
  217. }
  218. }
  219. public virtual Encoding CurrentEncoding
  220. {
  221. get { return _encoding; }
  222. }
  223. public virtual Stream BaseStream
  224. {
  225. get { return _stream; }
  226. }
  227. // DiscardBufferedData tells StreamReader to throw away its internal
  228. // buffer contents. This is useful if the user needs to seek on the
  229. // underlying stream to a known location then wants the StreamReader
  230. // to start reading from this new point. This method should be called
  231. // very sparingly, if ever, since it can lead to very poor performance.
  232. // However, it may be the only way of handling some scenarios where
  233. // users need to re-read the contents of a StreamReader a second time.
  234. public void DiscardBufferedData()
  235. {
  236. CheckAsyncTaskInProgress();
  237. _byteLen = 0;
  238. _charLen = 0;
  239. _charPos = 0;
  240. // in general we'd like to have an invariant that encoding isn't null. However,
  241. // for startup improvements for NullStreamReader, we want to delay load encoding.
  242. if (_encoding != null)
  243. {
  244. _decoder = _encoding.GetDecoder();
  245. }
  246. _isBlocked = false;
  247. }
  248. public bool EndOfStream
  249. {
  250. get
  251. {
  252. ThrowIfDisposed();
  253. CheckAsyncTaskInProgress();
  254. if (_charPos < _charLen)
  255. {
  256. return false;
  257. }
  258. // This may block on pipes!
  259. int numRead = ReadBuffer();
  260. return numRead == 0;
  261. }
  262. }
  263. public override int Peek()
  264. {
  265. ThrowIfDisposed();
  266. CheckAsyncTaskInProgress();
  267. if (_charPos == _charLen)
  268. {
  269. if (_isBlocked || ReadBuffer() == 0)
  270. {
  271. return -1;
  272. }
  273. }
  274. return _charBuffer[_charPos];
  275. }
  276. public override int Read()
  277. {
  278. ThrowIfDisposed();
  279. CheckAsyncTaskInProgress();
  280. if (_charPos == _charLen)
  281. {
  282. if (ReadBuffer() == 0)
  283. {
  284. return -1;
  285. }
  286. }
  287. int result = _charBuffer[_charPos];
  288. _charPos++;
  289. return result;
  290. }
  291. public override int Read(char[] buffer, int index, int count)
  292. {
  293. if (buffer == null)
  294. {
  295. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  296. }
  297. if (index < 0 || count < 0)
  298. {
  299. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  300. }
  301. if (buffer.Length - index < count)
  302. {
  303. throw new ArgumentException(SR.Argument_InvalidOffLen);
  304. }
  305. return ReadSpan(new Span<char>(buffer, index, count));
  306. }
  307. public override int Read(Span<char> buffer) =>
  308. GetType() == typeof(StreamReader) ? ReadSpan(buffer) :
  309. base.Read(buffer); // Defer to Read(char[], ...) if a derived type may have previously overridden it
  310. private int ReadSpan(Span<char> buffer)
  311. {
  312. ThrowIfDisposed();
  313. CheckAsyncTaskInProgress();
  314. int charsRead = 0;
  315. // As a perf optimization, if we had exactly one buffer's worth of
  316. // data read in, let's try writing directly to the user's buffer.
  317. bool readToUserBuffer = false;
  318. int count = buffer.Length;
  319. while (count > 0)
  320. {
  321. int n = _charLen - _charPos;
  322. if (n == 0)
  323. {
  324. n = ReadBuffer(buffer.Slice(charsRead), out readToUserBuffer);
  325. }
  326. if (n == 0)
  327. {
  328. break; // We're at EOF
  329. }
  330. if (n > count)
  331. {
  332. n = count;
  333. }
  334. if (!readToUserBuffer)
  335. {
  336. new Span<char>(_charBuffer, _charPos, n).CopyTo(buffer.Slice(charsRead));
  337. _charPos += n;
  338. }
  339. charsRead += n;
  340. count -= n;
  341. // This function shouldn't block for an indefinite amount of time,
  342. // or reading from a network stream won't work right. If we got
  343. // fewer bytes than we requested, then we want to break right here.
  344. if (_isBlocked)
  345. {
  346. break;
  347. }
  348. }
  349. return charsRead;
  350. }
  351. public override string ReadToEnd()
  352. {
  353. ThrowIfDisposed();
  354. CheckAsyncTaskInProgress();
  355. // Call ReadBuffer, then pull data out of charBuffer.
  356. StringBuilder sb = new StringBuilder(_charLen - _charPos);
  357. do
  358. {
  359. sb.Append(_charBuffer, _charPos, _charLen - _charPos);
  360. _charPos = _charLen; // Note we consumed these characters
  361. ReadBuffer();
  362. } while (_charLen > 0);
  363. return sb.ToString();
  364. }
  365. public override int ReadBlock(char[] buffer, int index, int count)
  366. {
  367. if (buffer == null)
  368. {
  369. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  370. }
  371. if (index < 0 || count < 0)
  372. {
  373. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  374. }
  375. if (buffer.Length - index < count)
  376. {
  377. throw new ArgumentException(SR.Argument_InvalidOffLen);
  378. }
  379. ThrowIfDisposed();
  380. CheckAsyncTaskInProgress();
  381. return base.ReadBlock(buffer, index, count);
  382. }
  383. public override int ReadBlock(Span<char> buffer)
  384. {
  385. if (GetType() != typeof(StreamReader))
  386. {
  387. // Defer to Read(char[], ...) if a derived type may have previously overridden it.
  388. return base.ReadBlock(buffer);
  389. }
  390. int i, n = 0;
  391. do
  392. {
  393. i = ReadSpan(buffer.Slice(n));
  394. n += i;
  395. } while (i > 0 && n < buffer.Length);
  396. return n;
  397. }
  398. // Trims n bytes from the front of the buffer.
  399. private void CompressBuffer(int n)
  400. {
  401. Debug.Assert(_byteLen >= n, "CompressBuffer was called with a number of bytes greater than the current buffer length. Are two threads using this StreamReader at the same time?");
  402. Buffer.BlockCopy(_byteBuffer, n, _byteBuffer, 0, _byteLen - n);
  403. _byteLen -= n;
  404. }
  405. private void DetectEncoding()
  406. {
  407. if (_byteLen < 2)
  408. {
  409. return;
  410. }
  411. _detectEncoding = false;
  412. bool changedEncoding = false;
  413. if (_byteBuffer[0] == 0xFE && _byteBuffer[1] == 0xFF)
  414. {
  415. // Big Endian Unicode
  416. _encoding = Encoding.BigEndianUnicode;
  417. CompressBuffer(2);
  418. changedEncoding = true;
  419. }
  420. else if (_byteBuffer[0] == 0xFF && _byteBuffer[1] == 0xFE)
  421. {
  422. // Little Endian Unicode, or possibly little endian UTF32
  423. if (_byteLen < 4 || _byteBuffer[2] != 0 || _byteBuffer[3] != 0)
  424. {
  425. _encoding = Encoding.Unicode;
  426. CompressBuffer(2);
  427. changedEncoding = true;
  428. }
  429. else
  430. {
  431. _encoding = Encoding.UTF32;
  432. CompressBuffer(4);
  433. changedEncoding = true;
  434. }
  435. }
  436. else if (_byteLen >= 3 && _byteBuffer[0] == 0xEF && _byteBuffer[1] == 0xBB && _byteBuffer[2] == 0xBF)
  437. {
  438. // UTF-8
  439. _encoding = Encoding.UTF8;
  440. CompressBuffer(3);
  441. changedEncoding = true;
  442. }
  443. else if (_byteLen >= 4 && _byteBuffer[0] == 0 && _byteBuffer[1] == 0 &&
  444. _byteBuffer[2] == 0xFE && _byteBuffer[3] == 0xFF)
  445. {
  446. // Big Endian UTF32
  447. _encoding = new UTF32Encoding(bigEndian: true, byteOrderMark: true);
  448. CompressBuffer(4);
  449. changedEncoding = true;
  450. }
  451. else if (_byteLen == 2)
  452. {
  453. _detectEncoding = true;
  454. }
  455. // Note: in the future, if we change this algorithm significantly,
  456. // we can support checking for the preamble of the given encoding.
  457. if (changedEncoding)
  458. {
  459. _decoder = _encoding.GetDecoder();
  460. int newMaxCharsPerBuffer = _encoding.GetMaxCharCount(_byteBuffer.Length);
  461. if (newMaxCharsPerBuffer > _maxCharsPerBuffer)
  462. {
  463. _charBuffer = new char[newMaxCharsPerBuffer];
  464. }
  465. _maxCharsPerBuffer = newMaxCharsPerBuffer;
  466. }
  467. }
  468. // Trims the preamble bytes from the byteBuffer. This routine can be called multiple times
  469. // and we will buffer the bytes read until the preamble is matched or we determine that
  470. // there is no match. If there is no match, every byte read previously will be available
  471. // for further consumption. If there is a match, we will compress the buffer for the
  472. // leading preamble bytes
  473. private bool IsPreamble()
  474. {
  475. if (!_checkPreamble)
  476. {
  477. return _checkPreamble;
  478. }
  479. ReadOnlySpan<byte> preamble = _encoding.Preamble;
  480. Debug.Assert(_bytePos <= preamble.Length, "_compressPreamble was called with the current bytePos greater than the preamble buffer length. Are two threads using this StreamReader at the same time?");
  481. int len = (_byteLen >= (preamble.Length)) ? (preamble.Length - _bytePos) : (_byteLen - _bytePos);
  482. for (int i = 0; i < len; i++, _bytePos++)
  483. {
  484. if (_byteBuffer[_bytePos] != preamble[_bytePos])
  485. {
  486. _bytePos = 0;
  487. _checkPreamble = false;
  488. break;
  489. }
  490. }
  491. Debug.Assert(_bytePos <= preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
  492. if (_checkPreamble)
  493. {
  494. if (_bytePos == preamble.Length)
  495. {
  496. // We have a match
  497. CompressBuffer(preamble.Length);
  498. _bytePos = 0;
  499. _checkPreamble = false;
  500. _detectEncoding = false;
  501. }
  502. }
  503. return _checkPreamble;
  504. }
  505. internal virtual int ReadBuffer()
  506. {
  507. _charLen = 0;
  508. _charPos = 0;
  509. if (!_checkPreamble)
  510. {
  511. _byteLen = 0;
  512. }
  513. do
  514. {
  515. if (_checkPreamble)
  516. {
  517. Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
  518. int len = _stream.Read(_byteBuffer, _bytePos, _byteBuffer.Length - _bytePos);
  519. Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  520. if (len == 0)
  521. {
  522. // EOF but we might have buffered bytes from previous
  523. // attempt to detect preamble that needs to be decoded now
  524. if (_byteLen > 0)
  525. {
  526. _charLen += _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, _charLen);
  527. // Need to zero out the byteLen after we consume these bytes so that we don't keep infinitely hitting this code path
  528. _bytePos = _byteLen = 0;
  529. }
  530. return _charLen;
  531. }
  532. _byteLen += len;
  533. }
  534. else
  535. {
  536. Debug.Assert(_bytePos == 0, "bytePos can be non zero only when we are trying to _checkPreamble. Are two threads using this StreamReader at the same time?");
  537. _byteLen = _stream.Read(_byteBuffer, 0, _byteBuffer.Length);
  538. Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  539. if (_byteLen == 0) // We're at EOF
  540. {
  541. return _charLen;
  542. }
  543. }
  544. // _isBlocked == whether we read fewer bytes than we asked for.
  545. // Note we must check it here because CompressBuffer or
  546. // DetectEncoding will change byteLen.
  547. _isBlocked = (_byteLen < _byteBuffer.Length);
  548. // Check for preamble before detect encoding. This is not to override the
  549. // user supplied Encoding for the one we implicitly detect. The user could
  550. // customize the encoding which we will loose, such as ThrowOnError on UTF8
  551. if (IsPreamble())
  552. {
  553. continue;
  554. }
  555. // If we're supposed to detect the encoding and haven't done so yet,
  556. // do it. Note this may need to be called more than once.
  557. if (_detectEncoding && _byteLen >= 2)
  558. {
  559. DetectEncoding();
  560. }
  561. _charLen += _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, _charLen);
  562. } while (_charLen == 0);
  563. //Console.WriteLine("ReadBuffer called. chars: "+charLen);
  564. return _charLen;
  565. }
  566. // This version has a perf optimization to decode data DIRECTLY into the
  567. // user's buffer, bypassing StreamReader's own buffer.
  568. // This gives a > 20% perf improvement for our encodings across the board,
  569. // but only when asking for at least the number of characters that one
  570. // buffer's worth of bytes could produce.
  571. // This optimization, if run, will break SwitchEncoding, so we must not do
  572. // this on the first call to ReadBuffer.
  573. private int ReadBuffer(Span<char> userBuffer, out bool readToUserBuffer)
  574. {
  575. _charLen = 0;
  576. _charPos = 0;
  577. if (!_checkPreamble)
  578. {
  579. _byteLen = 0;
  580. }
  581. int charsRead = 0;
  582. // As a perf optimization, we can decode characters DIRECTLY into a
  583. // user's char[]. We absolutely must not write more characters
  584. // into the user's buffer than they asked for. Calculating
  585. // encoding.GetMaxCharCount(byteLen) each time is potentially very
  586. // expensive - instead, cache the number of chars a full buffer's
  587. // worth of data may produce. Yes, this makes the perf optimization
  588. // less aggressive, in that all reads that asked for fewer than AND
  589. // returned fewer than _maxCharsPerBuffer chars won't get the user
  590. // buffer optimization. This affects reads where the end of the
  591. // Stream comes in the middle somewhere, and when you ask for
  592. // fewer chars than your buffer could produce.
  593. readToUserBuffer = userBuffer.Length >= _maxCharsPerBuffer;
  594. do
  595. {
  596. Debug.Assert(charsRead == 0);
  597. if (_checkPreamble)
  598. {
  599. Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
  600. int len = _stream.Read(_byteBuffer, _bytePos, _byteBuffer.Length - _bytePos);
  601. Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  602. if (len == 0)
  603. {
  604. // EOF but we might have buffered bytes from previous
  605. // attempt to detect preamble that needs to be decoded now
  606. if (_byteLen > 0)
  607. {
  608. if (readToUserBuffer)
  609. {
  610. charsRead = _decoder.GetChars(new ReadOnlySpan<byte>(_byteBuffer, 0, _byteLen), userBuffer.Slice(charsRead), flush: false);
  611. _charLen = 0; // StreamReader's buffer is empty.
  612. }
  613. else
  614. {
  615. charsRead = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, charsRead);
  616. _charLen += charsRead; // Number of chars in StreamReader's buffer.
  617. }
  618. }
  619. return charsRead;
  620. }
  621. _byteLen += len;
  622. }
  623. else
  624. {
  625. Debug.Assert(_bytePos == 0, "bytePos can be non zero only when we are trying to _checkPreamble. Are two threads using this StreamReader at the same time?");
  626. _byteLen = _stream.Read(_byteBuffer, 0, _byteBuffer.Length);
  627. Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  628. if (_byteLen == 0) // EOF
  629. {
  630. break;
  631. }
  632. }
  633. // _isBlocked == whether we read fewer bytes than we asked for.
  634. // Note we must check it here because CompressBuffer or
  635. // DetectEncoding will change byteLen.
  636. _isBlocked = (_byteLen < _byteBuffer.Length);
  637. // Check for preamble before detect encoding. This is not to override the
  638. // user supplied Encoding for the one we implicitly detect. The user could
  639. // customize the encoding which we will loose, such as ThrowOnError on UTF8
  640. // Note: we don't need to recompute readToUserBuffer optimization as IsPreamble
  641. // doesn't change the encoding or affect _maxCharsPerBuffer
  642. if (IsPreamble())
  643. {
  644. continue;
  645. }
  646. // On the first call to ReadBuffer, if we're supposed to detect the encoding, do it.
  647. if (_detectEncoding && _byteLen >= 2)
  648. {
  649. DetectEncoding();
  650. // DetectEncoding changes some buffer state. Recompute this.
  651. readToUserBuffer = userBuffer.Length >= _maxCharsPerBuffer;
  652. }
  653. _charPos = 0;
  654. if (readToUserBuffer)
  655. {
  656. charsRead += _decoder.GetChars(new ReadOnlySpan<byte>(_byteBuffer, 0, _byteLen), userBuffer.Slice(charsRead), flush:false);
  657. _charLen = 0; // StreamReader's buffer is empty.
  658. }
  659. else
  660. {
  661. charsRead = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, charsRead);
  662. _charLen += charsRead; // Number of chars in StreamReader's buffer.
  663. }
  664. } while (charsRead == 0);
  665. _isBlocked &= charsRead < userBuffer.Length;
  666. //Console.WriteLine("ReadBuffer: charsRead: "+charsRead+" readToUserBuffer: "+readToUserBuffer);
  667. return charsRead;
  668. }
  669. // Reads a line. A line is defined as a sequence of characters followed by
  670. // a carriage return ('\r'), a line feed ('\n'), or a carriage return
  671. // immediately followed by a line feed. The resulting string does not
  672. // contain the terminating carriage return and/or line feed. The returned
  673. // value is null if the end of the input stream has been reached.
  674. //
  675. public override string? ReadLine()
  676. {
  677. ThrowIfDisposed();
  678. CheckAsyncTaskInProgress();
  679. if (_charPos == _charLen)
  680. {
  681. if (ReadBuffer() == 0)
  682. {
  683. return null;
  684. }
  685. }
  686. StringBuilder? sb = null;
  687. do
  688. {
  689. int i = _charPos;
  690. do
  691. {
  692. char ch = _charBuffer[i];
  693. // Note the following common line feed chars:
  694. // \n - UNIX \r\n - DOS \r - Mac
  695. if (ch == '\r' || ch == '\n')
  696. {
  697. string s;
  698. if (sb != null)
  699. {
  700. sb.Append(_charBuffer, _charPos, i - _charPos);
  701. s = sb.ToString();
  702. }
  703. else
  704. {
  705. s = new string(_charBuffer, _charPos, i - _charPos);
  706. }
  707. _charPos = i + 1;
  708. if (ch == '\r' && (_charPos < _charLen || ReadBuffer() > 0))
  709. {
  710. if (_charBuffer[_charPos] == '\n')
  711. {
  712. _charPos++;
  713. }
  714. }
  715. return s;
  716. }
  717. i++;
  718. } while (i < _charLen);
  719. i = _charLen - _charPos;
  720. if (sb == null)
  721. {
  722. sb = new StringBuilder(i + 80);
  723. }
  724. sb.Append(_charBuffer, _charPos, i);
  725. } while (ReadBuffer() > 0);
  726. return sb.ToString();
  727. }
  728. public override Task<string?> ReadLineAsync()
  729. {
  730. // If we have been inherited into a subclass, the following implementation could be incorrect
  731. // since it does not call through to Read() which a subclass might have overridden.
  732. // To be safe we will only use this implementation in cases where we know it is safe to do so,
  733. // and delegate to our base class (which will call into Read) when we are not sure.
  734. if (GetType() != typeof(StreamReader))
  735. {
  736. return base.ReadLineAsync();
  737. }
  738. ThrowIfDisposed();
  739. CheckAsyncTaskInProgress();
  740. Task<string?> task = ReadLineAsyncInternal();
  741. _asyncReadTask = task;
  742. return task;
  743. }
  744. private async Task<string?> ReadLineAsyncInternal()
  745. {
  746. if (_charPos == _charLen && (await ReadBufferAsync().ConfigureAwait(false)) == 0)
  747. {
  748. return null;
  749. }
  750. StringBuilder? sb = null;
  751. do
  752. {
  753. char[] tmpCharBuffer = _charBuffer;
  754. int tmpCharLen = _charLen;
  755. int tmpCharPos = _charPos;
  756. int i = tmpCharPos;
  757. do
  758. {
  759. char ch = tmpCharBuffer[i];
  760. // Note the following common line feed chars:
  761. // \n - UNIX \r\n - DOS \r - Mac
  762. if (ch == '\r' || ch == '\n')
  763. {
  764. string s;
  765. if (sb != null)
  766. {
  767. sb.Append(tmpCharBuffer, tmpCharPos, i - tmpCharPos);
  768. s = sb.ToString();
  769. }
  770. else
  771. {
  772. s = new string(tmpCharBuffer, tmpCharPos, i - tmpCharPos);
  773. }
  774. _charPos = tmpCharPos = i + 1;
  775. if (ch == '\r' && (tmpCharPos < tmpCharLen || (await ReadBufferAsync().ConfigureAwait(false)) > 0))
  776. {
  777. tmpCharPos = _charPos;
  778. if (_charBuffer[tmpCharPos] == '\n')
  779. {
  780. _charPos = ++tmpCharPos;
  781. }
  782. }
  783. return s;
  784. }
  785. i++;
  786. } while (i < tmpCharLen);
  787. i = tmpCharLen - tmpCharPos;
  788. if (sb == null)
  789. {
  790. sb = new StringBuilder(i + 80);
  791. }
  792. sb.Append(tmpCharBuffer, tmpCharPos, i);
  793. } while (await ReadBufferAsync().ConfigureAwait(false) > 0);
  794. return sb.ToString();
  795. }
  796. public override Task<string> ReadToEndAsync()
  797. {
  798. // If we have been inherited into a subclass, the following implementation could be incorrect
  799. // since it does not call through to Read() which a subclass might have overridden.
  800. // To be safe we will only use this implementation in cases where we know it is safe to do so,
  801. // and delegate to our base class (which will call into Read) when we are not sure.
  802. if (GetType() != typeof(StreamReader))
  803. {
  804. return base.ReadToEndAsync();
  805. }
  806. ThrowIfDisposed();
  807. CheckAsyncTaskInProgress();
  808. Task<string> task = ReadToEndAsyncInternal();
  809. _asyncReadTask = task;
  810. return task;
  811. }
  812. private async Task<string> ReadToEndAsyncInternal()
  813. {
  814. // Call ReadBuffer, then pull data out of charBuffer.
  815. StringBuilder sb = new StringBuilder(_charLen - _charPos);
  816. do
  817. {
  818. int tmpCharPos = _charPos;
  819. sb.Append(_charBuffer, tmpCharPos, _charLen - tmpCharPos);
  820. _charPos = _charLen; // We consumed these characters
  821. await ReadBufferAsync().ConfigureAwait(false);
  822. } while (_charLen > 0);
  823. return sb.ToString();
  824. }
  825. public override Task<int> ReadAsync(char[] buffer, int index, int count)
  826. {
  827. if (buffer == null)
  828. {
  829. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  830. }
  831. if (index < 0 || count < 0)
  832. {
  833. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  834. }
  835. if (buffer.Length - index < count)
  836. {
  837. throw new ArgumentException(SR.Argument_InvalidOffLen);
  838. }
  839. // If we have been inherited into a subclass, the following implementation could be incorrect
  840. // since it does not call through to Read() which a subclass might have overridden.
  841. // To be safe we will only use this implementation in cases where we know it is safe to do so,
  842. // and delegate to our base class (which will call into Read) when we are not sure.
  843. if (GetType() != typeof(StreamReader))
  844. {
  845. return base.ReadAsync(buffer, index, count);
  846. }
  847. ThrowIfDisposed();
  848. CheckAsyncTaskInProgress();
  849. Task<int> task = ReadAsyncInternal(new Memory<char>(buffer, index, count), default).AsTask();
  850. _asyncReadTask = task;
  851. return task;
  852. }
  853. public override ValueTask<int> ReadAsync(Memory<char> buffer, CancellationToken cancellationToken = default)
  854. {
  855. if (GetType() != typeof(StreamReader))
  856. {
  857. // Ensure we use existing overrides if a class already overrode existing overloads.
  858. return base.ReadAsync(buffer, cancellationToken);
  859. }
  860. ThrowIfDisposed();
  861. CheckAsyncTaskInProgress();
  862. if (cancellationToken.IsCancellationRequested)
  863. {
  864. return new ValueTask<int>(Task.FromCanceled<int>(cancellationToken));
  865. }
  866. return ReadAsyncInternal(buffer, cancellationToken);
  867. }
  868. internal override async ValueTask<int> ReadAsyncInternal(Memory<char> buffer, CancellationToken cancellationToken)
  869. {
  870. if (_charPos == _charLen && (await ReadBufferAsync().ConfigureAwait(false)) == 0)
  871. {
  872. return 0;
  873. }
  874. int charsRead = 0;
  875. // As a perf optimization, if we had exactly one buffer's worth of
  876. // data read in, let's try writing directly to the user's buffer.
  877. bool readToUserBuffer = false;
  878. byte[] tmpByteBuffer = _byteBuffer;
  879. Stream tmpStream = _stream;
  880. int count = buffer.Length;
  881. while (count > 0)
  882. {
  883. // n is the characters available in _charBuffer
  884. int n = _charLen - _charPos;
  885. // charBuffer is empty, let's read from the stream
  886. if (n == 0)
  887. {
  888. _charLen = 0;
  889. _charPos = 0;
  890. if (!_checkPreamble)
  891. {
  892. _byteLen = 0;
  893. }
  894. readToUserBuffer = count >= _maxCharsPerBuffer;
  895. // We loop here so that we read in enough bytes to yield at least 1 char.
  896. // We break out of the loop if the stream is blocked (EOF is reached).
  897. do
  898. {
  899. Debug.Assert(n == 0);
  900. if (_checkPreamble)
  901. {
  902. Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
  903. int tmpBytePos = _bytePos;
  904. int len = await tmpStream.ReadAsync(new Memory<byte>(tmpByteBuffer, tmpBytePos, tmpByteBuffer.Length - tmpBytePos), cancellationToken).ConfigureAwait(false);
  905. Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  906. if (len == 0)
  907. {
  908. // EOF but we might have buffered bytes from previous
  909. // attempts to detect preamble that needs to be decoded now
  910. if (_byteLen > 0)
  911. {
  912. if (readToUserBuffer)
  913. {
  914. n = _decoder.GetChars(new ReadOnlySpan<byte>(tmpByteBuffer, 0, _byteLen), buffer.Span.Slice(charsRead), flush: false);
  915. _charLen = 0; // StreamReader's buffer is empty.
  916. }
  917. else
  918. {
  919. n = _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, 0);
  920. _charLen += n; // Number of chars in StreamReader's buffer.
  921. }
  922. }
  923. // How can part of the preamble yield any chars?
  924. Debug.Assert(n == 0);
  925. _isBlocked = true;
  926. break;
  927. }
  928. else
  929. {
  930. _byteLen += len;
  931. }
  932. }
  933. else
  934. {
  935. Debug.Assert(_bytePos == 0, "_bytePos can be non zero only when we are trying to _checkPreamble. Are two threads using this StreamReader at the same time?");
  936. _byteLen = await tmpStream.ReadAsync(new Memory<byte>(tmpByteBuffer), cancellationToken).ConfigureAwait(false);
  937. Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  938. if (_byteLen == 0) // EOF
  939. {
  940. _isBlocked = true;
  941. break;
  942. }
  943. }
  944. // _isBlocked == whether we read fewer bytes than we asked for.
  945. // Note we must check it here because CompressBuffer or
  946. // DetectEncoding will change _byteLen.
  947. _isBlocked = (_byteLen < tmpByteBuffer.Length);
  948. // Check for preamble before detect encoding. This is not to override the
  949. // user supplied Encoding for the one we implicitly detect. The user could
  950. // customize the encoding which we will loose, such as ThrowOnError on UTF8
  951. // Note: we don't need to recompute readToUserBuffer optimization as IsPreamble
  952. // doesn't change the encoding or affect _maxCharsPerBuffer
  953. if (IsPreamble())
  954. {
  955. continue;
  956. }
  957. // On the first call to ReadBuffer, if we're supposed to detect the encoding, do it.
  958. if (_detectEncoding && _byteLen >= 2)
  959. {
  960. DetectEncoding();
  961. // DetectEncoding changes some buffer state. Recompute this.
  962. readToUserBuffer = count >= _maxCharsPerBuffer;
  963. }
  964. Debug.Assert(n == 0);
  965. _charPos = 0;
  966. if (readToUserBuffer)
  967. {
  968. n += _decoder.GetChars(new ReadOnlySpan<byte>(tmpByteBuffer, 0, _byteLen), buffer.Span.Slice(charsRead), flush: false);
  969. // Why did the bytes yield no chars?
  970. Debug.Assert(n > 0);
  971. _charLen = 0; // StreamReader's buffer is empty.
  972. }
  973. else
  974. {
  975. n = _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, 0);
  976. // Why did the bytes yield no chars?
  977. Debug.Assert(n > 0);
  978. _charLen += n; // Number of chars in StreamReader's buffer.
  979. }
  980. } while (n == 0);
  981. if (n == 0)
  982. {
  983. break; // We're at EOF
  984. }
  985. } // if (n == 0)
  986. // Got more chars in charBuffer than the user requested
  987. if (n > count)
  988. {
  989. n = count;
  990. }
  991. if (!readToUserBuffer)
  992. {
  993. new Span<char>(_charBuffer, _charPos, n).CopyTo(buffer.Span.Slice(charsRead));
  994. _charPos += n;
  995. }
  996. charsRead += n;
  997. count -= n;
  998. // This function shouldn't block for an indefinite amount of time,
  999. // or reading from a network stream won't work right. If we got
  1000. // fewer bytes than we requested, then we want to break right here.
  1001. if (_isBlocked)
  1002. {
  1003. break;
  1004. }
  1005. } // while (count > 0)
  1006. return charsRead;
  1007. }
  1008. public override Task<int> ReadBlockAsync(char[] buffer, int index, int count)
  1009. {
  1010. if (buffer == null)
  1011. {
  1012. throw new ArgumentNullException(nameof(buffer), SR.ArgumentNull_Buffer);
  1013. }
  1014. if (index < 0 || count < 0)
  1015. {
  1016. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  1017. }
  1018. if (buffer.Length - index < count)
  1019. {
  1020. throw new ArgumentException(SR.Argument_InvalidOffLen);
  1021. }
  1022. // If we have been inherited into a subclass, the following implementation could be incorrect
  1023. // since it does not call through to Read() which a subclass might have overridden.
  1024. // To be safe we will only use this implementation in cases where we know it is safe to do so,
  1025. // and delegate to our base class (which will call into Read) when we are not sure.
  1026. if (GetType() != typeof(StreamReader))
  1027. {
  1028. return base.ReadBlockAsync(buffer, index, count);
  1029. }
  1030. ThrowIfDisposed();
  1031. CheckAsyncTaskInProgress();
  1032. Task<int> task = base.ReadBlockAsync(buffer, index, count);
  1033. _asyncReadTask = task;
  1034. return task;
  1035. }
  1036. public override ValueTask<int> ReadBlockAsync(Memory<char> buffer, CancellationToken cancellationToken = default)
  1037. {
  1038. if (GetType() != typeof(StreamReader))
  1039. {
  1040. // If a derived type may have overridden ReadBlockAsync(char[], ...) before this overload
  1041. // was introduced, defer to it.
  1042. return base.ReadBlockAsync(buffer, cancellationToken);
  1043. }
  1044. ThrowIfDisposed();
  1045. CheckAsyncTaskInProgress();
  1046. if (cancellationToken.IsCancellationRequested)
  1047. {
  1048. return new ValueTask<int>(Task.FromCanceled<int>(cancellationToken));
  1049. }
  1050. ValueTask<int> vt = ReadBlockAsyncInternal(buffer, cancellationToken);
  1051. if (vt.IsCompletedSuccessfully)
  1052. {
  1053. return vt;
  1054. }
  1055. Task<int> t = vt.AsTask();
  1056. _asyncReadTask = t;
  1057. return new ValueTask<int>(t);
  1058. }
  1059. private async ValueTask<int> ReadBufferAsync()
  1060. {
  1061. _charLen = 0;
  1062. _charPos = 0;
  1063. byte[] tmpByteBuffer = _byteBuffer;
  1064. Stream tmpStream = _stream;
  1065. if (!_checkPreamble)
  1066. {
  1067. _byteLen = 0;
  1068. }
  1069. do
  1070. {
  1071. if (_checkPreamble)
  1072. {
  1073. Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?");
  1074. int tmpBytePos = _bytePos;
  1075. int len = await tmpStream.ReadAsync(new Memory<byte>(tmpByteBuffer, tmpBytePos, tmpByteBuffer.Length - tmpBytePos)).ConfigureAwait(false);
  1076. Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class.");
  1077. if (len == 0)
  1078. {
  1079. // EOF but we might have buffered bytes from previous
  1080. // attempt to detect preamble that needs to be decoded now
  1081. if (_byteLen > 0)
  1082. {
  1083. _charLen += _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, _charLen);
  1084. // Need to zero out the _byteLen after we consume these bytes so that we don't keep infinitely hitting this code path
  1085. _bytePos = 0; _byteLen = 0;
  1086. }
  1087. return _charLen;
  1088. }
  1089. _byteLen += len;
  1090. }
  1091. else
  1092. {
  1093. Debug.Assert(_bytePos == 0, "_bytePos can be non zero only when we are trying to _checkPreamble. Are two threads using this StreamReader at the same time?");
  1094. _byteLen = await tmpStream.ReadAsync(new Memory<byte>(tmpByteBuffer)).ConfigureAwait(false);
  1095. Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! Bug in stream class.");
  1096. if (_byteLen == 0) // We're at EOF
  1097. {
  1098. return _charLen;
  1099. }
  1100. }
  1101. // _isBlocked == whether we read fewer bytes than we asked for.
  1102. // Note we must check it here because CompressBuffer or
  1103. // DetectEncoding will change _byteLen.
  1104. _isBlocked = (_byteLen < tmpByteBuffer.Length);
  1105. // Check for preamble before detect encoding. This is not to override the
  1106. // user supplied Encoding for the one we implicitly detect. The user could
  1107. // customize the encoding which we will loose, such as ThrowOnError on UTF8
  1108. if (IsPreamble())
  1109. {
  1110. continue;
  1111. }
  1112. // If we're supposed to detect the encoding and haven't done so yet,
  1113. // do it. Note this may need to be called more than once.
  1114. if (_detectEncoding && _byteLen >= 2)
  1115. {
  1116. DetectEncoding();
  1117. }
  1118. _charLen += _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, _charLen);
  1119. } while (_charLen == 0);
  1120. return _charLen;
  1121. }
  1122. private void ThrowIfDisposed()
  1123. {
  1124. if (_disposed)
  1125. {
  1126. ThrowObjectDisposedException();
  1127. }
  1128. void ThrowObjectDisposedException() => throw new ObjectDisposedException(GetType().Name, SR.ObjectDisposed_ReaderClosed);
  1129. }
  1130. // No data, class doesn't need to be serializable.
  1131. // Note this class is threadsafe.
  1132. private sealed class NullStreamReader : StreamReader
  1133. {
  1134. public override Encoding CurrentEncoding
  1135. {
  1136. get { return Encoding.Unicode; }
  1137. }
  1138. protected override void Dispose(bool disposing)
  1139. {
  1140. // Do nothing - this is essentially unclosable.
  1141. }
  1142. public override int Peek()
  1143. {
  1144. return -1;
  1145. }
  1146. public override int Read()
  1147. {
  1148. return -1;
  1149. }
  1150. [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
  1151. public override int Read(char[] buffer, int index, int count)
  1152. {
  1153. return 0;
  1154. }
  1155. public override string? ReadLine()
  1156. {
  1157. return null;
  1158. }
  1159. public override string ReadToEnd()
  1160. {
  1161. return string.Empty;
  1162. }
  1163. internal override int ReadBuffer()
  1164. {
  1165. return 0;
  1166. }
  1167. }
  1168. }
  1169. }