XmlInputStream.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. //
  11. // Permission is hereby granted, free of charge, to any person obtaining
  12. // a copy of this software and associated documentation files (the
  13. // "Software"), to deal in the Software without restriction, including
  14. // without limitation the rights to use, copy, modify, merge, publish,
  15. // distribute, sublicense, and/or sell copies of the Software, and to
  16. // permit persons to whom the Software is furnished to do so, subject to
  17. // the following conditions:
  18. //
  19. // The above copyright notice and this permission notice shall be
  20. // included in all copies or substantial portions of the Software.
  21. //
  22. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  23. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  24. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  25. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  26. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  27. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29. //
  30. using System;
  31. using System.IO;
  32. using System.Text;
  33. using System.Runtime.InteropServices;
  34. namespace System.Xml
  35. {
  36. #region XmlStreamReader
  37. internal class XmlStreamReader : NonBlockingStreamReader
  38. {
  39. XmlInputStream input;
  40. XmlStreamReader (XmlInputStream input)
  41. : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
  42. {
  43. this.input = input;
  44. }
  45. public XmlStreamReader (Stream input)
  46. : this (new XmlInputStream (input))
  47. {
  48. }
  49. static XmlException invalidDataException = new XmlException ("invalid data.");
  50. public override void Close ()
  51. {
  52. this.input.Close ();
  53. }
  54. public override int Read ([In, Out] char[] dest_buffer, int index, int count)
  55. {
  56. try {
  57. return base.Read (dest_buffer, index, count);
  58. }
  59. #if NET_1_1
  60. catch (System.ArgumentException) {
  61. throw invalidDataException;
  62. }
  63. #else
  64. catch (System.Text.DecoderFallbackException) {
  65. throw invalidDataException;
  66. }
  67. #endif
  68. }
  69. protected override void Dispose (bool disposing)
  70. {
  71. base.Dispose (disposing);
  72. if (disposing) {
  73. Close ();
  74. }
  75. }
  76. }
  77. #endregion
  78. #region NonBlockingStreamReader
  79. // mostly copied from StreamReader, removing BOM checks, ctor
  80. // parameter checks and some extra public members.
  81. internal class NonBlockingStreamReader : TextReader {
  82. const int DefaultBufferSize = 1024;
  83. const int DefaultFileBufferSize = 4096;
  84. const int MinimumBufferSize = 128;
  85. //
  86. // The input buffer
  87. //
  88. byte [] input_buffer;
  89. //
  90. // The decoded buffer from the above input buffer
  91. //
  92. char [] decoded_buffer;
  93. //
  94. // Decoded bytes in decoded_buffer.
  95. //
  96. int decoded_count;
  97. //
  98. // Current position in the decoded_buffer
  99. //
  100. int pos;
  101. //
  102. // The buffer size that we are using
  103. //
  104. int buffer_size;
  105. Encoding encoding;
  106. Decoder decoder;
  107. Stream base_stream;
  108. bool mayBlock;
  109. StringBuilder line_builder;
  110. public NonBlockingStreamReader(Stream stream, Encoding encoding)
  111. {
  112. int buffer_size = DefaultBufferSize;
  113. base_stream = stream;
  114. input_buffer = new byte [buffer_size];
  115. this.buffer_size = buffer_size;
  116. this.encoding = encoding;
  117. decoder = encoding.GetDecoder ();
  118. decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
  119. decoded_count = 0;
  120. pos = 0;
  121. }
  122. public Encoding Encoding {
  123. get { return encoding; }
  124. }
  125. public override void Close ()
  126. {
  127. Dispose (true);
  128. }
  129. protected override void Dispose (bool disposing)
  130. {
  131. if (disposing && base_stream != null)
  132. base_stream.Close ();
  133. input_buffer = null;
  134. decoded_buffer = null;
  135. encoding = null;
  136. decoder = null;
  137. base_stream = null;
  138. base.Dispose (disposing);
  139. }
  140. public void DiscardBufferedData ()
  141. {
  142. pos = decoded_count = 0;
  143. mayBlock = false;
  144. #if NET_2_0
  145. decoder.Reset ();
  146. #else
  147. decoder = encoding.GetDecoder ();
  148. #endif
  149. }
  150. // the buffer is empty, fill it again
  151. private int ReadBuffer ()
  152. {
  153. pos = 0;
  154. int cbEncoded = 0;
  155. // keep looping until the decoder gives us some chars
  156. decoded_count = 0;
  157. int parse_start = 0;
  158. do
  159. {
  160. cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
  161. if (cbEncoded == 0)
  162. return 0;
  163. mayBlock = (cbEncoded < buffer_size);
  164. decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
  165. parse_start = 0;
  166. } while (decoded_count == 0);
  167. return decoded_count;
  168. }
  169. public override int Peek ()
  170. {
  171. if (base_stream == null)
  172. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  173. if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
  174. return -1;
  175. return decoded_buffer [pos];
  176. }
  177. public override int Read ()
  178. {
  179. if (base_stream == null)
  180. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  181. if (pos >= decoded_count && ReadBuffer () == 0)
  182. return -1;
  183. return decoded_buffer [pos++];
  184. }
  185. public override int Read ([In, Out] char[] dest_buffer, int index, int count)
  186. {
  187. if (base_stream == null)
  188. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  189. if (dest_buffer == null)
  190. throw new ArgumentNullException ("dest_buffer");
  191. if (index < 0)
  192. throw new ArgumentOutOfRangeException ("index", "< 0");
  193. if (count < 0)
  194. throw new ArgumentOutOfRangeException ("count", "< 0");
  195. // re-ordered to avoid possible integer overflow
  196. if (index > dest_buffer.Length - count)
  197. throw new ArgumentException ("index + count > dest_buffer.Length");
  198. int chars_read = 0;
  199. // while (count > 0)
  200. {
  201. if (pos >= decoded_count && ReadBuffer () == 0)
  202. return chars_read > 0 ? chars_read : 0;
  203. int cch = Math.Min (decoded_count - pos, count);
  204. Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
  205. pos += cch;
  206. index += cch;
  207. count -= cch;
  208. chars_read += cch;
  209. }
  210. return chars_read;
  211. }
  212. bool foundCR;
  213. int FindNextEOL ()
  214. {
  215. char c = '\0';
  216. for (; pos < decoded_count; pos++) {
  217. c = decoded_buffer [pos];
  218. if (c == '\n') {
  219. pos++;
  220. int res = (foundCR) ? (pos - 2) : (pos - 1);
  221. if (res < 0)
  222. res = 0; // if a new buffer starts with a \n and there was a \r at
  223. // the end of the previous one, we get here.
  224. foundCR = false;
  225. return res;
  226. } else if (foundCR) {
  227. foundCR = false;
  228. return pos - 1;
  229. }
  230. foundCR = (c == '\r');
  231. }
  232. return -1;
  233. }
  234. public override string ReadLine()
  235. {
  236. if (base_stream == null)
  237. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  238. if (pos >= decoded_count && ReadBuffer () == 0)
  239. return null;
  240. int begin = pos;
  241. int end = FindNextEOL ();
  242. if (end < decoded_count && end >= begin)
  243. return new string (decoded_buffer, begin, end - begin);
  244. if (line_builder == null)
  245. line_builder = new StringBuilder ();
  246. else
  247. line_builder.Length = 0;
  248. while (true) {
  249. if (foundCR) // don't include the trailing CR if present
  250. decoded_count--;
  251. line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
  252. if (ReadBuffer () == 0) {
  253. if (line_builder.Capacity > 32768) {
  254. StringBuilder sb = line_builder;
  255. line_builder = null;
  256. return sb.ToString (0, sb.Length);
  257. }
  258. return line_builder.ToString (0, line_builder.Length);
  259. }
  260. begin = pos;
  261. end = FindNextEOL ();
  262. if (end < decoded_count && end >= begin) {
  263. line_builder.Append (new string (decoded_buffer, begin, end - begin));
  264. if (line_builder.Capacity > 32768) {
  265. StringBuilder sb = line_builder;
  266. line_builder = null;
  267. return sb.ToString (0, sb.Length);
  268. }
  269. return line_builder.ToString (0, line_builder.Length);
  270. }
  271. }
  272. }
  273. public override string ReadToEnd()
  274. {
  275. if (base_stream == null)
  276. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  277. StringBuilder text = new StringBuilder ();
  278. int size = decoded_buffer.Length;
  279. char [] buffer = new char [size];
  280. int len;
  281. while ((len = Read (buffer, 0, size)) != 0)
  282. text.Append (buffer, 0, len);
  283. return text.ToString ();
  284. }
  285. }
  286. #endregion
  287. class XmlInputStream : Stream
  288. {
  289. public static readonly Encoding StrictUTF8;
  290. static XmlInputStream ()
  291. {
  292. StrictUTF8 = new UTF8Encoding (false, true);
  293. }
  294. Encoding enc;
  295. Stream stream;
  296. byte[] buffer;
  297. int bufLength;
  298. int bufPos;
  299. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  300. public XmlInputStream (Stream stream)
  301. {
  302. Initialize (stream);
  303. }
  304. static string GetStringFromBytes (byte [] bytes, int index, int count)
  305. {
  306. #if NET_2_1
  307. char [] chars = new char [count];
  308. for (int i = index; i < count; i++)
  309. chars [i] = (char) bytes [i];
  310. return new string (chars);
  311. #else
  312. return Encoding.ASCII.GetString (bytes, index, count);
  313. #endif
  314. }
  315. private void Initialize (Stream stream)
  316. {
  317. buffer = new byte [64];
  318. this.stream = stream;
  319. enc = StrictUTF8; // Default to UTF8 if we can't guess it
  320. bufLength = stream.Read (buffer, 0, buffer.Length);
  321. if (bufLength == -1 || bufLength == 0) {
  322. return;
  323. }
  324. int c = ReadByteSpecial ();
  325. switch (c) {
  326. case 0xFF:
  327. c = ReadByteSpecial ();
  328. if (c == 0xFE) {
  329. // BOM-ed little endian utf-16
  330. enc = Encoding.Unicode;
  331. } else {
  332. // It doesn't start from "<?xml" then its encoding is utf-8
  333. bufPos = 0;
  334. }
  335. break;
  336. case 0xFE:
  337. c = ReadByteSpecial ();
  338. if (c == 0xFF) {
  339. // BOM-ed big endian utf-16
  340. enc = Encoding.BigEndianUnicode;
  341. return;
  342. } else {
  343. // It doesn't start from "<?xml" then its encoding is utf-8
  344. bufPos = 0;
  345. }
  346. break;
  347. case 0xEF:
  348. c = ReadByteSpecial ();
  349. if (c == 0xBB) {
  350. c = ReadByteSpecial ();
  351. if (c != 0xBF) {
  352. bufPos = 0;
  353. }
  354. } else {
  355. buffer [--bufPos] = 0xEF;
  356. }
  357. break;
  358. case '<':
  359. // try to get encoding name from XMLDecl.
  360. if (bufLength >= 5 && GetStringFromBytes (buffer, 1, 4) == "?xml") {
  361. bufPos += 4;
  362. c = SkipWhitespace ();
  363. // version. It is optional here.
  364. if (c == 'v') {
  365. while (c >= 0) {
  366. c = ReadByteSpecial ();
  367. if (c == '0') { // 0 of 1.0
  368. ReadByteSpecial ();
  369. break;
  370. }
  371. }
  372. c = SkipWhitespace ();
  373. }
  374. if (c == 'e') {
  375. int remaining = bufLength - bufPos;
  376. if (remaining >= 7 && GetStringFromBytes (buffer, bufPos, 7) == "ncoding") {
  377. bufPos += 7;
  378. c = SkipWhitespace();
  379. if (c != '=')
  380. throw encodingException;
  381. c = SkipWhitespace ();
  382. int quoteChar = c;
  383. StringBuilder sb = new StringBuilder ();
  384. while (true) {
  385. c = ReadByteSpecial ();
  386. if (c == quoteChar)
  387. break;
  388. else if (c < 0)
  389. throw encodingException;
  390. sb.Append ((char) c);
  391. }
  392. string encodingName = sb.ToString ();
  393. if (!XmlChar.IsValidIANAEncoding (encodingName))
  394. throw encodingException;
  395. enc = Encoding.GetEncoding (encodingName);
  396. }
  397. }
  398. }
  399. #if TARGET_JVM
  400. else {
  401. if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
  402. enc = Encoding.Unicode;
  403. }
  404. #endif
  405. bufPos = 0;
  406. break;
  407. default:
  408. bufPos = 0;
  409. break;
  410. }
  411. }
  412. // Just like readbyte, but grows the buffer too.
  413. int ReadByteSpecial ()
  414. {
  415. if (bufLength > bufPos)
  416. return buffer [bufPos++];
  417. byte [] newbuf = new byte [buffer.Length * 2];
  418. Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
  419. int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
  420. if (nbytes == -1 || nbytes == 0)
  421. return -1;
  422. bufLength += nbytes;
  423. buffer = newbuf;
  424. return buffer [bufPos++];
  425. }
  426. // skips whitespace and returns misc char that was read from stream
  427. private int SkipWhitespace ()
  428. {
  429. int c;
  430. while (true) {
  431. c = ReadByteSpecial ();
  432. switch ((char) c) {
  433. case '\r': goto case ' ';
  434. case '\n': goto case ' ';
  435. case '\t': goto case ' ';
  436. case ' ':
  437. continue;
  438. default:
  439. return c;
  440. }
  441. }
  442. }
  443. public Encoding ActualEncoding {
  444. get { return enc; }
  445. }
  446. #region Public Overrides
  447. public override bool CanRead {
  448. get {
  449. if (bufLength > bufPos)
  450. return true;
  451. else
  452. return stream.CanRead;
  453. }
  454. }
  455. // FIXME: It should support base stream's CanSeek.
  456. public override bool CanSeek {
  457. get { return false; } // stream.CanSeek; }
  458. }
  459. public override bool CanWrite {
  460. get { return false; }
  461. }
  462. public override long Length {
  463. get {
  464. return stream.Length;
  465. }
  466. }
  467. public override long Position {
  468. get {
  469. return stream.Position - bufLength + bufPos;
  470. }
  471. set {
  472. if(value < bufLength)
  473. bufPos = (int)value;
  474. else
  475. stream.Position = value - bufLength;
  476. }
  477. }
  478. public override void Close ()
  479. {
  480. stream.Close ();
  481. }
  482. public override void Flush ()
  483. {
  484. stream.Flush ();
  485. }
  486. public override int Read (byte[] buffer, int offset, int count)
  487. {
  488. int ret;
  489. if (count <= bufLength - bufPos) { // all from buffer
  490. Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
  491. bufPos += count;
  492. ret = count;
  493. } else {
  494. int bufRest = bufLength - bufPos;
  495. if (bufLength > bufPos) {
  496. Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
  497. bufPos += bufRest;
  498. }
  499. ret = bufRest +
  500. stream.Read (buffer, offset + bufRest, count - bufRest);
  501. }
  502. return ret;
  503. }
  504. public override int ReadByte ()
  505. {
  506. if (bufLength > bufPos) {
  507. return buffer [bufPos++];
  508. }
  509. return stream.ReadByte ();
  510. }
  511. public override long Seek (long offset, System.IO.SeekOrigin origin)
  512. {
  513. int bufRest = bufLength - bufPos;
  514. if (origin == SeekOrigin.Current)
  515. if (offset < bufRest)
  516. return buffer [bufPos + offset];
  517. else
  518. return stream.Seek (offset - bufRest, origin);
  519. else
  520. return stream.Seek (offset, origin);
  521. }
  522. public override void SetLength (long value)
  523. {
  524. stream.SetLength (value);
  525. }
  526. public override void Write (byte[] buffer, int offset, int count)
  527. {
  528. throw new NotSupportedException ();
  529. }
  530. #endregion
  531. }
  532. }