XmlInputStream.cs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. //
  11. // Permission is hereby granted, free of charge, to any person obtaining
  12. // a copy of this software and associated documentation files (the
  13. // "Software"), to deal in the Software without restriction, including
  14. // without limitation the rights to use, copy, modify, merge, publish,
  15. // distribute, sublicense, and/or sell copies of the Software, and to
  16. // permit persons to whom the Software is furnished to do so, subject to
  17. // the following conditions:
  18. //
  19. // The above copyright notice and this permission notice shall be
  20. // included in all copies or substantial portions of the Software.
  21. //
  22. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  23. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  24. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  25. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  26. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  27. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29. //
  30. using System;
  31. using System.IO;
  32. using System.Text;
  33. using System.Runtime.InteropServices;
  34. namespace System.Xml
  35. {
  36. #region XmlStreamReader
  37. internal class XmlStreamReader : NonBlockingStreamReader
  38. {
  39. XmlInputStream input;
  40. XmlStreamReader (XmlInputStream input)
  41. : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
  42. {
  43. this.input = input;
  44. }
  45. public XmlStreamReader (Stream input)
  46. : this (new XmlInputStream (input))
  47. {
  48. }
  49. public override void Close ()
  50. {
  51. this.input.Close ();
  52. }
  53. protected override void Dispose (bool disposing)
  54. {
  55. base.Dispose (disposing);
  56. if (disposing) {
  57. Close ();
  58. }
  59. }
  60. }
  61. #endregion
  62. #region NonBlockingStreamReader
  63. // mostly copied from StreamReader, removing BOM checks, ctor
  64. // parameter checks and some extra public members.
  65. internal class NonBlockingStreamReader : TextReader {
  66. const int DefaultBufferSize = 1024;
  67. const int DefaultFileBufferSize = 4096;
  68. const int MinimumBufferSize = 128;
  69. //
  70. // The input buffer
  71. //
  72. byte [] input_buffer;
  73. //
  74. // The decoded buffer from the above input buffer
  75. //
  76. char [] decoded_buffer;
  77. //
  78. // Decoded bytes in decoded_buffer.
  79. //
  80. int decoded_count;
  81. //
  82. // Current position in the decoded_buffer
  83. //
  84. int pos;
  85. //
  86. // The buffer size that we are using
  87. //
  88. int buffer_size;
  89. Encoding encoding;
  90. Decoder decoder;
  91. Stream base_stream;
  92. bool mayBlock;
  93. StringBuilder line_builder;
  94. public NonBlockingStreamReader(Stream stream, Encoding encoding)
  95. {
  96. int buffer_size = DefaultBufferSize;
  97. base_stream = stream;
  98. input_buffer = new byte [buffer_size];
  99. this.buffer_size = buffer_size;
  100. this.encoding = encoding;
  101. decoder = encoding.GetDecoder ();
  102. decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
  103. decoded_count = 0;
  104. pos = 0;
  105. }
  106. public override void Close ()
  107. {
  108. Dispose (true);
  109. }
  110. protected override void Dispose (bool disposing)
  111. {
  112. if (disposing && base_stream != null)
  113. base_stream.Close ();
  114. input_buffer = null;
  115. decoded_buffer = null;
  116. encoding = null;
  117. decoder = null;
  118. base_stream = null;
  119. base.Dispose (disposing);
  120. }
  121. public void DiscardBufferedData ()
  122. {
  123. pos = decoded_count = 0;
  124. mayBlock = false;
  125. #if NET_2_0
  126. decoder.Reset ();
  127. #else
  128. decoder = encoding.GetDecoder ();
  129. #endif
  130. }
  131. // the buffer is empty, fill it again
  132. private int ReadBuffer ()
  133. {
  134. pos = 0;
  135. int cbEncoded = 0;
  136. // keep looping until the decoder gives us some chars
  137. decoded_count = 0;
  138. int parse_start = 0;
  139. do
  140. {
  141. cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
  142. if (cbEncoded == 0)
  143. return 0;
  144. mayBlock = (cbEncoded < buffer_size);
  145. decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
  146. parse_start = 0;
  147. } while (decoded_count == 0);
  148. return decoded_count;
  149. }
  150. public override int Peek ()
  151. {
  152. if (base_stream == null)
  153. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  154. if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
  155. return -1;
  156. return decoded_buffer [pos];
  157. }
  158. public override int Read ()
  159. {
  160. if (base_stream == null)
  161. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  162. if (pos >= decoded_count && ReadBuffer () == 0)
  163. return -1;
  164. return decoded_buffer [pos++];
  165. }
  166. public override int Read ([In, Out] char[] dest_buffer, int index, int count)
  167. {
  168. if (base_stream == null)
  169. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  170. if (dest_buffer == null)
  171. throw new ArgumentNullException ("dest_buffer");
  172. if (index < 0)
  173. throw new ArgumentOutOfRangeException ("index", "< 0");
  174. if (count < 0)
  175. throw new ArgumentOutOfRangeException ("count", "< 0");
  176. // re-ordered to avoid possible integer overflow
  177. if (index > dest_buffer.Length - count)
  178. throw new ArgumentException ("index + count > dest_buffer.Length");
  179. int chars_read = 0;
  180. // while (count > 0)
  181. {
  182. if (pos >= decoded_count && ReadBuffer () == 0)
  183. return chars_read > 0 ? chars_read : 0;
  184. int cch = Math.Min (decoded_count - pos, count);
  185. Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
  186. pos += cch;
  187. index += cch;
  188. count -= cch;
  189. chars_read += cch;
  190. }
  191. return chars_read;
  192. }
  193. bool foundCR;
  194. int FindNextEOL ()
  195. {
  196. char c = '\0';
  197. for (; pos < decoded_count; pos++) {
  198. c = decoded_buffer [pos];
  199. if (c == '\n') {
  200. pos++;
  201. int res = (foundCR) ? (pos - 2) : (pos - 1);
  202. if (res < 0)
  203. res = 0; // if a new buffer starts with a \n and there was a \r at
  204. // the end of the previous one, we get here.
  205. foundCR = false;
  206. return res;
  207. } else if (foundCR) {
  208. foundCR = false;
  209. return pos - 1;
  210. }
  211. foundCR = (c == '\r');
  212. }
  213. return -1;
  214. }
  215. public override string ReadLine()
  216. {
  217. if (base_stream == null)
  218. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  219. if (pos >= decoded_count && ReadBuffer () == 0)
  220. return null;
  221. int begin = pos;
  222. int end = FindNextEOL ();
  223. if (end < decoded_count && end >= begin)
  224. return new string (decoded_buffer, begin, end - begin);
  225. if (line_builder == null)
  226. line_builder = new StringBuilder ();
  227. else
  228. line_builder.Length = 0;
  229. while (true) {
  230. if (foundCR) // don't include the trailing CR if present
  231. decoded_count--;
  232. line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
  233. if (ReadBuffer () == 0) {
  234. if (line_builder.Capacity > 32768) {
  235. StringBuilder sb = line_builder;
  236. line_builder = null;
  237. return sb.ToString (0, sb.Length);
  238. }
  239. return line_builder.ToString (0, line_builder.Length);
  240. }
  241. begin = pos;
  242. end = FindNextEOL ();
  243. if (end < decoded_count && end >= begin) {
  244. line_builder.Append (new string (decoded_buffer, begin, end - begin));
  245. if (line_builder.Capacity > 32768) {
  246. StringBuilder sb = line_builder;
  247. line_builder = null;
  248. return sb.ToString (0, sb.Length);
  249. }
  250. return line_builder.ToString (0, line_builder.Length);
  251. }
  252. }
  253. }
  254. public override string ReadToEnd()
  255. {
  256. if (base_stream == null)
  257. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  258. StringBuilder text = new StringBuilder ();
  259. int size = decoded_buffer.Length;
  260. char [] buffer = new char [size];
  261. int len;
  262. while ((len = Read (buffer, 0, size)) != 0)
  263. text.Append (buffer, 0, len);
  264. return text.ToString ();
  265. }
  266. }
  267. #endregion
  268. class XmlInputStream : Stream
  269. {
  270. public static readonly Encoding StrictUTF8;
  271. static XmlInputStream ()
  272. {
  273. StrictUTF8 = new UTF8Encoding (false, true);
  274. }
  275. Encoding enc;
  276. Stream stream;
  277. byte[] buffer;
  278. int bufLength;
  279. int bufPos;
  280. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  281. public XmlInputStream (Stream stream)
  282. {
  283. Initialize (stream);
  284. }
  285. private void Initialize (Stream stream)
  286. {
  287. buffer = new byte [64];
  288. this.stream = stream;
  289. enc = StrictUTF8; // Default to UTF8 if we can't guess it
  290. bufLength = stream.Read (buffer, 0, buffer.Length);
  291. if (bufLength == -1 || bufLength == 0) {
  292. return;
  293. }
  294. int c = ReadByteSpecial ();
  295. switch (c) {
  296. case 0xFF:
  297. c = ReadByteSpecial ();
  298. if (c == 0xFE) {
  299. // BOM-ed little endian utf-16
  300. enc = Encoding.Unicode;
  301. } else {
  302. // It doesn't start from "<?xml" then its encoding is utf-8
  303. bufPos = 0;
  304. }
  305. break;
  306. case 0xFE:
  307. c = ReadByteSpecial ();
  308. if (c == 0xFF) {
  309. // BOM-ed big endian utf-16
  310. enc = Encoding.BigEndianUnicode;
  311. return;
  312. } else {
  313. // It doesn't start from "<?xml" then its encoding is utf-8
  314. bufPos = 0;
  315. }
  316. break;
  317. case 0xEF:
  318. c = ReadByteSpecial ();
  319. if (c == 0xBB) {
  320. c = ReadByteSpecial ();
  321. if (c != 0xBF) {
  322. bufPos = 0;
  323. }
  324. } else {
  325. buffer [--bufPos] = 0xEF;
  326. }
  327. break;
  328. case '<':
  329. // try to get encoding name from XMLDecl.
  330. if (bufLength >= 5 && Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
  331. bufPos += 4;
  332. c = SkipWhitespace ();
  333. // version. It is optional here.
  334. if (c == 'v') {
  335. while (c >= 0) {
  336. c = ReadByteSpecial ();
  337. if (c == '0') { // 0 of 1.0
  338. ReadByteSpecial ();
  339. break;
  340. }
  341. }
  342. c = SkipWhitespace ();
  343. }
  344. if (c == 'e') {
  345. int remaining = bufLength - bufPos;
  346. if (remaining >= 7 && Encoding.ASCII.GetString(buffer, bufPos, 7) == "ncoding") {
  347. bufPos += 7;
  348. c = SkipWhitespace();
  349. if (c != '=')
  350. throw encodingException;
  351. c = SkipWhitespace ();
  352. int quoteChar = c;
  353. StringBuilder sb = new StringBuilder ();
  354. while (true) {
  355. c = ReadByteSpecial ();
  356. if (c == quoteChar)
  357. break;
  358. else if (c < 0)
  359. throw encodingException;
  360. sb.Append ((char) c);
  361. }
  362. string encodingName = sb.ToString ();
  363. if (!XmlChar.IsValidIANAEncoding (encodingName))
  364. throw encodingException;
  365. enc = Encoding.GetEncoding (encodingName);
  366. }
  367. }
  368. }
  369. #if TARGET_JVM
  370. else {
  371. if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
  372. enc = Encoding.Unicode;
  373. }
  374. #endif
  375. bufPos = 0;
  376. break;
  377. default:
  378. bufPos = 0;
  379. break;
  380. }
  381. }
  382. // Just like readbyte, but grows the buffer too.
  383. int ReadByteSpecial ()
  384. {
  385. if (bufLength > bufPos)
  386. return buffer [bufPos++];
  387. byte [] newbuf = new byte [buffer.Length * 2];
  388. Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
  389. int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
  390. if (nbytes == -1 || nbytes == 0)
  391. return -1;
  392. bufLength += nbytes;
  393. buffer = newbuf;
  394. return buffer [bufPos++];
  395. }
  396. // skips whitespace and returns misc char that was read from stream
  397. private int SkipWhitespace ()
  398. {
  399. int c;
  400. while (true) {
  401. c = ReadByteSpecial ();
  402. switch ((char) c) {
  403. case '\r': goto case ' ';
  404. case '\n': goto case ' ';
  405. case '\t': goto case ' ';
  406. case ' ':
  407. continue;
  408. default:
  409. return c;
  410. }
  411. }
  412. throw new InvalidOperationException ();
  413. }
  414. public Encoding ActualEncoding {
  415. get { return enc; }
  416. }
  417. #region Public Overrides
  418. public override bool CanRead {
  419. get {
  420. if (bufLength > bufPos)
  421. return true;
  422. else
  423. return stream.CanRead;
  424. }
  425. }
  426. // FIXME: It should support base stream's CanSeek.
  427. public override bool CanSeek {
  428. get { return false; } // stream.CanSeek; }
  429. }
  430. public override bool CanWrite {
  431. get { return false; }
  432. }
  433. public override long Length {
  434. get {
  435. return stream.Length;
  436. }
  437. }
  438. public override long Position {
  439. get {
  440. return stream.Position - bufLength + bufPos;
  441. }
  442. set {
  443. if(value < bufLength)
  444. bufPos = (int)value;
  445. else
  446. stream.Position = value - bufLength;
  447. }
  448. }
  449. public override void Close ()
  450. {
  451. stream.Close ();
  452. }
  453. public override void Flush ()
  454. {
  455. stream.Flush ();
  456. }
  457. public override int Read (byte[] buffer, int offset, int count)
  458. {
  459. int ret;
  460. if (count <= bufLength - bufPos) { // all from buffer
  461. Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
  462. bufPos += count;
  463. ret = count;
  464. } else {
  465. int bufRest = bufLength - bufPos;
  466. if (bufLength > bufPos) {
  467. Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
  468. bufPos += bufRest;
  469. }
  470. ret = bufRest +
  471. stream.Read (buffer, offset + bufRest, count - bufRest);
  472. }
  473. return ret;
  474. }
  475. public override int ReadByte ()
  476. {
  477. if (bufLength > bufPos) {
  478. return buffer [bufPos++];
  479. }
  480. return stream.ReadByte ();
  481. }
  482. public override long Seek (long offset, System.IO.SeekOrigin origin)
  483. {
  484. int bufRest = bufLength - bufPos;
  485. if (origin == SeekOrigin.Current)
  486. if (offset < bufRest)
  487. return buffer [bufPos + offset];
  488. else
  489. return stream.Seek (offset - bufRest, origin);
  490. else
  491. return stream.Seek (offset, origin);
  492. }
  493. public override void SetLength (long value)
  494. {
  495. stream.SetLength (value);
  496. }
  497. public override void Write (byte[] buffer, int offset, int count)
  498. {
  499. throw new NotSupportedException ();
  500. }
  501. #endregion
  502. }
  503. }