XmlInputStream.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. //
  11. // Permission is hereby granted, free of charge, to any person obtaining
  12. // a copy of this software and associated documentation files (the
  13. // "Software"), to deal in the Software without restriction, including
  14. // without limitation the rights to use, copy, modify, merge, publish,
  15. // distribute, sublicense, and/or sell copies of the Software, and to
  16. // permit persons to whom the Software is furnished to do so, subject to
  17. // the following conditions:
  18. //
  19. // The above copyright notice and this permission notice shall be
  20. // included in all copies or substantial portions of the Software.
  21. //
  22. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  23. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  24. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  25. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  26. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  27. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29. //
  30. using System;
  31. using System.IO;
  32. using System.Text;
  33. using System.Runtime.InteropServices;
  34. namespace System.Xml
  35. {
  36. #region XmlStreamReader
  37. internal class XmlStreamReader : NonBlockingStreamReader
  38. {
  39. XmlInputStream input;
  40. XmlStreamReader (XmlInputStream input)
  41. : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
  42. {
  43. this.input = input;
  44. }
  45. public XmlStreamReader (Stream input)
  46. : this (new XmlInputStream (input))
  47. {
  48. }
  49. public override void Close ()
  50. {
  51. this.input.Close ();
  52. }
  53. protected override void Dispose (bool disposing)
  54. {
  55. base.Dispose (disposing);
  56. if (disposing) {
  57. Close ();
  58. }
  59. }
  60. }
  61. #endregion
  62. #region NonBlockingStreamReader
  63. // mostly copied from StreamReader, removing BOM checks, ctor
  64. // parameter checks and some extra public members.
  65. internal class NonBlockingStreamReader : TextReader {
  66. const int DefaultBufferSize = 1024;
  67. const int DefaultFileBufferSize = 4096;
  68. const int MinimumBufferSize = 128;
  69. //
  70. // The input buffer
  71. //
  72. byte [] input_buffer;
  73. //
  74. // The decoded buffer from the above input buffer
  75. //
  76. char [] decoded_buffer;
  77. //
  78. // Decoded bytes in decoded_buffer.
  79. //
  80. int decoded_count;
  81. //
  82. // Current position in the decoded_buffer
  83. //
  84. int pos;
  85. //
  86. // The buffer size that we are using
  87. //
  88. int buffer_size;
  89. Encoding encoding;
  90. Decoder decoder;
  91. Stream base_stream;
  92. bool mayBlock;
  93. StringBuilder line_builder;
  94. public NonBlockingStreamReader(Stream stream, Encoding encoding)
  95. {
  96. int buffer_size = DefaultBufferSize;
  97. base_stream = stream;
  98. input_buffer = new byte [buffer_size];
  99. this.buffer_size = buffer_size;
  100. this.encoding = encoding;
  101. decoder = encoding.GetDecoder ();
  102. decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
  103. decoded_count = 0;
  104. pos = 0;
  105. }
  106. public Encoding Encoding {
  107. get { return encoding; }
  108. }
  109. public override void Close ()
  110. {
  111. Dispose (true);
  112. }
  113. protected override void Dispose (bool disposing)
  114. {
  115. if (disposing && base_stream != null)
  116. base_stream.Close ();
  117. input_buffer = null;
  118. decoded_buffer = null;
  119. encoding = null;
  120. decoder = null;
  121. base_stream = null;
  122. base.Dispose (disposing);
  123. }
  124. public void DiscardBufferedData ()
  125. {
  126. pos = decoded_count = 0;
  127. mayBlock = false;
  128. #if NET_2_0
  129. decoder.Reset ();
  130. #else
  131. decoder = encoding.GetDecoder ();
  132. #endif
  133. }
  134. // the buffer is empty, fill it again
  135. private int ReadBuffer ()
  136. {
  137. pos = 0;
  138. int cbEncoded = 0;
  139. // keep looping until the decoder gives us some chars
  140. decoded_count = 0;
  141. int parse_start = 0;
  142. do
  143. {
  144. cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
  145. if (cbEncoded == 0)
  146. return 0;
  147. mayBlock = (cbEncoded < buffer_size);
  148. decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
  149. parse_start = 0;
  150. } while (decoded_count == 0);
  151. return decoded_count;
  152. }
  153. public override int Peek ()
  154. {
  155. if (base_stream == null)
  156. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  157. if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
  158. return -1;
  159. return decoded_buffer [pos];
  160. }
  161. public override int Read ()
  162. {
  163. if (base_stream == null)
  164. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  165. if (pos >= decoded_count && ReadBuffer () == 0)
  166. return -1;
  167. return decoded_buffer [pos++];
  168. }
  169. public override int Read ([In, Out] char[] dest_buffer, int index, int count)
  170. {
  171. if (base_stream == null)
  172. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  173. if (dest_buffer == null)
  174. throw new ArgumentNullException ("dest_buffer");
  175. if (index < 0)
  176. throw new ArgumentOutOfRangeException ("index", "< 0");
  177. if (count < 0)
  178. throw new ArgumentOutOfRangeException ("count", "< 0");
  179. // re-ordered to avoid possible integer overflow
  180. if (index > dest_buffer.Length - count)
  181. throw new ArgumentException ("index + count > dest_buffer.Length");
  182. int chars_read = 0;
  183. // while (count > 0)
  184. {
  185. if (pos >= decoded_count && ReadBuffer () == 0)
  186. return chars_read > 0 ? chars_read : 0;
  187. int cch = Math.Min (decoded_count - pos, count);
  188. Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
  189. pos += cch;
  190. index += cch;
  191. count -= cch;
  192. chars_read += cch;
  193. }
  194. return chars_read;
  195. }
  196. bool foundCR;
  197. int FindNextEOL ()
  198. {
  199. char c = '\0';
  200. for (; pos < decoded_count; pos++) {
  201. c = decoded_buffer [pos];
  202. if (c == '\n') {
  203. pos++;
  204. int res = (foundCR) ? (pos - 2) : (pos - 1);
  205. if (res < 0)
  206. res = 0; // if a new buffer starts with a \n and there was a \r at
  207. // the end of the previous one, we get here.
  208. foundCR = false;
  209. return res;
  210. } else if (foundCR) {
  211. foundCR = false;
  212. return pos - 1;
  213. }
  214. foundCR = (c == '\r');
  215. }
  216. return -1;
  217. }
  218. public override string ReadLine()
  219. {
  220. if (base_stream == null)
  221. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  222. if (pos >= decoded_count && ReadBuffer () == 0)
  223. return null;
  224. int begin = pos;
  225. int end = FindNextEOL ();
  226. if (end < decoded_count && end >= begin)
  227. return new string (decoded_buffer, begin, end - begin);
  228. if (line_builder == null)
  229. line_builder = new StringBuilder ();
  230. else
  231. line_builder.Length = 0;
  232. while (true) {
  233. if (foundCR) // don't include the trailing CR if present
  234. decoded_count--;
  235. line_builder.Append (new string (decoded_buffer, begin, decoded_count - begin));
  236. if (ReadBuffer () == 0) {
  237. if (line_builder.Capacity > 32768) {
  238. StringBuilder sb = line_builder;
  239. line_builder = null;
  240. return sb.ToString (0, sb.Length);
  241. }
  242. return line_builder.ToString (0, line_builder.Length);
  243. }
  244. begin = pos;
  245. end = FindNextEOL ();
  246. if (end < decoded_count && end >= begin) {
  247. line_builder.Append (new string (decoded_buffer, begin, end - begin));
  248. if (line_builder.Capacity > 32768) {
  249. StringBuilder sb = line_builder;
  250. line_builder = null;
  251. return sb.ToString (0, sb.Length);
  252. }
  253. return line_builder.ToString (0, line_builder.Length);
  254. }
  255. }
  256. }
  257. public override string ReadToEnd()
  258. {
  259. if (base_stream == null)
  260. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  261. StringBuilder text = new StringBuilder ();
  262. int size = decoded_buffer.Length;
  263. char [] buffer = new char [size];
  264. int len;
  265. while ((len = Read (buffer, 0, size)) != 0)
  266. text.Append (buffer, 0, len);
  267. return text.ToString ();
  268. }
  269. }
  270. #endregion
  271. class XmlInputStream : Stream
  272. {
  273. public static readonly Encoding StrictUTF8;
  274. static XmlInputStream ()
  275. {
  276. StrictUTF8 = new UTF8Encoding (false, true);
  277. }
  278. Encoding enc;
  279. Stream stream;
  280. byte[] buffer;
  281. int bufLength;
  282. int bufPos;
  283. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  284. public XmlInputStream (Stream stream)
  285. {
  286. Initialize (stream);
  287. }
  288. private void Initialize (Stream stream)
  289. {
  290. buffer = new byte [64];
  291. this.stream = stream;
  292. enc = StrictUTF8; // Default to UTF8 if we can't guess it
  293. bufLength = stream.Read (buffer, 0, buffer.Length);
  294. if (bufLength == -1 || bufLength == 0) {
  295. return;
  296. }
  297. int c = ReadByteSpecial ();
  298. switch (c) {
  299. case 0xFF:
  300. c = ReadByteSpecial ();
  301. if (c == 0xFE) {
  302. // BOM-ed little endian utf-16
  303. enc = Encoding.Unicode;
  304. } else {
  305. // It doesn't start from "<?xml" then its encoding is utf-8
  306. bufPos = 0;
  307. }
  308. break;
  309. case 0xFE:
  310. c = ReadByteSpecial ();
  311. if (c == 0xFF) {
  312. // BOM-ed big endian utf-16
  313. enc = Encoding.BigEndianUnicode;
  314. return;
  315. } else {
  316. // It doesn't start from "<?xml" then its encoding is utf-8
  317. bufPos = 0;
  318. }
  319. break;
  320. case 0xEF:
  321. c = ReadByteSpecial ();
  322. if (c == 0xBB) {
  323. c = ReadByteSpecial ();
  324. if (c != 0xBF) {
  325. bufPos = 0;
  326. }
  327. } else {
  328. buffer [--bufPos] = 0xEF;
  329. }
  330. break;
  331. case '<':
  332. // try to get encoding name from XMLDecl.
  333. if (bufLength >= 5 && Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
  334. bufPos += 4;
  335. c = SkipWhitespace ();
  336. // version. It is optional here.
  337. if (c == 'v') {
  338. while (c >= 0) {
  339. c = ReadByteSpecial ();
  340. if (c == '0') { // 0 of 1.0
  341. ReadByteSpecial ();
  342. break;
  343. }
  344. }
  345. c = SkipWhitespace ();
  346. }
  347. if (c == 'e') {
  348. int remaining = bufLength - bufPos;
  349. if (remaining >= 7 && Encoding.ASCII.GetString(buffer, bufPos, 7) == "ncoding") {
  350. bufPos += 7;
  351. c = SkipWhitespace();
  352. if (c != '=')
  353. throw encodingException;
  354. c = SkipWhitespace ();
  355. int quoteChar = c;
  356. StringBuilder sb = new StringBuilder ();
  357. while (true) {
  358. c = ReadByteSpecial ();
  359. if (c == quoteChar)
  360. break;
  361. else if (c < 0)
  362. throw encodingException;
  363. sb.Append ((char) c);
  364. }
  365. string encodingName = sb.ToString ();
  366. if (!XmlChar.IsValidIANAEncoding (encodingName))
  367. throw encodingException;
  368. enc = Encoding.GetEncoding (encodingName);
  369. }
  370. }
  371. }
  372. #if TARGET_JVM
  373. else {
  374. if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
  375. enc = Encoding.Unicode;
  376. }
  377. #endif
  378. bufPos = 0;
  379. break;
  380. default:
  381. bufPos = 0;
  382. break;
  383. }
  384. }
  385. // Just like readbyte, but grows the buffer too.
  386. int ReadByteSpecial ()
  387. {
  388. if (bufLength > bufPos)
  389. return buffer [bufPos++];
  390. byte [] newbuf = new byte [buffer.Length * 2];
  391. Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
  392. int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
  393. if (nbytes == -1 || nbytes == 0)
  394. return -1;
  395. bufLength += nbytes;
  396. buffer = newbuf;
  397. return buffer [bufPos++];
  398. }
  399. // skips whitespace and returns misc char that was read from stream
  400. private int SkipWhitespace ()
  401. {
  402. int c;
  403. while (true) {
  404. c = ReadByteSpecial ();
  405. switch ((char) c) {
  406. case '\r': goto case ' ';
  407. case '\n': goto case ' ';
  408. case '\t': goto case ' ';
  409. case ' ':
  410. continue;
  411. default:
  412. return c;
  413. }
  414. }
  415. throw new InvalidOperationException ();
  416. }
  417. public Encoding ActualEncoding {
  418. get { return enc; }
  419. }
  420. #region Public Overrides
  421. public override bool CanRead {
  422. get {
  423. if (bufLength > bufPos)
  424. return true;
  425. else
  426. return stream.CanRead;
  427. }
  428. }
  429. // FIXME: It should support base stream's CanSeek.
  430. public override bool CanSeek {
  431. get { return false; } // stream.CanSeek; }
  432. }
  433. public override bool CanWrite {
  434. get { return false; }
  435. }
  436. public override long Length {
  437. get {
  438. return stream.Length;
  439. }
  440. }
  441. public override long Position {
  442. get {
  443. return stream.Position - bufLength + bufPos;
  444. }
  445. set {
  446. if(value < bufLength)
  447. bufPos = (int)value;
  448. else
  449. stream.Position = value - bufLength;
  450. }
  451. }
  452. public override void Close ()
  453. {
  454. stream.Close ();
  455. }
  456. public override void Flush ()
  457. {
  458. stream.Flush ();
  459. }
  460. public override int Read (byte[] buffer, int offset, int count)
  461. {
  462. int ret;
  463. if (count <= bufLength - bufPos) { // all from buffer
  464. Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
  465. bufPos += count;
  466. ret = count;
  467. } else {
  468. int bufRest = bufLength - bufPos;
  469. if (bufLength > bufPos) {
  470. Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
  471. bufPos += bufRest;
  472. }
  473. ret = bufRest +
  474. stream.Read (buffer, offset + bufRest, count - bufRest);
  475. }
  476. return ret;
  477. }
  478. public override int ReadByte ()
  479. {
  480. if (bufLength > bufPos) {
  481. return buffer [bufPos++];
  482. }
  483. return stream.ReadByte ();
  484. }
  485. public override long Seek (long offset, System.IO.SeekOrigin origin)
  486. {
  487. int bufRest = bufLength - bufPos;
  488. if (origin == SeekOrigin.Current)
  489. if (offset < bufRest)
  490. return buffer [bufPos + offset];
  491. else
  492. return stream.Seek (offset - bufRest, origin);
  493. else
  494. return stream.Seek (offset, origin);
  495. }
  496. public override void SetLength (long value)
  497. {
  498. stream.SetLength (value);
  499. }
  500. public override void Write (byte[] buffer, int offset, int count)
  501. {
  502. throw new NotSupportedException ();
  503. }
  504. #endregion
  505. }
  506. }