XmlInputStream.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. //
  11. // Permission is hereby granted, free of charge, to any person obtaining
  12. // a copy of this software and associated documentation files (the
  13. // "Software"), to deal in the Software without restriction, including
  14. // without limitation the rights to use, copy, modify, merge, publish,
  15. // distribute, sublicense, and/or sell copies of the Software, and to
  16. // permit persons to whom the Software is furnished to do so, subject to
  17. // the following conditions:
  18. //
  19. // The above copyright notice and this permission notice shall be
  20. // included in all copies or substantial portions of the Software.
  21. //
  22. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  23. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  24. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  25. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  26. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  27. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29. //
  30. using System;
  31. using System.IO;
  32. using System.Text;
  33. using System.Runtime.InteropServices;
  34. namespace System.Xml
  35. {
  36. #region XmlStreamReader
  37. internal class XmlStreamReader : NonBlockingStreamReader
  38. {
  39. XmlInputStream input;
  40. XmlStreamReader (XmlInputStream input)
  41. : base (input, input.ActualEncoding != null ? input.ActualEncoding : XmlInputStream.StrictUTF8)
  42. {
  43. this.input = input;
  44. }
  45. public XmlStreamReader (Stream input)
  46. : this (new XmlInputStream (input))
  47. {
  48. }
  49. public override void Close ()
  50. {
  51. this.input.Close ();
  52. }
  53. protected override void Dispose (bool disposing)
  54. {
  55. base.Dispose (disposing);
  56. if (disposing) {
  57. Close ();
  58. }
  59. }
  60. }
  61. #endregion
  62. #region NonBlockingStreamReader
  63. // mostly copied from StreamReader.
  64. internal class NonBlockingStreamReader : TextReader {
  65. const int DefaultBufferSize = 1024;
  66. const int DefaultFileBufferSize = 4096;
  67. const int MinimumBufferSize = 128;
  68. //
  69. // The input buffer
  70. //
  71. byte [] input_buffer;
  72. //
  73. // The decoded buffer from the above input buffer
  74. //
  75. char [] decoded_buffer;
  76. //
  77. // Decoded bytes in decoded_buffer.
  78. //
  79. int decoded_count;
  80. //
  81. // Current position in the decoded_buffer
  82. //
  83. int pos;
  84. //
  85. // The buffer size that we are using
  86. //
  87. int buffer_size;
  88. Encoding encoding;
  89. Decoder decoder;
  90. Stream base_stream;
  91. bool mayBlock;
  92. public NonBlockingStreamReader(Stream stream, Encoding encoding)
  93. {
  94. int buffer_size = DefaultBufferSize;
  95. base_stream = stream;
  96. input_buffer = new byte [buffer_size];
  97. this.buffer_size = buffer_size;
  98. this.encoding = encoding;
  99. decoder = encoding.GetDecoder ();
  100. decoded_buffer = new char [encoding.GetMaxCharCount (buffer_size)];
  101. decoded_count = 0;
  102. pos = 0;
  103. }
  104. public override void Close ()
  105. {
  106. Dispose (true);
  107. }
  108. protected override void Dispose (bool disposing)
  109. {
  110. if (disposing && base_stream != null)
  111. base_stream.Close ();
  112. input_buffer = null;
  113. decoded_buffer = null;
  114. encoding = null;
  115. decoder = null;
  116. base_stream = null;
  117. base.Dispose (disposing);
  118. }
  119. public void DiscardBufferedData ()
  120. {
  121. pos = decoded_count = 0;
  122. mayBlock = false;
  123. }
  124. // the buffer is empty, fill it again
  125. private int ReadBuffer ()
  126. {
  127. pos = 0;
  128. int cbEncoded = 0;
  129. // keep looping until the decoder gives us some chars
  130. decoded_count = 0;
  131. int parse_start = 0;
  132. do
  133. {
  134. cbEncoded = base_stream.Read (input_buffer, 0, buffer_size);
  135. if (cbEncoded == 0)
  136. return 0;
  137. mayBlock = (cbEncoded < buffer_size);
  138. decoded_count += decoder.GetChars (input_buffer, parse_start, cbEncoded, decoded_buffer, 0);
  139. parse_start = 0;
  140. } while (decoded_count == 0);
  141. return decoded_count;
  142. }
  143. public override int Peek ()
  144. {
  145. if (base_stream == null)
  146. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  147. if (pos >= decoded_count && (mayBlock || ReadBuffer () == 0))
  148. return -1;
  149. return decoded_buffer [pos];
  150. }
  151. public override int Read ()
  152. {
  153. if (base_stream == null)
  154. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  155. if (pos >= decoded_count && ReadBuffer () == 0)
  156. return -1;
  157. return decoded_buffer [pos++];
  158. }
  159. public override int Read ([In, Out] char[] dest_buffer, int index, int count)
  160. {
  161. if (base_stream == null)
  162. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  163. if (dest_buffer == null)
  164. throw new ArgumentNullException ("dest_buffer");
  165. if (index < 0)
  166. throw new ArgumentOutOfRangeException ("index", "< 0");
  167. if (count < 0)
  168. throw new ArgumentOutOfRangeException ("count", "< 0");
  169. // re-ordered to avoid possible integer overflow
  170. if (index > dest_buffer.Length - count)
  171. throw new ArgumentException ("index + count > dest_buffer.Length");
  172. int chars_read = 0;
  173. // while (count > 0)
  174. {
  175. if (pos >= decoded_count && ReadBuffer () == 0)
  176. return chars_read > 0 ? chars_read : 0;
  177. int cch = Math.Min (decoded_count - pos, count);
  178. Array.Copy (decoded_buffer, pos, dest_buffer, index, cch);
  179. pos += cch;
  180. index += cch;
  181. count -= cch;
  182. chars_read += cch;
  183. }
  184. return chars_read;
  185. }
  186. public override string ReadLine()
  187. {
  188. if (base_stream == null)
  189. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  190. bool foundCR = false;
  191. StringBuilder text = new StringBuilder ();
  192. while (true) {
  193. int c = Read ();
  194. if (c == -1) { // end of stream
  195. if (text.Length == 0)
  196. return null;
  197. if (foundCR)
  198. text.Length--;
  199. break;
  200. }
  201. if (c == '\n') { // newline
  202. if ((text.Length > 0) && (text [text.Length - 1] == '\r'))
  203. text.Length--;
  204. foundCR = false;
  205. break;
  206. } else if (foundCR) {
  207. pos--;
  208. text.Length--;
  209. break;
  210. }
  211. if (c == '\r')
  212. foundCR = true;
  213. text.Append ((char) c);
  214. }
  215. return text.ToString ();
  216. }
  217. public override string ReadToEnd()
  218. {
  219. if (base_stream == null)
  220. throw new ObjectDisposedException ("StreamReader", "Cannot read from a closed StreamReader");
  221. StringBuilder text = new StringBuilder ();
  222. int size = decoded_buffer.Length;
  223. char [] buffer = new char [size];
  224. int len;
  225. while ((len = Read (buffer, 0, size)) != 0)
  226. text.Append (buffer, 0, len);
  227. return text.ToString ();
  228. }
  229. }
  230. #endregion
  231. class XmlInputStream : Stream
  232. {
  233. public static readonly Encoding StrictUTF8;
  234. static XmlInputStream ()
  235. {
  236. StrictUTF8 = new UTF8Encoding (false, true);
  237. }
  238. Encoding enc;
  239. Stream stream;
  240. byte[] buffer;
  241. int bufLength;
  242. int bufPos;
  243. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  244. public XmlInputStream (Stream stream)
  245. {
  246. Initialize (stream);
  247. }
  248. private void Initialize (Stream stream)
  249. {
  250. buffer = new byte [64];
  251. this.stream = stream;
  252. enc = StrictUTF8; // Default to UTF8 if we can't guess it
  253. bufLength = stream.Read (buffer, 0, buffer.Length);
  254. if (bufLength == -1 || bufLength == 0) {
  255. return;
  256. }
  257. int c = ReadByteSpecial ();
  258. switch (c) {
  259. case 0xFF:
  260. c = ReadByteSpecial ();
  261. if (c == 0xFE) {
  262. // BOM-ed little endian utf-16
  263. enc = Encoding.Unicode;
  264. } else {
  265. // It doesn't start from "<?xml" then its encoding is utf-8
  266. bufPos = 0;
  267. }
  268. break;
  269. case 0xFE:
  270. c = ReadByteSpecial ();
  271. if (c == 0xFF) {
  272. // BOM-ed big endian utf-16
  273. enc = Encoding.BigEndianUnicode;
  274. return;
  275. } else {
  276. // It doesn't start from "<?xml" then its encoding is utf-8
  277. bufPos = 0;
  278. }
  279. break;
  280. case 0xEF:
  281. c = ReadByteSpecial ();
  282. if (c == 0xBB) {
  283. c = ReadByteSpecial ();
  284. if (c != 0xBF) {
  285. bufPos = 0;
  286. }
  287. } else {
  288. buffer [--bufPos] = 0xEF;
  289. }
  290. break;
  291. case '<':
  292. // try to get encoding name from XMLDecl.
  293. if (bufLength >= 5 && Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
  294. bufPos += 4;
  295. c = SkipWhitespace ();
  296. // version. It is optional here.
  297. if (c == 'v') {
  298. while (c >= 0) {
  299. c = ReadByteSpecial ();
  300. if (c == '0') { // 0 of 1.0
  301. ReadByteSpecial ();
  302. break;
  303. }
  304. }
  305. c = SkipWhitespace ();
  306. }
  307. if (c == 'e') {
  308. int remaining = bufLength - bufPos;
  309. if (remaining >= 7 && Encoding.ASCII.GetString(buffer, bufPos, 7) == "ncoding") {
  310. bufPos += 7;
  311. c = SkipWhitespace();
  312. if (c != '=')
  313. throw encodingException;
  314. c = SkipWhitespace ();
  315. int quoteChar = c;
  316. StringBuilder sb = new StringBuilder ();
  317. while (true) {
  318. c = ReadByteSpecial ();
  319. if (c == quoteChar)
  320. break;
  321. else if (c < 0)
  322. throw encodingException;
  323. sb.Append ((char) c);
  324. }
  325. string encodingName = sb.ToString ();
  326. if (!XmlChar.IsValidIANAEncoding (encodingName))
  327. throw encodingException;
  328. enc = Encoding.GetEncoding (encodingName);
  329. }
  330. }
  331. }
  332. #if TARGET_JVM
  333. else {
  334. if (bufLength >= 10 && Encoding.Unicode.GetString (buffer, 2, 8) == "?xml")
  335. enc = Encoding.Unicode;
  336. }
  337. #endif
  338. bufPos = 0;
  339. break;
  340. default:
  341. bufPos = 0;
  342. break;
  343. }
  344. }
  345. // Just like readbyte, but grows the buffer too.
  346. int ReadByteSpecial ()
  347. {
  348. if (bufLength > bufPos)
  349. return buffer [bufPos++];
  350. byte [] newbuf = new byte [buffer.Length * 2];
  351. Buffer.BlockCopy (buffer, 0, newbuf, 0, bufLength);
  352. int nbytes = stream.Read (newbuf, bufLength, buffer.Length);
  353. if (nbytes == -1 || nbytes == 0)
  354. return -1;
  355. bufLength += nbytes;
  356. buffer = newbuf;
  357. return buffer [bufPos++];
  358. }
  359. // skips whitespace and returns misc char that was read from stream
  360. private int SkipWhitespace ()
  361. {
  362. int c;
  363. while (true) {
  364. c = ReadByteSpecial ();
  365. switch ((char) c) {
  366. case '\r': goto case ' ';
  367. case '\n': goto case ' ';
  368. case '\t': goto case ' ';
  369. case ' ':
  370. continue;
  371. default:
  372. return c;
  373. }
  374. }
  375. throw new InvalidOperationException ();
  376. }
  377. public Encoding ActualEncoding {
  378. get { return enc; }
  379. }
  380. #region Public Overrides
  381. public override bool CanRead {
  382. get {
  383. if (bufLength > bufPos)
  384. return true;
  385. else
  386. return stream.CanRead;
  387. }
  388. }
  389. // FIXME: It should support base stream's CanSeek.
  390. public override bool CanSeek {
  391. get { return false; } // stream.CanSeek; }
  392. }
  393. public override bool CanWrite {
  394. get { return false; }
  395. }
  396. public override long Length {
  397. get {
  398. return stream.Length;
  399. }
  400. }
  401. public override long Position {
  402. get {
  403. return stream.Position - bufLength + bufPos;
  404. }
  405. set {
  406. if(value < bufLength)
  407. bufPos = (int)value;
  408. else
  409. stream.Position = value - bufLength;
  410. }
  411. }
  412. public override void Close ()
  413. {
  414. stream.Close ();
  415. }
  416. public override void Flush ()
  417. {
  418. stream.Flush ();
  419. }
  420. public override int Read (byte[] buffer, int offset, int count)
  421. {
  422. int ret;
  423. if (count <= bufLength - bufPos) { // all from buffer
  424. Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, count);
  425. bufPos += count;
  426. ret = count;
  427. } else {
  428. int bufRest = bufLength - bufPos;
  429. if (bufLength > bufPos) {
  430. Buffer.BlockCopy (this.buffer, bufPos, buffer, offset, bufRest);
  431. bufPos += bufRest;
  432. }
  433. ret = bufRest +
  434. stream.Read (buffer, offset + bufRest, count - bufRest);
  435. }
  436. return ret;
  437. }
  438. public override int ReadByte ()
  439. {
  440. if (bufLength > bufPos) {
  441. return buffer [bufPos++];
  442. }
  443. return stream.ReadByte ();
  444. }
  445. public override long Seek (long offset, System.IO.SeekOrigin origin)
  446. {
  447. int bufRest = bufLength - bufPos;
  448. if (origin == SeekOrigin.Current)
  449. if (offset < bufRest)
  450. return buffer [bufPos + offset];
  451. else
  452. return stream.Seek (offset - bufRest, origin);
  453. else
  454. return stream.Seek (offset, origin);
  455. }
  456. public override void SetLength (long value)
  457. {
  458. stream.SetLength (value);
  459. }
  460. public override void Write (byte[] buffer, int offset, int count)
  461. {
  462. throw new NotSupportedException ();
  463. }
  464. #endregion
  465. }
  466. }