XmlInputStream.cs 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. using System;
  11. using System.IO;
  12. using System.Text;
  13. namespace System.Xml
  14. {
  15. #region XmlStreamReader
  16. internal class XmlStreamReader : StreamReader
  17. {
  18. public XmlStreamReader (XmlInputStream input)
  19. : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
  20. {
  21. }
  22. }
  23. #endregion
  24. public class XmlInputStream : Stream
  25. {
  26. Encoding enc;
  27. Stream stream;
  28. byte[] buffer = new byte[256];
  29. int bufLength;
  30. int bufPos;
  31. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  32. public XmlInputStream (string uri)
  33. {
  34. Initialize (new System.Net.WebClient ().OpenRead (uri));
  35. }
  36. public XmlInputStream (Stream stream)
  37. {
  38. Initialize (stream);
  39. }
  40. private void Initialize (Stream stream)
  41. {
  42. // FIXME: seems too waste...
  43. MemoryStream ms = new MemoryStream ();
  44. this.stream = stream;
  45. int c = stream.ReadByte ();
  46. switch (c) {
  47. case 0xFF:
  48. c = stream.ReadByte ();
  49. if (c == 0xFE) {
  50. // BOM-ed little endian utf-16
  51. enc = Encoding.Unicode;
  52. } else {
  53. // It doesn't start from "<?xml" then its encoding is utf-8
  54. enc = Encoding.UTF8;
  55. ms.WriteByte ((byte)0xFF);
  56. ms.WriteByte ((byte)c);
  57. }
  58. break;
  59. case 0xFE:
  60. c = stream.ReadByte ();
  61. if (c == 0xFF) {
  62. // BOM-ed big endian utf-16
  63. enc = Encoding.BigEndianUnicode;
  64. return;
  65. } else {
  66. // It doesn't start from "<?xml" then its encoding is utf-8
  67. enc = Encoding.UTF8;
  68. ms.WriteByte ((byte)0xFE);
  69. ms.WriteByte ((byte)c);
  70. }
  71. break;
  72. case 0xEF:
  73. enc = Encoding.UTF8;
  74. c = ReadByte ();
  75. if (c == 0xBB) {
  76. c = ReadByte ();
  77. if (c != 0xBF) {
  78. ms.WriteByte ((byte)0xEF);
  79. ms.WriteByte ((byte)0xBB);
  80. ms.WriteByte ((byte)c);
  81. }
  82. } else {
  83. ms.WriteByte ((byte)0xEF);
  84. }
  85. break;
  86. case '<':
  87. // try to get encoding name from XMLDecl.
  88. ms.WriteByte ((byte)'<');
  89. int size = stream.Read (buffer, 1, 4);
  90. ms.Write (buffer, 1, 4);
  91. if (Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
  92. int loop = 0;
  93. c = SkipWhitespace (ms);
  94. // version
  95. if (c != 'v' || stream.ReadByte () != 'e')
  96. throw new XmlException ("invalid xml declaration.");
  97. ms.WriteByte ((byte)'v');
  98. ms.WriteByte ((byte)'e');
  99. while (loop++ >= 0) {
  100. c = stream.ReadByte ();
  101. ms.WriteByte ((byte)c);
  102. if (c == '0') {
  103. ms.WriteByte ((byte)stream.ReadByte ());
  104. break;
  105. }
  106. }
  107. c = SkipWhitespace (ms);
  108. if (c == 'e') {
  109. ms.WriteByte ((byte)'e');
  110. size = stream.Read (buffer, 0, 7);
  111. ms.Write (buffer, 0, 7);
  112. if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {
  113. c = this.SkipWhitespace(ms);
  114. if (c != '=')
  115. throw encodingException;
  116. ms.WriteByte ((byte)'=');
  117. c = this.SkipWhitespace (ms);
  118. int quoteChar = c;
  119. ms.WriteByte ((byte)c);
  120. int start = (int)ms.Position;
  121. while (loop++ >= 0) {
  122. c = stream.ReadByte ();
  123. if (c == quoteChar)
  124. break;
  125. else if (c < 0)
  126. throw encodingException;
  127. ms.WriteByte ((byte)c);
  128. }
  129. string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);
  130. if (!XmlConstructs.IsValidIANAEncoding (encodingName))
  131. throw encodingException;
  132. ms.WriteByte ((byte)quoteChar);
  133. enc = Encoding.GetEncoding (encodingName);
  134. }
  135. else
  136. ms.Write (buffer, 0, size);
  137. }
  138. else
  139. ms.WriteByte ((byte)c);
  140. }
  141. buffer = ms.ToArray ();
  142. bufLength = buffer.Length;
  143. bufPos = 0;
  144. break;
  145. default:
  146. buffer [0] = (byte)c;
  147. bufLength = 1;
  148. enc = Encoding.UTF8;
  149. break;
  150. }
  151. }
  152. // skips whitespace and returns misc char that was read from stream
  153. private int SkipWhitespace (MemoryStream ms) // ms may be null
  154. {
  155. int loop = 0;
  156. int c;
  157. while (loop++ >= 0) { // defends infinite loop (expecting overflow)
  158. c = stream.ReadByte ();
  159. switch (c) {
  160. case '\r': goto case ' ';
  161. case '\n': goto case ' ';
  162. case '\t': goto case ' ';
  163. case ' ':
  164. if (ms != null)
  165. ms.WriteByte ((byte)c);
  166. continue;
  167. default:
  168. return c;
  169. }
  170. }
  171. throw new InvalidOperationException ();
  172. }
  173. public Encoding ActualEncoding {
  174. get { return enc; }
  175. }
  176. #region Public Overrides
  177. public override bool CanRead {
  178. get { return stream.CanRead; }
  179. }
  180. public override bool CanSeek {
  181. get { return false; } //stream.CanSeek; }
  182. }
  183. public override bool CanWrite {
  184. get { return false; }
  185. }
  186. public override long Length {
  187. get {
  188. return stream.Length;
  189. }
  190. }
  191. public override long Position {
  192. get {
  193. return stream.Position + bufLength;
  194. }
  195. set {
  196. if(value < bufLength)
  197. bufPos = (int)value;
  198. else
  199. stream.Position = value - bufLength;
  200. }
  201. }
  202. public override void Flush()
  203. {
  204. stream.Flush ();
  205. }
  206. public override int Read (byte[] buffer, int offset, int count)
  207. {
  208. int ret;
  209. if (count <= bufLength - bufPos) { // all from buffer
  210. Array.Copy (this.buffer, bufPos, buffer, offset, count);
  211. bufPos += count;
  212. ret = count;
  213. } else {
  214. int bufRest = bufLength - bufPos;
  215. if (bufLength > bufPos) {
  216. Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);
  217. bufPos += bufRest;
  218. }
  219. ret = bufRest +
  220. stream.Read (buffer, offset + bufRest, count - bufRest);
  221. }
  222. return ret;
  223. }
  224. public override int ReadByte ()
  225. {
  226. if (bufLength > bufPos) {
  227. return buffer [bufPos++];
  228. }
  229. return stream.ReadByte ();
  230. }
  231. public override long Seek (long offset, System.IO.SeekOrigin origin)
  232. {
  233. int bufRest = bufLength - bufPos;
  234. if (origin == SeekOrigin.Current)
  235. if (offset < bufRest)
  236. return buffer [bufPos + offset];
  237. else
  238. return stream.Seek (offset - bufRest, origin);
  239. else
  240. return stream.Seek (offset, origin);
  241. }
  242. public override void SetLength (long value)
  243. {
  244. stream.SetLength (value);
  245. }
  246. public override void Write (byte[] buffer, int offset, int count)
  247. {
  248. throw new NotSupportedException ();
  249. }
  250. #endregion
  251. }
  252. }