XmlInputStream.cs 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. using System;
  11. using System.IO;
  12. using System.Text;
  13. using System.Xml;
  14. namespace Mono.Xml.Native
  15. {
  16. #region XmlStreamReader
  17. public class XmlStreamReader : StreamReader
  18. {
  19. XmlStreamReader (XmlInputStream input)
  20. : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
  21. {
  22. }
  23. public XmlStreamReader (Stream input)
  24. : this (new XmlInputStream (input))
  25. {
  26. }
  27. public XmlStreamReader (string url)
  28. : this (new XmlInputStream (url))
  29. {
  30. }
  31. }
  32. #endregion
  33. class XmlInputStream : Stream
  34. {
  35. Encoding enc;
  36. Stream stream;
  37. byte[] buffer = new byte[256];
  38. int bufLength;
  39. int bufPos;
  40. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  41. public XmlInputStream (string url)
  42. {
  43. #if NetworkEnabled
  44. try {
  45. Uri uri = new Uri (url);
  46. Initialize (new MemoryStream (new System.Net.WebClient ().DownloadData (url)));
  47. } catch (UriFormatException ex) {
  48. Initialize (new FileStream (url, FileMode.Open));
  49. }
  50. #else
  51. Initialize (new FileStream (url, FileMode.Open, FileAccess.Read));
  52. #endif
  53. }
  54. public XmlInputStream (Stream stream)
  55. {
  56. Initialize (stream);
  57. }
  58. private void Initialize (Stream stream)
  59. {
  60. // FIXME: seems too waste...
  61. MemoryStream ms = new MemoryStream ();
  62. this.stream = stream;
  63. int c = stream.ReadByte ();
  64. switch (c) {
  65. case 0xFF:
  66. c = stream.ReadByte ();
  67. if (c == 0xFE) {
  68. // BOM-ed little endian utf-16
  69. enc = Encoding.Unicode;
  70. } else {
  71. // It doesn't start from "<?xml" then its encoding is utf-8
  72. enc = Encoding.UTF8;
  73. ms.WriteByte ((byte)0xFF);
  74. ms.WriteByte ((byte)c);
  75. }
  76. break;
  77. case 0xFE:
  78. c = stream.ReadByte ();
  79. if (c == 0xFF) {
  80. // BOM-ed big endian utf-16
  81. enc = Encoding.BigEndianUnicode;
  82. return;
  83. } else {
  84. // It doesn't start from "<?xml" then its encoding is utf-8
  85. enc = Encoding.UTF8;
  86. ms.WriteByte ((byte)0xFE);
  87. ms.WriteByte ((byte)c);
  88. }
  89. break;
  90. case 0xEF:
  91. enc = Encoding.UTF8;
  92. c = ReadByte ();
  93. if (c == 0xBB) {
  94. c = ReadByte ();
  95. if (c != 0xBF) {
  96. ms.WriteByte ((byte)0xEF);
  97. ms.WriteByte ((byte)0xBB);
  98. ms.WriteByte ((byte)c);
  99. }
  100. } else {
  101. ms.WriteByte ((byte)0xEF);
  102. }
  103. break;
  104. case '<':
  105. // try to get encoding name from XMLDecl.
  106. ms.WriteByte ((byte)'<');
  107. int size = stream.Read (buffer, 1, 4);
  108. ms.Write (buffer, 1, 4);
  109. if (Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
  110. int loop = 0;
  111. c = SkipWhitespace (ms);
  112. // version
  113. if (c != 'v' || stream.ReadByte () != 'e')
  114. throw new XmlException ("invalid xml declaration.");
  115. ms.WriteByte ((byte)'v');
  116. ms.WriteByte ((byte)'e');
  117. while (loop++ >= 0) {
  118. c = stream.ReadByte ();
  119. ms.WriteByte ((byte)c);
  120. if (c == '0') {
  121. ms.WriteByte ((byte)stream.ReadByte ());
  122. break;
  123. }
  124. }
  125. c = SkipWhitespace (ms);
  126. if (c == 'e') {
  127. ms.WriteByte ((byte)'e');
  128. size = stream.Read (buffer, 0, 7);
  129. ms.Write (buffer, 0, 7);
  130. if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {
  131. c = this.SkipWhitespace(ms);
  132. if (c != '=')
  133. throw encodingException;
  134. ms.WriteByte ((byte)'=');
  135. c = this.SkipWhitespace (ms);
  136. int quoteChar = c;
  137. ms.WriteByte ((byte)c);
  138. int start = (int)ms.Position;
  139. while (loop++ >= 0) {
  140. c = stream.ReadByte ();
  141. if (c == quoteChar)
  142. break;
  143. else if (c < 0)
  144. throw encodingException;
  145. ms.WriteByte ((byte)c);
  146. }
  147. string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);
  148. if (!XmlConstructs.IsValidIANAEncoding (encodingName))
  149. throw encodingException;
  150. ms.WriteByte ((byte)quoteChar);
  151. enc = Encoding.GetEncoding (encodingName);
  152. }
  153. else
  154. ms.Write (buffer, 0, size);
  155. }
  156. else
  157. ms.WriteByte ((byte)c);
  158. }
  159. buffer = ms.ToArray ();
  160. bufLength = buffer.Length;
  161. bufPos = 0;
  162. break;
  163. default:
  164. buffer [0] = (byte)c;
  165. bufLength = 1;
  166. enc = Encoding.UTF8;
  167. break;
  168. }
  169. }
  170. // skips whitespace and returns misc char that was read from stream
  171. private int SkipWhitespace (MemoryStream ms) // ms may be null
  172. {
  173. int loop = 0;
  174. int c;
  175. while (loop++ >= 0) { // defends infinite loop (expecting overflow)
  176. c = stream.ReadByte ();
  177. switch (c) {
  178. case '\r': goto case ' ';
  179. case '\n': goto case ' ';
  180. case '\t': goto case ' ';
  181. case ' ':
  182. if (ms != null)
  183. ms.WriteByte ((byte)c);
  184. continue;
  185. default:
  186. return c;
  187. }
  188. }
  189. throw new InvalidOperationException ();
  190. }
  191. public Encoding ActualEncoding {
  192. get { return enc; }
  193. }
  194. #region Public Overrides
  195. public override bool CanRead {
  196. get { return stream.CanRead; }
  197. }
  198. public override bool CanSeek {
  199. get { return false; } //stream.CanSeek; }
  200. }
  201. public override bool CanWrite {
  202. get { return false; }
  203. }
  204. public override long Length {
  205. get {
  206. return stream.Length;
  207. }
  208. }
  209. public override long Position {
  210. get {
  211. return stream.Position + bufLength;
  212. }
  213. set {
  214. if(value < bufLength)
  215. bufPos = (int)value;
  216. else
  217. stream.Position = value - bufLength;
  218. }
  219. }
  220. public override void Flush()
  221. {
  222. stream.Flush ();
  223. }
  224. public override int Read (byte[] buffer, int offset, int count)
  225. {
  226. int ret;
  227. if (count <= bufLength - bufPos) { // all from buffer
  228. Array.Copy (this.buffer, bufPos, buffer, offset, count);
  229. bufPos += count;
  230. ret = count;
  231. } else {
  232. int bufRest = bufLength - bufPos;
  233. if (bufLength > bufPos) {
  234. Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);
  235. bufPos += bufRest;
  236. }
  237. ret = bufRest +
  238. stream.Read (buffer, offset + bufRest, count - bufRest);
  239. }
  240. return ret;
  241. }
  242. public override int ReadByte ()
  243. {
  244. if (bufLength > bufPos) {
  245. return buffer [bufPos++];
  246. }
  247. return stream.ReadByte ();
  248. }
  249. public override long Seek (long offset, System.IO.SeekOrigin origin)
  250. {
  251. int bufRest = bufLength - bufPos;
  252. if (origin == SeekOrigin.Current)
  253. if (offset < bufRest)
  254. return buffer [bufPos + offset];
  255. else
  256. return stream.Seek (offset - bufRest, origin);
  257. else
  258. return stream.Seek (offset, origin);
  259. }
  260. public override void SetLength (long value)
  261. {
  262. stream.SetLength (value);
  263. }
  264. public override void Write (byte[] buffer, int offset, int count)
  265. {
  266. throw new NotSupportedException ();
  267. }
  268. #endregion
  269. }
  270. }