XmlInputStream.cs 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. using System;
  11. using System.IO;
  12. using System.Text;
  13. using System.Xml;
  14. namespace Mono.Xml.Native
  15. {
  16. #region XmlStreamReader
  17. public class XmlStreamReader : StreamReader
  18. {
  19. XmlInputStream input;
  20. XmlStreamReader (XmlInputStream input)
  21. : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
  22. {
  23. this.input = input;
  24. }
  25. public XmlStreamReader (Stream input)
  26. : this (new XmlInputStream (input, true))
  27. {
  28. }
  29. public XmlStreamReader (Stream input, bool docent)
  30. : this (new XmlInputStream (input, docent))
  31. {
  32. }
  33. public XmlStreamReader (string url)
  34. : this (url, true)
  35. {
  36. }
  37. public XmlStreamReader (string url, bool docent)
  38. : this (new XmlInputStream (url, docent))
  39. {
  40. }
  41. public override void Close ()
  42. {
  43. this.input.Close ();
  44. }
  45. protected override void Dispose (bool disposing)
  46. {
  47. base.Dispose (disposing);
  48. if (disposing) {
  49. Close ();
  50. }
  51. }
  52. }
  53. #endregion
  54. class XmlInputStream : Stream
  55. {
  56. Encoding enc;
  57. Stream stream;
  58. byte[] buffer = new byte[256];
  59. int bufLength;
  60. int bufPos;
  61. bool isDocumentEntity; // allow omitting "version" or not.
  62. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  63. public XmlInputStream (string url)
  64. : this (url, true)
  65. {
  66. }
  67. public XmlInputStream (string url, bool docent)
  68. {
  69. this.isDocumentEntity = docent;
  70. #if NetworkEnabled
  71. try {
  72. Uri uri = new Uri (url);
  73. Initialize (new MemoryStream (new System.Net.WebClient ().DownloadData (url)));
  74. } catch (UriFormatException ex) {
  75. Initialize (new FileStream (url, FileMode.Open, FileAccess.Read));
  76. }
  77. #else
  78. Initialize (new FileStream (url, FileMode.Open, FileAccess.Read));
  79. #endif
  80. }
  81. public XmlInputStream (Stream stream)
  82. : this (stream, true)
  83. {
  84. }
  85. public XmlInputStream (Stream stream, bool docent)
  86. {
  87. this.isDocumentEntity = docent;
  88. Initialize (stream);
  89. }
  90. private void Initialize (Stream stream)
  91. {
  92. // FIXME: seems too waste...
  93. MemoryStream ms = new MemoryStream ();
  94. this.stream = stream;
  95. int c = stream.ReadByte ();
  96. switch (c) {
  97. case 0xFF:
  98. c = stream.ReadByte ();
  99. if (c == 0xFE) {
  100. // BOM-ed little endian utf-16
  101. enc = Encoding.Unicode;
  102. } else {
  103. // It doesn't start from "<?xml" then its encoding is utf-8
  104. enc = Encoding.UTF8;
  105. ms.WriteByte ((byte)0xFF);
  106. ms.WriteByte ((byte)c);
  107. }
  108. break;
  109. case 0xFE:
  110. c = stream.ReadByte ();
  111. if (c == 0xFF) {
  112. // BOM-ed big endian utf-16
  113. enc = Encoding.BigEndianUnicode;
  114. return;
  115. } else {
  116. // It doesn't start from "<?xml" then its encoding is utf-8
  117. enc = Encoding.UTF8;
  118. ms.WriteByte ((byte)0xFE);
  119. ms.WriteByte ((byte)c);
  120. }
  121. break;
  122. case 0xEF:
  123. enc = Encoding.UTF8;
  124. c = ReadByte ();
  125. if (c == 0xBB) {
  126. c = ReadByte ();
  127. if (c != 0xBF) {
  128. ms.WriteByte ((byte)0xEF);
  129. ms.WriteByte ((byte)0xBB);
  130. ms.WriteByte ((byte)c);
  131. }
  132. } else {
  133. ms.WriteByte ((byte)0xEF);
  134. }
  135. break;
  136. case '<':
  137. // try to get encoding name from XMLDecl.
  138. ms.WriteByte ((byte)'<');
  139. int size = stream.Read (buffer, 1, 4);
  140. ms.Write (buffer, 1, 4);
  141. if (Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
  142. int loop = 0;
  143. c = SkipWhitespace (ms);
  144. // version. It is optional here.
  145. if (c != 'v') {
  146. if (isDocumentEntity)
  147. throw new XmlException ("invalid xml declaration.");
  148. } else {
  149. ms.WriteByte ((byte)'v');
  150. while (loop++ >= 0 && c >= 0) {
  151. c = stream.ReadByte ();
  152. ms.WriteByte ((byte)c);
  153. if (c == '0') { // 0 of 1.0
  154. ms.WriteByte ((byte)stream.ReadByte ());
  155. break;
  156. }
  157. }
  158. c = SkipWhitespace (ms);
  159. }
  160. if (c == 'e') {
  161. ms.WriteByte ((byte)'e');
  162. size = stream.Read (buffer, 0, 7);
  163. ms.Write (buffer, 0, 7);
  164. if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {
  165. c = this.SkipWhitespace(ms);
  166. if (c != '=')
  167. throw encodingException;
  168. ms.WriteByte ((byte)'=');
  169. c = this.SkipWhitespace (ms);
  170. int quoteChar = c;
  171. ms.WriteByte ((byte)c);
  172. int start = (int)ms.Position;
  173. while (loop++ >= 0) {
  174. c = stream.ReadByte ();
  175. if (c == quoteChar)
  176. break;
  177. else if (c < 0)
  178. throw encodingException;
  179. ms.WriteByte ((byte)c);
  180. }
  181. string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);
  182. if (!XmlConstructs.IsValidIANAEncoding (encodingName))
  183. throw encodingException;
  184. ms.WriteByte ((byte)quoteChar);
  185. enc = Encoding.GetEncoding (encodingName);
  186. }
  187. else
  188. ms.Write (buffer, 0, size);
  189. }
  190. else
  191. ms.WriteByte ((byte)c);
  192. }
  193. buffer = ms.ToArray ();
  194. bufLength = buffer.Length;
  195. bufPos = 0;
  196. break;
  197. default:
  198. buffer [0] = (byte)c;
  199. bufLength = 1;
  200. enc = Encoding.UTF8;
  201. break;
  202. }
  203. }
  204. // skips whitespace and returns misc char that was read from stream
  205. private int SkipWhitespace (MemoryStream ms) // ms may be null
  206. {
  207. int loop = 0;
  208. int c;
  209. while (loop++ >= 0) { // defends infinite loop (expecting overflow)
  210. c = stream.ReadByte ();
  211. switch (c) {
  212. case '\r': goto case ' ';
  213. case '\n': goto case ' ';
  214. case '\t': goto case ' ';
  215. case ' ':
  216. if (ms != null)
  217. ms.WriteByte ((byte)c);
  218. continue;
  219. default:
  220. return c;
  221. }
  222. }
  223. throw new InvalidOperationException ();
  224. }
  225. public Encoding ActualEncoding {
  226. get { return enc; }
  227. }
  228. #region Public Overrides
  229. public override bool CanRead {
  230. get { return stream.CanRead; }
  231. }
  232. public override bool CanSeek {
  233. get { return false; } //stream.CanSeek; }
  234. }
  235. public override bool CanWrite {
  236. get { return false; }
  237. }
  238. public override long Length {
  239. get {
  240. return stream.Length;
  241. }
  242. }
  243. public override long Position {
  244. get {
  245. return stream.Position + bufLength;
  246. }
  247. set {
  248. if(value < bufLength)
  249. bufPos = (int)value;
  250. else
  251. stream.Position = value - bufLength;
  252. }
  253. }
  254. public override void Close ()
  255. {
  256. stream.Close ();
  257. }
  258. public override void Flush ()
  259. {
  260. stream.Flush ();
  261. }
  262. public override int Read (byte[] buffer, int offset, int count)
  263. {
  264. int ret;
  265. if (count <= bufLength - bufPos) { // all from buffer
  266. Array.Copy (this.buffer, bufPos, buffer, offset, count);
  267. bufPos += count;
  268. ret = count;
  269. } else {
  270. int bufRest = bufLength - bufPos;
  271. if (bufLength > bufPos) {
  272. Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);
  273. bufPos += bufRest;
  274. }
  275. ret = bufRest +
  276. stream.Read (buffer, offset + bufRest, count - bufRest);
  277. }
  278. return ret;
  279. }
  280. public override int ReadByte ()
  281. {
  282. if (bufLength > bufPos) {
  283. return buffer [bufPos++];
  284. }
  285. return stream.ReadByte ();
  286. }
  287. public override long Seek (long offset, System.IO.SeekOrigin origin)
  288. {
  289. int bufRest = bufLength - bufPos;
  290. if (origin == SeekOrigin.Current)
  291. if (offset < bufRest)
  292. return buffer [bufPos + offset];
  293. else
  294. return stream.Seek (offset - bufRest, origin);
  295. else
  296. return stream.Seek (offset, origin);
  297. }
  298. public override void SetLength (long value)
  299. {
  300. stream.SetLength (value);
  301. }
  302. public override void Write (byte[] buffer, int offset, int count)
  303. {
  304. throw new NotSupportedException ();
  305. }
  306. #endregion
  307. }
  308. }