XmlInputStream.cs 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. using System;
  11. using System.IO;
  12. using System.Text;
  13. using System.Xml;
  14. namespace Mono.Xml.Native
  15. {
  16. #region XmlStreamReader
  17. public class XmlStreamReader : StreamReader
  18. {
  19. XmlInputStream input;
  20. XmlStreamReader (XmlInputStream input)
  21. : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
  22. {
  23. this.input = input;
  24. }
  25. public XmlStreamReader (Stream input)
  26. : this (new XmlInputStream (input, true))
  27. {
  28. }
  29. public XmlStreamReader (Stream input, bool docent)
  30. : this (new XmlInputStream (input, docent))
  31. {
  32. }
  33. // public XmlStreamReader (string url)
  34. // : this (url, true)
  35. // {
  36. // }
  37. //
  38. // public XmlStreamReader (string url, bool docent)
  39. // : this (new XmlInputStream (url, docent, null, null))
  40. // {
  41. // }
  42. public XmlStreamReader (string url, XmlResolver resolver, string baseURI)
  43. : this (url, true, resolver, baseURI)
  44. {
  45. }
  46. public XmlStreamReader (string url, bool docent, XmlResolver resolver,
  47. string baseURI)
  48. : this (new XmlInputStream (url, docent, resolver, baseURI))
  49. {
  50. }
  51. public override void Close ()
  52. {
  53. this.input.Close ();
  54. }
  55. protected override void Dispose (bool disposing)
  56. {
  57. base.Dispose (disposing);
  58. if (disposing) {
  59. Close ();
  60. }
  61. }
  62. }
  63. #endregion
  64. class XmlInputStream : Stream
  65. {
  66. Encoding enc;
  67. Stream stream;
  68. byte[] buffer = new byte[256];
  69. int bufLength;
  70. int bufPos;
  71. bool isDocumentEntity; // allow omitting "version" or not.
  72. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  73. /*
  74. public XmlInputStream (string url)
  75. : this (url, true)
  76. {
  77. }
  78. */
  79. public XmlInputStream (string url, bool docent, XmlResolver resolver, string baseURI)
  80. {
  81. this.isDocumentEntity = docent;
  82. // Use XmlResolver to resolve external entity.
  83. if (resolver == null)
  84. resolver = new XmlUrlResolver ();
  85. Uri uri = resolver.ResolveUri (
  86. baseURI == null || baseURI == String.Empty ?
  87. null : new Uri (baseURI), url);
  88. Stream s = resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
  89. Initialize (s);
  90. }
  91. public XmlInputStream (Stream stream)
  92. : this (stream, true)
  93. {
  94. }
  95. public XmlInputStream (Stream stream, bool docent)
  96. {
  97. this.isDocumentEntity = docent;
  98. Initialize (stream);
  99. }
  100. private void Initialize (Stream stream)
  101. {
  102. // FIXME: seems too waste...
  103. MemoryStream ms = new MemoryStream ();
  104. this.stream = stream;
  105. int c = stream.ReadByte ();
  106. switch (c) {
  107. case 0xFF:
  108. c = stream.ReadByte ();
  109. if (c == 0xFE) {
  110. // BOM-ed little endian utf-16
  111. enc = Encoding.Unicode;
  112. } else {
  113. // It doesn't start from "<?xml" then its encoding is utf-8
  114. enc = Encoding.UTF8;
  115. ms.WriteByte ((byte)0xFF);
  116. ms.WriteByte ((byte)c);
  117. }
  118. break;
  119. case 0xFE:
  120. c = stream.ReadByte ();
  121. if (c == 0xFF) {
  122. // BOM-ed big endian utf-16
  123. enc = Encoding.BigEndianUnicode;
  124. return;
  125. } else {
  126. // It doesn't start from "<?xml" then its encoding is utf-8
  127. enc = Encoding.UTF8;
  128. ms.WriteByte ((byte)0xFE);
  129. ms.WriteByte ((byte)c);
  130. }
  131. break;
  132. case 0xEF:
  133. enc = Encoding.UTF8;
  134. c = ReadByte ();
  135. if (c == 0xBB) {
  136. c = ReadByte ();
  137. if (c != 0xBF) {
  138. ms.WriteByte ((byte)0xEF);
  139. ms.WriteByte ((byte)0xBB);
  140. ms.WriteByte ((byte)c);
  141. }
  142. } else {
  143. ms.WriteByte ((byte)0xEF);
  144. }
  145. break;
  146. case '<':
  147. // try to get encoding name from XMLDecl.
  148. ms.WriteByte ((byte)'<');
  149. int size = stream.Read (buffer, 1, 4);
  150. ms.Write (buffer, 1, size);
  151. if (Encoding.ASCII.GetString (buffer, 1, size) == "?xml") {
  152. int loop = 0;
  153. c = SkipWhitespace (ms);
  154. // version. It is optional here.
  155. if (c == 'v') {
  156. ms.WriteByte ((byte)'v');
  157. while (loop++ >= 0 && c >= 0) {
  158. c = stream.ReadByte ();
  159. ms.WriteByte ((byte)c);
  160. if (c == '0') { // 0 of 1.0
  161. ms.WriteByte ((byte)stream.ReadByte ());
  162. break;
  163. }
  164. }
  165. c = SkipWhitespace (ms);
  166. }
  167. if (c == 'e') {
  168. ms.WriteByte ((byte)'e');
  169. size = stream.Read (buffer, 0, 7);
  170. ms.Write (buffer, 0, 7);
  171. if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {
  172. c = this.SkipWhitespace(ms);
  173. if (c != '=')
  174. throw encodingException;
  175. ms.WriteByte ((byte)'=');
  176. c = this.SkipWhitespace (ms);
  177. int quoteChar = c;
  178. ms.WriteByte ((byte)c);
  179. int start = (int)ms.Position;
  180. while (loop++ >= 0) {
  181. c = stream.ReadByte ();
  182. if (c == quoteChar)
  183. break;
  184. else if (c < 0)
  185. throw encodingException;
  186. ms.WriteByte ((byte)c);
  187. }
  188. string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);
  189. if (!XmlChar.IsValidIANAEncoding (encodingName))
  190. throw encodingException;
  191. ms.WriteByte ((byte)quoteChar);
  192. enc = Encoding.GetEncoding (encodingName);
  193. }
  194. else
  195. ms.Write (buffer, 0, size);
  196. }
  197. else
  198. ms.WriteByte ((byte)c);
  199. }
  200. buffer = ms.ToArray ();
  201. bufLength = buffer.Length;
  202. bufPos = 0;
  203. break;
  204. default:
  205. buffer [0] = (byte)c;
  206. bufLength = 1;
  207. enc = Encoding.UTF8;
  208. break;
  209. }
  210. }
  211. // skips whitespace and returns misc char that was read from stream
  212. private int SkipWhitespace (MemoryStream ms) // ms may be null
  213. {
  214. int loop = 0;
  215. int c;
  216. while (loop++ >= 0) { // defends infinite loop (expecting overflow)
  217. c = stream.ReadByte ();
  218. switch (c) {
  219. case '\r': goto case ' ';
  220. case '\n': goto case ' ';
  221. case '\t': goto case ' ';
  222. case ' ':
  223. if (ms != null)
  224. ms.WriteByte ((byte)c);
  225. continue;
  226. default:
  227. return c;
  228. }
  229. }
  230. throw new InvalidOperationException ();
  231. }
  232. public Encoding ActualEncoding {
  233. get { return enc; }
  234. }
  235. #region Public Overrides
  236. public override bool CanRead {
  237. get {
  238. if (bufLength > bufPos)
  239. return true;
  240. else
  241. return stream.CanRead;
  242. }
  243. }
  244. // FIXME: It should support base stream's CanSeek.
  245. public override bool CanSeek {
  246. get { return false; } // stream.CanSeek; }
  247. }
  248. public override bool CanWrite {
  249. get { return false; }
  250. }
  251. public override long Length {
  252. get {
  253. return stream.Length;
  254. }
  255. }
  256. public override long Position {
  257. get {
  258. return stream.Position - bufLength + bufPos;
  259. }
  260. set {
  261. if(value < bufLength)
  262. bufPos = (int)value;
  263. else
  264. stream.Position = value - bufLength;
  265. }
  266. }
  267. public override void Close ()
  268. {
  269. stream.Close ();
  270. }
  271. public override void Flush ()
  272. {
  273. stream.Flush ();
  274. }
  275. public override int Read (byte[] buffer, int offset, int count)
  276. {
  277. int ret;
  278. if (count <= bufLength - bufPos) { // all from buffer
  279. Array.Copy (this.buffer, bufPos, buffer, offset, count);
  280. bufPos += count;
  281. ret = count;
  282. } else {
  283. int bufRest = bufLength - bufPos;
  284. if (bufLength > bufPos) {
  285. Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);
  286. bufPos += bufRest;
  287. }
  288. ret = bufRest +
  289. stream.Read (buffer, offset + bufRest, count - bufRest);
  290. }
  291. return ret;
  292. }
  293. public override int ReadByte ()
  294. {
  295. if (bufLength > bufPos) {
  296. return buffer [bufPos++];
  297. }
  298. return stream.ReadByte ();
  299. }
  300. public override long Seek (long offset, System.IO.SeekOrigin origin)
  301. {
  302. int bufRest = bufLength - bufPos;
  303. if (origin == SeekOrigin.Current)
  304. if (offset < bufRest)
  305. return buffer [bufPos + offset];
  306. else
  307. return stream.Seek (offset - bufRest, origin);
  308. else
  309. return stream.Seek (offset, origin);
  310. }
  311. public override void SetLength (long value)
  312. {
  313. stream.SetLength (value);
  314. }
  315. public override void Write (byte[] buffer, int offset, int count)
  316. {
  317. throw new NotSupportedException ();
  318. }
  319. #endregion
  320. }
  321. }