XmlInputStream.cs 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. using System;
  11. using System.IO;
  12. using System.Text;
  13. using System.Xml;
  14. namespace Mono.Xml.Native
  15. {
  16. #region XmlStreamReader
  17. public class XmlStreamReader : StreamReader
  18. {
  19. XmlInputStream input;
  20. XmlStreamReader (XmlInputStream input)
  21. : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
  22. {
  23. this.input = input;
  24. }
  25. public XmlStreamReader (Stream input)
  26. : this (new XmlInputStream (input, true))
  27. {
  28. }
  29. public XmlStreamReader (Stream input, bool docent)
  30. : this (new XmlInputStream (input, docent))
  31. {
  32. }
  33. // public XmlStreamReader (string url)
  34. // : this (url, true)
  35. // {
  36. // }
  37. //
  38. // public XmlStreamReader (string url, bool docent)
  39. // : this (new XmlInputStream (url, docent, null, null))
  40. // {
  41. // }
  42. public XmlStreamReader (string url, XmlResolver resolver, string baseURI)
  43. : this (url, true, resolver, baseURI)
  44. {
  45. }
  46. public XmlStreamReader (string url, bool docent, XmlResolver resolver,
  47. string baseURI)
  48. : this (new XmlInputStream (url, docent, resolver, baseURI))
  49. {
  50. }
  51. public override void Close ()
  52. {
  53. this.input.Close ();
  54. }
  55. protected override void Dispose (bool disposing)
  56. {
  57. base.Dispose (disposing);
  58. if (disposing) {
  59. Close ();
  60. }
  61. }
  62. }
  63. #endregion
  64. class XmlInputStream : Stream
  65. {
  66. Encoding enc;
  67. Stream stream;
  68. byte[] buffer = new byte[256];
  69. int bufLength;
  70. int bufPos;
  71. bool isDocumentEntity; // allow omitting "version" or not.
  72. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  73. /*
  74. public XmlInputStream (string url)
  75. : this (url, true)
  76. {
  77. }
  78. */
  79. public XmlInputStream (string url, bool docent, XmlResolver resolver, string baseURI)
  80. {
  81. this.isDocumentEntity = docent;
  82. // Use XmlResolver to resolve external entity.
  83. #if true // #if REMOVE_IT_AFTER_URI_IMPLEMENTED
  84. if (resolver == null)
  85. resolver = new XmlUrlResolver ();
  86. Uri uri = resolver.ResolveUri (
  87. baseURI == null || baseURI == String.Empty ?
  88. null : new Uri (baseURI), url);
  89. Stream s = resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
  90. #else
  91. Stream s = new FileStream (url, FileMode.Open, FileAccess.Read);
  92. #endif
  93. Initialize (s);
  94. }
  95. public XmlInputStream (Stream stream)
  96. : this (stream, true)
  97. {
  98. }
  99. public XmlInputStream (Stream stream, bool docent)
  100. {
  101. this.isDocumentEntity = docent;
  102. Initialize (stream);
  103. }
  104. private void Initialize (Stream stream)
  105. {
  106. // FIXME: seems too waste...
  107. MemoryStream ms = new MemoryStream ();
  108. this.stream = stream;
  109. int c = stream.ReadByte ();
  110. switch (c) {
  111. case 0xFF:
  112. c = stream.ReadByte ();
  113. if (c == 0xFE) {
  114. // BOM-ed little endian utf-16
  115. enc = Encoding.Unicode;
  116. } else {
  117. // It doesn't start from "<?xml" then its encoding is utf-8
  118. enc = Encoding.UTF8;
  119. ms.WriteByte ((byte)0xFF);
  120. ms.WriteByte ((byte)c);
  121. }
  122. break;
  123. case 0xFE:
  124. c = stream.ReadByte ();
  125. if (c == 0xFF) {
  126. // BOM-ed big endian utf-16
  127. enc = Encoding.BigEndianUnicode;
  128. return;
  129. } else {
  130. // It doesn't start from "<?xml" then its encoding is utf-8
  131. enc = Encoding.UTF8;
  132. ms.WriteByte ((byte)0xFE);
  133. ms.WriteByte ((byte)c);
  134. }
  135. break;
  136. case 0xEF:
  137. enc = Encoding.UTF8;
  138. c = ReadByte ();
  139. if (c == 0xBB) {
  140. c = ReadByte ();
  141. if (c != 0xBF) {
  142. ms.WriteByte ((byte)0xEF);
  143. ms.WriteByte ((byte)0xBB);
  144. ms.WriteByte ((byte)c);
  145. }
  146. } else {
  147. ms.WriteByte ((byte)0xEF);
  148. }
  149. break;
  150. case '<':
  151. // try to get encoding name from XMLDecl.
  152. ms.WriteByte ((byte)'<');
  153. int size = stream.Read (buffer, 1, 4);
  154. ms.Write (buffer, 1, 4);
  155. if (Encoding.ASCII.GetString (buffer, 1, 4) == "?xml") {
  156. int loop = 0;
  157. c = SkipWhitespace (ms);
  158. // version. It is optional here.
  159. if (c != 'v') {
  160. if (isDocumentEntity)
  161. throw new XmlException ("invalid xml declaration.");
  162. } else {
  163. ms.WriteByte ((byte)'v');
  164. while (loop++ >= 0 && c >= 0) {
  165. c = stream.ReadByte ();
  166. ms.WriteByte ((byte)c);
  167. if (c == '0') { // 0 of 1.0
  168. ms.WriteByte ((byte)stream.ReadByte ());
  169. break;
  170. }
  171. }
  172. c = SkipWhitespace (ms);
  173. }
  174. if (c == 'e') {
  175. ms.WriteByte ((byte)'e');
  176. size = stream.Read (buffer, 0, 7);
  177. ms.Write (buffer, 0, 7);
  178. if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {
  179. c = this.SkipWhitespace(ms);
  180. if (c != '=')
  181. throw encodingException;
  182. ms.WriteByte ((byte)'=');
  183. c = this.SkipWhitespace (ms);
  184. int quoteChar = c;
  185. ms.WriteByte ((byte)c);
  186. int start = (int)ms.Position;
  187. while (loop++ >= 0) {
  188. c = stream.ReadByte ();
  189. if (c == quoteChar)
  190. break;
  191. else if (c < 0)
  192. throw encodingException;
  193. ms.WriteByte ((byte)c);
  194. }
  195. string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);
  196. if (!XmlConstructs.IsValidIANAEncoding (encodingName))
  197. throw encodingException;
  198. ms.WriteByte ((byte)quoteChar);
  199. enc = Encoding.GetEncoding (encodingName);
  200. }
  201. else
  202. ms.Write (buffer, 0, size);
  203. }
  204. else
  205. ms.WriteByte ((byte)c);
  206. }
  207. buffer = ms.ToArray ();
  208. bufLength = buffer.Length;
  209. bufPos = 0;
  210. break;
  211. default:
  212. buffer [0] = (byte)c;
  213. bufLength = 1;
  214. enc = Encoding.UTF8;
  215. break;
  216. }
  217. }
  218. // skips whitespace and returns misc char that was read from stream
  219. private int SkipWhitespace (MemoryStream ms) // ms may be null
  220. {
  221. int loop = 0;
  222. int c;
  223. while (loop++ >= 0) { // defends infinite loop (expecting overflow)
  224. c = stream.ReadByte ();
  225. switch (c) {
  226. case '\r': goto case ' ';
  227. case '\n': goto case ' ';
  228. case '\t': goto case ' ';
  229. case ' ':
  230. if (ms != null)
  231. ms.WriteByte ((byte)c);
  232. continue;
  233. default:
  234. return c;
  235. }
  236. }
  237. throw new InvalidOperationException ();
  238. }
  239. public Encoding ActualEncoding {
  240. get { return enc; }
  241. }
  242. #region Public Overrides
  243. public override bool CanRead {
  244. get { return stream.CanRead; }
  245. }
  246. public override bool CanSeek {
  247. get { return false; } //stream.CanSeek; }
  248. }
  249. public override bool CanWrite {
  250. get { return false; }
  251. }
  252. public override long Length {
  253. get {
  254. return stream.Length;
  255. }
  256. }
  257. public override long Position {
  258. get {
  259. return stream.Position + bufLength;
  260. }
  261. set {
  262. if(value < bufLength)
  263. bufPos = (int)value;
  264. else
  265. stream.Position = value - bufLength;
  266. }
  267. }
  268. public override void Close ()
  269. {
  270. stream.Close ();
  271. }
  272. public override void Flush ()
  273. {
  274. stream.Flush ();
  275. }
  276. public override int Read (byte[] buffer, int offset, int count)
  277. {
  278. int ret;
  279. if (count <= bufLength - bufPos) { // all from buffer
  280. Array.Copy (this.buffer, bufPos, buffer, offset, count);
  281. bufPos += count;
  282. ret = count;
  283. } else {
  284. int bufRest = bufLength - bufPos;
  285. if (bufLength > bufPos) {
  286. Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);
  287. bufPos += bufRest;
  288. }
  289. ret = bufRest +
  290. stream.Read (buffer, offset + bufRest, count - bufRest);
  291. }
  292. return ret;
  293. }
  294. public override int ReadByte ()
  295. {
  296. if (bufLength > bufPos) {
  297. return buffer [bufPos++];
  298. }
  299. return stream.ReadByte ();
  300. }
  301. public override long Seek (long offset, System.IO.SeekOrigin origin)
  302. {
  303. int bufRest = bufLength - bufPos;
  304. if (origin == SeekOrigin.Current)
  305. if (offset < bufRest)
  306. return buffer [bufPos + offset];
  307. else
  308. return stream.Seek (offset - bufRest, origin);
  309. else
  310. return stream.Seek (offset, origin);
  311. }
  312. public override void SetLength (long value)
  313. {
  314. stream.SetLength (value);
  315. }
  316. public override void Write (byte[] buffer, int offset, int count)
  317. {
  318. throw new NotSupportedException ();
  319. }
  320. #endregion
  321. }
  322. }