XmlInputStream.cs 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. using System;
  11. using System.IO;
  12. using System.Text;
  13. using System.Xml;
  14. namespace Mono.Xml.Native
  15. {
  16. #region XmlStreamReader
  17. public class XmlStreamReader : StreamReader
  18. {
  19. XmlInputStream input;
  20. XmlStreamReader (XmlInputStream input)
  21. : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
  22. {
  23. this.input = input;
  24. }
  25. public XmlStreamReader (Stream input)
  26. : this (new XmlInputStream (input, true))
  27. {
  28. }
  29. public XmlStreamReader (Stream input, bool docent)
  30. : this (new XmlInputStream (input, docent))
  31. {
  32. }
  33. // public XmlStreamReader (string url)
  34. // : this (url, true)
  35. // {
  36. // }
  37. //
  38. // public XmlStreamReader (string url, bool docent)
  39. // : this (new XmlInputStream (url, docent, null, null))
  40. // {
  41. // }
  42. public XmlStreamReader (string url, XmlResolver resolver, string baseURI)
  43. : this (url, true, resolver, baseURI)
  44. {
  45. }
  46. public XmlStreamReader (string url, bool docent, XmlResolver resolver,
  47. string baseURI)
  48. : this (new XmlInputStream (url, docent, resolver, baseURI))
  49. {
  50. }
  51. public override void Close ()
  52. {
  53. this.input.Close ();
  54. }
  55. protected override void Dispose (bool disposing)
  56. {
  57. base.Dispose (disposing);
  58. if (disposing) {
  59. Close ();
  60. }
  61. }
  62. }
  63. #endregion
  64. class XmlInputStream : Stream
  65. {
  66. Encoding enc;
  67. Stream stream;
  68. byte[] buffer = new byte[256];
  69. int bufLength;
  70. int bufPos;
  71. bool isDocumentEntity; // allow omitting "version" or not.
  72. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  73. /*
  74. public XmlInputStream (string url)
  75. : this (url, true)
  76. {
  77. }
  78. */
  79. public XmlInputStream (string url, bool docent, XmlResolver resolver, string baseURI)
  80. {
  81. this.isDocumentEntity = docent;
  82. // Use XmlResolver to resolve external entity.
  83. if (resolver == null)
  84. resolver = new XmlUrlResolver ();
  85. Uri uri = resolver.ResolveUri (
  86. baseURI == null || baseURI == String.Empty ?
  87. null : new Uri (baseURI), url);
  88. Stream s = resolver.GetEntity (uri, null, typeof (Stream)) as Stream;
  89. Initialize (s);
  90. }
  91. public XmlInputStream (Stream stream)
  92. : this (stream, true)
  93. {
  94. }
  95. public XmlInputStream (Stream stream, bool docent)
  96. {
  97. this.isDocumentEntity = docent;
  98. Initialize (stream);
  99. }
  100. private void Initialize (Stream stream)
  101. {
  102. // FIXME: seems too waste...
  103. MemoryStream ms = new MemoryStream ();
  104. this.stream = stream;
  105. int c = stream.ReadByte ();
  106. switch (c) {
  107. case 0xFF:
  108. c = stream.ReadByte ();
  109. if (c == 0xFE) {
  110. // BOM-ed little endian utf-16
  111. enc = Encoding.Unicode;
  112. } else {
  113. // It doesn't start from "<?xml" then its encoding is utf-8
  114. enc = Encoding.UTF8;
  115. ms.WriteByte ((byte)0xFF);
  116. ms.WriteByte ((byte)c);
  117. }
  118. break;
  119. case 0xFE:
  120. c = stream.ReadByte ();
  121. if (c == 0xFF) {
  122. // BOM-ed big endian utf-16
  123. enc = Encoding.BigEndianUnicode;
  124. return;
  125. } else {
  126. // It doesn't start from "<?xml" then its encoding is utf-8
  127. enc = Encoding.UTF8;
  128. ms.WriteByte ((byte)0xFE);
  129. ms.WriteByte ((byte)c);
  130. }
  131. break;
  132. case 0xEF:
  133. enc = Encoding.UTF8;
  134. c = ReadByte ();
  135. if (c == 0xBB) {
  136. c = ReadByte ();
  137. if (c != 0xBF) {
  138. ms.WriteByte ((byte)0xEF);
  139. ms.WriteByte ((byte)0xBB);
  140. ms.WriteByte ((byte)c);
  141. }
  142. } else {
  143. ms.WriteByte ((byte)0xEF);
  144. }
  145. break;
  146. case '<':
  147. // try to get encoding name from XMLDecl.
  148. ms.WriteByte ((byte)'<');
  149. int size = stream.Read (buffer, 1, 4);
  150. ms.Write (buffer, 1, size);
  151. if (Encoding.ASCII.GetString (buffer, 1, size) == "?xml") {
  152. int loop = 0;
  153. c = SkipWhitespace (ms);
  154. // version. It is optional here.
  155. if (c != 'v') {
  156. // FIXME: temporarily comment out here.
  157. // if (isDocumentEntity)
  158. // throw new XmlException ("invalid xml declaration.");
  159. } else {
  160. ms.WriteByte ((byte)'v');
  161. while (loop++ >= 0 && c >= 0) {
  162. c = stream.ReadByte ();
  163. ms.WriteByte ((byte)c);
  164. if (c == '0') { // 0 of 1.0
  165. ms.WriteByte ((byte)stream.ReadByte ());
  166. break;
  167. }
  168. }
  169. c = SkipWhitespace (ms);
  170. }
  171. if (c == 'e') {
  172. ms.WriteByte ((byte)'e');
  173. size = stream.Read (buffer, 0, 7);
  174. ms.Write (buffer, 0, 7);
  175. if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {
  176. c = this.SkipWhitespace(ms);
  177. if (c != '=')
  178. throw encodingException;
  179. ms.WriteByte ((byte)'=');
  180. c = this.SkipWhitespace (ms);
  181. int quoteChar = c;
  182. ms.WriteByte ((byte)c);
  183. int start = (int)ms.Position;
  184. while (loop++ >= 0) {
  185. c = stream.ReadByte ();
  186. if (c == quoteChar)
  187. break;
  188. else if (c < 0)
  189. throw encodingException;
  190. ms.WriteByte ((byte)c);
  191. }
  192. string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);
  193. if (!XmlChar.IsValidIANAEncoding (encodingName))
  194. throw encodingException;
  195. ms.WriteByte ((byte)quoteChar);
  196. enc = Encoding.GetEncoding (encodingName);
  197. }
  198. else
  199. ms.Write (buffer, 0, size);
  200. }
  201. else
  202. ms.WriteByte ((byte)c);
  203. }
  204. buffer = ms.ToArray ();
  205. bufLength = buffer.Length;
  206. bufPos = 0;
  207. break;
  208. default:
  209. buffer [0] = (byte)c;
  210. bufLength = 1;
  211. enc = Encoding.UTF8;
  212. break;
  213. }
  214. }
  215. // skips whitespace and returns misc char that was read from stream
  216. private int SkipWhitespace (MemoryStream ms) // ms may be null
  217. {
  218. int loop = 0;
  219. int c;
  220. while (loop++ >= 0) { // defends infinite loop (expecting overflow)
  221. c = stream.ReadByte ();
  222. switch (c) {
  223. case '\r': goto case ' ';
  224. case '\n': goto case ' ';
  225. case '\t': goto case ' ';
  226. case ' ':
  227. if (ms != null)
  228. ms.WriteByte ((byte)c);
  229. continue;
  230. default:
  231. return c;
  232. }
  233. }
  234. throw new InvalidOperationException ();
  235. }
  236. public Encoding ActualEncoding {
  237. get { return enc; }
  238. }
  239. #region Public Overrides
  240. public override bool CanRead {
  241. get {
  242. if (bufLength > bufPos)
  243. return true;
  244. else
  245. return stream.CanRead;
  246. }
  247. }
  248. // FIXME: It should support base stream's CanSeek.
  249. public override bool CanSeek {
  250. get { return false; } // stream.CanSeek; }
  251. }
  252. public override bool CanWrite {
  253. get { return false; }
  254. }
  255. public override long Length {
  256. get {
  257. return stream.Length;
  258. }
  259. }
  260. public override long Position {
  261. get {
  262. return stream.Position - bufLength + bufPos;
  263. }
  264. set {
  265. if(value < bufLength)
  266. bufPos = (int)value;
  267. else
  268. stream.Position = value - bufLength;
  269. }
  270. }
  271. public override void Close ()
  272. {
  273. stream.Close ();
  274. }
  275. public override void Flush ()
  276. {
  277. stream.Flush ();
  278. }
  279. public override int Read (byte[] buffer, int offset, int count)
  280. {
  281. int ret;
  282. if (count <= bufLength - bufPos) { // all from buffer
  283. Array.Copy (this.buffer, bufPos, buffer, offset, count);
  284. bufPos += count;
  285. ret = count;
  286. } else {
  287. int bufRest = bufLength - bufPos;
  288. if (bufLength > bufPos) {
  289. Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);
  290. bufPos += bufRest;
  291. }
  292. ret = bufRest +
  293. stream.Read (buffer, offset + bufRest, count - bufRest);
  294. }
  295. return ret;
  296. }
  297. public override int ReadByte ()
  298. {
  299. if (bufLength > bufPos) {
  300. return buffer [bufPos++];
  301. }
  302. return stream.ReadByte ();
  303. }
  304. public override long Seek (long offset, System.IO.SeekOrigin origin)
  305. {
  306. int bufRest = bufLength - bufPos;
  307. if (origin == SeekOrigin.Current)
  308. if (offset < bufRest)
  309. return buffer [bufPos + offset];
  310. else
  311. return stream.Seek (offset - bufRest, origin);
  312. else
  313. return stream.Seek (offset, origin);
  314. }
  315. public override void SetLength (long value)
  316. {
  317. stream.SetLength (value);
  318. }
  319. public override void Write (byte[] buffer, int offset, int count)
  320. {
  321. throw new NotSupportedException ();
  322. }
  323. #endregion
  324. }
  325. }