XmlInputStream.cs 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. //
  2. // System.Xml.XmlInputStream
  3. // encoding-specification-wise XML input stream and reader
  4. //
  5. // Author:
  6. // Atsushi Enomoto ([email protected])
  7. //
  8. // (C)2003 Atsushi Enomoto
  9. //
  10. using System;
  11. using System.IO;
  12. using System.Text;
  13. using System.Xml;
  14. namespace Mono.Xml.Native
  15. {
  16. #region XmlStreamReader
  17. public class XmlStreamReader : StreamReader
  18. {
  19. XmlInputStream input;
  20. XmlStreamReader (XmlInputStream input)
  21. : base (input, input.ActualEncoding != null ? input.ActualEncoding : Encoding.UTF8)
  22. {
  23. this.input = input;
  24. }
  25. public XmlStreamReader (Stream input)
  26. : this (new XmlInputStream (input))
  27. {
  28. }
  29. public override void Close ()
  30. {
  31. this.input.Close ();
  32. }
  33. protected override void Dispose (bool disposing)
  34. {
  35. base.Dispose (disposing);
  36. if (disposing) {
  37. Close ();
  38. }
  39. }
  40. }
  41. #endregion
  42. class XmlInputStream : Stream
  43. {
  44. Encoding enc;
  45. Stream stream;
  46. byte[] buffer = new byte[256];
  47. int bufLength;
  48. int bufPos;
  49. static XmlException encodingException = new XmlException ("invalid encoding specification.");
  50. public XmlInputStream (Stream stream)
  51. {
  52. Initialize (stream);
  53. }
  54. private void Initialize (Stream stream)
  55. {
  56. // FIXME: seems too waste...
  57. MemoryStream ms = new MemoryStream ();
  58. this.stream = stream;
  59. int c = stream.ReadByte ();
  60. switch (c) {
  61. case 0xFF:
  62. c = stream.ReadByte ();
  63. if (c == 0xFE) {
  64. // BOM-ed little endian utf-16
  65. enc = Encoding.Unicode;
  66. } else {
  67. // It doesn't start from "<?xml" then its encoding is utf-8
  68. enc = Encoding.UTF8;
  69. ms.WriteByte ((byte)0xFF);
  70. ms.WriteByte ((byte)c);
  71. }
  72. break;
  73. case 0xFE:
  74. c = stream.ReadByte ();
  75. if (c == 0xFF) {
  76. // BOM-ed big endian utf-16
  77. enc = Encoding.BigEndianUnicode;
  78. return;
  79. } else {
  80. // It doesn't start from "<?xml" then its encoding is utf-8
  81. enc = Encoding.UTF8;
  82. ms.WriteByte ((byte)0xFE);
  83. ms.WriteByte ((byte)c);
  84. }
  85. break;
  86. case 0xEF:
  87. enc = Encoding.UTF8;
  88. c = ReadByte ();
  89. if (c == 0xBB) {
  90. c = ReadByte ();
  91. if (c != 0xBF) {
  92. ms.WriteByte ((byte)0xEF);
  93. ms.WriteByte ((byte)0xBB);
  94. ms.WriteByte ((byte)c);
  95. }
  96. } else {
  97. ms.WriteByte ((byte)0xEF);
  98. }
  99. break;
  100. case '<':
  101. // try to get encoding name from XMLDecl.
  102. ms.WriteByte ((byte)'<');
  103. int size = stream.Read (buffer, 1, 4);
  104. ms.Write (buffer, 1, size);
  105. if (Encoding.ASCII.GetString (buffer, 1, size) == "?xml") {
  106. int loop = 0;
  107. c = SkipWhitespace (ms);
  108. // version. It is optional here.
  109. if (c == 'v') {
  110. ms.WriteByte ((byte)'v');
  111. while (loop++ >= 0 && c >= 0) {
  112. c = stream.ReadByte ();
  113. ms.WriteByte ((byte)c);
  114. if (c == '0') { // 0 of 1.0
  115. ms.WriteByte ((byte)stream.ReadByte ());
  116. break;
  117. }
  118. }
  119. c = SkipWhitespace (ms);
  120. }
  121. if (c == 'e') {
  122. ms.WriteByte ((byte)'e');
  123. size = stream.Read (buffer, 0, 7);
  124. ms.Write (buffer, 0, 7);
  125. if (Encoding.ASCII.GetString(buffer, 0, 7) == "ncoding") {
  126. c = this.SkipWhitespace(ms);
  127. if (c != '=')
  128. throw encodingException;
  129. ms.WriteByte ((byte)'=');
  130. c = this.SkipWhitespace (ms);
  131. int quoteChar = c;
  132. ms.WriteByte ((byte)c);
  133. int start = (int)ms.Position;
  134. while (loop++ >= 0) {
  135. c = stream.ReadByte ();
  136. if (c == quoteChar)
  137. break;
  138. else if (c < 0)
  139. throw encodingException;
  140. ms.WriteByte ((byte)c);
  141. }
  142. string encodingName = Encoding.UTF8.GetString (ms.GetBuffer (), start, (int)ms.Position - start);
  143. if (!XmlChar.IsValidIANAEncoding (encodingName))
  144. throw encodingException;
  145. ms.WriteByte ((byte)quoteChar);
  146. enc = Encoding.GetEncoding (encodingName);
  147. }
  148. else
  149. ms.Write (buffer, 0, size);
  150. }
  151. else
  152. ms.WriteByte ((byte)c);
  153. }
  154. buffer = ms.ToArray ();
  155. bufLength = buffer.Length;
  156. bufPos = 0;
  157. break;
  158. default:
  159. buffer [0] = (byte)c;
  160. bufLength = 1;
  161. enc = Encoding.UTF8;
  162. break;
  163. }
  164. }
  165. // skips whitespace and returns misc char that was read from stream
  166. private int SkipWhitespace (MemoryStream ms) // ms may be null
  167. {
  168. int loop = 0;
  169. int c;
  170. while (loop++ >= 0) { // defends infinite loop (expecting overflow)
  171. c = stream.ReadByte ();
  172. switch (c) {
  173. case '\r': goto case ' ';
  174. case '\n': goto case ' ';
  175. case '\t': goto case ' ';
  176. case ' ':
  177. if (ms != null)
  178. ms.WriteByte ((byte)c);
  179. continue;
  180. default:
  181. return c;
  182. }
  183. }
  184. throw new InvalidOperationException ();
  185. }
  186. public Encoding ActualEncoding {
  187. get { return enc; }
  188. }
  189. #region Public Overrides
  190. public override bool CanRead {
  191. get {
  192. if (bufLength > bufPos)
  193. return true;
  194. else
  195. return stream.CanRead;
  196. }
  197. }
  198. // FIXME: It should support base stream's CanSeek.
  199. public override bool CanSeek {
  200. get { return false; } // stream.CanSeek; }
  201. }
  202. public override bool CanWrite {
  203. get { return false; }
  204. }
  205. public override long Length {
  206. get {
  207. return stream.Length;
  208. }
  209. }
  210. public override long Position {
  211. get {
  212. return stream.Position - bufLength + bufPos;
  213. }
  214. set {
  215. if(value < bufLength)
  216. bufPos = (int)value;
  217. else
  218. stream.Position = value - bufLength;
  219. }
  220. }
  221. public override void Close ()
  222. {
  223. stream.Close ();
  224. }
  225. public override void Flush ()
  226. {
  227. stream.Flush ();
  228. }
  229. public override int Read (byte[] buffer, int offset, int count)
  230. {
  231. int ret;
  232. if (count <= bufLength - bufPos) { // all from buffer
  233. Array.Copy (this.buffer, bufPos, buffer, offset, count);
  234. bufPos += count;
  235. ret = count;
  236. } else {
  237. int bufRest = bufLength - bufPos;
  238. if (bufLength > bufPos) {
  239. Array.Copy (this.buffer, bufPos, buffer, offset, bufRest);
  240. bufPos += bufRest;
  241. }
  242. ret = bufRest +
  243. stream.Read (buffer, offset + bufRest, count - bufRest);
  244. }
  245. return ret;
  246. }
  247. public override int ReadByte ()
  248. {
  249. if (bufLength > bufPos) {
  250. return buffer [bufPos++];
  251. }
  252. return stream.ReadByte ();
  253. }
  254. public override long Seek (long offset, System.IO.SeekOrigin origin)
  255. {
  256. int bufRest = bufLength - bufPos;
  257. if (origin == SeekOrigin.Current)
  258. if (offset < bufRest)
  259. return buffer [bufPos + offset];
  260. else
  261. return stream.Seek (offset - bufRest, origin);
  262. else
  263. return stream.Seek (offset, origin);
  264. }
  265. public override void SetLength (long value)
  266. {
  267. stream.SetLength (value);
  268. }
  269. public override void Write (byte[] buffer, int offset, int count)
  270. {
  271. throw new NotSupportedException ();
  272. }
  273. #endregion
  274. }
  275. }