EncodingStreamWrapper.cs 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093
  1. //-----------------------------------------------------------------------------
  2. // Copyright (c) Microsoft Corporation. All rights reserved.
  3. //-----------------------------------------------------------------------------
  4. namespace System.Xml
  5. {
  6. using System;
  7. using System.IO;
  8. using System.Text;
  9. using System.Runtime.Serialization;
  10. // This wrapper does not support seek.
  11. // Constructors consume/emit byte order mark.
  12. // Supports: UTF-8, Unicode, BigEndianUnicode
  13. // ASSUMPTION ([....]): This class will only be used for EITHER reading OR writing. It can be done, it would just mean more buffers.
  14. // ASSUMPTION ([....]): The byte buffer is large enough to hold the declaration
  15. // ASSUMPTION ([....]): The buffer manipulation methods (FillBuffer/Compare/etc.) will only be used to parse the declaration
  16. // during construction.
  17. class EncodingStreamWrapper : Stream
  18. {
  19. enum SupportedEncoding { UTF8, UTF16LE, UTF16BE, None }
  20. static readonly UTF8Encoding SafeUTF8 = new UTF8Encoding(false, false);
  21. static readonly UnicodeEncoding SafeUTF16 = new UnicodeEncoding(false, false, false);
  22. static readonly UnicodeEncoding SafeBEUTF16 = new UnicodeEncoding(true, false, false);
  23. static readonly UTF8Encoding ValidatingUTF8 = new UTF8Encoding(false, true);
  24. static readonly UnicodeEncoding ValidatingUTF16 = new UnicodeEncoding(false, false, true);
  25. static readonly UnicodeEncoding ValidatingBEUTF16 = new UnicodeEncoding(true, false, true);
  26. const int BufferLength = 128;
  27. // UTF-8 is fastpath, so that's how these are stored
  28. // Compare methods adapt to unicodes.
  29. static readonly byte[] encodingAttr = new byte[] { (byte)'e', (byte)'n', (byte)'c', (byte)'o', (byte)'d', (byte)'i', (byte)'n', (byte)'g' };
  30. static readonly byte[] encodingUTF8 = new byte[] { (byte)'u', (byte)'t', (byte)'f', (byte)'-', (byte)'8' };
  31. static readonly byte[] encodingUnicode = new byte[] { (byte)'u', (byte)'t', (byte)'f', (byte)'-', (byte)'1', (byte)'6' };
  32. static readonly byte[] encodingUnicodeLE = new byte[] { (byte)'u', (byte)'t', (byte)'f', (byte)'-', (byte)'1', (byte)'6', (byte)'l', (byte)'e' };
  33. static readonly byte[] encodingUnicodeBE = new byte[] { (byte)'u', (byte)'t', (byte)'f', (byte)'-', (byte)'1', (byte)'6', (byte)'b', (byte)'e' };
  34. SupportedEncoding encodingCode;
  35. Encoding encoding;
  36. Encoder enc;
  37. Decoder dec;
  38. bool isReading;
  39. Stream stream;
  40. char[] chars;
  41. byte[] bytes;
  42. int byteOffset;
  43. int byteCount;
  44. byte[] byteBuffer = new byte[1];
  45. // Reading constructor
  46. public EncodingStreamWrapper(Stream stream, Encoding encoding)
  47. {
  48. try
  49. {
  50. this.isReading = true;
  51. this.stream = new BufferedStream(stream);
  52. // Decode the expected encoding
  53. SupportedEncoding expectedEnc = GetSupportedEncoding(encoding);
  54. // Get the byte order mark so we can determine the encoding
  55. // May want to try to delay allocating everything until we know the BOM
  56. SupportedEncoding declEnc = ReadBOMEncoding(encoding == null);
  57. // Check that the expected encoding matches the decl encoding.
  58. if (expectedEnc != SupportedEncoding.None && expectedEnc != declEnc)
  59. ThrowExpectedEncodingMismatch(expectedEnc, declEnc);
  60. // Fastpath: UTF-8 BOM
  61. if (declEnc == SupportedEncoding.UTF8)
  62. {
  63. // Fastpath: UTF-8 BOM, No declaration
  64. FillBuffer(2);
  65. if (bytes[byteOffset + 1] != '?' || bytes[byteOffset] != '<')
  66. {
  67. return;
  68. }
  69. FillBuffer(BufferLength);
  70. CheckUTF8DeclarationEncoding(bytes, byteOffset, byteCount, declEnc, expectedEnc);
  71. }
  72. else
  73. {
  74. // Convert to UTF-8
  75. EnsureBuffers();
  76. FillBuffer((BufferLength - 1) * 2);
  77. SetReadDocumentEncoding(declEnc);
  78. CleanupCharBreak();
  79. int count = this.encoding.GetChars(bytes, byteOffset, byteCount, chars, 0);
  80. byteOffset = 0;
  81. byteCount = ValidatingUTF8.GetBytes(chars, 0, count, bytes, 0);
  82. // Check for declaration
  83. if (bytes[1] == '?' && bytes[0] == '<')
  84. {
  85. CheckUTF8DeclarationEncoding(bytes, 0, byteCount, declEnc, expectedEnc);
  86. }
  87. else
  88. {
  89. // Declaration required if no out-of-band encoding
  90. if (expectedEnc == SupportedEncoding.None)
  91. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclarationRequired)));
  92. }
  93. }
  94. }
  95. catch (DecoderFallbackException ex)
  96. {
  97. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlInvalidBytes), ex));
  98. }
  99. }
  100. void SetReadDocumentEncoding(SupportedEncoding e)
  101. {
  102. EnsureBuffers();
  103. this.encodingCode = e;
  104. this.encoding = GetEncoding(e);
  105. }
  106. static Encoding GetEncoding(SupportedEncoding e)
  107. {
  108. switch (e)
  109. {
  110. case SupportedEncoding.UTF8:
  111. return ValidatingUTF8;
  112. case SupportedEncoding.UTF16LE:
  113. return ValidatingUTF16;
  114. case SupportedEncoding.UTF16BE:
  115. return ValidatingBEUTF16;
  116. default:
  117. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlEncodingNotSupported)));
  118. }
  119. }
  120. static Encoding GetSafeEncoding(SupportedEncoding e)
  121. {
  122. switch (e)
  123. {
  124. case SupportedEncoding.UTF8:
  125. return SafeUTF8;
  126. case SupportedEncoding.UTF16LE:
  127. return SafeUTF16;
  128. case SupportedEncoding.UTF16BE:
  129. return SafeBEUTF16;
  130. default:
  131. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlEncodingNotSupported)));
  132. }
  133. }
  134. static string GetEncodingName(SupportedEncoding enc)
  135. {
  136. switch (enc)
  137. {
  138. case SupportedEncoding.UTF8:
  139. return "utf-8";
  140. case SupportedEncoding.UTF16LE:
  141. return "utf-16LE";
  142. case SupportedEncoding.UTF16BE:
  143. return "utf-16BE";
  144. default:
  145. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlEncodingNotSupported)));
  146. }
  147. }
  148. static SupportedEncoding GetSupportedEncoding(Encoding encoding)
  149. {
  150. if (encoding == null)
  151. return SupportedEncoding.None;
  152. else if (encoding.WebName == ValidatingUTF8.WebName)
  153. return SupportedEncoding.UTF8;
  154. else if (encoding.WebName == ValidatingUTF16.WebName)
  155. return SupportedEncoding.UTF16LE;
  156. else if (encoding.WebName == ValidatingBEUTF16.WebName)
  157. return SupportedEncoding.UTF16BE;
  158. else
  159. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlEncodingNotSupported)));
  160. }
  161. // Writing constructor
  162. public EncodingStreamWrapper(Stream stream, Encoding encoding, bool emitBOM)
  163. {
  164. this.isReading = false;
  165. this.encoding = encoding;
  166. this.stream = new BufferedStream(stream);
  167. // Set the encoding code
  168. this.encodingCode = GetSupportedEncoding(encoding);
  169. if (encodingCode != SupportedEncoding.UTF8)
  170. {
  171. EnsureBuffers();
  172. dec = ValidatingUTF8.GetDecoder();
  173. enc = this.encoding.GetEncoder();
  174. // Emit BOM
  175. if (emitBOM)
  176. {
  177. byte[] bom = this.encoding.GetPreamble();
  178. if (bom.Length > 0)
  179. this.stream.Write(bom, 0, bom.Length);
  180. }
  181. }
  182. }
  183. SupportedEncoding ReadBOMEncoding(bool notOutOfBand)
  184. {
  185. int b1 = this.stream.ReadByte();
  186. int b2 = this.stream.ReadByte();
  187. int b3 = this.stream.ReadByte();
  188. int b4 = this.stream.ReadByte();
  189. // Premature end of stream
  190. if (b4 == -1)
  191. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.UnexpectedEndOfFile)));
  192. int preserve;
  193. SupportedEncoding e = ReadBOMEncoding((byte)b1, (byte)b2, (byte)b3, (byte)b4, notOutOfBand, out preserve);
  194. EnsureByteBuffer();
  195. switch (preserve)
  196. {
  197. case 1:
  198. bytes[0] = (byte)b4;
  199. break;
  200. case 2:
  201. bytes[0] = (byte)b3;
  202. bytes[1] = (byte)b4;
  203. break;
  204. case 4:
  205. bytes[0] = (byte)b1;
  206. bytes[1] = (byte)b2;
  207. bytes[2] = (byte)b3;
  208. bytes[3] = (byte)b4;
  209. break;
  210. }
  211. byteCount = preserve;
  212. return e;
  213. }
  214. static SupportedEncoding ReadBOMEncoding(byte b1, byte b2, byte b3, byte b4, bool notOutOfBand, out int preserve)
  215. {
  216. SupportedEncoding e = SupportedEncoding.UTF8; // Default
  217. preserve = 0;
  218. if (b1 == '<' && b2 != 0x00) // UTF-8, no BOM
  219. {
  220. e = SupportedEncoding.UTF8;
  221. preserve = 4;
  222. }
  223. else if (b1 == 0xFF && b2 == 0xFE) // UTF-16 little endian
  224. {
  225. e = SupportedEncoding.UTF16LE;
  226. preserve = 2;
  227. }
  228. else if (b1 == 0xFE && b2 == 0xFF) // UTF-16 big endian
  229. {
  230. e = SupportedEncoding.UTF16BE;
  231. preserve = 2;
  232. }
  233. else if (b1 == 0x00 && b2 == '<') // UTF-16 big endian, no BOM
  234. {
  235. e = SupportedEncoding.UTF16BE;
  236. if (notOutOfBand && (b3 != 0x00 || b4 != '?'))
  237. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclMissing)));
  238. preserve = 4;
  239. }
  240. else if (b1 == '<' && b2 == 0x00) // UTF-16 little endian, no BOM
  241. {
  242. e = SupportedEncoding.UTF16LE;
  243. if (notOutOfBand && (b3 != '?' || b4 != 0x00))
  244. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclMissing)));
  245. preserve = 4;
  246. }
  247. else if (b1 == 0xEF && b2 == 0xBB) // UTF8 with BOM
  248. {
  249. // Encoding error
  250. if (notOutOfBand && b3 != 0xBF)
  251. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlBadBOM)));
  252. preserve = 1;
  253. }
  254. else // Assume UTF8
  255. {
  256. preserve = 4;
  257. }
  258. return e;
  259. }
  260. void FillBuffer(int count)
  261. {
  262. count -= byteCount;
  263. while (count > 0)
  264. {
  265. int read = stream.Read(bytes, byteOffset + byteCount, count);
  266. if (read == 0)
  267. break;
  268. byteCount += read;
  269. count -= read;
  270. }
  271. }
  272. void EnsureBuffers()
  273. {
  274. EnsureByteBuffer();
  275. if (chars == null)
  276. chars = new char[BufferLength];
  277. }
  278. void EnsureByteBuffer()
  279. {
  280. if (bytes != null)
  281. return;
  282. bytes = new byte[BufferLength * 4];
  283. byteOffset = 0;
  284. byteCount = 0;
  285. }
  286. static void CheckUTF8DeclarationEncoding(byte[] buffer, int offset, int count, SupportedEncoding e, SupportedEncoding expectedEnc)
  287. {
  288. byte quot = 0;
  289. int encEq = -1;
  290. int max = offset + Math.Min(count, BufferLength);
  291. // Encoding should be second "=", abort at first "?"
  292. int i = 0;
  293. int eq = 0;
  294. for (i = offset + 2; i < max; i++) // Skip the "<?" so we don't get caught by the first "?"
  295. {
  296. if (quot != 0)
  297. {
  298. if (buffer[i] == quot)
  299. {
  300. quot = 0;
  301. }
  302. continue;
  303. }
  304. if (buffer[i] == (byte)'\'' || buffer[i] == (byte)'"')
  305. {
  306. quot = buffer[i];
  307. }
  308. else if (buffer[i] == (byte)'=')
  309. {
  310. if (eq == 1)
  311. {
  312. encEq = i;
  313. break;
  314. }
  315. eq++;
  316. }
  317. else if (buffer[i] == (byte)'?') // Not legal character in a decl before second "="
  318. {
  319. break;
  320. }
  321. }
  322. // No encoding found
  323. if (encEq == -1)
  324. {
  325. if (e != SupportedEncoding.UTF8 && expectedEnc == SupportedEncoding.None)
  326. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclarationRequired)));
  327. return;
  328. }
  329. if (encEq < 28) // Earliest second "=" can appear
  330. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlMalformedDecl)));
  331. // Back off whitespace
  332. for (i = encEq - 1; IsWhitespace(buffer[i]); i--);
  333. // Check for encoding attribute
  334. if (!Compare(encodingAttr, buffer, i - encodingAttr.Length + 1))
  335. {
  336. if (e != SupportedEncoding.UTF8 && expectedEnc == SupportedEncoding.None)
  337. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclarationRequired)));
  338. return;
  339. }
  340. // Move ahead of whitespace
  341. for (i = encEq + 1; i < max && IsWhitespace(buffer[i]); i++);
  342. // Find the quotes
  343. if (buffer[i] != '\'' && buffer[i] != '"')
  344. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlMalformedDecl)));
  345. quot = buffer[i];
  346. int q = i;
  347. for (i = q + 1; buffer[i] != quot && i < max; ++i);
  348. if (buffer[i] != quot)
  349. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlMalformedDecl)));
  350. int encStart = q + 1;
  351. int encCount = i - encStart;
  352. // lookup the encoding
  353. SupportedEncoding declEnc = e;
  354. if (encCount == encodingUTF8.Length && CompareCaseInsensitive(encodingUTF8, buffer, encStart))
  355. {
  356. declEnc = SupportedEncoding.UTF8;
  357. }
  358. else if (encCount == encodingUnicodeLE.Length && CompareCaseInsensitive(encodingUnicodeLE, buffer, encStart))
  359. {
  360. declEnc = SupportedEncoding.UTF16LE;
  361. }
  362. else if (encCount == encodingUnicodeBE.Length && CompareCaseInsensitive(encodingUnicodeBE, buffer, encStart))
  363. {
  364. declEnc = SupportedEncoding.UTF16BE;
  365. }
  366. else if (encCount == encodingUnicode.Length && CompareCaseInsensitive(encodingUnicode, buffer, encStart))
  367. {
  368. if (e == SupportedEncoding.UTF8)
  369. ThrowEncodingMismatch(SafeUTF8.GetString(buffer, encStart, encCount), SafeUTF8.GetString(encodingUTF8, 0, encodingUTF8.Length));
  370. }
  371. else
  372. {
  373. ThrowEncodingMismatch(SafeUTF8.GetString(buffer, encStart, encCount), e);
  374. }
  375. if (e != declEnc)
  376. ThrowEncodingMismatch(SafeUTF8.GetString(buffer, encStart, encCount), e);
  377. }
  378. static bool CompareCaseInsensitive(byte[] key, byte[] buffer, int offset)
  379. {
  380. for (int i = 0; i < key.Length; i++)
  381. {
  382. if (key[i] == buffer[offset + i])
  383. continue;
  384. if (key[i] != Char.ToLower((char)buffer[offset + i], System.Globalization.CultureInfo.InvariantCulture))
  385. return false;
  386. }
  387. return true;
  388. }
  389. static bool Compare(byte[] key, byte[] buffer, int offset)
  390. {
  391. for (int i = 0; i < key.Length; i++)
  392. {
  393. if (key[i] != buffer[offset + i])
  394. return false;
  395. }
  396. return true;
  397. }
  398. static bool IsWhitespace(byte ch)
  399. {
  400. return ch == (byte)' ' || ch == (byte)'\n' || ch == (byte)'\t' || ch == (byte)'\r';
  401. }
  402. internal static ArraySegment<byte> ProcessBuffer(byte[] buffer, int offset, int count, Encoding encoding)
  403. {
  404. if (count < 4)
  405. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.UnexpectedEndOfFile)));
  406. try
  407. {
  408. int preserve;
  409. ArraySegment<byte> seg;
  410. SupportedEncoding expectedEnc = GetSupportedEncoding(encoding);
  411. SupportedEncoding declEnc = ReadBOMEncoding(buffer[offset], buffer[offset + 1], buffer[offset + 2], buffer[offset + 3], encoding == null, out preserve);
  412. if (expectedEnc != SupportedEncoding.None && expectedEnc != declEnc)
  413. ThrowExpectedEncodingMismatch(expectedEnc, declEnc);
  414. offset += 4 - preserve;
  415. count -= 4 - preserve;
  416. // Fastpath: UTF-8
  417. char[] chars;
  418. byte[] bytes;
  419. Encoding localEnc;
  420. if (declEnc == SupportedEncoding.UTF8)
  421. {
  422. // Fastpath: No declaration
  423. if (buffer[offset + 1] != '?' || buffer[offset] != '<')
  424. {
  425. seg = new ArraySegment<byte>(buffer, offset, count);
  426. return seg;
  427. }
  428. CheckUTF8DeclarationEncoding(buffer, offset, count, declEnc, expectedEnc);
  429. seg = new ArraySegment<byte>(buffer, offset, count);
  430. return seg;
  431. }
  432. // Convert to UTF-8
  433. localEnc = GetSafeEncoding(declEnc);
  434. int inputCount = Math.Min(count, BufferLength * 2);
  435. chars = new char[localEnc.GetMaxCharCount(inputCount)];
  436. int ccount = localEnc.GetChars(buffer, offset, inputCount, chars, 0);
  437. bytes = new byte[ValidatingUTF8.GetMaxByteCount(ccount)];
  438. int bcount = ValidatingUTF8.GetBytes(chars, 0, ccount, bytes, 0);
  439. // Check for declaration
  440. if (bytes[1] == '?' && bytes[0] == '<')
  441. {
  442. CheckUTF8DeclarationEncoding(bytes, 0, bcount, declEnc, expectedEnc);
  443. }
  444. else
  445. {
  446. // Declaration required if no out-of-band encoding
  447. if (expectedEnc == SupportedEncoding.None)
  448. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlDeclarationRequired)));
  449. }
  450. seg = new ArraySegment<byte>(ValidatingUTF8.GetBytes(GetEncoding(declEnc).GetChars(buffer, offset, count)));
  451. return seg;
  452. }
  453. catch (DecoderFallbackException e)
  454. {
  455. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlInvalidBytes), e));
  456. }
  457. }
  458. static void ThrowExpectedEncodingMismatch(SupportedEncoding expEnc, SupportedEncoding actualEnc)
  459. {
  460. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlExpectedEncoding, GetEncodingName(expEnc), GetEncodingName(actualEnc))));
  461. }
  462. static void ThrowEncodingMismatch(string declEnc, SupportedEncoding enc)
  463. {
  464. ThrowEncodingMismatch(declEnc, GetEncodingName(enc));
  465. }
  466. static void ThrowEncodingMismatch(string declEnc, string docEnc)
  467. {
  468. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlEncodingMismatch, declEnc, docEnc)));
  469. }
  470. // This stream wrapper does not support duplex
  471. public override bool CanRead
  472. {
  473. get
  474. {
  475. if (!isReading)
  476. return false;
  477. return this.stream.CanRead;
  478. }
  479. }
  480. // The encoding conversion and buffering breaks seeking.
  481. public override bool CanSeek
  482. {
  483. get
  484. {
  485. return false;
  486. }
  487. }
  488. // This stream wrapper does not support duplex
  489. public override bool CanWrite
  490. {
  491. get
  492. {
  493. if (isReading)
  494. return false;
  495. return this.stream.CanWrite;
  496. }
  497. }
  498. // The encoding conversion and buffering breaks seeking.
  499. public override long Position
  500. {
  501. get
  502. {
  503. #pragma warning suppress 56503 // The contract for non seekable stream is to throw exception
  504. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new NotSupportedException());
  505. }
  506. set
  507. {
  508. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new NotSupportedException());
  509. }
  510. }
  511. public override void Close()
  512. {
  513. Flush();
  514. base.Close();
  515. this.stream.Close();
  516. }
  517. public override void Flush()
  518. {
  519. this.stream.Flush();
  520. }
  521. public override int ReadByte()
  522. {
  523. if (byteCount == 0 && encodingCode == SupportedEncoding.UTF8)
  524. return this.stream.ReadByte();
  525. if (Read(byteBuffer, 0, 1) == 0)
  526. return -1;
  527. return byteBuffer[0];
  528. }
  529. public override int Read(byte[] buffer, int offset, int count)
  530. {
  531. try
  532. {
  533. if (byteCount == 0)
  534. {
  535. if (encodingCode == SupportedEncoding.UTF8)
  536. return this.stream.Read(buffer, offset, count);
  537. // No more bytes than can be turned into characters
  538. byteOffset = 0;
  539. byteCount = this.stream.Read(bytes, byteCount, (chars.Length - 1) * 2);
  540. // Check for end of stream
  541. if (byteCount == 0)
  542. return 0;
  543. // Fix up incomplete chars
  544. CleanupCharBreak();
  545. // Change encoding
  546. int charCount = this.encoding.GetChars(bytes, 0, byteCount, chars, 0);
  547. byteCount = Encoding.UTF8.GetBytes(chars, 0, charCount, bytes, 0);
  548. }
  549. // Give them bytes
  550. if (byteCount < count)
  551. count = byteCount;
  552. Buffer.BlockCopy(bytes, byteOffset, buffer, offset, count);
  553. byteOffset += count;
  554. byteCount -= count;
  555. return count;
  556. }
  557. catch (DecoderFallbackException ex)
  558. {
  559. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.XmlInvalidBytes), ex));
  560. }
  561. }
  562. void CleanupCharBreak()
  563. {
  564. int max = byteOffset + byteCount;
  565. // Read on 2 byte boundaries
  566. if ((byteCount % 2) != 0)
  567. {
  568. int b = this.stream.ReadByte();
  569. if (b < 0)
  570. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.UnexpectedEndOfFile)));
  571. bytes[max++] = (byte)b;
  572. byteCount++;
  573. }
  574. // Don't cut off a surrogate character
  575. int w;
  576. if (encodingCode == SupportedEncoding.UTF16LE)
  577. {
  578. w = bytes[max - 2] + (bytes[max - 1] << 8);
  579. }
  580. else
  581. {
  582. w = bytes[max - 1] + (bytes[max - 2] << 8);
  583. }
  584. if ((w & 0xDC00) != 0xDC00 && w >= 0xD800 && w <= 0xDBFF) // First 16-bit number of surrogate pair
  585. {
  586. int b1 = this.stream.ReadByte();
  587. int b2 = this.stream.ReadByte();
  588. if (b2 < 0)
  589. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(SR.GetString(SR.UnexpectedEndOfFile)));
  590. bytes[max++] = (byte)b1;
  591. bytes[max++] = (byte)b2;
  592. byteCount += 2;
  593. }
  594. }
  595. public override long Seek(long offset, SeekOrigin origin)
  596. {
  597. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new NotSupportedException());
  598. }
  599. public override void WriteByte(byte b)
  600. {
  601. if (encodingCode == SupportedEncoding.UTF8)
  602. {
  603. this.stream.WriteByte(b);
  604. return;
  605. }
  606. byteBuffer[0] = b;
  607. Write(byteBuffer, 0, 1);
  608. }
  609. public override void Write(byte[] buffer, int offset, int count)
  610. {
  611. // Optimize UTF-8 case
  612. if (encodingCode == SupportedEncoding.UTF8)
  613. {
  614. this.stream.Write(buffer, offset, count);
  615. return;
  616. }
  617. while (count > 0)
  618. {
  619. int size = chars.Length < count ? chars.Length : count;
  620. int charCount = dec.GetChars(buffer, offset, size, chars, 0, false);
  621. byteCount = enc.GetBytes(chars, 0, charCount, bytes, 0, false);
  622. this.stream.Write(bytes, 0, byteCount);
  623. offset += size;
  624. count -= size;
  625. }
  626. }
  627. // Delegate properties
  628. public override bool CanTimeout { get { return this.stream.CanTimeout; } }
  629. public override long Length { get { return this.stream.Length; } }
  630. public override int ReadTimeout
  631. {
  632. get { return this.stream.ReadTimeout; }
  633. set { this.stream.ReadTimeout = value; }
  634. }
  635. public override int WriteTimeout
  636. {
  637. get { return this.stream.WriteTimeout; }
  638. set { this.stream.WriteTimeout = value; }
  639. }
  640. // Delegate methods
  641. public override void SetLength(long value)
  642. {
  643. throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new NotSupportedException());
  644. }
  645. }
  646. // Add format exceptions
  647. // Do we need to modify the stream position/Seek to account for the buffer?
  648. // ASSUMPTION ([....]): This class will only be used for EITHER reading OR writing.
  649. #if NO
  650. class UTF16Stream : Stream
  651. {
  652. const int BufferLength = 128;
  653. Stream stream;
  654. bool bigEndian;
  655. byte[] streamBuffer;
  656. int streamOffset;
  657. int streamMax;
  658. byte[] trailBytes = new byte[4];
  659. int trailCount;
  660. public UTF16Stream(Stream stream, bool bigEndian)
  661. {
  662. this.stream = stream;
  663. this.bigEndian = bigEndian;
  664. this.streamBuffer = byte[BufferLength];
  665. }
  666. public override void Close()
  667. {
  668. Flush();
  669. base.Close();
  670. this.stream.Close();
  671. }
  672. public override void Flush()
  673. {
  674. this.stream.Flush();
  675. }
  676. public override int Read(byte[] buffer, int offset, int count)
  677. {
  678. // Validate args
  679. // Read what we can if we aren't sure we have enough for a single character
  680. if (this.streamMax < 4)
  681. this.streamMax += this.stream.Read(this.streamBuffer, streamOffset, streamBuffer.Length - this.streamMax);
  682. int totalWritten = 0;
  683. while (streamOffset < streamMax && count > 0)
  684. {
  685. int ch;
  686. int read;
  687. read = ReadUTF16Char(out ch, streamBuffer, streamOffset, streamBuffer.Length - streamMax);
  688. if (read == 0)
  689. break;
  690. int written = WriteUTF8Char(ch, buffer, offset, count);
  691. if (written == 0)
  692. break;
  693. totalWritten += written;
  694. streamOffset += read;
  695. offset += written;
  696. count -= written;
  697. }
  698. // Shift down the leftover data
  699. if (this.streamOffset > 0 && this.streamOffset < this.streamMax)
  700. {
  701. Buffer.BlockCopy(this.streamBuffer, this.streamOffset, this.streamBuffer, 0, this.streamMax - this.streamOffset);
  702. this.streamMax -= this.streamOffset;
  703. this.streamOffset = 0;
  704. }
  705. return totalWritten;
  706. }
  707. int ReadUTF8Char(out int ch, byte[] buffer, int offset, int count)
  708. {
  709. ch = -1;
  710. if (buffer[offset] < 0x80)
  711. {
  712. ch = buffer[offset];
  713. return 1;
  714. }
  715. int mask = buffer[offset] & 0xF0;
  716. byte b1, b2, b3, b4;
  717. if (mask == 0xC0)
  718. {
  719. if (count < 2)
  720. return 0;
  721. b1 = buffer[offset + 0];
  722. b2 = buffer[offset + 1];
  723. ch = ((b1 & 0x1F) << 6) + (b2 & 0x3F);
  724. return 2;
  725. }
  726. else if (mask == 0xE0)
  727. {
  728. if (count < 3)
  729. return 0;
  730. b1 = buffer[offset + 0];
  731. b2 = buffer[offset + 1];
  732. b3 = buffer[offset + 2];
  733. ch = ((((b1 & 0x0F) << 6) + (b2 & 0x3F)) << 6) + (b3 & 0x3F);
  734. return 3;
  735. }
  736. else if (mask == 0xF0)
  737. {
  738. if (count < 4)
  739. return 0;
  740. b1 = buffer[offset + 0];
  741. b2 = buffer[offset + 1];
  742. b3 = buffer[offset + 2];
  743. b4 = buffer[offset + 3];
  744. ch = ((((((b1 & 0x0F) << 6) + (b2 & 0x3F)) << 6) + (b3 & 0x3F)) << 6) + (b4 & 0x3F);
  745. return 4;
  746. }
  747. // Invalid
  748. return 0;
  749. }
  750. int ReadUTF16Char(out int ch, byte[] buffer, int offset, int count)
  751. {
  752. ch = -1;
  753. if (count < 2)
  754. return 0;
  755. int w1 = ReadEndian(buffer, offset);
  756. if (w1 < 0xD800 || w1 > 0xDFFF)
  757. {
  758. ch = w1;
  759. return 2;
  760. }
  761. if (count < 4)
  762. return 0;
  763. int w2 = ReadEndian(buffer, offset + 2);
  764. ch = ((w1 & 0x03FF) << 10) + (w2 & 0x03FF);
  765. return 4;
  766. }
  767. int ReadEndian(byte[] buffer, int offset)
  768. {
  769. if (bigEndian)
  770. {
  771. return (buffer[offset + 0] << 8) + buffer[offset + 1];
  772. }
  773. else
  774. {
  775. return (buffer[offset + 1] << 8) + buffer[offset + 0];
  776. }
  777. }
  778. int WriteUTF8Char(int ch, byte[] buffer, int offset, int count)
  779. {
  780. if (ch < 0x80)
  781. {
  782. buffer[offset] = (byte)ch;
  783. return 1;
  784. }
  785. else if (ch < 0x800)
  786. {
  787. if (count < 2)
  788. return 0;
  789. buffer[offset + 1] = 0x80 | (ch & 0x3F);
  790. ch >>= 6;
  791. buffer[offset + 0] = 0xC0 | ch;
  792. return 2
  793. }
  794. else if (ch < 0x10000)
  795. {
  796. if (count < 3)
  797. return 0;
  798. buffer[offset + 2] = 0x80 | (ch & 0x3F);
  799. ch >>= 6;
  800. buffer[offset + 1] = 0x80 | (ch & 0x3F);
  801. ch >>= 6;
  802. buffer[offset + 0] = 0xE0 | ch;
  803. return 3;
  804. }
  805. else if (ch <= 0x110000)
  806. {
  807. if (count < 4)
  808. return 0;
  809. buffer[offset + 3] = 0x80 | (ch & 0x3F);
  810. ch >>= 6;
  811. buffer[offset + 2] = 0x80 | (ch & 0x3F);
  812. ch >>= 6;
  813. buffer[offset + 1] = 0x80 | (ch & 0x3F);
  814. ch >>= 6;
  815. buffer[offset + 0] = 0xF0 | ch;
  816. return 4;
  817. }
  818. // Invalid?
  819. return 0;
  820. }
  821. int WriteUTF16Char(int ch, byte[] buffer, int offset, int count)
  822. {
  823. if (ch < 0x10000)
  824. {
  825. if (count < 2)
  826. return 0;
  827. WriteEndian(ch, buffer, offset);
  828. return 2;
  829. }
  830. if (count < 4)
  831. return 0;
  832. ch -= 0x10000;
  833. int w2 = 0xDC00 | (ch & 0x03FF);
  834. int w1 = 0xD800 | ch >> 10;
  835. WriteEndian(w1, buffer, offset);
  836. WriteEndian(w2, buffer, offset + 2);
  837. return 4;
  838. }
  839. void WriteEndian(int ch, byte[] buffer, int offset)
  840. {
  841. if (bigEndian)
  842. {
  843. buffer[offset + 1] = (byte)ch;
  844. buffer[offset + 0] = ch >> 8;
  845. }
  846. else
  847. {
  848. buffer[offset + 0] = (byte)ch;
  849. buffer[offset + 1] = ch >> 8;
  850. }
  851. }
  852. public override void Write(byte[] buffer, int offset, int count)
  853. {
  854. // Validate args
  855. // Write the trail bytes
  856. if (trailCount > 0)
  857. {
  858. int free = 4-trailCount;
  859. int total = (count < free ? count : free) + trialCount;
  860. Buffer.BlockCopy(buffer, offset, trailBytes, trailCount, total);
  861. int c;
  862. int r = ReadUTF8Char(out c, trailBuffer, 0, total);
  863. if (r == 0 && count < free)
  864. {
  865. trailCount = total;
  866. return;
  867. }
  868. int diff = r - trailCount;
  869. offset += diff;
  870. count -= diff;
  871. streamOffset = WriteUTF16Char(c, streamBuffer, 0, streamBuffer.Length - streamOffset);
  872. }
  873. while (count > 0)
  874. {
  875. if (streamBuffer.Length - streamOffset < 4)
  876. {
  877. this.stream.Write(streamBuffer, 0, streamOffset);
  878. streamOffset = 0;
  879. }
  880. int ch;
  881. int read = ReadUTF8Char(out ch, buffer, offset, count);
  882. if (read == 0)
  883. break;
  884. int written = WriteUTF16Char(ch, streamBuffer, streamOffset, streamBuffer.Length - streamOffset);
  885. if (written == 0)
  886. break;
  887. streamOffset += written;
  888. offset += read;
  889. count -= read;
  890. }
  891. if (streamOffset > 0)
  892. {
  893. this.stream.Write(streamBuffer, 0, streamOffset);
  894. streamOffset = 0;
  895. }
  896. // Save trailing bytes
  897. if (count > 0)
  898. {
  899. Buffer.BlockCopy(buffer, offset, trailBytes, 0, count);
  900. trailCount = count;
  901. }
  902. }
  903. // Delegate properties
  904. public override bool CanRead { get { return this.stream.CanRead; } }
  905. public override bool CanSeek { get { return this.stream.CanSeek; } }
  906. public override bool CanTimeout { get { return this.stream.CanTimeout; } }
  907. public override bool CanWrite { get { return this.stream.CanWrite; } }
  908. public override long Length { get { return this.stream.Length; } }
  909. public override long Position
  910. {
  911. get { return this.stream.Position; }
  912. set { this.stream.Position = value; }
  913. }
  914. public override int ReadTimeout
  915. {
  916. get { return this.stream.ReadTimeout; }
  917. set { this.stream.ReadTimeout = value; }
  918. }
  919. public override int WriteTimeout
  920. {
  921. get { return this.stream.WriteTimeout; }
  922. set { this.stream.WriteTimeout = value; }
  923. }
  924. // Delegate methods
  925. public override long Seek(long offset, SeekOrigin origin)
  926. {
  927. return this.stream.Seek(offset, origin);
  928. }
  929. public override void SetLength(long value)
  930. {
  931. this.stream.SetLength(value);
  932. }
  933. }
  934. #endif
  935. }