UTF7Encoding.cs 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. //
  5. // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
  6. //
  7. using System;
  8. using System.Diagnostics;
  9. using System.Runtime.InteropServices;
  10. namespace System.Text
  11. {
  12. public class UTF7Encoding : Encoding
  13. {
  14. private const string base64Chars =
  15. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  16. // 0123456789111111111122222222223333333333444444444455555555556666
  17. // 012345678901234567890123456789012345678901234567890123
  18. // These are the characters that can be directly encoded in UTF7.
  19. private const string directChars =
  20. "\t\n\r '(),-./0123456789:?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
  21. // These are the characters that can be optionally directly encoded in UTF7.
  22. private const string optionalChars =
  23. "!\"#$%&*;<=>@[]^_`{|}";
  24. // Used by Encoding.UTF7 for lazy initialization
  25. // The initialization code will not be run until a static member of the class is referenced
  26. internal static readonly UTF7Encoding s_default = new UTF7Encoding();
  27. // The set of base 64 characters.
  28. private byte[] _base64Bytes;
  29. // The decoded bits for every base64 values. This array has a size of 128 elements.
  30. // The index is the code point value of the base 64 characters. The value is -1 if
  31. // the code point is not a valid base 64 character. Otherwise, the value is a value
  32. // from 0 ~ 63.
  33. private sbyte[] _base64Values;
  34. // The array to decide if a Unicode code point below 0x80 can be directly encoded in UTF7.
  35. // This array has a size of 128.
  36. private bool[] _directEncode;
  37. private bool _allowOptionals;
  38. private const int UTF7_CODEPAGE = 65000;
  39. public UTF7Encoding()
  40. : this(false)
  41. {
  42. }
  43. public UTF7Encoding(bool allowOptionals)
  44. : base(UTF7_CODEPAGE) //Set the data item.
  45. {
  46. // Allowing optionals?
  47. _allowOptionals = allowOptionals;
  48. // Make our tables
  49. MakeTables();
  50. }
  51. private void MakeTables()
  52. {
  53. // Build our tables
  54. _base64Bytes = new byte[64];
  55. for (int i = 0; i < 64; i++) _base64Bytes[i] = (byte)base64Chars[i];
  56. _base64Values = new sbyte[128];
  57. for (int i = 0; i < 128; i++) _base64Values[i] = -1;
  58. for (int i = 0; i < 64; i++) _base64Values[_base64Bytes[i]] = (sbyte)i;
  59. _directEncode = new bool[128];
  60. int count = directChars.Length;
  61. for (int i = 0; i < count; i++)
  62. {
  63. _directEncode[directChars[i]] = true;
  64. }
  65. if (_allowOptionals)
  66. {
  67. count = optionalChars.Length;
  68. for (int i = 0; i < count; i++)
  69. {
  70. _directEncode[optionalChars[i]] = true;
  71. }
  72. }
  73. }
  74. // We go ahead and set this because Encoding expects it, however nothing can fall back in UTF7.
  75. internal sealed override void SetDefaultFallbacks()
  76. {
  77. // UTF7 had an odd decoderFallback behavior, and the Encoder fallback
  78. // is irrelevant because we encode surrogates individually and never check for unmatched ones
  79. // (so nothing can fallback during encoding)
  80. this.encoderFallback = new EncoderReplacementFallback(string.Empty);
  81. this.decoderFallback = new DecoderUTF7Fallback();
  82. }
  83. public override bool Equals(object value)
  84. {
  85. if (value is UTF7Encoding that)
  86. {
  87. return (_allowOptionals == that._allowOptionals) &&
  88. (EncoderFallback.Equals(that.EncoderFallback)) &&
  89. (DecoderFallback.Equals(that.DecoderFallback));
  90. }
  91. return (false);
  92. }
  93. // Compared to all the other encodings, variations of UTF7 are unlikely
  94. public override int GetHashCode()
  95. {
  96. return this.CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
  97. }
  98. // The following methods are copied from EncodingNLS.cs.
  99. // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
  100. // These should be kept in sync for the following classes:
  101. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  102. // Returns the number of bytes required to encode a range of characters in
  103. // a character array.
  104. //
  105. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  106. // So if you fix this, fix the others. Currently those include:
  107. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  108. // parent method is safe
  109. public override unsafe int GetByteCount(char[] chars, int index, int count)
  110. {
  111. // Validate input parameters
  112. if (chars == null)
  113. throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
  114. if (index < 0 || count < 0)
  115. throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
  116. if (chars.Length - index < count)
  117. throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
  118. // If no input, return 0, avoid fixed empty array problem
  119. if (count == 0)
  120. return 0;
  121. // Just call the pointer version
  122. fixed (char* pChars = chars)
  123. return GetByteCount(pChars + index, count, null);
  124. }
  125. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  126. // So if you fix this, fix the others. Currently those include:
  127. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  128. // parent method is safe
  129. public override unsafe int GetByteCount(string s)
  130. {
  131. // Validate input
  132. if (s==null)
  133. throw new ArgumentNullException(nameof(s));
  134. fixed (char* pChars = s)
  135. return GetByteCount(pChars, s.Length, null);
  136. }
  137. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  138. // So if you fix this, fix the others. Currently those include:
  139. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  140. [CLSCompliant(false)]
  141. public override unsafe int GetByteCount(char* chars, int count)
  142. {
  143. // Validate Parameters
  144. if (chars == null)
  145. throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
  146. if (count < 0)
  147. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  148. // Call it with empty encoder
  149. return GetByteCount(chars, count, null);
  150. }
  151. // Parent method is safe.
  152. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  153. // So if you fix this, fix the others. Currently those include:
  154. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  155. public override unsafe int GetBytes(string s, int charIndex, int charCount,
  156. byte[] bytes, int byteIndex)
  157. {
  158. if (s == null || bytes == null)
  159. throw new ArgumentNullException((s == null ? nameof(s) : nameof(bytes)), SR.ArgumentNull_Array);
  160. if (charIndex < 0 || charCount < 0)
  161. throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  162. if (s.Length - charIndex < charCount)
  163. throw new ArgumentOutOfRangeException(nameof(s), SR.ArgumentOutOfRange_IndexCount);
  164. if (byteIndex < 0 || byteIndex > bytes.Length)
  165. throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
  166. int byteCount = bytes.Length - byteIndex;
  167. fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
  168. return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
  169. }
  170. // Encodes a range of characters in a character array into a range of bytes
  171. // in a byte array. An exception occurs if the byte array is not large
  172. // enough to hold the complete encoding of the characters. The
  173. // GetByteCount method can be used to determine the exact number of
  174. // bytes that will be produced for a given range of characters.
  175. // Alternatively, the GetMaxByteCount method can be used to
  176. // determine the maximum number of bytes that will be produced for a given
  177. // number of characters, regardless of the actual character values.
  178. //
  179. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  180. // So if you fix this, fix the others. Currently those include:
  181. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  182. // parent method is safe
  183. public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
  184. byte[] bytes, int byteIndex)
  185. {
  186. // Validate parameters
  187. if (chars == null || bytes == null)
  188. throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
  189. if (charIndex < 0 || charCount < 0)
  190. throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  191. if (chars.Length - charIndex < charCount)
  192. throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
  193. if (byteIndex < 0 || byteIndex > bytes.Length)
  194. throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
  195. // If nothing to encode return 0, avoid fixed problem
  196. if (charCount == 0)
  197. return 0;
  198. // Just call pointer version
  199. int byteCount = bytes.Length - byteIndex;
  200. fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
  201. // Remember that byteCount is # to decode, not size of array.
  202. return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
  203. }
  204. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  205. // So if you fix this, fix the others. Currently those include:
  206. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  207. [CLSCompliant(false)]
  208. public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
  209. {
  210. // Validate Parameters
  211. if (bytes == null || chars == null)
  212. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  213. if (charCount < 0 || byteCount < 0)
  214. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  215. return GetBytes(chars, charCount, bytes, byteCount, null);
  216. }
  217. // Returns the number of characters produced by decoding a range of bytes
  218. // in a byte array.
  219. //
  220. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  221. // So if you fix this, fix the others. Currently those include:
  222. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  223. // parent method is safe
  224. public override unsafe int GetCharCount(byte[] bytes, int index, int count)
  225. {
  226. // Validate Parameters
  227. if (bytes == null)
  228. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  229. if (index < 0 || count < 0)
  230. throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
  231. if (bytes.Length - index < count)
  232. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  233. // If no input just return 0, fixed doesn't like 0 length arrays.
  234. if (count == 0)
  235. return 0;
  236. // Just call pointer version
  237. fixed (byte* pBytes = bytes)
  238. return GetCharCount(pBytes + index, count, null);
  239. }
  240. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  241. // So if you fix this, fix the others. Currently those include:
  242. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  243. [CLSCompliant(false)]
  244. public override unsafe int GetCharCount(byte* bytes, int count)
  245. {
  246. // Validate Parameters
  247. if (bytes == null)
  248. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  249. if (count < 0)
  250. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  251. return GetCharCount(bytes, count, null);
  252. }
  253. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  254. // So if you fix this, fix the others. Currently those include:
  255. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  256. // parent method is safe
  257. public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
  258. char[] chars, int charIndex)
  259. {
  260. // Validate Parameters
  261. if (bytes == null || chars == null)
  262. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  263. if (byteIndex < 0 || byteCount < 0)
  264. throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  265. if ( bytes.Length - byteIndex < byteCount)
  266. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  267. if (charIndex < 0 || charIndex > chars.Length)
  268. throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
  269. // If no input, return 0 & avoid fixed problem
  270. if (byteCount == 0)
  271. return 0;
  272. // Just call pointer version
  273. int charCount = chars.Length - charIndex;
  274. fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
  275. // Remember that charCount is # to decode, not size of array
  276. return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
  277. }
  278. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  279. // So if you fix this, fix the others. Currently those include:
  280. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  281. [CLSCompliant(false)]
  282. public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
  283. {
  284. // Validate Parameters
  285. if (bytes == null || chars == null)
  286. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  287. if (charCount < 0 || byteCount < 0)
  288. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  289. return GetChars(bytes, byteCount, chars, charCount, null);
  290. }
  291. // Returns a string containing the decoded representation of a range of
  292. // bytes in a byte array.
  293. //
  294. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  295. // So if you fix this, fix the others. Currently those include:
  296. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  297. // parent method is safe
  298. public override unsafe string GetString(byte[] bytes, int index, int count)
  299. {
  300. // Validate Parameters
  301. if (bytes == null)
  302. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  303. if (index < 0 || count < 0)
  304. throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
  305. if (bytes.Length - index < count)
  306. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  307. // Avoid problems with empty input buffer
  308. if (count == 0) return string.Empty;
  309. fixed (byte* pBytes = bytes)
  310. return string.CreateStringFromEncoding(
  311. pBytes + index, count, this);
  312. }
  313. //
  314. // End of standard methods copied from EncodingNLS.cs
  315. //
  316. internal sealed override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
  317. {
  318. Debug.Assert(chars != null, "[UTF7Encoding.GetByteCount]chars!=null");
  319. Debug.Assert(count >= 0, "[UTF7Encoding.GetByteCount]count >=0");
  320. // Just call GetBytes with bytes == null
  321. return GetBytes(chars, count, null, 0, baseEncoder);
  322. }
  323. internal sealed override unsafe int GetBytes(
  324. char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder)
  325. {
  326. Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetBytes]byteCount >=0");
  327. Debug.Assert(chars != null, "[UTF7Encoding.GetBytes]chars!=null");
  328. Debug.Assert(charCount >= 0, "[UTF7Encoding.GetBytes]charCount >=0");
  329. // Get encoder info
  330. UTF7Encoding.Encoder encoder = (UTF7Encoding.Encoder)baseEncoder;
  331. // Default bits & count
  332. int bits = 0;
  333. int bitCount = -1;
  334. // prepare our helpers
  335. Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
  336. this, encoder, bytes, byteCount, chars, charCount);
  337. if (encoder != null)
  338. {
  339. bits = encoder.bits;
  340. bitCount = encoder.bitCount;
  341. // May have had too many left over
  342. while (bitCount >= 6)
  343. {
  344. bitCount -= 6;
  345. // If we fail we'll never really have enough room
  346. if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F]))
  347. ThrowBytesOverflow(encoder, buffer.Count == 0);
  348. }
  349. }
  350. while (buffer.MoreData)
  351. {
  352. char currentChar = buffer.GetNextChar();
  353. if (currentChar < 0x80 && _directEncode[currentChar])
  354. {
  355. if (bitCount >= 0)
  356. {
  357. if (bitCount > 0)
  358. {
  359. // Try to add the next byte
  360. if (!buffer.AddByte(_base64Bytes[bits << 6 - bitCount & 0x3F]))
  361. break; // Stop here, didn't throw
  362. bitCount = 0;
  363. }
  364. // Need to get emit '-' and our char, 2 bytes total
  365. if (!buffer.AddByte((byte)'-'))
  366. break; // Stop here, didn't throw
  367. bitCount = -1;
  368. }
  369. // Need to emit our char
  370. if (!buffer.AddByte((byte)currentChar))
  371. break; // Stop here, didn't throw
  372. }
  373. else if (bitCount < 0 && currentChar == '+')
  374. {
  375. if (!buffer.AddByte((byte)'+', (byte)'-'))
  376. break; // Stop here, didn't throw
  377. }
  378. else
  379. {
  380. if (bitCount < 0)
  381. {
  382. // Need to emit a + and 12 bits (3 bytes)
  383. // Only 12 of the 16 bits will be emitted this time, the other 4 wait 'til next time
  384. if (!buffer.AddByte((byte)'+'))
  385. break; // Stop here, didn't throw
  386. // We're now in bit mode, but haven't stored data yet
  387. bitCount = 0;
  388. }
  389. // Add our bits
  390. bits = bits << 16 | currentChar;
  391. bitCount += 16;
  392. while (bitCount >= 6)
  393. {
  394. bitCount -= 6;
  395. if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F]))
  396. {
  397. bitCount += 6; // We didn't use these bits
  398. currentChar = buffer.GetNextChar(); // We're processing this char still, but AddByte
  399. // --'d it when we ran out of space
  400. break; // Stop here, not enough room for bytes
  401. }
  402. }
  403. if (bitCount >= 6)
  404. break; // Didn't have room to encode enough bits
  405. }
  406. }
  407. // Now if we have bits left over we have to encode them.
  408. // MustFlush may have been cleared by encoding.ThrowBytesOverflow earlier if converting
  409. if (bitCount >= 0 && (encoder == null || encoder.MustFlush))
  410. {
  411. // Do we have bits we have to stick in?
  412. if (bitCount > 0)
  413. {
  414. if (buffer.AddByte(_base64Bytes[(bits << (6 - bitCount)) & 0x3F]))
  415. {
  416. // Emitted spare bits, 0 bits left
  417. bitCount = 0;
  418. }
  419. }
  420. // If converting and failed bitCount above, then we'll fail this too
  421. if (buffer.AddByte((byte)'-'))
  422. {
  423. // turned off bit mode';
  424. bits = 0;
  425. bitCount = -1;
  426. }
  427. else
  428. // If not successful, convert will maintain state for next time, also
  429. // AddByte will have decremented our char count, however we need it to remain the same
  430. buffer.GetNextChar();
  431. }
  432. // Do we have an encoder we're allowed to use?
  433. // bytes == null if counting, so don't use encoder then
  434. if (bytes != null && encoder != null)
  435. {
  436. // We already cleared bits & bitcount for mustflush case
  437. encoder.bits = bits;
  438. encoder.bitCount = bitCount;
  439. encoder._charsUsed = buffer.CharsUsed;
  440. }
  441. return buffer.Count;
  442. }
  443. internal sealed override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
  444. {
  445. Debug.Assert(count >= 0, "[UTF7Encoding.GetCharCount]count >=0");
  446. Debug.Assert(bytes != null, "[UTF7Encoding.GetCharCount]bytes!=null");
  447. // Just call GetChars with null char* to do counting
  448. return GetChars(bytes, count, null, 0, baseDecoder);
  449. }
  450. internal sealed override unsafe int GetChars(
  451. byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder)
  452. {
  453. Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetChars]byteCount >=0");
  454. Debug.Assert(bytes != null, "[UTF7Encoding.GetChars]bytes!=null");
  455. Debug.Assert(charCount >= 0, "[UTF7Encoding.GetChars]charCount >=0");
  456. // Might use a decoder
  457. UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder)baseDecoder;
  458. // Get our output buffer info.
  459. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
  460. this, decoder, chars, charCount, bytes, byteCount);
  461. // Get decoder info
  462. int bits = 0;
  463. int bitCount = -1;
  464. bool firstByte = false;
  465. if (decoder != null)
  466. {
  467. bits = decoder.bits;
  468. bitCount = decoder.bitCount;
  469. firstByte = decoder.firstByte;
  470. Debug.Assert(firstByte == false || decoder.bitCount <= 0,
  471. "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set");
  472. }
  473. // We may have had bits in the decoder that we couldn't output last time, so do so now
  474. if (bitCount >= 16)
  475. {
  476. // Check our decoder buffer
  477. if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF)))
  478. ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert
  479. // Used this one, clean up extra bits
  480. bitCount -= 16;
  481. }
  482. // Loop through the input
  483. while (buffer.MoreData)
  484. {
  485. byte currentByte = buffer.GetNextByte();
  486. int c;
  487. if (bitCount >= 0)
  488. {
  489. //
  490. // Modified base 64 encoding.
  491. //
  492. sbyte v;
  493. if (currentByte < 0x80 && ((v = _base64Values[currentByte]) >= 0))
  494. {
  495. firstByte = false;
  496. bits = (bits << 6) | ((byte)v);
  497. bitCount += 6;
  498. if (bitCount >= 16)
  499. {
  500. c = (bits >> (bitCount - 16)) & 0xFFFF;
  501. bitCount -= 16;
  502. }
  503. // If not enough bits just continue
  504. else continue;
  505. }
  506. else
  507. {
  508. // If it wasn't a base 64 byte, everything's going to turn off base 64 mode
  509. bitCount = -1;
  510. if (currentByte != '-')
  511. {
  512. // >= 0x80 (because of 1st if statemtn)
  513. // We need this check since the _base64Values[b] check below need b <= 0x7f.
  514. // This is not a valid base 64 byte. Terminate the shifted-sequence and
  515. // emit this byte.
  516. // not in base 64 table
  517. // According to the RFC 1642 and the example code of UTF-7
  518. // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte
  519. // Chars won't be updated unless this works, try to fallback
  520. if (!buffer.Fallback(currentByte))
  521. break; // Stop here, didn't throw
  522. // Used that byte, we're done with it
  523. continue;
  524. }
  525. //
  526. // The encoding for '+' is "+-".
  527. //
  528. if (firstByte) c = '+';
  529. // We just turn it off if not emitting a +, so we're done.
  530. else continue;
  531. }
  532. //
  533. // End of modified base 64 encoding block.
  534. //
  535. }
  536. else if (currentByte == '+')
  537. {
  538. //
  539. // Found the start of a modified base 64 encoding block or a plus sign.
  540. //
  541. bitCount = 0;
  542. firstByte = true;
  543. continue;
  544. }
  545. else
  546. {
  547. // Normal character
  548. if (currentByte >= 0x80)
  549. {
  550. // Try to fallback
  551. if (!buffer.Fallback(currentByte))
  552. break; // Stop here, didn't throw
  553. // Done falling back
  554. continue;
  555. }
  556. // Use the normal character
  557. c = currentByte;
  558. }
  559. if (c >= 0)
  560. {
  561. // Check our buffer
  562. if (!buffer.AddChar((char)c))
  563. {
  564. // No room. If it was a plain char we'll try again later.
  565. // Note, we'll consume this byte and stick it in decoder, even if we can't output it
  566. if (bitCount >= 0) // Can we rememmber this byte (char)
  567. {
  568. buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed
  569. bitCount += 16; // We'll still need that char we have in our bits
  570. }
  571. break; // didn't throw, stop
  572. }
  573. }
  574. }
  575. // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder)
  576. if (chars != null && decoder != null)
  577. {
  578. // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer)
  579. if (decoder.MustFlush)
  580. {
  581. // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them
  582. decoder.bits = 0;
  583. decoder.bitCount = -1;
  584. decoder.firstByte = false;
  585. }
  586. else
  587. {
  588. decoder.bits = bits;
  589. decoder.bitCount = bitCount;
  590. decoder.firstByte = firstByte;
  591. }
  592. decoder._bytesUsed = buffer.BytesUsed;
  593. }
  594. // else ignore any hanging bits.
  595. // Return our count
  596. return buffer.Count;
  597. }
  598. public override System.Text.Decoder GetDecoder()
  599. {
  600. return new UTF7Encoding.Decoder(this);
  601. }
  602. public override System.Text.Encoder GetEncoder()
  603. {
  604. return new UTF7Encoding.Encoder(this);
  605. }
  606. public override int GetMaxByteCount(int charCount)
  607. {
  608. if (charCount < 0)
  609. throw new ArgumentOutOfRangeException(nameof(charCount),
  610. SR.ArgumentOutOfRange_NeedNonNegNum);
  611. // Suppose that every char can not be direct-encoded, we know that
  612. // a byte can encode 6 bits of the Unicode character. And we will
  613. // also need two extra bytes for the shift-in ('+') and shift-out ('-') mark.
  614. // Therefore, the max byte should be:
  615. // byteCount = 2 + Math.Ceiling((double)charCount * 16 / 6);
  616. // That is always <= 2 + 3 * charCount;
  617. // Longest case is alternating encoded, direct, encoded data for 5 + 1 + 5... bytes per char.
  618. // UTF7 doesn't have left over surrogates, but if no input we may need an output - to turn off
  619. // encoding if MustFlush is true.
  620. // Its easiest to think of this as 2 bytes to turn on/off the base64 mode, then 3 bytes per char.
  621. // 3 bytes is 18 bits of encoding, which is more than we need, but if its direct encoded then 3
  622. // bytes allows us to turn off and then back on base64 mode if necessary.
  623. // Note that UTF7 encoded surrogates individually and isn't worried about mismatches, so all
  624. // code points are encodable int UTF7.
  625. long byteCount = (long)charCount * 3 + 2;
  626. // check for overflow
  627. if (byteCount > 0x7fffffff)
  628. throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
  629. return (int)byteCount;
  630. }
  631. public override int GetMaxCharCount(int byteCount)
  632. {
  633. if (byteCount < 0)
  634. throw new ArgumentOutOfRangeException(nameof(byteCount),
  635. SR.ArgumentOutOfRange_NeedNonNegNum);
  636. // Worst case is 1 char per byte. Minimum 1 for left over bits in case decoder is being flushed
  637. // Also note that we ignore extra bits (per spec), so UTF7 doesn't have unknown in this direction.
  638. int charCount = byteCount;
  639. if (charCount == 0) charCount = 1;
  640. return charCount;
  641. }
  642. // Of all the amazing things... This MUST be Decoder so that our com name
  643. // for System.Text.Decoder doesn't change
  644. private sealed class Decoder : DecoderNLS
  645. {
  646. /*private*/
  647. internal int bits;
  648. /*private*/
  649. internal int bitCount;
  650. /*private*/
  651. internal bool firstByte;
  652. public Decoder(UTF7Encoding encoding) : base(encoding)
  653. {
  654. // base calls reset
  655. }
  656. public override void Reset()
  657. {
  658. this.bits = 0;
  659. this.bitCount = -1;
  660. this.firstByte = false;
  661. if (_fallbackBuffer != null)
  662. _fallbackBuffer.Reset();
  663. }
  664. // Anything left in our encoder?
  665. internal override bool HasState
  666. {
  667. get
  668. {
  669. // NOTE: This forces the last -, which some encoder might not encode. If we
  670. // don't see it we don't think we're done reading.
  671. return (this.bitCount != -1);
  672. }
  673. }
  674. }
  675. // Of all the amazing things... This MUST be Encoder so that our com name
  676. // for System.Text.Encoder doesn't change
  677. private sealed class Encoder : EncoderNLS
  678. {
  679. /*private*/
  680. internal int bits;
  681. /*private*/
  682. internal int bitCount;
  683. public Encoder(UTF7Encoding encoding) : base(encoding)
  684. {
  685. // base calls reset
  686. }
  687. public override void Reset()
  688. {
  689. this.bitCount = -1;
  690. this.bits = 0;
  691. if (_fallbackBuffer != null)
  692. _fallbackBuffer.Reset();
  693. }
  694. // Anything left in our encoder?
  695. internal override bool HasState
  696. {
  697. get
  698. {
  699. return (this.bits != 0 || this.bitCount != -1);
  700. }
  701. }
  702. }
  703. // Preexisting UTF7 behavior for bad bytes was just to spit out the byte as the next char
  704. // and turn off base64 mode if it was in that mode. We still exit the mode, but now we fallback.
  705. private sealed class DecoderUTF7Fallback : DecoderFallback
  706. {
  707. // Construction. Default replacement fallback uses no best fit and ? replacement string
  708. public DecoderUTF7Fallback()
  709. {
  710. }
  711. public override DecoderFallbackBuffer CreateFallbackBuffer()
  712. {
  713. return new DecoderUTF7FallbackBuffer(this);
  714. }
  715. // Maximum number of characters that this instance of this fallback could return
  716. public override int MaxCharCount
  717. {
  718. get
  719. {
  720. // returns 1 char per bad byte
  721. return 1;
  722. }
  723. }
  724. public override bool Equals(object value)
  725. {
  726. DecoderUTF7Fallback that = value as DecoderUTF7Fallback;
  727. if (that != null)
  728. {
  729. return true;
  730. }
  731. return (false);
  732. }
  733. public override int GetHashCode()
  734. {
  735. return 984;
  736. }
  737. }
  738. private sealed class DecoderUTF7FallbackBuffer : DecoderFallbackBuffer
  739. {
  740. // Store our default string
  741. private char cFallback = (char)0;
  742. private int iCount = -1;
  743. private int iSize;
  744. // Construction
  745. public DecoderUTF7FallbackBuffer(DecoderUTF7Fallback fallback)
  746. {
  747. }
  748. // Fallback Methods
  749. public override bool Fallback(byte[] bytesUnknown, int index)
  750. {
  751. // We expect no previous fallback in our buffer
  752. Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.Fallback] Can't have recursive fallbacks");
  753. Debug.Assert(bytesUnknown.Length == 1, "[DecoderUTF7FallbackBuffer.Fallback] Only possible fallback case should be 1 unknown byte");
  754. // Go ahead and get our fallback
  755. cFallback = (char)bytesUnknown[0];
  756. // Any of the fallback characters can be handled except for 0
  757. if (cFallback == 0)
  758. {
  759. return false;
  760. }
  761. iCount = iSize = 1;
  762. return true;
  763. }
  764. public override char GetNextChar()
  765. {
  766. if (iCount-- > 0)
  767. return cFallback;
  768. // Note: this means that 0 in UTF7 stream will never be emitted.
  769. return (char)0;
  770. }
  771. public override bool MovePrevious()
  772. {
  773. if (iCount >= 0)
  774. {
  775. iCount++;
  776. }
  777. // return true if we were allowed to do this
  778. return (iCount >= 0 && iCount <= iSize);
  779. }
  780. // Return # of chars left in this fallback
  781. public override int Remaining
  782. {
  783. get
  784. {
  785. return (iCount > 0) ? iCount : 0;
  786. }
  787. }
  788. // Clear the buffer
  789. public override unsafe void Reset()
  790. {
  791. iCount = -1;
  792. byteStart = null;
  793. }
  794. // This version just counts the fallback and doesn't actually copy anything.
  795. internal unsafe override int InternalFallback(byte[] bytes, byte* pBytes)
  796. // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
  797. // array, and we might need the index, hence the byte*
  798. {
  799. // We expect no previous fallback in our buffer
  800. Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.InternalFallback] Can't have recursive fallbacks");
  801. if (bytes.Length != 1)
  802. {
  803. throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
  804. }
  805. // Can't fallback a byte 0, so return for that case, 1 otherwise.
  806. return bytes[0] == 0 ? 0 : 1;
  807. }
  808. }
  809. }
  810. }