UTF7Encoding.cs 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. //
  5. // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
  6. //
  7. using System;
  8. using System.Diagnostics;
  9. using System.Runtime.InteropServices;
  10. namespace System.Text
  11. {
  12. public class UTF7Encoding : Encoding
  13. {
  14. private const string base64Chars =
  15. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  16. // 0123456789111111111122222222223333333333444444444455555555556666
  17. // 012345678901234567890123456789012345678901234567890123
  18. // These are the characters that can be directly encoded in UTF7.
  19. private const string directChars =
  20. "\t\n\r '(),-./0123456789:?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
  21. // These are the characters that can be optionally directly encoded in UTF7.
  22. private const string optionalChars =
  23. "!\"#$%&*;<=>@[]^_`{|}";
  24. // Used by Encoding.UTF7 for lazy initialization
  25. // The initialization code will not be run until a static member of the class is referenced
  26. internal static readonly UTF7Encoding s_default = new UTF7Encoding();
  27. // The set of base 64 characters.
  28. private byte[] _base64Bytes;
  29. // The decoded bits for every base64 values. This array has a size of 128 elements.
  30. // The index is the code point value of the base 64 characters. The value is -1 if
  31. // the code point is not a valid base 64 character. Otherwise, the value is a value
  32. // from 0 ~ 63.
  33. private sbyte[] _base64Values;
  34. // The array to decide if a Unicode code point below 0x80 can be directly encoded in UTF7.
  35. // This array has a size of 128.
  36. private bool[] _directEncode;
  37. private bool _allowOptionals;
  38. private const int UTF7_CODEPAGE = 65000;
  39. public UTF7Encoding()
  40. : this(false)
  41. {
  42. }
  43. public UTF7Encoding(bool allowOptionals)
  44. : base(UTF7_CODEPAGE) //Set the data item.
  45. {
  46. // Allowing optionals?
  47. _allowOptionals = allowOptionals;
  48. // Make our tables
  49. MakeTables();
  50. }
  51. private void MakeTables()
  52. {
  53. // Build our tables
  54. _base64Bytes = new byte[64];
  55. for (int i = 0; i < 64; i++) _base64Bytes[i] = (byte)base64Chars[i];
  56. _base64Values = new sbyte[128];
  57. for (int i = 0; i < 128; i++) _base64Values[i] = -1;
  58. for (int i = 0; i < 64; i++) _base64Values[_base64Bytes[i]] = (sbyte)i;
  59. _directEncode = new bool[128];
  60. int count = directChars.Length;
  61. for (int i = 0; i < count; i++)
  62. {
  63. _directEncode[directChars[i]] = true;
  64. }
  65. if (_allowOptionals)
  66. {
  67. count = optionalChars.Length;
  68. for (int i = 0; i < count; i++)
  69. {
  70. _directEncode[optionalChars[i]] = true;
  71. }
  72. }
  73. }
  74. // We go ahead and set this because Encoding expects it, however nothing can fall back in UTF7.
  75. internal sealed override void SetDefaultFallbacks()
  76. {
  77. // UTF7 had an odd decoderFallback behavior, and the Encoder fallback
  78. // is irrelevant because we encode surrogates individually and never check for unmatched ones
  79. // (so nothing can fallback during encoding)
  80. this.encoderFallback = new EncoderReplacementFallback(string.Empty);
  81. this.decoderFallback = new DecoderUTF7Fallback();
  82. }
  83. public override bool Equals(object value)
  84. {
  85. UTF7Encoding that = value as UTF7Encoding;
  86. if (that != null)
  87. {
  88. return (_allowOptionals == that._allowOptionals) &&
  89. (EncoderFallback.Equals(that.EncoderFallback)) &&
  90. (DecoderFallback.Equals(that.DecoderFallback));
  91. }
  92. return (false);
  93. }
  94. // Compared to all the other encodings, variations of UTF7 are unlikely
  95. public override int GetHashCode()
  96. {
  97. return this.CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
  98. }
  99. // The following methods are copied from EncodingNLS.cs.
  100. // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
  101. // These should be kept in sync for the following classes:
  102. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  103. // Returns the number of bytes required to encode a range of characters in
  104. // a character array.
  105. //
  106. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  107. // So if you fix this, fix the others. Currently those include:
  108. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  109. // parent method is safe
  110. public override unsafe int GetByteCount(char[] chars, int index, int count)
  111. {
  112. // Validate input parameters
  113. if (chars == null)
  114. throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
  115. if (index < 0 || count < 0)
  116. throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
  117. if (chars.Length - index < count)
  118. throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
  119. // If no input, return 0, avoid fixed empty array problem
  120. if (count == 0)
  121. return 0;
  122. // Just call the pointer version
  123. fixed (char* pChars = chars)
  124. return GetByteCount(pChars + index, count, null);
  125. }
  126. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  127. // So if you fix this, fix the others. Currently those include:
  128. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  129. // parent method is safe
  130. public override unsafe int GetByteCount(string s)
  131. {
  132. // Validate input
  133. if (s==null)
  134. throw new ArgumentNullException(nameof(s));
  135. fixed (char* pChars = s)
  136. return GetByteCount(pChars, s.Length, null);
  137. }
  138. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  139. // So if you fix this, fix the others. Currently those include:
  140. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  141. [CLSCompliant(false)]
  142. public override unsafe int GetByteCount(char* chars, int count)
  143. {
  144. // Validate Parameters
  145. if (chars == null)
  146. throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
  147. if (count < 0)
  148. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  149. // Call it with empty encoder
  150. return GetByteCount(chars, count, null);
  151. }
  152. // Parent method is safe.
  153. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  154. // So if you fix this, fix the others. Currently those include:
  155. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  156. public override unsafe int GetBytes(string s, int charIndex, int charCount,
  157. byte[] bytes, int byteIndex)
  158. {
  159. if (s == null || bytes == null)
  160. throw new ArgumentNullException((s == null ? nameof(s) : nameof(bytes)), SR.ArgumentNull_Array);
  161. if (charIndex < 0 || charCount < 0)
  162. throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  163. if (s.Length - charIndex < charCount)
  164. throw new ArgumentOutOfRangeException(nameof(s), SR.ArgumentOutOfRange_IndexCount);
  165. if (byteIndex < 0 || byteIndex > bytes.Length)
  166. throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
  167. int byteCount = bytes.Length - byteIndex;
  168. fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
  169. return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
  170. }
  171. // Encodes a range of characters in a character array into a range of bytes
  172. // in a byte array. An exception occurs if the byte array is not large
  173. // enough to hold the complete encoding of the characters. The
  174. // GetByteCount method can be used to determine the exact number of
  175. // bytes that will be produced for a given range of characters.
  176. // Alternatively, the GetMaxByteCount method can be used to
  177. // determine the maximum number of bytes that will be produced for a given
  178. // number of characters, regardless of the actual character values.
  179. //
  180. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  181. // So if you fix this, fix the others. Currently those include:
  182. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  183. // parent method is safe
  184. public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
  185. byte[] bytes, int byteIndex)
  186. {
  187. // Validate parameters
  188. if (chars == null || bytes == null)
  189. throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
  190. if (charIndex < 0 || charCount < 0)
  191. throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  192. if (chars.Length - charIndex < charCount)
  193. throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
  194. if (byteIndex < 0 || byteIndex > bytes.Length)
  195. throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
  196. // If nothing to encode return 0, avoid fixed problem
  197. if (charCount == 0)
  198. return 0;
  199. // Just call pointer version
  200. int byteCount = bytes.Length - byteIndex;
  201. fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
  202. // Remember that byteCount is # to decode, not size of array.
  203. return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
  204. }
  205. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  206. // So if you fix this, fix the others. Currently those include:
  207. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  208. [CLSCompliant(false)]
  209. public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
  210. {
  211. // Validate Parameters
  212. if (bytes == null || chars == null)
  213. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  214. if (charCount < 0 || byteCount < 0)
  215. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  216. return GetBytes(chars, charCount, bytes, byteCount, null);
  217. }
  218. // Returns the number of characters produced by decoding a range of bytes
  219. // in a byte array.
  220. //
  221. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  222. // So if you fix this, fix the others. Currently those include:
  223. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  224. // parent method is safe
  225. public override unsafe int GetCharCount(byte[] bytes, int index, int count)
  226. {
  227. // Validate Parameters
  228. if (bytes == null)
  229. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  230. if (index < 0 || count < 0)
  231. throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
  232. if (bytes.Length - index < count)
  233. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  234. // If no input just return 0, fixed doesn't like 0 length arrays.
  235. if (count == 0)
  236. return 0;
  237. // Just call pointer version
  238. fixed (byte* pBytes = bytes)
  239. return GetCharCount(pBytes + index, count, null);
  240. }
  241. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  242. // So if you fix this, fix the others. Currently those include:
  243. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  244. [CLSCompliant(false)]
  245. public override unsafe int GetCharCount(byte* bytes, int count)
  246. {
  247. // Validate Parameters
  248. if (bytes == null)
  249. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  250. if (count < 0)
  251. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  252. return GetCharCount(bytes, count, null);
  253. }
  254. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  255. // So if you fix this, fix the others. Currently those include:
  256. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  257. // parent method is safe
  258. public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
  259. char[] chars, int charIndex)
  260. {
  261. // Validate Parameters
  262. if (bytes == null || chars == null)
  263. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  264. if (byteIndex < 0 || byteCount < 0)
  265. throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  266. if ( bytes.Length - byteIndex < byteCount)
  267. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  268. if (charIndex < 0 || charIndex > chars.Length)
  269. throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
  270. // If no input, return 0 & avoid fixed problem
  271. if (byteCount == 0)
  272. return 0;
  273. // Just call pointer version
  274. int charCount = chars.Length - charIndex;
  275. fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
  276. // Remember that charCount is # to decode, not size of array
  277. return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
  278. }
  279. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  280. // So if you fix this, fix the others. Currently those include:
  281. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  282. [CLSCompliant(false)]
  283. public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
  284. {
  285. // Validate Parameters
  286. if (bytes == null || chars == null)
  287. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  288. if (charCount < 0 || byteCount < 0)
  289. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  290. return GetChars(bytes, byteCount, chars, charCount, null);
  291. }
  292. // Returns a string containing the decoded representation of a range of
  293. // bytes in a byte array.
  294. //
  295. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  296. // So if you fix this, fix the others. Currently those include:
  297. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  298. // parent method is safe
  299. public override unsafe string GetString(byte[] bytes, int index, int count)
  300. {
  301. // Validate Parameters
  302. if (bytes == null)
  303. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  304. if (index < 0 || count < 0)
  305. throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
  306. if (bytes.Length - index < count)
  307. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  308. // Avoid problems with empty input buffer
  309. if (count == 0) return string.Empty;
  310. fixed (byte* pBytes = bytes)
  311. return string.CreateStringFromEncoding(
  312. pBytes + index, count, this);
  313. }
  314. //
  315. // End of standard methods copied from EncodingNLS.cs
  316. //
  317. internal sealed override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
  318. {
  319. Debug.Assert(chars != null, "[UTF7Encoding.GetByteCount]chars!=null");
  320. Debug.Assert(count >= 0, "[UTF7Encoding.GetByteCount]count >=0");
  321. // Just call GetBytes with bytes == null
  322. return GetBytes(chars, count, null, 0, baseEncoder);
  323. }
  324. internal sealed override unsafe int GetBytes(
  325. char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder)
  326. {
  327. Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetBytes]byteCount >=0");
  328. Debug.Assert(chars != null, "[UTF7Encoding.GetBytes]chars!=null");
  329. Debug.Assert(charCount >= 0, "[UTF7Encoding.GetBytes]charCount >=0");
  330. // Get encoder info
  331. UTF7Encoding.Encoder encoder = (UTF7Encoding.Encoder)baseEncoder;
  332. // Default bits & count
  333. int bits = 0;
  334. int bitCount = -1;
  335. // prepare our helpers
  336. Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
  337. this, encoder, bytes, byteCount, chars, charCount);
  338. if (encoder != null)
  339. {
  340. bits = encoder.bits;
  341. bitCount = encoder.bitCount;
  342. // May have had too many left over
  343. while (bitCount >= 6)
  344. {
  345. bitCount -= 6;
  346. // If we fail we'll never really have enough room
  347. if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F]))
  348. ThrowBytesOverflow(encoder, buffer.Count == 0);
  349. }
  350. }
  351. while (buffer.MoreData)
  352. {
  353. char currentChar = buffer.GetNextChar();
  354. if (currentChar < 0x80 && _directEncode[currentChar])
  355. {
  356. if (bitCount >= 0)
  357. {
  358. if (bitCount > 0)
  359. {
  360. // Try to add the next byte
  361. if (!buffer.AddByte(_base64Bytes[bits << 6 - bitCount & 0x3F]))
  362. break; // Stop here, didn't throw
  363. bitCount = 0;
  364. }
  365. // Need to get emit '-' and our char, 2 bytes total
  366. if (!buffer.AddByte((byte)'-'))
  367. break; // Stop here, didn't throw
  368. bitCount = -1;
  369. }
  370. // Need to emit our char
  371. if (!buffer.AddByte((byte)currentChar))
  372. break; // Stop here, didn't throw
  373. }
  374. else if (bitCount < 0 && currentChar == '+')
  375. {
  376. if (!buffer.AddByte((byte)'+', (byte)'-'))
  377. break; // Stop here, didn't throw
  378. }
  379. else
  380. {
  381. if (bitCount < 0)
  382. {
  383. // Need to emit a + and 12 bits (3 bytes)
  384. // Only 12 of the 16 bits will be emitted this time, the other 4 wait 'til next time
  385. if (!buffer.AddByte((byte)'+'))
  386. break; // Stop here, didn't throw
  387. // We're now in bit mode, but haven't stored data yet
  388. bitCount = 0;
  389. }
  390. // Add our bits
  391. bits = bits << 16 | currentChar;
  392. bitCount += 16;
  393. while (bitCount >= 6)
  394. {
  395. bitCount -= 6;
  396. if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F]))
  397. {
  398. bitCount += 6; // We didn't use these bits
  399. currentChar = buffer.GetNextChar(); // We're processing this char still, but AddByte
  400. // --'d it when we ran out of space
  401. break; // Stop here, not enough room for bytes
  402. }
  403. }
  404. if (bitCount >= 6)
  405. break; // Didn't have room to encode enough bits
  406. }
  407. }
  408. // Now if we have bits left over we have to encode them.
  409. // MustFlush may have been cleared by encoding.ThrowBytesOverflow earlier if converting
  410. if (bitCount >= 0 && (encoder == null || encoder.MustFlush))
  411. {
  412. // Do we have bits we have to stick in?
  413. if (bitCount > 0)
  414. {
  415. if (buffer.AddByte(_base64Bytes[(bits << (6 - bitCount)) & 0x3F]))
  416. {
  417. // Emitted spare bits, 0 bits left
  418. bitCount = 0;
  419. }
  420. }
  421. // If converting and failed bitCount above, then we'll fail this too
  422. if (buffer.AddByte((byte)'-'))
  423. {
  424. // turned off bit mode';
  425. bits = 0;
  426. bitCount = -1;
  427. }
  428. else
  429. // If not successful, convert will maintain state for next time, also
  430. // AddByte will have decremented our char count, however we need it to remain the same
  431. buffer.GetNextChar();
  432. }
  433. // Do we have an encoder we're allowed to use?
  434. // bytes == null if counting, so don't use encoder then
  435. if (bytes != null && encoder != null)
  436. {
  437. // We already cleared bits & bitcount for mustflush case
  438. encoder.bits = bits;
  439. encoder.bitCount = bitCount;
  440. encoder._charsUsed = buffer.CharsUsed;
  441. }
  442. return buffer.Count;
  443. }
  444. internal sealed override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
  445. {
  446. Debug.Assert(count >= 0, "[UTF7Encoding.GetCharCount]count >=0");
  447. Debug.Assert(bytes != null, "[UTF7Encoding.GetCharCount]bytes!=null");
  448. // Just call GetChars with null char* to do counting
  449. return GetChars(bytes, count, null, 0, baseDecoder);
  450. }
  451. internal sealed override unsafe int GetChars(
  452. byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder)
  453. {
  454. Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetChars]byteCount >=0");
  455. Debug.Assert(bytes != null, "[UTF7Encoding.GetChars]bytes!=null");
  456. Debug.Assert(charCount >= 0, "[UTF7Encoding.GetChars]charCount >=0");
  457. // Might use a decoder
  458. UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder)baseDecoder;
  459. // Get our output buffer info.
  460. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
  461. this, decoder, chars, charCount, bytes, byteCount);
  462. // Get decoder info
  463. int bits = 0;
  464. int bitCount = -1;
  465. bool firstByte = false;
  466. if (decoder != null)
  467. {
  468. bits = decoder.bits;
  469. bitCount = decoder.bitCount;
  470. firstByte = decoder.firstByte;
  471. Debug.Assert(firstByte == false || decoder.bitCount <= 0,
  472. "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set");
  473. }
  474. // We may have had bits in the decoder that we couldn't output last time, so do so now
  475. if (bitCount >= 16)
  476. {
  477. // Check our decoder buffer
  478. if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF)))
  479. ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert
  480. // Used this one, clean up extra bits
  481. bitCount -= 16;
  482. }
  483. // Loop through the input
  484. while (buffer.MoreData)
  485. {
  486. byte currentByte = buffer.GetNextByte();
  487. int c;
  488. if (bitCount >= 0)
  489. {
  490. //
  491. // Modified base 64 encoding.
  492. //
  493. sbyte v;
  494. if (currentByte < 0x80 && ((v = _base64Values[currentByte]) >= 0))
  495. {
  496. firstByte = false;
  497. bits = (bits << 6) | ((byte)v);
  498. bitCount += 6;
  499. if (bitCount >= 16)
  500. {
  501. c = (bits >> (bitCount - 16)) & 0xFFFF;
  502. bitCount -= 16;
  503. }
  504. // If not enough bits just continue
  505. else continue;
  506. }
  507. else
  508. {
  509. // If it wasn't a base 64 byte, everything's going to turn off base 64 mode
  510. bitCount = -1;
  511. if (currentByte != '-')
  512. {
  513. // >= 0x80 (because of 1st if statemtn)
  514. // We need this check since the _base64Values[b] check below need b <= 0x7f.
  515. // This is not a valid base 64 byte. Terminate the shifted-sequence and
  516. // emit this byte.
  517. // not in base 64 table
  518. // According to the RFC 1642 and the example code of UTF-7
  519. // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte
  520. // Chars won't be updated unless this works, try to fallback
  521. if (!buffer.Fallback(currentByte))
  522. break; // Stop here, didn't throw
  523. // Used that byte, we're done with it
  524. continue;
  525. }
  526. //
  527. // The encoding for '+' is "+-".
  528. //
  529. if (firstByte) c = '+';
  530. // We just turn it off if not emitting a +, so we're done.
  531. else continue;
  532. }
  533. //
  534. // End of modified base 64 encoding block.
  535. //
  536. }
  537. else if (currentByte == '+')
  538. {
  539. //
  540. // Found the start of a modified base 64 encoding block or a plus sign.
  541. //
  542. bitCount = 0;
  543. firstByte = true;
  544. continue;
  545. }
  546. else
  547. {
  548. // Normal character
  549. if (currentByte >= 0x80)
  550. {
  551. // Try to fallback
  552. if (!buffer.Fallback(currentByte))
  553. break; // Stop here, didn't throw
  554. // Done falling back
  555. continue;
  556. }
  557. // Use the normal character
  558. c = currentByte;
  559. }
  560. if (c >= 0)
  561. {
  562. // Check our buffer
  563. if (!buffer.AddChar((char)c))
  564. {
  565. // No room. If it was a plain char we'll try again later.
  566. // Note, we'll consume this byte and stick it in decoder, even if we can't output it
  567. if (bitCount >= 0) // Can we rememmber this byte (char)
  568. {
  569. buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed
  570. bitCount += 16; // We'll still need that char we have in our bits
  571. }
  572. break; // didn't throw, stop
  573. }
  574. }
  575. }
  576. // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder)
  577. if (chars != null && decoder != null)
  578. {
  579. // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer)
  580. if (decoder.MustFlush)
  581. {
  582. // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them
  583. decoder.bits = 0;
  584. decoder.bitCount = -1;
  585. decoder.firstByte = false;
  586. }
  587. else
  588. {
  589. decoder.bits = bits;
  590. decoder.bitCount = bitCount;
  591. decoder.firstByte = firstByte;
  592. }
  593. decoder._bytesUsed = buffer.BytesUsed;
  594. }
  595. // else ignore any hanging bits.
  596. // Return our count
  597. return buffer.Count;
  598. }
  599. public override System.Text.Decoder GetDecoder()
  600. {
  601. return new UTF7Encoding.Decoder(this);
  602. }
  603. public override System.Text.Encoder GetEncoder()
  604. {
  605. return new UTF7Encoding.Encoder(this);
  606. }
  607. public override int GetMaxByteCount(int charCount)
  608. {
  609. if (charCount < 0)
  610. throw new ArgumentOutOfRangeException(nameof(charCount),
  611. SR.ArgumentOutOfRange_NeedNonNegNum);
  612. // Suppose that every char can not be direct-encoded, we know that
  613. // a byte can encode 6 bits of the Unicode character. And we will
  614. // also need two extra bytes for the shift-in ('+') and shift-out ('-') mark.
  615. // Therefore, the max byte should be:
  616. // byteCount = 2 + Math.Ceiling((double)charCount * 16 / 6);
  617. // That is always <= 2 + 3 * charCount;
  618. // Longest case is alternating encoded, direct, encoded data for 5 + 1 + 5... bytes per char.
  619. // UTF7 doesn't have left over surrogates, but if no input we may need an output - to turn off
  620. // encoding if MustFlush is true.
  621. // Its easiest to think of this as 2 bytes to turn on/off the base64 mode, then 3 bytes per char.
  622. // 3 bytes is 18 bits of encoding, which is more than we need, but if its direct encoded then 3
  623. // bytes allows us to turn off and then back on base64 mode if necessary.
  624. // Note that UTF7 encoded surrogates individually and isn't worried about mismatches, so all
  625. // code points are encodable int UTF7.
  626. long byteCount = (long)charCount * 3 + 2;
  627. // check for overflow
  628. if (byteCount > 0x7fffffff)
  629. throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
  630. return (int)byteCount;
  631. }
  632. public override int GetMaxCharCount(int byteCount)
  633. {
  634. if (byteCount < 0)
  635. throw new ArgumentOutOfRangeException(nameof(byteCount),
  636. SR.ArgumentOutOfRange_NeedNonNegNum);
  637. // Worst case is 1 char per byte. Minimum 1 for left over bits in case decoder is being flushed
  638. // Also note that we ignore extra bits (per spec), so UTF7 doesn't have unknown in this direction.
  639. int charCount = byteCount;
  640. if (charCount == 0) charCount = 1;
  641. return charCount;
  642. }
  643. // Of all the amazing things... This MUST be Decoder so that our com name
  644. // for System.Text.Decoder doesn't change
  645. private sealed class Decoder : DecoderNLS
  646. {
  647. /*private*/
  648. internal int bits;
  649. /*private*/
  650. internal int bitCount;
  651. /*private*/
  652. internal bool firstByte;
  653. public Decoder(UTF7Encoding encoding) : base(encoding)
  654. {
  655. // base calls reset
  656. }
  657. public override void Reset()
  658. {
  659. this.bits = 0;
  660. this.bitCount = -1;
  661. this.firstByte = false;
  662. if (_fallbackBuffer != null)
  663. _fallbackBuffer.Reset();
  664. }
  665. // Anything left in our encoder?
  666. internal override bool HasState
  667. {
  668. get
  669. {
  670. // NOTE: This forces the last -, which some encoder might not encode. If we
  671. // don't see it we don't think we're done reading.
  672. return (this.bitCount != -1);
  673. }
  674. }
  675. }
  676. // Of all the amazing things... This MUST be Encoder so that our com name
  677. // for System.Text.Encoder doesn't change
  678. private sealed class Encoder : EncoderNLS
  679. {
  680. /*private*/
  681. internal int bits;
  682. /*private*/
  683. internal int bitCount;
  684. public Encoder(UTF7Encoding encoding) : base(encoding)
  685. {
  686. // base calls reset
  687. }
  688. public override void Reset()
  689. {
  690. this.bitCount = -1;
  691. this.bits = 0;
  692. if (_fallbackBuffer != null)
  693. _fallbackBuffer.Reset();
  694. }
  695. // Anything left in our encoder?
  696. internal override bool HasState
  697. {
  698. get
  699. {
  700. return (this.bits != 0 || this.bitCount != -1);
  701. }
  702. }
  703. }
  704. // Preexisting UTF7 behavior for bad bytes was just to spit out the byte as the next char
  705. // and turn off base64 mode if it was in that mode. We still exit the mode, but now we fallback.
  706. private sealed class DecoderUTF7Fallback : DecoderFallback
  707. {
  708. // Construction. Default replacement fallback uses no best fit and ? replacement string
  709. public DecoderUTF7Fallback()
  710. {
  711. }
  712. public override DecoderFallbackBuffer CreateFallbackBuffer()
  713. {
  714. return new DecoderUTF7FallbackBuffer(this);
  715. }
  716. // Maximum number of characters that this instance of this fallback could return
  717. public override int MaxCharCount
  718. {
  719. get
  720. {
  721. // returns 1 char per bad byte
  722. return 1;
  723. }
  724. }
  725. public override bool Equals(object value)
  726. {
  727. DecoderUTF7Fallback that = value as DecoderUTF7Fallback;
  728. if (that != null)
  729. {
  730. return true;
  731. }
  732. return (false);
  733. }
  734. public override int GetHashCode()
  735. {
  736. return 984;
  737. }
  738. }
  739. private sealed class DecoderUTF7FallbackBuffer : DecoderFallbackBuffer
  740. {
  741. // Store our default string
  742. private char cFallback = (char)0;
  743. private int iCount = -1;
  744. private int iSize;
  745. // Construction
  746. public DecoderUTF7FallbackBuffer(DecoderUTF7Fallback fallback)
  747. {
  748. }
  749. // Fallback Methods
  750. public override bool Fallback(byte[] bytesUnknown, int index)
  751. {
  752. // We expect no previous fallback in our buffer
  753. Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.Fallback] Can't have recursive fallbacks");
  754. Debug.Assert(bytesUnknown.Length == 1, "[DecoderUTF7FallbackBuffer.Fallback] Only possible fallback case should be 1 unknown byte");
  755. // Go ahead and get our fallback
  756. cFallback = (char)bytesUnknown[0];
  757. // Any of the fallback characters can be handled except for 0
  758. if (cFallback == 0)
  759. {
  760. return false;
  761. }
  762. iCount = iSize = 1;
  763. return true;
  764. }
  765. public override char GetNextChar()
  766. {
  767. if (iCount-- > 0)
  768. return cFallback;
  769. // Note: this means that 0 in UTF7 stream will never be emitted.
  770. return (char)0;
  771. }
  772. public override bool MovePrevious()
  773. {
  774. if (iCount >= 0)
  775. {
  776. iCount++;
  777. }
  778. // return true if we were allowed to do this
  779. return (iCount >= 0 && iCount <= iSize);
  780. }
  781. // Return # of chars left in this fallback
  782. public override int Remaining
  783. {
  784. get
  785. {
  786. return (iCount > 0) ? iCount : 0;
  787. }
  788. }
  789. // Clear the buffer
  790. public override unsafe void Reset()
  791. {
  792. iCount = -1;
  793. byteStart = null;
  794. }
  795. // This version just counts the fallback and doesn't actually copy anything.
  796. internal unsafe override int InternalFallback(byte[] bytes, byte* pBytes)
  797. // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
  798. // array, and we might need the index, hence the byte*
  799. {
  800. // We expect no previous fallback in our buffer
  801. Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.InternalFallback] Can't have recursive fallbacks");
  802. if (bytes.Length != 1)
  803. {
  804. throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
  805. }
  806. // Can't fallback a byte 0, so return for that case, 1 otherwise.
  807. return bytes[0] == 0 ? 0 : 1;
  808. }
  809. }
  810. }
  811. }