UTF7Encoding.cs 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. //
  5. // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
  6. //
  7. using System.Diagnostics;
  8. using System.Runtime.InteropServices;
  9. namespace System.Text
  10. {
  11. public class UTF7Encoding : Encoding
  12. {
  13. private const string base64Chars =
  14. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  15. // 0123456789111111111122222222223333333333444444444455555555556666
  16. // 012345678901234567890123456789012345678901234567890123
  17. // These are the characters that can be directly encoded in UTF7.
  18. private const string directChars =
  19. "\t\n\r '(),-./0123456789:?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
  20. // These are the characters that can be optionally directly encoded in UTF7.
  21. private const string optionalChars =
  22. "!\"#$%&*;<=>@[]^_`{|}";
  23. // Used by Encoding.UTF7 for lazy initialization
  24. // The initialization code will not be run until a static member of the class is referenced
  25. internal static readonly UTF7Encoding s_default = new UTF7Encoding();
  26. // The set of base 64 characters.
  27. private byte[] _base64Bytes = null!;
  28. // The decoded bits for every base64 values. This array has a size of 128 elements.
  29. // The index is the code point value of the base 64 characters. The value is -1 if
  30. // the code point is not a valid base 64 character. Otherwise, the value is a value
  31. // from 0 ~ 63.
  32. private sbyte[] _base64Values = null!;
  33. // The array to decide if a Unicode code point below 0x80 can be directly encoded in UTF7.
  34. // This array has a size of 128.
  35. private bool[] _directEncode = null!;
  36. private readonly bool _allowOptionals;
  37. private const int UTF7_CODEPAGE = 65000;
  38. public UTF7Encoding()
  39. : this(false)
  40. {
  41. }
  42. public UTF7Encoding(bool allowOptionals)
  43. : base(UTF7_CODEPAGE) // Set the data item.
  44. {
  45. // Allowing optionals?
  46. _allowOptionals = allowOptionals;
  47. // Make our tables
  48. MakeTables();
  49. }
  50. private void MakeTables()
  51. {
  52. // Build our tables
  53. _base64Bytes = new byte[64];
  54. for (int i = 0; i < 64; i++) _base64Bytes[i] = (byte)base64Chars[i];
  55. _base64Values = new sbyte[128];
  56. for (int i = 0; i < 128; i++) _base64Values[i] = -1;
  57. for (int i = 0; i < 64; i++) _base64Values[_base64Bytes[i]] = (sbyte)i;
  58. _directEncode = new bool[128];
  59. int count = directChars.Length;
  60. for (int i = 0; i < count; i++)
  61. {
  62. _directEncode[directChars[i]] = true;
  63. }
  64. if (_allowOptionals)
  65. {
  66. count = optionalChars.Length;
  67. for (int i = 0; i < count; i++)
  68. {
  69. _directEncode[optionalChars[i]] = true;
  70. }
  71. }
  72. }
  73. // We go ahead and set this because Encoding expects it, however nothing can fall back in UTF7.
  74. internal sealed override void SetDefaultFallbacks()
  75. {
  76. // UTF7 had an odd decoderFallback behavior, and the Encoder fallback
  77. // is irrelevant because we encode surrogates individually and never check for unmatched ones
  78. // (so nothing can fallback during encoding)
  79. this.encoderFallback = new EncoderReplacementFallback(string.Empty);
  80. this.decoderFallback = new DecoderUTF7Fallback();
  81. }
  82. public override bool Equals(object? value)
  83. {
  84. if (value is UTF7Encoding that)
  85. {
  86. return (_allowOptionals == that._allowOptionals) &&
  87. (EncoderFallback.Equals(that.EncoderFallback)) &&
  88. (DecoderFallback.Equals(that.DecoderFallback));
  89. }
  90. return false;
  91. }
  92. // Compared to all the other encodings, variations of UTF7 are unlikely
  93. public override int GetHashCode()
  94. {
  95. return this.CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
  96. }
  97. // The following methods are copied from EncodingNLS.cs.
  98. // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
  99. // These should be kept in sync for the following classes:
  100. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  101. // Returns the number of bytes required to encode a range of characters in
  102. // a character array.
  103. //
  104. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  105. // So if you fix this, fix the others. Currently those include:
  106. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  107. // parent method is safe
  108. public override unsafe int GetByteCount(char[] chars, int index, int count)
  109. {
  110. // Validate input parameters
  111. if (chars == null)
  112. throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
  113. if (index < 0 || count < 0)
  114. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  115. if (chars.Length - index < count)
  116. throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
  117. // If no input, return 0, avoid fixed empty array problem
  118. if (count == 0)
  119. return 0;
  120. // Just call the pointer version
  121. fixed (char* pChars = chars)
  122. return GetByteCount(pChars + index, count, null);
  123. }
  124. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  125. // So if you fix this, fix the others. Currently those include:
  126. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  127. // parent method is safe
  128. public override unsafe int GetByteCount(string s)
  129. {
  130. // Validate input
  131. if (s == null)
  132. throw new ArgumentNullException(nameof(s));
  133. fixed (char* pChars = s)
  134. return GetByteCount(pChars, s.Length, null);
  135. }
  136. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  137. // So if you fix this, fix the others. Currently those include:
  138. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  139. [CLSCompliant(false)]
  140. public override unsafe int GetByteCount(char* chars, int count)
  141. {
  142. // Validate Parameters
  143. if (chars == null)
  144. throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
  145. if (count < 0)
  146. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  147. // Call it with empty encoder
  148. return GetByteCount(chars, count, null);
  149. }
  150. // Parent method is safe.
  151. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  152. // So if you fix this, fix the others. Currently those include:
  153. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  154. public override unsafe int GetBytes(string s, int charIndex, int charCount,
  155. byte[] bytes, int byteIndex)
  156. {
  157. if (s == null || bytes == null)
  158. throw new ArgumentNullException(s == null ? nameof(s) : nameof(bytes), SR.ArgumentNull_Array);
  159. if (charIndex < 0 || charCount < 0)
  160. throw new ArgumentOutOfRangeException(charIndex < 0 ? nameof(charIndex) : nameof(charCount), SR.ArgumentOutOfRange_NeedNonNegNum);
  161. if (s.Length - charIndex < charCount)
  162. throw new ArgumentOutOfRangeException(nameof(s), SR.ArgumentOutOfRange_IndexCount);
  163. if (byteIndex < 0 || byteIndex > bytes.Length)
  164. throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
  165. int byteCount = bytes.Length - byteIndex;
  166. fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
  167. return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
  168. }
  169. // Encodes a range of characters in a character array into a range of bytes
  170. // in a byte array. An exception occurs if the byte array is not large
  171. // enough to hold the complete encoding of the characters. The
  172. // GetByteCount method can be used to determine the exact number of
  173. // bytes that will be produced for a given range of characters.
  174. // Alternatively, the GetMaxByteCount method can be used to
  175. // determine the maximum number of bytes that will be produced for a given
  176. // number of characters, regardless of the actual character values.
  177. //
  178. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  179. // So if you fix this, fix the others. Currently those include:
  180. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  181. // parent method is safe
  182. public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
  183. byte[] bytes, int byteIndex)
  184. {
  185. // Validate parameters
  186. if (chars == null || bytes == null)
  187. throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes), SR.ArgumentNull_Array);
  188. if (charIndex < 0 || charCount < 0)
  189. throw new ArgumentOutOfRangeException(charIndex < 0 ? nameof(charIndex) : nameof(charCount), SR.ArgumentOutOfRange_NeedNonNegNum);
  190. if (chars.Length - charIndex < charCount)
  191. throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
  192. if (byteIndex < 0 || byteIndex > bytes.Length)
  193. throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
  194. // If nothing to encode return 0, avoid fixed problem
  195. if (charCount == 0)
  196. return 0;
  197. // Just call pointer version
  198. int byteCount = bytes.Length - byteIndex;
  199. fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
  200. // Remember that byteCount is # to decode, not size of array.
  201. return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
  202. }
  203. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  204. // So if you fix this, fix the others. Currently those include:
  205. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  206. [CLSCompliant(false)]
  207. public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
  208. {
  209. // Validate Parameters
  210. if (bytes == null || chars == null)
  211. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  212. if (charCount < 0 || byteCount < 0)
  213. throw new ArgumentOutOfRangeException(charCount < 0 ? nameof(charCount) : nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
  214. return GetBytes(chars, charCount, bytes, byteCount, null);
  215. }
  216. // Returns the number of characters produced by decoding a range of bytes
  217. // in a byte array.
  218. //
  219. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  220. // So if you fix this, fix the others. Currently those include:
  221. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  222. // parent method is safe
  223. public override unsafe int GetCharCount(byte[] bytes, int index, int count)
  224. {
  225. // Validate Parameters
  226. if (bytes == null)
  227. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  228. if (index < 0 || count < 0)
  229. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  230. if (bytes.Length - index < count)
  231. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  232. // If no input just return 0, fixed doesn't like 0 length arrays.
  233. if (count == 0)
  234. return 0;
  235. // Just call pointer version
  236. fixed (byte* pBytes = bytes)
  237. return GetCharCount(pBytes + index, count, null);
  238. }
  239. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  240. // So if you fix this, fix the others. Currently those include:
  241. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  242. [CLSCompliant(false)]
  243. public override unsafe int GetCharCount(byte* bytes, int count)
  244. {
  245. // Validate Parameters
  246. if (bytes == null)
  247. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  248. if (count < 0)
  249. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  250. return GetCharCount(bytes, count, null);
  251. }
  252. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  253. // So if you fix this, fix the others. Currently those include:
  254. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  255. // parent method is safe
  256. public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
  257. char[] chars, int charIndex)
  258. {
  259. // Validate Parameters
  260. if (bytes == null || chars == null)
  261. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  262. if (byteIndex < 0 || byteCount < 0)
  263. throw new ArgumentOutOfRangeException(byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
  264. if (bytes.Length - byteIndex < byteCount)
  265. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  266. if (charIndex < 0 || charIndex > chars.Length)
  267. throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
  268. // If no input, return 0 & avoid fixed problem
  269. if (byteCount == 0)
  270. return 0;
  271. // Just call pointer version
  272. int charCount = chars.Length - charIndex;
  273. fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
  274. // Remember that charCount is # to decode, not size of array
  275. return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
  276. }
  277. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  278. // So if you fix this, fix the others. Currently those include:
  279. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  280. [CLSCompliant(false)]
  281. public override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
  282. {
  283. // Validate Parameters
  284. if (bytes == null || chars == null)
  285. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  286. if (charCount < 0 || byteCount < 0)
  287. throw new ArgumentOutOfRangeException(charCount < 0 ? nameof(charCount) : nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
  288. return GetChars(bytes, byteCount, chars, charCount, null);
  289. }
  290. // Returns a string containing the decoded representation of a range of
  291. // bytes in a byte array.
  292. //
  293. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  294. // So if you fix this, fix the others. Currently those include:
  295. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  296. // parent method is safe
  297. public override unsafe string GetString(byte[] bytes, int index, int count)
  298. {
  299. // Validate Parameters
  300. if (bytes == null)
  301. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  302. if (index < 0 || count < 0)
  303. throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  304. if (bytes.Length - index < count)
  305. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  306. // Avoid problems with empty input buffer
  307. if (count == 0) return string.Empty;
  308. fixed (byte* pBytes = bytes)
  309. return string.CreateStringFromEncoding(
  310. pBytes + index, count, this);
  311. }
  312. //
  313. // End of standard methods copied from EncodingNLS.cs
  314. //
  315. internal sealed override unsafe int GetByteCount(char* chars, int count, EncoderNLS? baseEncoder)
  316. {
  317. Debug.Assert(chars != null, "[UTF7Encoding.GetByteCount]chars!=null");
  318. Debug.Assert(count >= 0, "[UTF7Encoding.GetByteCount]count >=0");
  319. // Just call GetBytes with bytes == null
  320. return GetBytes(chars, count, null, 0, baseEncoder);
  321. }
  322. internal sealed override unsafe int GetBytes(
  323. char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS? baseEncoder)
  324. {
  325. Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetBytes]byteCount >=0");
  326. Debug.Assert(chars != null, "[UTF7Encoding.GetBytes]chars!=null");
  327. Debug.Assert(charCount >= 0, "[UTF7Encoding.GetBytes]charCount >=0");
  328. // Get encoder info
  329. UTF7Encoding.Encoder? encoder = (UTF7Encoding.Encoder?)baseEncoder;
  330. // Default bits & count
  331. int bits = 0;
  332. int bitCount = -1;
  333. // prepare our helpers
  334. Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
  335. this, encoder, bytes, byteCount, chars, charCount);
  336. if (encoder != null)
  337. {
  338. bits = encoder.bits;
  339. bitCount = encoder.bitCount;
  340. // May have had too many left over
  341. while (bitCount >= 6)
  342. {
  343. bitCount -= 6;
  344. // If we fail we'll never really have enough room
  345. if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F]))
  346. ThrowBytesOverflow(encoder, buffer.Count == 0);
  347. }
  348. }
  349. while (buffer.MoreData)
  350. {
  351. char currentChar = buffer.GetNextChar();
  352. if (currentChar < 0x80 && _directEncode[currentChar])
  353. {
  354. if (bitCount >= 0)
  355. {
  356. if (bitCount > 0)
  357. {
  358. // Try to add the next byte
  359. if (!buffer.AddByte(_base64Bytes[bits << 6 - bitCount & 0x3F]))
  360. break; // Stop here, didn't throw
  361. bitCount = 0;
  362. }
  363. // Need to get emit '-' and our char, 2 bytes total
  364. if (!buffer.AddByte((byte)'-'))
  365. break; // Stop here, didn't throw
  366. bitCount = -1;
  367. }
  368. // Need to emit our char
  369. if (!buffer.AddByte((byte)currentChar))
  370. break; // Stop here, didn't throw
  371. }
  372. else if (bitCount < 0 && currentChar == '+')
  373. {
  374. if (!buffer.AddByte((byte)'+', (byte)'-'))
  375. break; // Stop here, didn't throw
  376. }
  377. else
  378. {
  379. if (bitCount < 0)
  380. {
  381. // Need to emit a + and 12 bits (3 bytes)
  382. // Only 12 of the 16 bits will be emitted this time, the other 4 wait 'til next time
  383. if (!buffer.AddByte((byte)'+'))
  384. break; // Stop here, didn't throw
  385. // We're now in bit mode, but haven't stored data yet
  386. bitCount = 0;
  387. }
  388. // Add our bits
  389. bits = bits << 16 | currentChar;
  390. bitCount += 16;
  391. while (bitCount >= 6)
  392. {
  393. bitCount -= 6;
  394. if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F]))
  395. {
  396. bitCount += 6; // We didn't use these bits
  397. buffer.GetNextChar(); // We're processing this char still, but AddByte
  398. // --'d it when we ran out of space
  399. break; // Stop here, not enough room for bytes
  400. }
  401. }
  402. if (bitCount >= 6)
  403. break; // Didn't have room to encode enough bits
  404. }
  405. }
  406. // Now if we have bits left over we have to encode them.
  407. // MustFlush may have been cleared by encoding.ThrowBytesOverflow earlier if converting
  408. if (bitCount >= 0 && (encoder == null || encoder.MustFlush))
  409. {
  410. // Do we have bits we have to stick in?
  411. if (bitCount > 0)
  412. {
  413. if (buffer.AddByte(_base64Bytes[(bits << (6 - bitCount)) & 0x3F]))
  414. {
  415. // Emitted spare bits, 0 bits left
  416. bitCount = 0;
  417. }
  418. }
  419. // If converting and failed bitCount above, then we'll fail this too
  420. if (buffer.AddByte((byte)'-'))
  421. {
  422. // turned off bit mode';
  423. bits = 0;
  424. bitCount = -1;
  425. }
  426. else
  427. // If not successful, convert will maintain state for next time, also
  428. // AddByte will have decremented our char count, however we need it to remain the same
  429. buffer.GetNextChar();
  430. }
  431. // Do we have an encoder we're allowed to use?
  432. // bytes == null if counting, so don't use encoder then
  433. if (bytes != null && encoder != null)
  434. {
  435. // We already cleared bits & bitcount for mustflush case
  436. encoder.bits = bits;
  437. encoder.bitCount = bitCount;
  438. encoder._charsUsed = buffer.CharsUsed;
  439. }
  440. return buffer.Count;
  441. }
  442. internal sealed override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS? baseDecoder)
  443. {
  444. Debug.Assert(count >= 0, "[UTF7Encoding.GetCharCount]count >=0");
  445. Debug.Assert(bytes != null, "[UTF7Encoding.GetCharCount]bytes!=null");
  446. // Just call GetChars with null char* to do counting
  447. return GetChars(bytes, count, null, 0, baseDecoder);
  448. }
  449. internal sealed override unsafe int GetChars(
  450. byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS? baseDecoder)
  451. {
  452. Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetChars]byteCount >=0");
  453. Debug.Assert(bytes != null, "[UTF7Encoding.GetChars]bytes!=null");
  454. Debug.Assert(charCount >= 0, "[UTF7Encoding.GetChars]charCount >=0");
  455. // Might use a decoder
  456. UTF7Encoding.Decoder? decoder = (UTF7Encoding.Decoder?)baseDecoder;
  457. // Get our output buffer info.
  458. Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
  459. this, decoder, chars, charCount, bytes, byteCount);
  460. // Get decoder info
  461. int bits = 0;
  462. int bitCount = -1;
  463. bool firstByte = false;
  464. if (decoder != null)
  465. {
  466. bits = decoder.bits;
  467. bitCount = decoder.bitCount;
  468. firstByte = decoder.firstByte;
  469. Debug.Assert(!firstByte || decoder.bitCount <= 0,
  470. "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set");
  471. }
  472. // We may have had bits in the decoder that we couldn't output last time, so do so now
  473. if (bitCount >= 16)
  474. {
  475. // Check our decoder buffer
  476. if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF)))
  477. ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert
  478. // Used this one, clean up extra bits
  479. bitCount -= 16;
  480. }
  481. // Loop through the input
  482. while (buffer.MoreData)
  483. {
  484. byte currentByte = buffer.GetNextByte();
  485. int c;
  486. if (bitCount >= 0)
  487. {
  488. //
  489. // Modified base 64 encoding.
  490. //
  491. sbyte v;
  492. if (currentByte < 0x80 && ((v = _base64Values[currentByte]) >= 0))
  493. {
  494. firstByte = false;
  495. bits = (bits << 6) | ((byte)v);
  496. bitCount += 6;
  497. if (bitCount >= 16)
  498. {
  499. c = (bits >> (bitCount - 16)) & 0xFFFF;
  500. bitCount -= 16;
  501. }
  502. // If not enough bits just continue
  503. else continue;
  504. }
  505. else
  506. {
  507. // If it wasn't a base 64 byte, everything's going to turn off base 64 mode
  508. bitCount = -1;
  509. if (currentByte != '-')
  510. {
  511. // >= 0x80 (because of 1st if statemtn)
  512. // We need this check since the _base64Values[b] check below need b <= 0x7f.
  513. // This is not a valid base 64 byte. Terminate the shifted-sequence and
  514. // emit this byte.
  515. // not in base 64 table
  516. // According to the RFC 1642 and the example code of UTF-7
  517. // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte
  518. // Chars won't be updated unless this works, try to fallback
  519. if (!buffer.Fallback(currentByte))
  520. break; // Stop here, didn't throw
  521. // Used that byte, we're done with it
  522. continue;
  523. }
  524. //
  525. // The encoding for '+' is "+-".
  526. //
  527. if (firstByte) c = '+';
  528. // We just turn it off if not emitting a +, so we're done.
  529. else continue;
  530. }
  531. //
  532. // End of modified base 64 encoding block.
  533. //
  534. }
  535. else if (currentByte == '+')
  536. {
  537. //
  538. // Found the start of a modified base 64 encoding block or a plus sign.
  539. //
  540. bitCount = 0;
  541. firstByte = true;
  542. continue;
  543. }
  544. else
  545. {
  546. // Normal character
  547. if (currentByte >= 0x80)
  548. {
  549. // Try to fallback
  550. if (!buffer.Fallback(currentByte))
  551. break; // Stop here, didn't throw
  552. // Done falling back
  553. continue;
  554. }
  555. // Use the normal character
  556. c = currentByte;
  557. }
  558. if (c >= 0)
  559. {
  560. // Check our buffer
  561. if (!buffer.AddChar((char)c))
  562. {
  563. // No room. If it was a plain char we'll try again later.
  564. // Note, we'll consume this byte and stick it in decoder, even if we can't output it
  565. if (bitCount >= 0) // Can we rememmber this byte (char)
  566. {
  567. buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed
  568. bitCount += 16; // We'll still need that char we have in our bits
  569. }
  570. break; // didn't throw, stop
  571. }
  572. }
  573. }
  574. // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder)
  575. if (chars != null && decoder != null)
  576. {
  577. // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer)
  578. if (decoder.MustFlush)
  579. {
  580. // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them
  581. decoder.bits = 0;
  582. decoder.bitCount = -1;
  583. decoder.firstByte = false;
  584. }
  585. else
  586. {
  587. decoder.bits = bits;
  588. decoder.bitCount = bitCount;
  589. decoder.firstByte = firstByte;
  590. }
  591. decoder._bytesUsed = buffer.BytesUsed;
  592. }
  593. // else ignore any hanging bits.
  594. // Return our count
  595. return buffer.Count;
  596. }
  597. public override System.Text.Decoder GetDecoder()
  598. {
  599. return new UTF7Encoding.Decoder(this);
  600. }
  601. public override System.Text.Encoder GetEncoder()
  602. {
  603. return new UTF7Encoding.Encoder(this);
  604. }
  605. public override int GetMaxByteCount(int charCount)
  606. {
  607. if (charCount < 0)
  608. throw new ArgumentOutOfRangeException(nameof(charCount),
  609. SR.ArgumentOutOfRange_NeedNonNegNum);
  610. // Suppose that every char can not be direct-encoded, we know that
  611. // a byte can encode 6 bits of the Unicode character. And we will
  612. // also need two extra bytes for the shift-in ('+') and shift-out ('-') mark.
  613. // Therefore, the max byte should be:
  614. // byteCount = 2 + Math.Ceiling((double)charCount * 16 / 6);
  615. // That is always <= 2 + 3 * charCount;
  616. // Longest case is alternating encoded, direct, encoded data for 5 + 1 + 5... bytes per char.
  617. // UTF7 doesn't have left over surrogates, but if no input we may need an output - to turn off
  618. // encoding if MustFlush is true.
  619. // Its easiest to think of this as 2 bytes to turn on/off the base64 mode, then 3 bytes per char.
  620. // 3 bytes is 18 bits of encoding, which is more than we need, but if its direct encoded then 3
  621. // bytes allows us to turn off and then back on base64 mode if necessary.
  622. // Note that UTF7 encoded surrogates individually and isn't worried about mismatches, so all
  623. // code points are encodable int UTF7.
  624. long byteCount = (long)charCount * 3 + 2;
  625. // check for overflow
  626. if (byteCount > 0x7fffffff)
  627. throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
  628. return (int)byteCount;
  629. }
  630. public override int GetMaxCharCount(int byteCount)
  631. {
  632. if (byteCount < 0)
  633. throw new ArgumentOutOfRangeException(nameof(byteCount),
  634. SR.ArgumentOutOfRange_NeedNonNegNum);
  635. // Worst case is 1 char per byte. Minimum 1 for left over bits in case decoder is being flushed
  636. // Also note that we ignore extra bits (per spec), so UTF7 doesn't have unknown in this direction.
  637. int charCount = byteCount;
  638. if (charCount == 0) charCount = 1;
  639. return charCount;
  640. }
  641. // Of all the amazing things... This MUST be Decoder so that our com name
  642. // for System.Text.Decoder doesn't change
  643. private sealed class Decoder : DecoderNLS
  644. {
  645. /*private*/
  646. internal int bits;
  647. /*private*/
  648. internal int bitCount;
  649. /*private*/
  650. internal bool firstByte;
  651. public Decoder(UTF7Encoding encoding) : base(encoding)
  652. {
  653. // base calls reset
  654. }
  655. public override void Reset()
  656. {
  657. this.bits = 0;
  658. this.bitCount = -1;
  659. this.firstByte = false;
  660. if (_fallbackBuffer != null)
  661. _fallbackBuffer.Reset();
  662. }
  663. // Anything left in our encoder?
  664. internal override bool HasState =>
  665. // NOTE: This forces the last -, which some encoder might not encode. If we
  666. // don't see it we don't think we're done reading.
  667. this.bitCount != -1;
  668. }
  669. // Of all the amazing things... This MUST be Encoder so that our com name
  670. // for System.Text.Encoder doesn't change
  671. private sealed class Encoder : EncoderNLS
  672. {
  673. /*private*/
  674. internal int bits;
  675. /*private*/
  676. internal int bitCount;
  677. public Encoder(UTF7Encoding encoding) : base(encoding)
  678. {
  679. // base calls reset
  680. }
  681. public override void Reset()
  682. {
  683. this.bitCount = -1;
  684. this.bits = 0;
  685. if (_fallbackBuffer != null)
  686. _fallbackBuffer.Reset();
  687. }
  688. // Anything left in our encoder?
  689. internal override bool HasState => this.bits != 0 || this.bitCount != -1;
  690. }
  691. // Preexisting UTF7 behavior for bad bytes was just to spit out the byte as the next char
  692. // and turn off base64 mode if it was in that mode. We still exit the mode, but now we fallback.
  693. private sealed class DecoderUTF7Fallback : DecoderFallback
  694. {
  695. // Default replacement fallback uses no best fit and ? replacement string
  696. public override DecoderFallbackBuffer CreateFallbackBuffer() =>
  697. new DecoderUTF7FallbackBuffer();
  698. // Maximum number of characters that this instance of this fallback could return
  699. public override int MaxCharCount => 1; // returns 1 char per bad byte
  700. public override bool Equals(object? value) => value is DecoderUTF7Fallback;
  701. public override int GetHashCode() => 984;
  702. }
  703. private sealed class DecoderUTF7FallbackBuffer : DecoderFallbackBuffer
  704. {
  705. // Store our default string
  706. private char cFallback = (char)0;
  707. private int iCount = -1;
  708. private int iSize;
  709. // Fallback Methods
  710. public override bool Fallback(byte[] bytesUnknown, int index)
  711. {
  712. // We expect no previous fallback in our buffer
  713. Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.Fallback] Can't have recursive fallbacks");
  714. Debug.Assert(bytesUnknown.Length == 1, "[DecoderUTF7FallbackBuffer.Fallback] Only possible fallback case should be 1 unknown byte");
  715. // Go ahead and get our fallback
  716. cFallback = (char)bytesUnknown[0];
  717. // Any of the fallback characters can be handled except for 0
  718. if (cFallback == 0)
  719. {
  720. return false;
  721. }
  722. iCount = iSize = 1;
  723. return true;
  724. }
  725. public override char GetNextChar()
  726. {
  727. if (iCount-- > 0)
  728. return cFallback;
  729. // Note: this means that 0 in UTF7 stream will never be emitted.
  730. return (char)0;
  731. }
  732. public override bool MovePrevious()
  733. {
  734. if (iCount >= 0)
  735. {
  736. iCount++;
  737. }
  738. // return true if we were allowed to do this
  739. return iCount >= 0 && iCount <= iSize;
  740. }
  741. // Return # of chars left in this fallback
  742. public override int Remaining => (iCount > 0) ? iCount : 0;
  743. // Clear the buffer
  744. public override unsafe void Reset()
  745. {
  746. iCount = -1;
  747. byteStart = null;
  748. }
  749. // This version just counts the fallback and doesn't actually copy anything.
  750. internal override unsafe int InternalFallback(byte[] bytes, byte* pBytes)
  751. // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
  752. // array, and we might need the index, hence the byte*
  753. {
  754. // We expect no previous fallback in our buffer
  755. Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.InternalFallback] Can't have recursive fallbacks");
  756. if (bytes.Length != 1)
  757. {
  758. throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
  759. }
  760. // Can't fallback a byte 0, so return for that case, 1 otherwise.
  761. return bytes[0] == 0 ? 0 : 1;
  762. }
  763. }
  764. }
  765. }