EncodingTable.cs 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Collections;
  5. using System.Diagnostics;
  6. using System.Threading;
  7. namespace System.Text
  8. {
  9. //
  10. // Data table for encoding classes. Used by System.Text.Encoding.
  11. // This class contains two hashtables to allow System.Text.Encoding
  12. // to retrieve the data item either by codepage value or by webName.
  13. //
  14. internal static partial class EncodingTable
  15. {
  16. private static readonly Hashtable s_nameToCodePage = Hashtable.Synchronized(new Hashtable(StringComparer.OrdinalIgnoreCase));
  17. private static CodePageDataItem?[]? s_codePageToCodePageData;
  18. /*=================================GetCodePageFromName==========================
  19. **Action: Given a encoding name, return the correct code page number for this encoding.
  20. **Returns: The code page for the encoding.
  21. **Arguments:
  22. ** name the name of the encoding
  23. **Exceptions:
  24. ** ArgumentNullException if name is null.
  25. ** internalGetCodePageFromName will throw ArgumentException if name is not a valid encoding name.
  26. ============================================================================*/
  27. internal static int GetCodePageFromName(string name)
  28. {
  29. if (name == null)
  30. throw new ArgumentNullException(nameof(name));
  31. object? codePageObj = s_nameToCodePage[name];
  32. if (codePageObj != null)
  33. {
  34. return (int)codePageObj;
  35. }
  36. int codePage = InternalGetCodePageFromName(name);
  37. s_nameToCodePage[name] = codePage;
  38. return codePage;
  39. }
  40. // Find the data item by binary searching the table.
  41. private static int InternalGetCodePageFromName(string name)
  42. {
  43. int left = 0;
  44. int right = s_encodingNameIndices.Length - 2;
  45. int index;
  46. int result;
  47. Debug.Assert(s_encodingNameIndices.Length == s_codePagesByName.Length + 1);
  48. Debug.Assert(s_encodingNameIndices[^1] == s_encodingNames.Length);
  49. ReadOnlySpan<char> invariantName = name.ToLowerInvariant().AsSpan();
  50. // Binary search the array until we have only a couple of elements left and then
  51. // just walk those elements.
  52. while ((right - left) > 3)
  53. {
  54. index = ((right - left) / 2) + left;
  55. Debug.Assert(index < s_encodingNameIndices.Length - 1);
  56. result = string.CompareOrdinal(invariantName, s_encodingNames.AsSpan(s_encodingNameIndices[index], s_encodingNameIndices[index + 1] - s_encodingNameIndices[index]));
  57. if (result == 0)
  58. {
  59. // We found the item, return the associated codePage.
  60. return s_codePagesByName[index];
  61. }
  62. else if (result < 0)
  63. {
  64. // The name that we're looking for is less than our current index.
  65. right = index;
  66. }
  67. else
  68. {
  69. // The name that we're looking for is greater than our current index
  70. left = index;
  71. }
  72. }
  73. // Walk the remaining elements (it'll be 3 or fewer).
  74. for (; left <= right; left++)
  75. {
  76. Debug.Assert(left < s_encodingNameIndices.Length - 1);
  77. if (string.CompareOrdinal(invariantName, s_encodingNames.AsSpan(s_encodingNameIndices[left], s_encodingNameIndices[left + 1] - s_encodingNameIndices[left])) == 0)
  78. {
  79. return s_codePagesByName[left];
  80. }
  81. }
  82. // The encoding name is not valid.
  83. throw new ArgumentException(
  84. SR.Format(SR.Argument_EncodingNotSupported, name),
  85. nameof(name));
  86. }
  87. // Return a list of all EncodingInfo objects describing all of our encodings
  88. internal static EncodingInfo[] GetEncodings()
  89. {
  90. EncodingInfo[] arrayEncodingInfo = new EncodingInfo[s_mappedCodePages.Length];
  91. for (int i = 0; i < s_mappedCodePages.Length; i++)
  92. {
  93. arrayEncodingInfo[i] = new EncodingInfo(
  94. s_mappedCodePages[i],
  95. s_webNames[s_webNameIndices[i]..s_webNameIndices[i + 1]],
  96. GetDisplayName(s_mappedCodePages[i], i)
  97. );
  98. }
  99. return arrayEncodingInfo;
  100. }
  101. internal static CodePageDataItem? GetCodePageDataItem(int codePage)
  102. {
  103. if (s_codePageToCodePageData == null)
  104. {
  105. Interlocked.CompareExchange<CodePageDataItem?[]?>(ref s_codePageToCodePageData, new CodePageDataItem[s_mappedCodePages.Length], null);
  106. }
  107. // Keep in sync with s_mappedCodePages
  108. int index;
  109. switch (codePage)
  110. {
  111. case 1200: // utf-16
  112. index = 0;
  113. break;
  114. case 1201: // utf-16be
  115. index = 1;
  116. break;
  117. case 12000: // utf-32
  118. index = 2;
  119. break;
  120. case 12001: // utf-32be
  121. index = 3;
  122. break;
  123. case 20127: // us-ascii
  124. index = 4;
  125. break;
  126. case 28591: // iso-8859-1
  127. index = 5;
  128. break;
  129. case 65000: // utf-7
  130. index = 6;
  131. break;
  132. case 65001: // utf-8
  133. index = 7;
  134. break;
  135. default:
  136. return null;
  137. }
  138. CodePageDataItem? data = s_codePageToCodePageData[index];
  139. if (data == null)
  140. {
  141. Interlocked.CompareExchange<CodePageDataItem?>(ref s_codePageToCodePageData[index], InternalGetCodePageDataItem(codePage, index), null);
  142. data = s_codePageToCodePageData[index];
  143. }
  144. return data;
  145. }
  146. private static CodePageDataItem InternalGetCodePageDataItem(int codePage, int index)
  147. {
  148. int uiFamilyCodePage = s_uiFamilyCodePages[index];
  149. string webName = s_webNames[s_webNameIndices[index]..s_webNameIndices[index + 1]];
  150. // All supported code pages have identical header names, and body names.
  151. string headerName = webName;
  152. string bodyName = webName;
  153. string displayName = GetDisplayName(codePage, index);
  154. uint flags = s_flags[index];
  155. return new CodePageDataItem(uiFamilyCodePage, webName, headerName, bodyName, displayName, flags);
  156. }
  157. private static string GetDisplayName(int codePage, int englishNameIndex)
  158. {
  159. string? displayName = SR.GetResourceString("Globalization_cp_" + codePage.ToString());
  160. if (string.IsNullOrEmpty(displayName))
  161. displayName = s_englishNames[s_englishNameIndices[englishNameIndex]..s_englishNameIndices[englishNameIndex + 1]];
  162. return displayName;
  163. }
  164. }
  165. }