EncodingData.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. //
  5. // THIS IS AUTOGENERATED FILE CREATED BY
  6. // https://github.com/dotnet/buildtools/blob/6736870b84e06b75e7df32bb84d442db1b2afa10/src/Microsoft.DotNet.Build.Tasks/PackageFiles/encoding.targets
  7. //
  8. namespace System.Text
  9. {
  10. internal static partial class EncodingTable
  11. {
  12. //
  13. // s_encodingNames is the concatenation of all supported IANA names for each codepage.
  14. // This is done rather than using a large readonly array of strings to avoid
  15. // generating a large amount of code in the static constructor.
  16. // Using indices from s_encodingNamesIndices, we binary search this string when mapping
  17. // an encoding name to a codepage. Note that these names are all lowercase and are
  18. // sorted alphabetically.
  19. //
  20. private const string s_encodingNames =
  21. "ansi_x3.4-1968" + // 20127
  22. "ansi_x3.4-1986" + // 20127
  23. "ascii" + // 20127
  24. "cp367" + // 20127
  25. "cp819" + // 28591
  26. "csascii" + // 20127
  27. "csisolatin1" + // 28591
  28. "csunicode11utf7" + // 65000
  29. "ibm367" + // 20127
  30. "ibm819" + // 28591
  31. "iso-10646-ucs-2" + // 1200
  32. "iso-8859-1" + // 28591
  33. "iso-ir-100" + // 28591
  34. "iso-ir-6" + // 20127
  35. "iso646-us" + // 20127
  36. "iso8859-1" + // 28591
  37. "iso_646.irv:1991" + // 20127
  38. "iso_8859-1" + // 28591
  39. "iso_8859-1:1987" + // 28591
  40. "l1" + // 28591
  41. "latin1" + // 28591
  42. "ucs-2" + // 1200
  43. "unicode" + // 1200
  44. "unicode-1-1-utf-7" + // 65000
  45. "unicode-1-1-utf-8" + // 65001
  46. "unicode-2-0-utf-7" + // 65000
  47. "unicode-2-0-utf-8" + // 65001
  48. "unicodefffe" + // 1201
  49. "us" + // 20127
  50. "us-ascii" + // 20127
  51. "utf-16" + // 1200
  52. "utf-16be" + // 1201
  53. "utf-16le" + // 1200
  54. "utf-32" + // 12000
  55. "utf-32be" + // 12001
  56. "utf-32le" + // 12000
  57. "utf-7" + // 65000
  58. "utf-8" + // 65001
  59. "x-unicode-1-1-utf-7" + // 65000
  60. "x-unicode-1-1-utf-8" + // 65001
  61. "x-unicode-2-0-utf-7" + // 65000
  62. "x-unicode-2-0-utf-8"; // 65001
  63. //
  64. // s_encodingNameIndices contains the start index of every encoding name in the string
  65. // s_encodingNames. We infer the length of each string by looking at the start index
  66. // of the next string.
  67. //
  68. private static readonly int[] s_encodingNameIndices = new int[]
  69. {
  70. 0, // ansi_x3.4-1968 (20127)
  71. 14, // ansi_x3.4-1986 (20127)
  72. 28, // ascii (20127)
  73. 33, // cp367 (20127)
  74. 38, // cp819 (28591)
  75. 43, // csascii (20127)
  76. 50, // csisolatin1 (28591)
  77. 61, // csunicode11utf7 (65000)
  78. 76, // ibm367 (20127)
  79. 82, // ibm819 (28591)
  80. 88, // iso-10646-ucs-2 (1200)
  81. 103, // iso-8859-1 (28591)
  82. 113, // iso-ir-100 (28591)
  83. 123, // iso-ir-6 (20127)
  84. 131, // iso646-us (20127)
  85. 140, // iso8859-1 (28591)
  86. 149, // iso_646.irv:1991 (20127)
  87. 165, // iso_8859-1 (28591)
  88. 175, // iso_8859-1:1987 (28591)
  89. 190, // l1 (28591)
  90. 192, // latin1 (28591)
  91. 198, // ucs-2 (1200)
  92. 203, // unicode (1200)
  93. 210, // unicode-1-1-utf-7 (65000)
  94. 227, // unicode-1-1-utf-8 (65001)
  95. 244, // unicode-2-0-utf-7 (65000)
  96. 261, // unicode-2-0-utf-8 (65001)
  97. 278, // unicodefffe (1201)
  98. 289, // us (20127)
  99. 291, // us-ascii (20127)
  100. 299, // utf-16 (1200)
  101. 305, // utf-16be (1201)
  102. 313, // utf-16le (1200)
  103. 321, // utf-32 (12000)
  104. 327, // utf-32be (12001)
  105. 335, // utf-32le (12000)
  106. 343, // utf-7 (65000)
  107. 348, // utf-8 (65001)
  108. 353, // x-unicode-1-1-utf-7 (65000)
  109. 372, // x-unicode-1-1-utf-8 (65001)
  110. 391, // x-unicode-2-0-utf-7 (65000)
  111. 410, // x-unicode-2-0-utf-8 (65001)
  112. 429
  113. };
  114. //
  115. // s_codePagesByName contains the list of supported codepages which match the encoding
  116. // names listed in s_encodingNames. The way mapping works is we binary search
  117. // s_encodingNames using s_encodingNamesIndices until we find a match for a given name.
  118. // The index of the entry in s_encodingNamesIndices will be the index of codepage in
  119. // s_codePagesByName.
  120. //
  121. private static readonly ushort[] s_codePagesByName = new ushort[]
  122. {
  123. 20127, // ansi_x3.4-1968
  124. 20127, // ansi_x3.4-1986
  125. 20127, // ascii
  126. 20127, // cp367
  127. 28591, // cp819
  128. 20127, // csascii
  129. 28591, // csisolatin1
  130. 65000, // csunicode11utf7
  131. 20127, // ibm367
  132. 28591, // ibm819
  133. 1200, // iso-10646-ucs-2
  134. 28591, // iso-8859-1
  135. 28591, // iso-ir-100
  136. 20127, // iso-ir-6
  137. 20127, // iso646-us
  138. 28591, // iso8859-1
  139. 20127, // iso_646.irv:1991
  140. 28591, // iso_8859-1
  141. 28591, // iso_8859-1:1987
  142. 28591, // l1
  143. 28591, // latin1
  144. 1200, // ucs-2
  145. 1200, // unicode
  146. 65000, // unicode-1-1-utf-7
  147. 65001, // unicode-1-1-utf-8
  148. 65000, // unicode-2-0-utf-7
  149. 65001, // unicode-2-0-utf-8
  150. 1201, // unicodefffe
  151. 20127, // us
  152. 20127, // us-ascii
  153. 1200, // utf-16
  154. 1201, // utf-16be
  155. 1200, // utf-16le
  156. 12000, // utf-32
  157. 12001, // utf-32be
  158. 12000, // utf-32le
  159. 65000, // utf-7
  160. 65001, // utf-8
  161. 65000, // x-unicode-1-1-utf-7
  162. 65001, // x-unicode-1-1-utf-8
  163. 65000, // x-unicode-2-0-utf-7
  164. 65001 // x-unicode-2-0-utf-8
  165. };
  166. //
  167. // When retrieving the value for System.Text.Encoding.WebName or
  168. // System.Text.Encoding.EncodingName given System.Text.Encoding.CodePage,
  169. // we perform a linear search on s_mappedCodePages to find the index of the
  170. // given codepage. This is used to index WebNameIndices to get the start
  171. // index of the web name in the string WebNames, and to index
  172. // s_englishNameIndices to get the start of the English name in
  173. // s_englishNames. In addition, this arrays indices correspond to the indices
  174. // into s_uiFamilyCodePages and s_flags.
  175. //
  176. private static readonly ushort[] s_mappedCodePages = new ushort[]
  177. {
  178. 1200, // utf-16
  179. 1201, // utf-16be
  180. 12000, // utf-32
  181. 12001, // utf-32be
  182. 20127, // us-ascii
  183. 28591, // iso-8859-1
  184. 65000, // utf-7
  185. 65001 // utf-8
  186. };
  187. //
  188. // s_uiFamilyCodePages is indexed by the corresponding index in s_mappedCodePages.
  189. //
  190. private static readonly int[] s_uiFamilyCodePages = new int[]
  191. {
  192. 1200,
  193. 1200,
  194. 1200,
  195. 1200,
  196. 1252,
  197. 1252,
  198. 1200,
  199. 1200
  200. };
  201. //
  202. // s_webNames is a concatenation of the default encoding names
  203. // for each code page. It is used in retrieving the value for
  204. // System.Text.Encoding.WebName given System.Text.Encoding.CodePage.
  205. // This is done rather than using a large readonly array of strings to avoid
  206. // generating a large amount of code in the static constructor.
  207. //
  208. private const string s_webNames =
  209. "utf-16" + // 1200
  210. "utf-16BE" + // 1201
  211. "utf-32" + // 12000
  212. "utf-32BE" + // 12001
  213. "us-ascii" + // 20127
  214. "iso-8859-1" + // 28591
  215. "utf-7" + // 65000
  216. "utf-8"; // 65001
  217. //
  218. // s_webNameIndices contains the start index of each code page's default
  219. // web name in the string s_webNames. It is indexed by an index into
  220. // s_mappedCodePages.
  221. //
  222. private static readonly int[] s_webNameIndices = new int[]
  223. {
  224. 0, // utf-16 (1200)
  225. 6, // utf-16be (1201)
  226. 14, // utf-32 (12000)
  227. 20, // utf-32be (12001)
  228. 28, // us-ascii (20127)
  229. 36, // iso-8859-1 (28591)
  230. 46, // utf-7 (65000)
  231. 51, // utf-8 (65001)
  232. 56
  233. };
  234. //
  235. // s_englishNames is the concatenation of the English names for each codepage.
  236. // It is used in retrieving the value for System.Text.Encoding.EncodingName
  237. // given System.Text.Encoding.CodePage.
  238. // This is done rather than using a large readonly array of strings to avoid
  239. // generating a large amount of code in the static constructor.
  240. //
  241. private const string s_englishNames =
  242. "Unicode" + // 1200
  243. "Unicode (Big-Endian)" + // 1201
  244. "Unicode (UTF-32)" + // 12000
  245. "Unicode (UTF-32 Big-Endian)" + // 12001
  246. "US-ASCII" + // 20127
  247. "Western European (ISO)" + // 28591
  248. "Unicode (UTF-7)" + // 65000
  249. "Unicode (UTF-8)"; // 65001
  250. //
  251. // s_englishNameIndices contains the start index of each code page's English
  252. // name in the string s_englishNames. It is indexed by an index into
  253. // s_mappedCodePages.
  254. //
  255. private static readonly int[] s_englishNameIndices = new int[]
  256. {
  257. 0, // Unicode (1200)
  258. 7, // Unicode (Big-Endian) (1201)
  259. 27, // Unicode (UTF-32) (12000)
  260. 43, // Unicode (UTF-32 Big-Endian) (12001)
  261. 70, // US-ASCII (20127)
  262. 78, // Western European (ISO) (28591)
  263. 100, // Unicode (UTF-7) (65000)
  264. 115, // Unicode (UTF-8) (65001)
  265. 130
  266. };
  267. // redeclaring these constants here for readability below
  268. private const uint MIMECONTF_MAILNEWS = Encoding.MIMECONTF_MAILNEWS;
  269. private const uint MIMECONTF_BROWSER = Encoding.MIMECONTF_BROWSER;
  270. private const uint MIMECONTF_SAVABLE_MAILNEWS = Encoding.MIMECONTF_SAVABLE_MAILNEWS;
  271. private const uint MIMECONTF_SAVABLE_BROWSER = Encoding.MIMECONTF_SAVABLE_BROWSER;
  272. //
  273. //s_flags is indexed by the corresponding index in s_mappedCodePages.
  274. //
  275. private static readonly uint[] s_flags = new uint[]
  276. {
  277. MIMECONTF_SAVABLE_BROWSER,
  278. 0,
  279. 0,
  280. 0,
  281. MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS,
  282. MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER,
  283. MIMECONTF_MAILNEWS | MIMECONTF_SAVABLE_MAILNEWS,
  284. MIMECONTF_MAILNEWS | MIMECONTF_BROWSER | MIMECONTF_SAVABLE_MAILNEWS | MIMECONTF_SAVABLE_BROWSER
  285. };
  286. }
  287. }