Char.cs 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. /*============================================================
  5. **
  6. **
  7. **
  8. ** Purpose: This is the value class representing a Unicode character
  9. ** Char methods until we create this functionality.
  10. **
  11. **
  12. ===========================================================*/
  13. using System.Diagnostics;
  14. using System.Globalization;
  15. using System.Runtime.InteropServices;
  16. using System.Text;
  17. namespace System
  18. {
  19. [Serializable]
  20. [StructLayout(LayoutKind.Sequential)]
  21. [System.Runtime.CompilerServices.TypeForwardedFrom("mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")]
  22. public readonly struct Char : IComparable, IComparable<char>, IEquatable<char>, IConvertible
  23. {
  24. //
  25. // Member Variables
  26. //
  27. private readonly char m_value; // Do not rename (binary serialization)
  28. //
  29. // Public Constants
  30. //
  31. // The maximum character value.
  32. public const char MaxValue = (char)0xFFFF;
  33. // The minimum character value.
  34. public const char MinValue = (char)0x00;
  35. // Unicode category values from Unicode U+0000 ~ U+00FF. Store them in byte[] array to save space.
  36. private static ReadOnlySpan<byte> CategoryForLatin1 => new byte[] { // uses C# compiler's optimization for static byte[] data
  37. (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0000 - 0007
  38. (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0008 - 000F
  39. (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0010 - 0017
  40. (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0018 - 001F
  41. (byte)UnicodeCategory.SpaceSeparator, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, // 0020 - 0027
  42. (byte)UnicodeCategory.OpenPunctuation, (byte)UnicodeCategory.ClosePunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.DashPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, // 0028 - 002F
  43. (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, // 0030 - 0037
  44. (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.DecimalDigitNumber, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.OtherPunctuation, // 0038 - 003F
  45. (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 0040 - 0047
  46. (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 0048 - 004F
  47. (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 0050 - 0057
  48. (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.OpenPunctuation, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.ClosePunctuation, (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.ConnectorPunctuation, // 0058 - 005F
  49. (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 0060 - 0067
  50. (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 0068 - 006F
  51. (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 0070 - 0077
  52. (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.OpenPunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.ClosePunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.Control, // 0078 - 007F
  53. (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0080 - 0087
  54. (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0088 - 008F
  55. (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0090 - 0097
  56. (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, (byte)UnicodeCategory.Control, // 0098 - 009F
  57. (byte)UnicodeCategory.SpaceSeparator, (byte)UnicodeCategory.OtherPunctuation, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.CurrencySymbol, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.OtherSymbol, // 00A0 - 00A7
  58. (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.InitialQuotePunctuation, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.DashPunctuation, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.ModifierSymbol, // 00A8 - 00AF
  59. (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.MathSymbol, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.OtherSymbol, (byte)UnicodeCategory.OtherPunctuation, // 00B0 - 00B7
  60. (byte)UnicodeCategory.ModifierSymbol, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.FinalQuotePunctuation, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherNumber, (byte)UnicodeCategory.OtherPunctuation, // 00B8 - 00BF
  61. (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 00C0 - 00C7
  62. (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, // 00C8 - 00CF
  63. (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.MathSymbol, // 00D0 - 00D7
  64. (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.UppercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00D8 - 00DF
  65. (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00E0 - 00E7
  66. (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00E8 - 00EF
  67. (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.MathSymbol, // 00F0 - 00F7
  68. (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, (byte)UnicodeCategory.LowercaseLetter, // 00F8 - 00FF
  69. };
  70. // Return true for all characters below or equal U+00ff, which is ASCII + Latin-1 Supplement.
  71. private static bool IsLatin1(char ch)
  72. {
  73. return (uint)ch <= '\x00ff';
  74. }
  75. // Return true for all characters below or equal U+007f, which is ASCII.
  76. private static bool IsAscii(char ch)
  77. {
  78. return (uint)ch <= '\x007f';
  79. }
  80. // Return the Unicode category for Unicode character <= 0x00ff.
  81. private static UnicodeCategory GetLatin1UnicodeCategory(char ch)
  82. {
  83. Debug.Assert(IsLatin1(ch), "char.GetLatin1UnicodeCategory(): ch should be <= 007f");
  84. return (UnicodeCategory)CategoryForLatin1[(int)ch];
  85. }
  86. //
  87. // Private Constants
  88. //
  89. //
  90. // Overriden Instance Methods
  91. //
  92. // Calculate a hashcode for a 2 byte Unicode character.
  93. public override int GetHashCode()
  94. {
  95. return (int)m_value | ((int)m_value << 16);
  96. }
  97. // Used for comparing two boxed Char objects.
  98. //
  99. public override bool Equals(object obj)
  100. {
  101. if (!(obj is char))
  102. {
  103. return false;
  104. }
  105. return (m_value == ((char)obj).m_value);
  106. }
  107. [System.Runtime.Versioning.NonVersionable]
  108. public bool Equals(char obj)
  109. {
  110. return m_value == obj;
  111. }
  112. // Compares this object to another object, returning an integer that
  113. // indicates the relationship.
  114. // Returns a value less than zero if this object
  115. // null is considered to be less than any instance.
  116. // If object is not of type Char, this method throws an ArgumentException.
  117. //
  118. public int CompareTo(object value)
  119. {
  120. if (value == null)
  121. {
  122. return 1;
  123. }
  124. if (!(value is char))
  125. {
  126. throw new ArgumentException(SR.Arg_MustBeChar);
  127. }
  128. return (m_value - ((char)value).m_value);
  129. }
  130. public int CompareTo(char value)
  131. {
  132. return (m_value - value);
  133. }
  134. // Overrides System.Object.ToString.
  135. public override string ToString()
  136. {
  137. return char.ToString(m_value);
  138. }
  139. public string ToString(IFormatProvider provider)
  140. {
  141. return char.ToString(m_value);
  142. }
  143. //
  144. // Formatting Methods
  145. //
  146. /*===================================ToString===================================
  147. **This static methods takes a character and returns the String representation of it.
  148. ==============================================================================*/
  149. // Provides a string representation of a character.
  150. public static string ToString(char c) => string.CreateFromChar(c);
  151. public static char Parse(string s)
  152. {
  153. if (s == null)
  154. {
  155. throw new ArgumentNullException(nameof(s));
  156. }
  157. if (s.Length != 1)
  158. {
  159. throw new FormatException(SR.Format_NeedSingleChar);
  160. }
  161. return s[0];
  162. }
  163. public static bool TryParse(string s, out char result)
  164. {
  165. result = '\0';
  166. if (s == null)
  167. {
  168. return false;
  169. }
  170. if (s.Length != 1)
  171. {
  172. return false;
  173. }
  174. result = s[0];
  175. return true;
  176. }
  177. //
  178. // Static Methods
  179. //
  180. /*=================================ISDIGIT======================================
  181. **A wrapper for char. Returns a boolean indicating whether **
  182. **character c is considered to be a digit. **
  183. ==============================================================================*/
  184. // Determines whether a character is a digit.
  185. public static bool IsDigit(char c)
  186. {
  187. if (IsLatin1(c))
  188. {
  189. return IsInRange(c, '0', '9');
  190. }
  191. return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.DecimalDigitNumber);
  192. }
  193. internal static bool IsInRange(char c, char min, char max) => (uint)(c - min) <= (uint)(max - min);
  194. private static bool IsInRange(UnicodeCategory c, UnicodeCategory min, UnicodeCategory max) => (uint)(c - min) <= (uint)(max - min);
  195. /*=================================CheckLetter=====================================
  196. ** Check if the specified UnicodeCategory belongs to the letter categories.
  197. ==============================================================================*/
  198. internal static bool CheckLetter(UnicodeCategory uc)
  199. {
  200. return IsInRange(uc, UnicodeCategory.UppercaseLetter, UnicodeCategory.OtherLetter);
  201. }
  202. /*=================================ISLETTER=====================================
  203. **A wrapper for char. Returns a boolean indicating whether **
  204. **character c is considered to be a letter. **
  205. ==============================================================================*/
  206. // Determines whether a character is a letter.
  207. public static bool IsLetter(char c)
  208. {
  209. if (IsLatin1(c))
  210. {
  211. if (IsAscii(c))
  212. {
  213. c |= (char)0x20;
  214. return IsInRange(c, 'a', 'z');
  215. }
  216. return (CheckLetter(GetLatin1UnicodeCategory(c)));
  217. }
  218. return (CheckLetter(CharUnicodeInfo.GetUnicodeCategory(c)));
  219. }
  220. private static bool IsWhiteSpaceLatin1(char c)
  221. {
  222. // There are characters which belong to UnicodeCategory.Control but are considered as white spaces.
  223. // We use code point comparisons for these characters here as a temporary fix.
  224. // U+0009 = <control> HORIZONTAL TAB
  225. // U+000a = <control> LINE FEED
  226. // U+000b = <control> VERTICAL TAB
  227. // U+000c = <contorl> FORM FEED
  228. // U+000d = <control> CARRIAGE RETURN
  229. // U+0085 = <control> NEXT LINE
  230. // U+00a0 = NO-BREAK SPACE
  231. return
  232. c == ' ' ||
  233. (uint)(c - '\x0009') <= ('\x000d' - '\x0009') || // (c >= '\x0009' && c <= '\x000d')
  234. c == '\x00a0' ||
  235. c == '\x0085';
  236. }
  237. /*===============================ISWHITESPACE===================================
  238. **A wrapper for char. Returns a boolean indicating whether **
  239. **character c is considered to be a whitespace character. **
  240. ==============================================================================*/
  241. // Determines whether a character is whitespace.
  242. public static bool IsWhiteSpace(char c)
  243. {
  244. if (IsLatin1(c))
  245. {
  246. return (IsWhiteSpaceLatin1(c));
  247. }
  248. return CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(c));
  249. }
  250. /*===================================IsUpper====================================
  251. **Arguments: c -- the characater to be checked.
  252. **Returns: True if c is an uppercase character.
  253. ==============================================================================*/
  254. // Determines whether a character is upper-case.
  255. public static bool IsUpper(char c)
  256. {
  257. if (IsLatin1(c))
  258. {
  259. if (IsAscii(c))
  260. {
  261. return IsInRange(c, 'A', 'Z');
  262. }
  263. return (GetLatin1UnicodeCategory(c) == UnicodeCategory.UppercaseLetter);
  264. }
  265. return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.UppercaseLetter);
  266. }
  267. /*===================================IsLower====================================
  268. **Arguments: c -- the characater to be checked.
  269. **Returns: True if c is an lowercase character.
  270. ==============================================================================*/
  271. // Determines whether a character is lower-case.
  272. public static bool IsLower(char c)
  273. {
  274. if (IsLatin1(c))
  275. {
  276. if (IsAscii(c))
  277. {
  278. return IsInRange(c, 'a', 'z');
  279. }
  280. return (GetLatin1UnicodeCategory(c) == UnicodeCategory.LowercaseLetter);
  281. }
  282. return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.LowercaseLetter);
  283. }
  284. internal static bool CheckPunctuation(UnicodeCategory uc)
  285. {
  286. return IsInRange(uc, UnicodeCategory.ConnectorPunctuation, UnicodeCategory.OtherPunctuation);
  287. }
  288. /*================================IsPunctuation=================================
  289. **Arguments: c -- the characater to be checked.
  290. **Returns: True if c is an punctuation mark
  291. ==============================================================================*/
  292. // Determines whether a character is a punctuation mark.
  293. public static bool IsPunctuation(char c)
  294. {
  295. if (IsLatin1(c))
  296. {
  297. return (CheckPunctuation(GetLatin1UnicodeCategory(c)));
  298. }
  299. return (CheckPunctuation(CharUnicodeInfo.GetUnicodeCategory(c)));
  300. }
  301. /*=================================CheckLetterOrDigit=====================================
  302. ** Check if the specified UnicodeCategory belongs to the letter or digit categories.
  303. ==============================================================================*/
  304. internal static bool CheckLetterOrDigit(UnicodeCategory uc)
  305. {
  306. return CheckLetter(uc) || uc == UnicodeCategory.DecimalDigitNumber;
  307. }
  308. // Determines whether a character is a letter or a digit.
  309. public static bool IsLetterOrDigit(char c)
  310. {
  311. if (IsLatin1(c))
  312. {
  313. return (CheckLetterOrDigit(GetLatin1UnicodeCategory(c)));
  314. }
  315. return (CheckLetterOrDigit(CharUnicodeInfo.GetUnicodeCategory(c)));
  316. }
  317. /*===================================ToUpper====================================
  318. **
  319. ==============================================================================*/
  320. // Converts a character to upper-case for the specified culture.
  321. // <;<;Not fully implemented>;>;
  322. public static char ToUpper(char c, CultureInfo culture)
  323. {
  324. if (culture == null)
  325. throw new ArgumentNullException(nameof(culture));
  326. return culture.TextInfo.ToUpper(c);
  327. }
  328. /*=================================TOUPPER======================================
  329. **A wrapper for char.ToUpperCase. Converts character c to its **
  330. **uppercase equivalent. If c is already an uppercase character or is not an **
  331. **alphabetic, nothing happens. **
  332. ==============================================================================*/
  333. // Converts a character to upper-case for the default culture.
  334. //
  335. public static char ToUpper(char c)
  336. {
  337. return CultureInfo.CurrentCulture.TextInfo.ToUpper(c);
  338. }
  339. // Converts a character to upper-case for invariant culture.
  340. public static char ToUpperInvariant(char c)
  341. {
  342. return CultureInfo.InvariantCulture.TextInfo.ToUpper(c);
  343. }
  344. /*===================================ToLower====================================
  345. **
  346. ==============================================================================*/
  347. // Converts a character to lower-case for the specified culture.
  348. // <;<;Not fully implemented>;>;
  349. public static char ToLower(char c, CultureInfo culture)
  350. {
  351. if (culture == null)
  352. throw new ArgumentNullException(nameof(culture));
  353. return culture.TextInfo.ToLower(c);
  354. }
  355. /*=================================TOLOWER======================================
  356. **A wrapper for char.ToLowerCase. Converts character c to its **
  357. **lowercase equivalent. If c is already a lowercase character or is not an **
  358. **alphabetic, nothing happens. **
  359. ==============================================================================*/
  360. // Converts a character to lower-case for the default culture.
  361. public static char ToLower(char c)
  362. {
  363. return CultureInfo.CurrentCulture.TextInfo.ToLower(c);
  364. }
  365. // Converts a character to lower-case for invariant culture.
  366. public static char ToLowerInvariant(char c)
  367. {
  368. return CultureInfo.InvariantCulture.TextInfo.ToLower(c);
  369. }
  370. //
  371. // IConvertible implementation
  372. //
  373. public TypeCode GetTypeCode()
  374. {
  375. return TypeCode.Char;
  376. }
  377. bool IConvertible.ToBoolean(IFormatProvider provider)
  378. {
  379. throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Boolean"));
  380. }
  381. char IConvertible.ToChar(IFormatProvider provider)
  382. {
  383. return m_value;
  384. }
  385. sbyte IConvertible.ToSByte(IFormatProvider provider)
  386. {
  387. return Convert.ToSByte(m_value);
  388. }
  389. byte IConvertible.ToByte(IFormatProvider provider)
  390. {
  391. return Convert.ToByte(m_value);
  392. }
  393. short IConvertible.ToInt16(IFormatProvider provider)
  394. {
  395. return Convert.ToInt16(m_value);
  396. }
  397. ushort IConvertible.ToUInt16(IFormatProvider provider)
  398. {
  399. return Convert.ToUInt16(m_value);
  400. }
  401. int IConvertible.ToInt32(IFormatProvider provider)
  402. {
  403. return Convert.ToInt32(m_value);
  404. }
  405. uint IConvertible.ToUInt32(IFormatProvider provider)
  406. {
  407. return Convert.ToUInt32(m_value);
  408. }
  409. long IConvertible.ToInt64(IFormatProvider provider)
  410. {
  411. return Convert.ToInt64(m_value);
  412. }
  413. ulong IConvertible.ToUInt64(IFormatProvider provider)
  414. {
  415. return Convert.ToUInt64(m_value);
  416. }
  417. float IConvertible.ToSingle(IFormatProvider provider)
  418. {
  419. throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Single"));
  420. }
  421. double IConvertible.ToDouble(IFormatProvider provider)
  422. {
  423. throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Double"));
  424. }
  425. decimal IConvertible.ToDecimal(IFormatProvider provider)
  426. {
  427. throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Decimal"));
  428. }
  429. DateTime IConvertible.ToDateTime(IFormatProvider provider)
  430. {
  431. throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "DateTime"));
  432. }
  433. object IConvertible.ToType(Type type, IFormatProvider provider)
  434. {
  435. return Convert.DefaultToType((IConvertible)this, type, provider);
  436. }
  437. public static bool IsControl(char c)
  438. {
  439. if (IsLatin1(c))
  440. {
  441. return (GetLatin1UnicodeCategory(c) == UnicodeCategory.Control);
  442. }
  443. return (CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.Control);
  444. }
  445. public static bool IsControl(string s, int index)
  446. {
  447. if (s == null)
  448. throw new ArgumentNullException(nameof(s));
  449. if (((uint)index) >= ((uint)s.Length))
  450. {
  451. throw new ArgumentOutOfRangeException(nameof(index));
  452. }
  453. char c = s[index];
  454. if (IsLatin1(c))
  455. {
  456. return (GetLatin1UnicodeCategory(c) == UnicodeCategory.Control);
  457. }
  458. return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.Control);
  459. }
  460. public static bool IsDigit(string s, int index)
  461. {
  462. if (s == null)
  463. throw new ArgumentNullException(nameof(s));
  464. if (((uint)index) >= ((uint)s.Length))
  465. {
  466. throw new ArgumentOutOfRangeException(nameof(index));
  467. }
  468. char c = s[index];
  469. if (IsLatin1(c))
  470. {
  471. return IsInRange(c, '0', '9');
  472. }
  473. return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.DecimalDigitNumber);
  474. }
  475. public static bool IsLetter(string s, int index)
  476. {
  477. if (s == null)
  478. throw new ArgumentNullException(nameof(s));
  479. if (((uint)index) >= ((uint)s.Length))
  480. {
  481. throw new ArgumentOutOfRangeException(nameof(index));
  482. }
  483. char c = s[index];
  484. if (IsLatin1(c))
  485. {
  486. if (IsAscii(c))
  487. {
  488. c |= (char)0x20;
  489. return IsInRange(c, 'a', 'z');
  490. }
  491. return (CheckLetter(GetLatin1UnicodeCategory(c)));
  492. }
  493. return (CheckLetter(CharUnicodeInfo.GetUnicodeCategory(s, index)));
  494. }
  495. public static bool IsLetterOrDigit(string s, int index)
  496. {
  497. if (s == null)
  498. throw new ArgumentNullException(nameof(s));
  499. if (((uint)index) >= ((uint)s.Length))
  500. {
  501. throw new ArgumentOutOfRangeException(nameof(index));
  502. }
  503. char c = s[index];
  504. if (IsLatin1(c))
  505. {
  506. return CheckLetterOrDigit(GetLatin1UnicodeCategory(c));
  507. }
  508. return CheckLetterOrDigit(CharUnicodeInfo.GetUnicodeCategory(s, index));
  509. }
  510. public static bool IsLower(string s, int index)
  511. {
  512. if (s == null)
  513. throw new ArgumentNullException(nameof(s));
  514. if (((uint)index) >= ((uint)s.Length))
  515. {
  516. throw new ArgumentOutOfRangeException(nameof(index));
  517. }
  518. char c = s[index];
  519. if (IsLatin1(c))
  520. {
  521. if (IsAscii(c))
  522. {
  523. return IsInRange(c, 'a', 'z');
  524. }
  525. return (GetLatin1UnicodeCategory(c) == UnicodeCategory.LowercaseLetter);
  526. }
  527. return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.LowercaseLetter);
  528. }
  529. /*=================================CheckNumber=====================================
  530. ** Check if the specified UnicodeCategory belongs to the number categories.
  531. ==============================================================================*/
  532. internal static bool CheckNumber(UnicodeCategory uc)
  533. {
  534. return IsInRange(uc, UnicodeCategory.DecimalDigitNumber, UnicodeCategory.OtherNumber);
  535. }
  536. public static bool IsNumber(char c)
  537. {
  538. if (IsLatin1(c))
  539. {
  540. if (IsAscii(c))
  541. {
  542. return IsInRange(c, '0', '9');
  543. }
  544. return (CheckNumber(GetLatin1UnicodeCategory(c)));
  545. }
  546. return (CheckNumber(CharUnicodeInfo.GetUnicodeCategory(c)));
  547. }
  548. public static bool IsNumber(string s, int index)
  549. {
  550. if (s == null)
  551. throw new ArgumentNullException(nameof(s));
  552. if (((uint)index) >= ((uint)s.Length))
  553. {
  554. throw new ArgumentOutOfRangeException(nameof(index));
  555. }
  556. char c = s[index];
  557. if (IsLatin1(c))
  558. {
  559. if (IsAscii(c))
  560. {
  561. return IsInRange(c, '0', '9');
  562. }
  563. return (CheckNumber(GetLatin1UnicodeCategory(c)));
  564. }
  565. return (CheckNumber(CharUnicodeInfo.GetUnicodeCategory(s, index)));
  566. }
  567. ////////////////////////////////////////////////////////////////////////
  568. //
  569. // IsPunctuation
  570. //
  571. // Determines if the given character is a punctuation character.
  572. //
  573. ////////////////////////////////////////////////////////////////////////
  574. public static bool IsPunctuation(string s, int index)
  575. {
  576. if (s == null)
  577. throw new ArgumentNullException(nameof(s));
  578. if (((uint)index) >= ((uint)s.Length))
  579. {
  580. throw new ArgumentOutOfRangeException(nameof(index));
  581. }
  582. char c = s[index];
  583. if (IsLatin1(c))
  584. {
  585. return (CheckPunctuation(GetLatin1UnicodeCategory(c)));
  586. }
  587. return (CheckPunctuation(CharUnicodeInfo.GetUnicodeCategory(s, index)));
  588. }
  589. /*================================= CheckSeparator ============================
  590. ** Check if the specified UnicodeCategory belongs to the seprator categories.
  591. ==============================================================================*/
  592. internal static bool CheckSeparator(UnicodeCategory uc)
  593. {
  594. return IsInRange(uc, UnicodeCategory.SpaceSeparator, UnicodeCategory.ParagraphSeparator);
  595. }
  596. private static bool IsSeparatorLatin1(char c)
  597. {
  598. // U+00a0 = NO-BREAK SPACE
  599. // There is no LineSeparator or ParagraphSeparator in Latin 1 range.
  600. return (c == '\x0020' || c == '\x00a0');
  601. }
  602. public static bool IsSeparator(char c)
  603. {
  604. if (IsLatin1(c))
  605. {
  606. return (IsSeparatorLatin1(c));
  607. }
  608. return (CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(c)));
  609. }
  610. public static bool IsSeparator(string s, int index)
  611. {
  612. if (s == null)
  613. throw new ArgumentNullException(nameof(s));
  614. if (((uint)index) >= ((uint)s.Length))
  615. {
  616. throw new ArgumentOutOfRangeException(nameof(index));
  617. }
  618. char c = s[index];
  619. if (IsLatin1(c))
  620. {
  621. return (IsSeparatorLatin1(c));
  622. }
  623. return (CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(s, index)));
  624. }
  625. public static bool IsSurrogate(char c)
  626. {
  627. return IsInRange(c, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END);
  628. }
  629. public static bool IsSurrogate(string s, int index)
  630. {
  631. if (s == null)
  632. {
  633. throw new ArgumentNullException(nameof(s));
  634. }
  635. if (((uint)index) >= ((uint)s.Length))
  636. {
  637. throw new ArgumentOutOfRangeException(nameof(index));
  638. }
  639. return (IsSurrogate(s[index]));
  640. }
  641. /*================================= CheckSymbol ============================
  642. ** Check if the specified UnicodeCategory belongs to the symbol categories.
  643. ==============================================================================*/
  644. internal static bool CheckSymbol(UnicodeCategory uc)
  645. {
  646. return IsInRange(uc, UnicodeCategory.MathSymbol, UnicodeCategory.OtherSymbol);
  647. }
  648. public static bool IsSymbol(char c)
  649. {
  650. if (IsLatin1(c))
  651. {
  652. return (CheckSymbol(GetLatin1UnicodeCategory(c)));
  653. }
  654. return (CheckSymbol(CharUnicodeInfo.GetUnicodeCategory(c)));
  655. }
  656. public static bool IsSymbol(string s, int index)
  657. {
  658. if (s == null)
  659. throw new ArgumentNullException(nameof(s));
  660. if (((uint)index) >= ((uint)s.Length))
  661. {
  662. throw new ArgumentOutOfRangeException(nameof(index));
  663. }
  664. char c = s[index];
  665. if (IsLatin1(c))
  666. {
  667. return (CheckSymbol(GetLatin1UnicodeCategory(c)));
  668. }
  669. return (CheckSymbol(CharUnicodeInfo.GetUnicodeCategory(s, index)));
  670. }
  671. public static bool IsUpper(string s, int index)
  672. {
  673. if (s == null)
  674. throw new ArgumentNullException(nameof(s));
  675. if (((uint)index) >= ((uint)s.Length))
  676. {
  677. throw new ArgumentOutOfRangeException(nameof(index));
  678. }
  679. char c = s[index];
  680. if (IsLatin1(c))
  681. {
  682. if (IsAscii(c))
  683. {
  684. return IsInRange(c, 'A', 'Z');
  685. }
  686. return (GetLatin1UnicodeCategory(c) == UnicodeCategory.UppercaseLetter);
  687. }
  688. return (CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.UppercaseLetter);
  689. }
  690. public static bool IsWhiteSpace(string s, int index)
  691. {
  692. if (s == null)
  693. throw new ArgumentNullException(nameof(s));
  694. if (((uint)index) >= ((uint)s.Length))
  695. {
  696. throw new ArgumentOutOfRangeException(nameof(index));
  697. }
  698. if (IsLatin1(s[index]))
  699. {
  700. return IsWhiteSpaceLatin1(s[index]);
  701. }
  702. return CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(s, index));
  703. }
  704. public static UnicodeCategory GetUnicodeCategory(char c)
  705. {
  706. if (IsLatin1(c))
  707. {
  708. return (GetLatin1UnicodeCategory(c));
  709. }
  710. return CharUnicodeInfo.GetUnicodeCategory((int)c);
  711. }
  712. public static UnicodeCategory GetUnicodeCategory(string s, int index)
  713. {
  714. if (s == null)
  715. throw new ArgumentNullException(nameof(s));
  716. if (((uint)index) >= ((uint)s.Length))
  717. {
  718. throw new ArgumentOutOfRangeException(nameof(index));
  719. }
  720. if (IsLatin1(s[index]))
  721. {
  722. return (GetLatin1UnicodeCategory(s[index]));
  723. }
  724. return CharUnicodeInfo.InternalGetUnicodeCategory(s, index);
  725. }
  726. public static double GetNumericValue(char c)
  727. {
  728. return CharUnicodeInfo.GetNumericValue(c);
  729. }
  730. public static double GetNumericValue(string s, int index)
  731. {
  732. if (s == null)
  733. throw new ArgumentNullException(nameof(s));
  734. if (((uint)index) >= ((uint)s.Length))
  735. {
  736. throw new ArgumentOutOfRangeException(nameof(index));
  737. }
  738. return CharUnicodeInfo.GetNumericValue(s, index);
  739. }
  740. /*================================= IsHighSurrogate ============================
  741. ** Check if a char is a high surrogate.
  742. ==============================================================================*/
  743. public static bool IsHighSurrogate(char c)
  744. {
  745. return IsInRange(c, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END);
  746. }
  747. public static bool IsHighSurrogate(string s, int index)
  748. {
  749. if (s == null)
  750. {
  751. throw new ArgumentNullException(nameof(s));
  752. }
  753. if (index < 0 || index >= s.Length)
  754. {
  755. throw new ArgumentOutOfRangeException(nameof(index));
  756. }
  757. return (IsHighSurrogate(s[index]));
  758. }
  759. /*================================= IsLowSurrogate ============================
  760. ** Check if a char is a low surrogate.
  761. ==============================================================================*/
  762. public static bool IsLowSurrogate(char c)
  763. {
  764. return IsInRange(c, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END);
  765. }
  766. public static bool IsLowSurrogate(string s, int index)
  767. {
  768. if (s == null)
  769. {
  770. throw new ArgumentNullException(nameof(s));
  771. }
  772. if (index < 0 || index >= s.Length)
  773. {
  774. throw new ArgumentOutOfRangeException(nameof(index));
  775. }
  776. return (IsLowSurrogate(s[index]));
  777. }
  778. /*================================= IsSurrogatePair ============================
  779. ** Check if the string specified by the index starts with a surrogate pair.
  780. ==============================================================================*/
  781. public static bool IsSurrogatePair(string s, int index)
  782. {
  783. if (s == null)
  784. {
  785. throw new ArgumentNullException(nameof(s));
  786. }
  787. if (index < 0 || index >= s.Length)
  788. {
  789. throw new ArgumentOutOfRangeException(nameof(index));
  790. }
  791. if (index + 1 < s.Length)
  792. {
  793. return (IsSurrogatePair(s[index], s[index + 1]));
  794. }
  795. return (false);
  796. }
  797. public static bool IsSurrogatePair(char highSurrogate, char lowSurrogate)
  798. {
  799. // Since both the high and low surrogate ranges are exactly 0x400 elements
  800. // wide, and since this is a power of two, we can perform a single comparison
  801. // by baselining each value to the start of its respective range and taking
  802. // the logical OR of them.
  803. uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
  804. uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;
  805. return (highSurrogateOffset | lowSurrogateOffset) <= CharUnicodeInfo.HIGH_SURROGATE_RANGE;
  806. }
  807. internal const int UNICODE_PLANE00_END = 0x00ffff;
  808. // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff.
  809. internal const int UNICODE_PLANE01_START = 0x10000;
  810. // The end codepoint for Unicode plane 16. This is the maximum code point value allowed for Unicode.
  811. // Plane 16 contains 0x100000 ~ 0x10ffff.
  812. internal const int UNICODE_PLANE16_END = 0x10ffff;
  813. /*================================= ConvertFromUtf32 ============================
  814. ** Convert an UTF32 value into a surrogate pair.
  815. ==============================================================================*/
  816. public static string ConvertFromUtf32(int utf32)
  817. {
  818. if (!UnicodeUtility.IsValidUnicodeScalar((uint)utf32))
  819. {
  820. throw new ArgumentOutOfRangeException(nameof(utf32), SR.ArgumentOutOfRange_InvalidUTF32);
  821. }
  822. return Rune.UnsafeCreate((uint)utf32).ToString();
  823. }
  824. /*=============================ConvertToUtf32===================================
  825. ** Convert a surrogate pair to UTF32 value
  826. ==============================================================================*/
  827. public static int ConvertToUtf32(char highSurrogate, char lowSurrogate)
  828. {
  829. // First, extend both to 32 bits, then calculate the offset of
  830. // each candidate surrogate char from the start of its range.
  831. uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
  832. uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;
  833. // This is a single comparison which allows us to check both for validity at once since
  834. // both the high surrogate range and the low surrogate range are the same length.
  835. // If the comparison fails, we call to a helper method to throw the correct exception message.
  836. if ((highSurrogateOffset | lowSurrogateOffset) > CharUnicodeInfo.HIGH_SURROGATE_RANGE)
  837. {
  838. ConvertToUtf32_ThrowInvalidArgs(highSurrogateOffset);
  839. }
  840. // The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding.
  841. return ((int)highSurrogateOffset << 10) + (lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + (0x40 << 10);
  842. }
  843. [StackTraceHidden]
  844. private static void ConvertToUtf32_ThrowInvalidArgs(uint highSurrogateOffset)
  845. {
  846. // If the high surrogate is not within its expected range, throw an exception
  847. // whose message fingers it as invalid. If it's within the expected range,
  848. // change the message to read that the low surrogate was the problem.
  849. if (highSurrogateOffset > CharUnicodeInfo.HIGH_SURROGATE_RANGE)
  850. {
  851. throw new ArgumentOutOfRangeException(
  852. paramName: "highSurrogate",
  853. message: SR.ArgumentOutOfRange_InvalidHighSurrogate);
  854. }
  855. else
  856. {
  857. throw new ArgumentOutOfRangeException(
  858. paramName: "lowSurrogate",
  859. message: SR.ArgumentOutOfRange_InvalidLowSurrogate);
  860. }
  861. }
  862. /*=============================ConvertToUtf32===================================
  863. ** Convert a character or a surrogate pair starting at index of the specified string
  864. ** to UTF32 value.
  865. ** The char pointed by index should be a surrogate pair or a BMP character.
  866. ** This method throws if a high-surrogate is not followed by a low surrogate.
  867. ** This method throws if a low surrogate is seen without preceding a high-surrogate.
  868. ==============================================================================*/
  869. public static int ConvertToUtf32(string s, int index)
  870. {
  871. if (s == null)
  872. {
  873. throw new ArgumentNullException(nameof(s));
  874. }
  875. if (index < 0 || index >= s.Length)
  876. {
  877. throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
  878. }
  879. // Check if the character at index is a high surrogate.
  880. int temp1 = (int)s[index] - CharUnicodeInfo.HIGH_SURROGATE_START;
  881. if (temp1 >= 0 && temp1 <= 0x7ff)
  882. {
  883. // Found a surrogate char.
  884. if (temp1 <= 0x3ff)
  885. {
  886. // Found a high surrogate.
  887. if (index < s.Length - 1)
  888. {
  889. int temp2 = (int)s[index + 1] - CharUnicodeInfo.LOW_SURROGATE_START;
  890. if (temp2 >= 0 && temp2 <= 0x3ff)
  891. {
  892. // Found a low surrogate.
  893. return ((temp1 * 0x400) + temp2 + UNICODE_PLANE01_START);
  894. }
  895. else
  896. {
  897. throw new ArgumentException(SR.Format(SR.Argument_InvalidHighSurrogate, index), nameof(s));
  898. }
  899. }
  900. else
  901. {
  902. // Found a high surrogate at the end of the string.
  903. throw new ArgumentException(SR.Format(SR.Argument_InvalidHighSurrogate, index), nameof(s));
  904. }
  905. }
  906. else
  907. {
  908. // Find a low surrogate at the character pointed by index.
  909. throw new ArgumentException(SR.Format(SR.Argument_InvalidLowSurrogate, index), nameof(s));
  910. }
  911. }
  912. // Not a high-surrogate or low-surrogate. Genereate the UTF32 value for the BMP characters.
  913. return ((int)s[index]);
  914. }
  915. }
  916. }