String.cs 24 KB


  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Buffers;
  5. using System.Collections;
  6. using System.Collections.Generic;
  7. using System.Diagnostics;
  8. using System.Diagnostics.CodeAnalysis;
  9. using System.Globalization;
  10. using System.Runtime.CompilerServices;
  11. using System.Runtime.InteropServices;
  12. using System.Runtime.Versioning;
  13. using System.Text;
  14. using Internal.Runtime.CompilerServices;
  15. namespace System
  16. {
  17. // The String class represents a static string of characters. Many of
  18. // the string methods perform some type of transformation on the current
  19. // instance and return the result as a new string. As with arrays, character
  20. // positions (indices) are zero-based.
  21. [Serializable]
  22. [System.Runtime.CompilerServices.TypeForwardedFrom("mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")]
  23. public sealed partial class String : IComparable, IEnumerable, IConvertible, IEnumerable<char>, IComparable<string?>,
  24. // IEquatable<string> is invariant by design. However, the lack of covariance means that String?
  25. // couldn't be used in places constrained to T : IEquatable<String>. As a workaround, until the
  26. // language provides a mechanism for this, we make the generic type argument oblivious, in conjunction
  27. // with making all such constraints oblivious as well.
  28. #nullable disable
  29. IEquatable<string>,
  30. #nullable restore
  31. ICloneable
  32. {
  33. //
  34. // These fields map directly onto the fields in an EE StringObject. See object.h for the layout.
  35. //
  36. [NonSerialized]
  37. private int _stringLength;
  38. // For empty strings, this will be '\0' since
  39. // strings are both null-terminated and length prefixed
  40. [NonSerialized]
  41. private char _firstChar;
  42. /*
  43. * CONSTRUCTORS
  44. *
  45. * Defining a new constructor for string-like types (like String) requires changes both
  46. * to the managed code below and to the native VM code. See the comment at the top of
  47. * src/vm/ecall.cpp for instructions on how to add new overloads.
  48. */
  49. [MethodImpl(MethodImplOptions.InternalCall)]
  50. public extern String(char[] value);
  51. #if !CORECLR
  52. static
  53. #endif
  54. private string Ctor(char[]? value)
  55. {
  56. if (value == null || value.Length == 0)
  57. return Empty;
  58. string result = FastAllocateString(value.Length);
  59. unsafe
  60. {
  61. fixed (char* dest = &result._firstChar, source = value)
  62. wstrcpy(dest, source, value.Length);
  63. }
  64. return result;
  65. }
  66. [MethodImpl(MethodImplOptions.InternalCall)]
  67. public extern String(char[] value, int startIndex, int length);
  68. #if !CORECLR
  69. static
  70. #endif
  71. private string Ctor(char[] value, int startIndex, int length)
  72. {
  73. if (value == null)
  74. throw new ArgumentNullException(nameof(value));
  75. if (startIndex < 0)
  76. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndex);
  77. if (length < 0)
  78. throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_NegativeLength);
  79. if (startIndex > value.Length - length)
  80. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  81. if (length == 0)
  82. return Empty;
  83. string result = FastAllocateString(length);
  84. unsafe
  85. {
  86. fixed (char* dest = &result._firstChar, source = value)
  87. wstrcpy(dest, source + startIndex, length);
  88. }
  89. return result;
  90. }
  91. [CLSCompliant(false)]
  92. [MethodImpl(MethodImplOptions.InternalCall)]
  93. public extern unsafe String(char* value);
  94. #if !CORECLR
  95. static
  96. #endif
  97. private unsafe string Ctor(char* ptr)
  98. {
  99. if (ptr == null)
  100. return Empty;
  101. int count = wcslen(ptr);
  102. if (count == 0)
  103. return Empty;
  104. string result = FastAllocateString(count);
  105. fixed (char* dest = &result._firstChar)
  106. wstrcpy(dest, ptr, count);
  107. return result;
  108. }
  109. [CLSCompliant(false)]
  110. [MethodImpl(MethodImplOptions.InternalCall)]
  111. public extern unsafe String(char* value, int startIndex, int length);
  112. #if !CORECLR
  113. static
  114. #endif
  115. private unsafe string Ctor(char* ptr, int startIndex, int length)
  116. {
  117. if (length < 0)
  118. throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_NegativeLength);
  119. if (startIndex < 0)
  120. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndex);
  121. char* pStart = ptr + startIndex;
  122. // overflow check
  123. if (pStart < ptr)
  124. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_PartialWCHAR);
  125. if (length == 0)
  126. return Empty;
  127. if (ptr == null)
  128. throw new ArgumentOutOfRangeException(nameof(ptr), SR.ArgumentOutOfRange_PartialWCHAR);
  129. string result = FastAllocateString(length);
  130. fixed (char* dest = &result._firstChar)
  131. wstrcpy(dest, pStart, length);
  132. return result;
  133. }
  134. [CLSCompliant(false)]
  135. [MethodImpl(MethodImplOptions.InternalCall)]
  136. public extern unsafe String(sbyte* value);
  137. #if !CORECLR
  138. static
  139. #endif
  140. private unsafe string Ctor(sbyte* value)
  141. {
  142. byte* pb = (byte*)value;
  143. if (pb == null)
  144. return Empty;
  145. int numBytes = strlen((byte*)value);
  146. return CreateStringForSByteConstructor(pb, numBytes);
  147. }
  148. [CLSCompliant(false)]
  149. [MethodImpl(MethodImplOptions.InternalCall)]
  150. public extern unsafe String(sbyte* value, int startIndex, int length);
  151. #if !CORECLR
  152. static
  153. #endif
  154. private unsafe string Ctor(sbyte* value, int startIndex, int length)
  155. {
  156. if (startIndex < 0)
  157. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndex);
  158. if (length < 0)
  159. throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_NegativeLength);
  160. if (value == null)
  161. {
  162. if (length == 0)
  163. return Empty;
  164. throw new ArgumentNullException(nameof(value));
  165. }
  166. byte* pStart = (byte*)(value + startIndex);
  167. // overflow check
  168. if (pStart < value)
  169. throw new ArgumentOutOfRangeException(nameof(value), SR.ArgumentOutOfRange_PartialWCHAR);
  170. return CreateStringForSByteConstructor(pStart, length);
  171. }
  172. // Encoder for String..ctor(sbyte*) and String..ctor(sbyte*, int, int)
  173. private static unsafe string CreateStringForSByteConstructor(byte* pb, int numBytes)
  174. {
  175. Debug.Assert(numBytes >= 0);
  176. Debug.Assert(pb <= (pb + numBytes));
  177. if (numBytes == 0)
  178. return Empty;
  179. #if PLATFORM_WINDOWS
  180. int numCharsRequired = Interop.Kernel32.MultiByteToWideChar(Interop.Kernel32.CP_ACP, Interop.Kernel32.MB_PRECOMPOSED, pb, numBytes, (char*)null, 0);
  181. if (numCharsRequired == 0)
  182. throw new ArgumentException(SR.Arg_InvalidANSIString);
  183. string newString = FastAllocateString(numCharsRequired);
  184. fixed (char* pFirstChar = &newString._firstChar)
  185. {
  186. numCharsRequired = Interop.Kernel32.MultiByteToWideChar(Interop.Kernel32.CP_ACP, Interop.Kernel32.MB_PRECOMPOSED, pb, numBytes, pFirstChar, numCharsRequired);
  187. }
  188. if (numCharsRequired == 0)
  189. throw new ArgumentException(SR.Arg_InvalidANSIString);
  190. return newString;
  191. #else
  192. return Encoding.UTF8.GetString(pb, numBytes);
  193. #endif
  194. }
  195. [CLSCompliant(false)]
  196. [MethodImpl(MethodImplOptions.InternalCall)]
  197. public extern unsafe String(sbyte* value, int startIndex, int length, Encoding enc);
  198. #if !CORECLR
  199. static
  200. #endif
  201. private unsafe string Ctor(sbyte* value, int startIndex, int length, Encoding? enc)
  202. {
  203. if (enc == null)
  204. return new string(value, startIndex, length);
  205. if (length < 0)
  206. throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_NeedNonNegNum);
  207. if (startIndex < 0)
  208. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndex);
  209. if (value == null)
  210. {
  211. if (length == 0)
  212. return Empty;
  213. throw new ArgumentNullException(nameof(value));
  214. }
  215. byte* pStart = (byte*)(value + startIndex);
  216. // overflow check
  217. if (pStart < value)
  218. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_PartialWCHAR);
  219. return enc.GetString(new ReadOnlySpan<byte>(pStart, length));
  220. }
  221. [MethodImpl(MethodImplOptions.InternalCall)]
  222. public extern String(char c, int count);
  223. #if !CORECLR
  224. static
  225. #endif
  226. private string Ctor(char c, int count)
  227. {
  228. if (count <= 0)
  229. {
  230. if (count == 0)
  231. return Empty;
  232. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NegativeCount);
  233. }
  234. string result = FastAllocateString(count);
  235. if (c != '\0') // Fast path null char string
  236. {
  237. unsafe
  238. {
  239. fixed (char* dest = &result._firstChar)
  240. {
  241. uint cc = (uint)((c << 16) | c);
  242. uint* dmem = (uint*)dest;
  243. if (count >= 4)
  244. {
  245. count -= 4;
  246. do
  247. {
  248. dmem[0] = cc;
  249. dmem[1] = cc;
  250. dmem += 2;
  251. count -= 4;
  252. } while (count >= 0);
  253. }
  254. if ((count & 2) != 0)
  255. {
  256. *dmem = cc;
  257. dmem++;
  258. }
  259. if ((count & 1) != 0)
  260. ((char*)dmem)[0] = c;
  261. }
  262. }
  263. }
  264. return result;
  265. }
  266. [MethodImpl(MethodImplOptions.InternalCall)]
  267. public extern String(ReadOnlySpan<char> value);
  268. #if !CORECLR
  269. static
  270. #endif
  271. private unsafe string Ctor(ReadOnlySpan<char> value)
  272. {
  273. if (value.Length == 0)
  274. return Empty;
  275. string result = FastAllocateString(value.Length);
  276. Buffer.Memmove(ref result._firstChar, ref MemoryMarshal.GetReference(value), (uint)value.Length);
  277. return result;
  278. }
  279. public static string Create<TState>(int length, TState state, SpanAction<char, TState> action)
  280. {
  281. if (action == null)
  282. throw new ArgumentNullException(nameof(action));
  283. if (length <= 0)
  284. {
  285. if (length == 0)
  286. return Empty;
  287. throw new ArgumentOutOfRangeException(nameof(length));
  288. }
  289. string result = FastAllocateString(length);
  290. action(new Span<char>(ref result.GetRawStringData(), length), state);
  291. return result;
  292. }
  293. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  294. public static implicit operator ReadOnlySpan<char>(string? value) =>
  295. value != null ? new ReadOnlySpan<char>(ref value.GetRawStringData(), value.Length) : default;
  296. public object Clone()
  297. {
  298. return this;
  299. }
  300. public static unsafe string Copy(string str)
  301. {
  302. if (str == null)
  303. throw new ArgumentNullException(nameof(str));
  304. string result = FastAllocateString(str.Length);
  305. fixed (char* dest = &result._firstChar, src = &str._firstChar)
  306. wstrcpy(dest, src, str.Length);
  307. return result;
  308. }
  309. // Converts a substring of this string to an array of characters. Copies the
  310. // characters of this string beginning at position sourceIndex and ending at
  311. // sourceIndex + count - 1 to the character array buffer, beginning
  312. // at destinationIndex.
  313. //
  314. public unsafe void CopyTo(int sourceIndex, char[] destination, int destinationIndex, int count)
  315. {
  316. if (destination == null)
  317. throw new ArgumentNullException(nameof(destination));
  318. if (count < 0)
  319. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NegativeCount);
  320. if (sourceIndex < 0)
  321. throw new ArgumentOutOfRangeException(nameof(sourceIndex), SR.ArgumentOutOfRange_Index);
  322. if (count > Length - sourceIndex)
  323. throw new ArgumentOutOfRangeException(nameof(sourceIndex), SR.ArgumentOutOfRange_IndexCount);
  324. if (destinationIndex > destination.Length - count || destinationIndex < 0)
  325. throw new ArgumentOutOfRangeException(nameof(destinationIndex), SR.ArgumentOutOfRange_IndexCount);
  326. fixed (char* src = &_firstChar, dest = destination)
  327. wstrcpy(dest + destinationIndex, src + sourceIndex, count);
  328. }
  329. // Returns the entire string as an array of characters.
  330. public unsafe char[] ToCharArray()
  331. {
  332. if (Length == 0)
  333. return Array.Empty<char>();
  334. char[] chars = new char[Length];
  335. fixed (char* src = &_firstChar, dest = &chars[0])
  336. wstrcpy(dest, src, Length);
  337. return chars;
  338. }
  339. // Returns a substring of this string as an array of characters.
  340. //
  341. public unsafe char[] ToCharArray(int startIndex, int length)
  342. {
  343. // Range check everything.
  344. if (startIndex < 0 || startIndex > Length || startIndex > Length - length)
  345. throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
  346. if (length <= 0)
  347. {
  348. if (length == 0)
  349. return Array.Empty<char>();
  350. throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_Index);
  351. }
  352. char[] chars = new char[length];
  353. fixed (char* src = &_firstChar, dest = &chars[0])
  354. wstrcpy(dest, src + startIndex, length);
  355. return chars;
  356. }
  357. [NonVersionable]
  358. public static bool IsNullOrEmpty([NotNullWhen(false)] string? value)
  359. {
  360. // Using 0u >= (uint)value.Length rather than
  361. // value.Length == 0 as it will elide the bounds check to
  362. // the first char: value[0] if that is performed following the test
  363. // for the same test cost.
  364. // Ternary operator returning true/false prevents redundant asm generation:
  365. // https://github.com/dotnet/coreclr/issues/914
  366. return (value == null || 0u >= (uint)value.Length) ? true : false;
  367. }
  368. public static bool IsNullOrWhiteSpace([NotNullWhen(false)] string? value)
  369. {
  370. if (value == null) return true;
  371. for (int i = 0; i < value.Length; i++)
  372. {
  373. if (!char.IsWhiteSpace(value[i])) return false;
  374. }
  375. return true;
  376. }
  377. /// <summary>
  378. /// Returns a reference to the first element of the String. If the string is null, an access will throw a NullReferenceException.
  379. /// </summary>
  380. [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)]
  381. [NonVersionable]
  382. public ref readonly char GetPinnableReference() => ref _firstChar;
  383. internal ref char GetRawStringData() => ref _firstChar;
  384. // Helper for encodings so they can talk to our buffer directly
  385. // stringLength must be the exact size we'll expect
  386. internal static unsafe string CreateStringFromEncoding(
  387. byte* bytes, int byteLength, Encoding encoding)
  388. {
  389. Debug.Assert(bytes != null);
  390. Debug.Assert(byteLength >= 0);
  391. // Get our string length
  392. int stringLength = encoding.GetCharCount(bytes, byteLength);
  393. Debug.Assert(stringLength >= 0, "stringLength >= 0");
  394. // They gave us an empty string if they needed one
  395. // 0 bytelength might be possible if there's something in an encoder
  396. if (stringLength == 0)
  397. return Empty;
  398. string s = FastAllocateString(stringLength);
  399. fixed (char* pTempChars = &s._firstChar)
  400. {
  401. int doubleCheck = encoding.GetChars(bytes, byteLength, pTempChars, stringLength);
  402. Debug.Assert(stringLength == doubleCheck,
  403. "Expected encoding.GetChars to return same length as encoding.GetCharCount");
  404. }
  405. return s;
  406. }
  407. // This is only intended to be used by char.ToString.
  408. // It is necessary to put the code in this class instead of Char, since _firstChar is a private member.
  409. // Making _firstChar internal would be dangerous since it would make it much easier to break String's immutability.
  410. internal static string CreateFromChar(char c)
  411. {
  412. string result = FastAllocateString(1);
  413. result._firstChar = c;
  414. return result;
  415. }
  416. internal static string CreateFromChar(char c1, char c2)
  417. {
  418. string result = FastAllocateString(2);
  419. result._firstChar = c1;
  420. Unsafe.Add(ref result._firstChar, 1) = c2;
  421. return result;
  422. }
  423. internal static unsafe void wstrcpy(char* dmem, char* smem, int charCount)
  424. {
  425. Buffer.Memmove((byte*)dmem, (byte*)smem, ((uint)charCount) * 2);
  426. }
  427. // Returns this string.
  428. public override string ToString()
  429. {
  430. return this;
  431. }
  432. // Returns this string.
  433. public string ToString(IFormatProvider? provider)
  434. {
  435. return this;
  436. }
  437. public CharEnumerator GetEnumerator()
  438. {
  439. return new CharEnumerator(this);
  440. }
  441. IEnumerator<char> IEnumerable<char>.GetEnumerator()
  442. {
  443. return new CharEnumerator(this);
  444. }
  445. IEnumerator IEnumerable.GetEnumerator()
  446. {
  447. return new CharEnumerator(this);
  448. }
  449. /// <summary>
  450. /// Returns an enumeration of <see cref="Rune"/> from this string.
  451. /// </summary>
  452. /// <remarks>
  453. /// Invalid sequences will be represented in the enumeration by <see cref="Rune.ReplacementChar"/>.
  454. /// </remarks>
  455. public StringRuneEnumerator EnumerateRunes()
  456. {
  457. return new StringRuneEnumerator(this);
  458. }
  459. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  460. internal static unsafe int wcslen(char* ptr)
  461. {
  462. // IndexOf processes memory in aligned chunks, and thus it won't crash even if it accesses memory beyond the null terminator.
  463. int length = SpanHelpers.IndexOf(ref *ptr, '\0', int.MaxValue);
  464. if (length < 0)
  465. {
  466. ThrowMustBeNullTerminatedString();
  467. }
  468. return length;
  469. }
  470. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  471. internal static unsafe int strlen(byte* ptr)
  472. {
  473. // IndexOf processes memory in aligned chunks, and thus it won't crash even if it accesses memory beyond the null terminator.
  474. int length = SpanHelpers.IndexOf(ref *ptr, (byte)'\0', int.MaxValue);
  475. if (length < 0)
  476. {
  477. ThrowMustBeNullTerminatedString();
  478. }
  479. return length;
  480. }
  481. [DoesNotReturn]
  482. private static void ThrowMustBeNullTerminatedString()
  483. {
  484. throw new ArgumentException(SR.Arg_MustBeNullTerminatedString);
  485. }
  486. //
  487. // IConvertible implementation
  488. //
  489. public TypeCode GetTypeCode()
  490. {
  491. return TypeCode.String;
  492. }
  493. bool IConvertible.ToBoolean(IFormatProvider? provider)
  494. {
  495. return Convert.ToBoolean(this, provider);
  496. }
  497. char IConvertible.ToChar(IFormatProvider? provider)
  498. {
  499. return Convert.ToChar(this, provider);
  500. }
  501. sbyte IConvertible.ToSByte(IFormatProvider? provider)
  502. {
  503. return Convert.ToSByte(this, provider);
  504. }
  505. byte IConvertible.ToByte(IFormatProvider? provider)
  506. {
  507. return Convert.ToByte(this, provider);
  508. }
  509. short IConvertible.ToInt16(IFormatProvider? provider)
  510. {
  511. return Convert.ToInt16(this, provider);
  512. }
  513. ushort IConvertible.ToUInt16(IFormatProvider? provider)
  514. {
  515. return Convert.ToUInt16(this, provider);
  516. }
  517. int IConvertible.ToInt32(IFormatProvider? provider)
  518. {
  519. return Convert.ToInt32(this, provider);
  520. }
  521. uint IConvertible.ToUInt32(IFormatProvider? provider)
  522. {
  523. return Convert.ToUInt32(this, provider);
  524. }
  525. long IConvertible.ToInt64(IFormatProvider? provider)
  526. {
  527. return Convert.ToInt64(this, provider);
  528. }
  529. ulong IConvertible.ToUInt64(IFormatProvider? provider)
  530. {
  531. return Convert.ToUInt64(this, provider);
  532. }
  533. float IConvertible.ToSingle(IFormatProvider? provider)
  534. {
  535. return Convert.ToSingle(this, provider);
  536. }
  537. double IConvertible.ToDouble(IFormatProvider? provider)
  538. {
  539. return Convert.ToDouble(this, provider);
  540. }
  541. decimal IConvertible.ToDecimal(IFormatProvider? provider)
  542. {
  543. return Convert.ToDecimal(this, provider);
  544. }
  545. DateTime IConvertible.ToDateTime(IFormatProvider? provider)
  546. {
  547. return Convert.ToDateTime(this, provider);
  548. }
  549. object IConvertible.ToType(Type type, IFormatProvider? provider)
  550. {
  551. return Convert.DefaultToType((IConvertible)this, type, provider);
  552. }
  553. // Normalization Methods
  554. // These just wrap calls to Normalization class
  555. public bool IsNormalized()
  556. {
  557. return IsNormalized(NormalizationForm.FormC);
  558. }
  559. public bool IsNormalized(NormalizationForm normalizationForm)
  560. {
  561. if (this.IsAscii())
  562. {
  563. // If its ASCII && one of the 4 main forms, then its already normalized
  564. if (normalizationForm == NormalizationForm.FormC ||
  565. normalizationForm == NormalizationForm.FormKC ||
  566. normalizationForm == NormalizationForm.FormD ||
  567. normalizationForm == NormalizationForm.FormKD)
  568. return true;
  569. }
  570. return Normalization.IsNormalized(this, normalizationForm);
  571. }
  572. public string Normalize()
  573. {
  574. return Normalize(NormalizationForm.FormC);
  575. }
  576. public string Normalize(NormalizationForm normalizationForm)
  577. {
  578. if (this.IsAscii())
  579. {
  580. // If its ASCII && one of the 4 main forms, then its already normalized
  581. if (normalizationForm == NormalizationForm.FormC ||
  582. normalizationForm == NormalizationForm.FormKC ||
  583. normalizationForm == NormalizationForm.FormD ||
  584. normalizationForm == NormalizationForm.FormKD)
  585. return this;
  586. }
  587. return Normalization.Normalize(this, normalizationForm);
  588. }
  589. private unsafe bool IsAscii()
  590. {
  591. fixed (char* str = &_firstChar)
  592. {
  593. return ASCIIUtility.GetIndexOfFirstNonAsciiChar(str, (uint)Length) == (uint)Length;
  594. }
  595. }
  596. }
  597. }