StringTests.cs 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. namespace TextTests;
  2. #nullable enable
  3. public class StringTests
  4. {
  5. [Fact]
  6. public void TestGetColumns_Null ()
  7. {
  8. string? str = null;
  9. Assert.Equal (0, str!.GetColumns ());
  10. }
  11. [Fact]
  12. public void TestGetColumns_Empty ()
  13. {
  14. var str = string.Empty;
  15. Assert.Equal (0, str.GetColumns ());
  16. }
  17. [Fact]
  18. public void TestGetColumns_SingleRune ()
  19. {
  20. var str = "a";
  21. Assert.Equal (1, str.GetColumns ());
  22. }
  23. [Fact]
  24. public void TestGetColumns_Zero_Width ()
  25. {
  26. var str = "\u200D";
  27. Assert.Equal (0, str.GetColumns ());
  28. }
  29. [Theory]
  30. [InlineData ("a", 1)]
  31. [InlineData ("á", 1)]
  32. [InlineData ("ab", 2)]
  33. [InlineData ("áé", 2)]
  34. [InlineData ("abc", 3)]
  35. [InlineData ("áéí", 3)]
  36. [InlineData ("abcd", 4)]
  37. public void TestGetColumns_MultiRune (string str, int expected) { Assert.Equal (expected, str.GetColumns ()); }
  38. // Test non-BMP codepoints
  39. // Face with Tears of Joy Emoji (😂), Unicode U+1F602 is 2 columns wide
  40. [Theory]
  41. [InlineData ("😂", 2)]
  42. [InlineData ("😂😂", 4)]
  43. public void TestGetColumns_MultiRune_NonBMP (string str, int expected) { Assert.Equal (expected, str.GetColumns ()); }
  44. // Test known wide codepoints
  45. [Theory]
  46. [InlineData ("🙂", 2, 1, 2)]
  47. [InlineData ("a🙂", 3, 2, 3)]
  48. [InlineData ("🙂a", 3, 2, 3)]
  49. [InlineData ("👨‍👩‍👦‍👦", 8, 1, 2)]
  50. [InlineData ("👨‍👩‍👦‍👦🙂", 10, 2, 4)]
  51. [InlineData ("👨‍👩‍👦‍👦🙂a", 11, 3, 5)]
  52. [InlineData ("👨‍👩‍👦‍👦a🙂", 11, 3, 5)]
  53. [InlineData ("👨‍👩‍👦‍👦👨‍👩‍👦‍👦", 16, 2, 4)]
  54. [InlineData ("าำ", 2, 1, 2)] // า U+0E32 - THAI CHARACTER SARA AA with ำ U+0E33 - THAI CHARACTER SARA AM
  55. [InlineData ("山", 2, 1, 2)] // The character for "mountain" in Chinese/Japanese/Korean (山), Unicode U+5C71
  56. [InlineData ("山🙂", 4, 2, 4)] // The character for "mountain" in Chinese/Japanese/Korean (山), Unicode U+5C71
  57. [InlineData ("a\ufe20e\ufe21", 2, 2, 2)] // Combining Ligature Left Half ︠ - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  58. // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  59. //[InlineData ("क", 1, 1, 1)] // क U+0915 Devanagari Letter Ka
  60. //[InlineData ("ि", 1, 1, 1)] // U+093F Devanagari Vowel Sign I ि (i-kar).
  61. //[InlineData ("कि", 2, 1, 2)] // "कि" is U+0915 for the base consonant "क" with U+093F for the vowel sign "ि" (i-kar).
  62. [InlineData ("ᄀ", 2, 1, 2)] // ᄀ U+1100 HANGUL CHOSEONG KIYEOK (consonant)
  63. [InlineData ("ᅡ", 0, 1, 0)] // ᅡ U+1161 HANGUL JUNGSEONG A (vowel)
  64. [InlineData ("가", 2, 1, 2)] // ᄀ U+1100 HANGUL CHOSEONG KIYEOK (consonant) with ᅡ U+1161 HANGUL JUNGSEONG A (vowel)
  65. [InlineData ("ᄒ", 2, 1, 2)] // ᄒ U+1112 Hangul Choseong Hieuh
  66. [InlineData ("ᅵ", 0, 1, 0)] // ᅵ U+1175 Hangul Jungseong I
  67. [InlineData ("ᇂ", 0, 1, 0)] // ᇂ U+11C2 Hangul Jongseong Hieuh
  68. [InlineData ("힣", 2, 1, 2)] // ᄒ (choseong h) + ᅵ (jungseong i) + ᇂ (jongseong h)
  69. [InlineData ("ힰ", 0, 1, 0)] // U+D7B0 ힰ Hangul Jungseong O-Yeo
  70. [InlineData ("ᄀힰ", 2, 1, 2)] // ᄀ U+1100 HANGUL CHOSEONG KIYEOK (consonant) with U+D7B0 ힰ Hangul Jungseong O-Yeo
  71. //[InlineData ("षि", 2, 1, 2)] // U+0937 ष DEVANAGARI LETTER SSA with U+093F ि COMBINING DEVANAGARI VOWEL SIGN I
  72. [InlineData ("🇵🇹", 2, 1, 2)] // 🇵 U+1F1F5 — REGIONAL INDICATOR SYMBOL LETTER P with 🇹 U+1F1F9 — REGIONAL INDICATOR SYMBOL LETTER T (flag of Portugal)
  73. public void TestGetColumns_MultiRune_WideBMP_Graphemes (string str, int expectedRunesWidth, int expectedGraphemesCount, int expectedWidth)
  74. {
  75. Assert.Equal (expectedRunesWidth, str.EnumerateRunes ().Sum (r => r.GetColumns ()));
  76. Assert.Equal (expectedGraphemesCount, GraphemeHelper.GetGraphemes (str).ToArray ().Length);
  77. Assert.Equal (expectedWidth, str.GetColumns ());
  78. }
  79. [Theory]
  80. [InlineData (null)]
  81. [InlineData ("")]
  82. public void TestGetColumns_Does_Not_Throws_With_Null_And_Empty_String (string? text)
  83. {
  84. // ReSharper disable once InvokeAsExtensionMethod
  85. Assert.Equal (0, StringExtensions.GetColumns (text!));
  86. }
  87. public class ReadOnlySpanExtensionsTests
  88. {
  89. [Theory]
  90. [InlineData ("12345", true)] // all ASCII digits
  91. [InlineData ("0", true)] // single ASCII digit
  92. [InlineData ("", false)] // empty span
  93. [InlineData ("12a45", false)] // contains a letter
  94. [InlineData ("123", false)] // full-width Unicode digits (not ASCII)
  95. [InlineData ("12 34", false)] // contains space
  96. [InlineData ("١٢٣", false)] // Arabic-Indic digits
  97. public void IsAllAsciiDigits_WorksAsExpected (string input, bool expected)
  98. {
  99. // Arrange
  100. ReadOnlySpan<char> span = input.AsSpan ();
  101. // Act
  102. bool result = span.IsAllAsciiDigits ();
  103. // Assert
  104. Assert.Equal (expected, result);
  105. }
  106. }
  107. [Theory]
  108. [InlineData ("0", true)]
  109. [InlineData ("9", true)]
  110. [InlineData ("A", true)]
  111. [InlineData ("F", true)]
  112. [InlineData ("a", true)]
  113. [InlineData ("f", true)]
  114. [InlineData ("123ABC", true)]
  115. [InlineData ("abcdef", true)]
  116. [InlineData ("G", false)] // 'G' not hex
  117. [InlineData ("Z9", false)] // 'Z' not hex
  118. [InlineData ("12 34", false)] // space not hex
  119. [InlineData ("", false)] // empty string
  120. [InlineData ("123", false)] // full-width digits, not ASCII
  121. [InlineData ("0xFF", false)] // includes 'x'
  122. public void IsAllAsciiHexDigits_ReturnsExpected (string input, bool expected)
  123. {
  124. // Arrange
  125. ReadOnlySpan<char> span = input.AsSpan ();
  126. // Act
  127. bool result = span.IsAllAsciiHexDigits ();
  128. // Assert
  129. Assert.Equal (expected, result);
  130. }
  131. [Theory]
  132. [MemberData (nameof (GetStringConcatCases))]
  133. public void ToString_ReturnsExpected (IEnumerable<string> input, string expected)
  134. {
  135. // Act
  136. string result = StringExtensions.ToString (input);
  137. // Assert
  138. Assert.Equal (expected, result);
  139. }
  140. public static IEnumerable<object []> GetStringConcatCases ()
  141. {
  142. yield return [new string [] { }, string.Empty]; // Empty sequence
  143. yield return [new [] { "" }, string.Empty]; // Single empty string
  144. yield return [new [] { "A" }, "A"]; // Single element
  145. yield return [new [] { "A", "B" }, "AB"]; // Simple concatenation
  146. yield return [new [] { "Hello", " ", "World" }, "Hello World"]; // Multiple parts
  147. yield return [new [] { "123", "456", "789" }, "123456789"]; // Numeric strings
  148. yield return [new [] { "👩‍", "🧒" }, "👩‍🧒"]; // Grapheme sequence
  149. yield return [new [] { "α", "β", "γ" }, "αβγ"]; // Unicode letters
  150. yield return [new [] { "A", null, "B" }, "AB"]; // Null ignored by string.Concat
  151. yield return [new [] { "🇵", "🇹" }, "🇵🇹"]; // Grapheme sequence
  152. }
  153. [Theory]
  154. [InlineData ("", false)] // Empty string
  155. [InlineData ("A", false)] // Single BMP character
  156. [InlineData ("AB", false)] // Two BMP chars, not a surrogate pair
  157. [InlineData ("👩", true)] // Single emoji surrogate pair (U+1F469)
  158. [InlineData ("🧒", true)] // Another emoji surrogate pair (U+1F9D2)
  159. [InlineData ("𐍈", true)] // Gothic letter hwair (U+10348)
  160. [InlineData ("A👩", false)] // One BMP + one surrogate half
  161. [InlineData ("👩‍", false)] // Surrogate pair + ZWJ (length != 2)
  162. public void IsSurrogatePair_ReturnsExpected (string input, bool expected)
  163. {
  164. // Act
  165. bool result = input.IsSurrogatePair ();
  166. // Assert
  167. Assert.Equal (expected, result);
  168. }
  169. [Theory]
  170. // Control characters (should be replaced with the "Control Pictures" block)
  171. [InlineData ("\u0000", "\u2400")] // NULL → ␀
  172. [InlineData ("\u0009", "\u2409")] // TAB → ␉
  173. [InlineData ("\u000A", "\u240A")] // LF → ␊
  174. [InlineData ("\u000D", "\u240D")] // CR → ␍
  175. // Printable characters (should remain unchanged)
  176. [InlineData ("A", "A")]
  177. [InlineData (" ", " ")]
  178. [InlineData ("~", "~")]
  179. // Multi-character string (should return unchanged)
  180. [InlineData ("AB", "AB")]
  181. [InlineData ("Hello", "Hello")]
  182. [InlineData ("\u0009A", "\u0009A")] // includes a control char, but length > 1
  183. public void MakePrintable_ReturnsExpected (string input, string expected)
  184. {
  185. // Act
  186. string result = input.MakePrintable ();
  187. // Assert
  188. Assert.Equal (expected, result);
  189. }
  190. }