StringTests.cs 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. namespace UnitTests_Parallelizable.TextTests;
  2. #nullable enable
  3. public class StringTests
  4. {
  5. [Fact]
  6. public void TestGetColumns_Null ()
  7. {
  8. string? str = null;
  9. Assert.Equal (0, str!.GetColumns ());
  10. }
  11. [Fact]
  12. public void TestGetColumns_Empty ()
  13. {
  14. var str = string.Empty;
  15. Assert.Equal (0, str.GetColumns ());
  16. }
  17. [Fact]
  18. public void TestGetColumns_SingleRune ()
  19. {
  20. var str = "a";
  21. Assert.Equal (1, str.GetColumns ());
  22. }
  23. [Fact]
  24. public void TestGetColumns_Zero_Width ()
  25. {
  26. var str = "\u200D";
  27. Assert.Equal (0, str.GetColumns ());
  28. }
  29. [Theory]
  30. [InlineData ("a", 1)]
  31. [InlineData ("á", 1)]
  32. [InlineData ("ab", 2)]
  33. [InlineData ("áé", 2)]
  34. [InlineData ("abc", 3)]
  35. [InlineData ("áéí", 3)]
  36. [InlineData ("abcd", 4)]
  37. public void TestGetColumns_MultiRune (string str, int expected) { Assert.Equal (expected, str.GetColumns ()); }
  38. // Test non-BMP codepoints
  39. // Face with Tears of Joy Emoji (😂), Unicode U+1F602 is 2 columns wide
  40. [Theory]
  41. [InlineData ("😂", 2)]
  42. [InlineData ("😂😂", 4)]
  43. public void TestGetColumns_MultiRune_NonBMP (string str, int expected) { Assert.Equal (expected, str.GetColumns ()); }
  44. // Test known wide codepoints
  45. [Theory]
  46. [InlineData ("🙂", 2, 1, 2)]
  47. [InlineData ("a🙂", 3, 2, 3)]
  48. [InlineData ("🙂a", 3, 2, 3)]
  49. [InlineData ("👨‍👩‍👦‍👦", 8, 1, 2)]
  50. [InlineData ("👨‍👩‍👦‍👦🙂", 10, 2, 4)]
  51. [InlineData ("👨‍👩‍👦‍👦🙂a", 11, 3, 5)]
  52. [InlineData ("👨‍👩‍👦‍👦a🙂", 11, 3, 5)]
  53. [InlineData ("👨‍👩‍👦‍👦👨‍👩‍👦‍👦", 16, 2, 4)]
  54. [InlineData ("าำ", 2, 1, 2)] // า U+0E32 - THAI CHARACTER SARA AA with ำ U+0E33 - THAI CHARACTER SARA AM
  55. [InlineData ("山", 2, 1, 2)] // The character for "mountain" in Chinese/Japanese/Korean (山), Unicode U+5C71
  56. [InlineData ("山🙂", 4, 2, 4)] // The character for "mountain" in Chinese/Japanese/Korean (山), Unicode U+5C71
  57. [InlineData ("a\ufe20e\ufe21", 2, 2, 2)] // Combining Ligature Left Half ︠ - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  58. // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  59. //[InlineData ("क", 1, 1, 1)] // क U+0915 Devanagari Letter Ka
  60. //[InlineData ("ि", 1, 1, 1)] // U+093F Devanagari Vowel Sign I ि (i-kar).
  61. //[InlineData ("कि", 2, 1, 2)] // "कि" is U+0915 for the base consonant "क" with U+093F for the vowel sign "ि" (i-kar).
  62. [InlineData ("ᄀ", 2, 1, 2)] // ᄀ U+1100 HANGUL CHOSEONG KIYEOK (consonant)
  63. [InlineData ("ᅡ", 0, 1, 0)] // ᅡ U+1161 HANGUL JUNGSEONG A (vowel)
  64. [InlineData ("가", 2, 1, 2)] // ᄀ U+1100 HANGUL CHOSEONG KIYEOK (consonant) with ᅡ U+1161 HANGUL JUNGSEONG A (vowel)
  65. [InlineData ("ᄒ", 2, 1, 2)] // ᄒ U+1112 Hangul Choseong Hieuh
  66. [InlineData ("ᅵ", 0, 1, 0)] // ᅵ U+1175 Hangul Jungseong I
  67. [InlineData ("ᇂ", 0, 1, 0)] // ᇂ U+11C2 Hangul Jongseong Hieuh
  68. [InlineData ("힣", 2, 1, 2)] // ᄒ (choseong h) + ᅵ (jungseong i) + ᇂ (jongseong h)
  69. [InlineData ("ힰ", 0, 1, 0)] // U+D7B0 ힰ Hangul Jungseong O-Yeo
  70. [InlineData ("ᄀힰ", 2, 1, 2)] // ᄀ U+1100 HANGUL CHOSEONG KIYEOK (consonant) with U+D7B0 ힰ Hangul Jungseong O-Yeo
  71. //[InlineData ("षि", 2, 1, 2)] // U+0937 ष DEVANAGARI LETTER SSA with U+093F ि COMBINING DEVANAGARI VOWEL SIGN I
  72. public void TestGetColumns_MultiRune_WideBMP_Graphemes (string str, int expectedRunesWidth, int expectedGraphemesCount, int expectedWidth)
  73. {
  74. Assert.Equal (expectedRunesWidth, str.EnumerateRunes ().Sum (r => r.GetColumns ()));
  75. Assert.Equal (expectedGraphemesCount, GraphemeHelper.GetGraphemes (str).ToArray ().Length);
  76. Assert.Equal (expectedWidth, str.GetColumns ());
  77. }
  78. [Theory]
  79. [InlineData (null)]
  80. [InlineData ("")]
  81. public void TestGetColumns_Does_Not_Throws_With_Null_And_Empty_String (string? text)
  82. {
  83. // ReSharper disable once InvokeAsExtensionMethod
  84. Assert.Equal (0, StringExtensions.GetColumns (text!));
  85. }
  86. public class ReadOnlySpanExtensionsTests
  87. {
  88. [Theory]
  89. [InlineData ("12345", true)] // all ASCII digits
  90. [InlineData ("0", true)] // single ASCII digit
  91. [InlineData ("", false)] // empty span
  92. [InlineData ("12a45", false)] // contains a letter
  93. [InlineData ("123", false)] // full-width Unicode digits (not ASCII)
  94. [InlineData ("12 34", false)] // contains space
  95. [InlineData ("١٢٣", false)] // Arabic-Indic digits
  96. public void IsAllAsciiDigits_WorksAsExpected (string input, bool expected)
  97. {
  98. // Arrange
  99. ReadOnlySpan<char> span = input.AsSpan ();
  100. // Act
  101. bool result = span.IsAllAsciiDigits ();
  102. // Assert
  103. Assert.Equal (expected, result);
  104. }
  105. }
  106. [Theory]
  107. [InlineData ("0", true)]
  108. [InlineData ("9", true)]
  109. [InlineData ("A", true)]
  110. [InlineData ("F", true)]
  111. [InlineData ("a", true)]
  112. [InlineData ("f", true)]
  113. [InlineData ("123ABC", true)]
  114. [InlineData ("abcdef", true)]
  115. [InlineData ("G", false)] // 'G' not hex
  116. [InlineData ("Z9", false)] // 'Z' not hex
  117. [InlineData ("12 34", false)] // space not hex
  118. [InlineData ("", false)] // empty string
  119. [InlineData ("123", false)] // full-width digits, not ASCII
  120. [InlineData ("0xFF", false)] // includes 'x'
  121. public void IsAllAsciiHexDigits_ReturnsExpected (string input, bool expected)
  122. {
  123. // Arrange
  124. ReadOnlySpan<char> span = input.AsSpan ();
  125. // Act
  126. bool result = span.IsAllAsciiHexDigits ();
  127. // Assert
  128. Assert.Equal (expected, result);
  129. }
  130. [Theory]
  131. [MemberData (nameof (GetStringConcatCases))]
  132. public void ToString_ReturnsExpected (IEnumerable<string> input, string expected)
  133. {
  134. // Act
  135. string result = StringExtensions.ToString (input);
  136. // Assert
  137. Assert.Equal (expected, result);
  138. }
  139. public static IEnumerable<object []> GetStringConcatCases ()
  140. {
  141. yield return [new string [] { }, string.Empty]; // Empty sequence
  142. yield return [new [] { "" }, string.Empty]; // Single empty string
  143. yield return [new [] { "A" }, "A"]; // Single element
  144. yield return [new [] { "A", "B" }, "AB"]; // Simple concatenation
  145. yield return [new [] { "Hello", " ", "World" }, "Hello World"]; // Multiple parts
  146. yield return [new [] { "123", "456", "789" }, "123456789"]; // Numeric strings
  147. yield return [new [] { "👩‍", "🧒" }, "👩‍🧒"]; // Grapheme sequence
  148. yield return [new [] { "α", "β", "γ" }, "αβγ"]; // Unicode letters
  149. yield return [new [] { "A", null, "B" }, "AB"]; // Null ignored by string.Concat
  150. }
  151. [Theory]
  152. [InlineData ("", false)] // Empty string
  153. [InlineData ("A", false)] // Single BMP character
  154. [InlineData ("AB", false)] // Two BMP chars, not a surrogate pair
  155. [InlineData ("👩", true)] // Single emoji surrogate pair (U+1F469)
  156. [InlineData ("🧒", true)] // Another emoji surrogate pair (U+1F9D2)
  157. [InlineData ("𐍈", true)] // Gothic letter hwair (U+10348)
  158. [InlineData ("A👩", false)] // One BMP + one surrogate half
  159. [InlineData ("👩‍", false)] // Surrogate pair + ZWJ (length != 2)
  160. public void IsSurrogatePair_ReturnsExpected (string input, bool expected)
  161. {
  162. // Act
  163. bool result = input.IsSurrogatePair ();
  164. // Assert
  165. Assert.Equal (expected, result);
  166. }
  167. [Theory]
  168. // Control characters (should be replaced with the "Control Pictures" block)
  169. [InlineData ("\u0000", "\u2400")] // NULL → ␀
  170. [InlineData ("\u0009", "\u2409")] // TAB → ␉
  171. [InlineData ("\u000A", "\u240A")] // LF → ␊
  172. [InlineData ("\u000D", "\u240D")] // CR → ␍
  173. // Printable characters (should remain unchanged)
  174. [InlineData ("A", "A")]
  175. [InlineData (" ", " ")]
  176. [InlineData ("~", "~")]
  177. // Multi-character string (should return unchanged)
  178. [InlineData ("AB", "AB")]
  179. [InlineData ("Hello", "Hello")]
  180. [InlineData ("\u0009A", "\u0009A")] // includes a control char, but length > 1
  181. public void MakePrintable_ReturnsExpected (string input, string expected)
  182. {
  183. // Act
  184. string result = input.MakePrintable ();
  185. // Assert
  186. Assert.Equal (expected, result);
  187. }
  188. }