RuneTests.cs 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815
  1. using System;
  2. using System.Buffers;
  3. using System.Collections.Generic;
  4. using System.Globalization;
  5. using System.Linq;
  6. using System.Text;
  7. using Xunit;
  8. using static Terminal.Gui.SpinnerStyle;
  9. namespace Terminal.Gui.TextTests;
  10. public class RuneTests {
  11. // IsCombiningMark tests
  12. [Theory]
  13. [InlineData (0x0338, true)] // Combining Long Solidus Overlay (U+0338) (e.g. ≠)
  14. [InlineData (0x0300, true)] // Combining Grave Accent
  15. [InlineData (0x0301, true)] // Combining acute accent (é)
  16. [InlineData (0x0302, true)] // Combining Circumflex Accent
  17. [InlineData (0x0328, true)] // Combining ogonek (a small hook or comma shape) U+0328
  18. [InlineData (0x00E9, false)] // Latin Small Letter E with Acute, Unicode U+00E9 é
  19. [InlineData (0x0061, false)] // Latin Small Letter A is U+0061.
  20. [InlineData ('\uFE20', true)] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  21. [InlineData ('\uFE21', true)] // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  22. public void IsCombiningMark (int codepoint, bool expected)
  23. {
  24. var rune = new Rune (codepoint);
  25. Assert.Equal (expected, rune.IsCombiningMark ());
  26. }
  27. [Theory]
  28. [InlineData (0x0000001F, 0x241F)]
  29. [InlineData (0x0000007F, 0x247F)]
  30. [InlineData (0x0000009F, 0x249F)]
  31. [InlineData (0x0001001A, 0x1001A)]
  32. public void MakePrintable_Converts_Control_Chars_To_Proper_Unicode (int code, int expected)
  33. {
  34. var actual = ((Rune)code).MakePrintable ();
  35. Assert.Equal (expected, actual.Value);
  36. }
  37. [Theory]
  38. [InlineData (0x20)]
  39. [InlineData (0x7E)]
  40. [InlineData (0xA0)]
  41. [InlineData (0x010020)]
  42. public void MakePrintable_Does_Not_Convert_Ansi_Chars_To_Unicode (int code)
  43. {
  44. var actual = ((Rune)code).MakePrintable ();
  45. Assert.Equal (code, actual.Value);
  46. }
  47. [Theory]
  48. [InlineData (0x0338)] // Combining Long Solidus Overlay (U+0338) (e.g. ≠)
  49. [InlineData (0x0300)] // Combining Grave Accent
  50. [InlineData (0x0301)] // Combining acute accent (é)
  51. [InlineData (0x0302)] // Combining Circumflex Accent
  52. [InlineData (0x0061)] // Combining ogonek (a small hook or comma shape)
  53. [InlineData ('\uFE20')] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  54. [InlineData ('\uFE21')] // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  55. public void MakePrintable_Combining_Character_Is_Not_Printable (int code)
  56. {
  57. var rune = new Rune (code);
  58. var actual = rune.MakePrintable ();
  59. Assert.Equal (code, actual.Value);
  60. }
  61. [Theory]
  62. [InlineData (0, "\0", 0, 1, 1)]
  63. [InlineData ('\u1dc0', "᷀", 0, 1, 3)] // ◌᷀ Combining Dotted Grave Accent
  64. [InlineData ('\u20D0', "⃐", 0, 1, 3)] // ◌⃐ Combining Left Harpoon Above
  65. [InlineData (1, "\u0001", -1, 1, 1)]
  66. [InlineData (2, "\u0002", -1, 1, 1)]
  67. [InlineData (31, "\u001f", -1, 1, 1)] // non printable character - Information Separator One
  68. [InlineData (127, "\u007f", -1, 1, 1)] // non printable character - Delete
  69. [InlineData (32, " ", 1, 1, 1)] // space
  70. [InlineData ('a', "a", 1, 1, 1)]
  71. [InlineData ('b', "b", 1, 1, 1)]
  72. [InlineData (123, "{", 1, 1, 1)] // { Left Curly Bracket
  73. [InlineData ('\u231c', "⌜", 1, 1, 3)] // ⌜ Top Left Corner
  74. // BUGBUG: These are CLEARLY wide glyphs, but GetColumns() returns 1
  75. // However, most terminals treat these as narrow and they overlap the next cell when drawn (including Windows Terminal)
  76. [InlineData ('\u1161', "ᅡ", 1, 1, 3)] // ᅡ Hangul Jungseong A - Unicode Hangul Jamo for join with column width equal to 0 alone.
  77. [InlineData ('\u2103', "℃", 1, 1, 3)] // ℃ Degree Celsius
  78. [InlineData ('\u2501', "━", 1, 1, 3)] // ━ Box Drawings Heavy Horizontal
  79. [InlineData ('\u25a0', "■", 1, 1, 3)] // ■ Black Square
  80. [InlineData ('\u25a1', "□", 1, 1, 3)] // □ White Square
  81. [InlineData ('\u277f', "❿", 1, 1, 3)] //Dingbat Negative Circled Number Ten - ❿ U+277f
  82. [InlineData ('\u4dc0', "䷀", 1, 1, 3)] // ䷀Hexagram For The Creative Heaven - U+4dc0 - https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  83. [InlineData ('\ud7b0', "ힰ", 1, 1, 3)] // ힰ ┤Hangul Jungseong O-Yeo - ힰ U+d7b0')]
  84. [InlineData ('\uf61e', "", 1, 1, 3)] // Private Use Area
  85. [InlineData ('\u23f0', "⏰", 2, 1, 3)] // Alarm Clock - ⏰ U+23f0
  86. [InlineData ('\u1100', "ᄀ", 2, 1, 3)] // ᄀ Hangul Choseong Kiyeok
  87. [InlineData ('\u1150', "ᅐ", 2, 1, 3)] // ᅐ Hangul Choseong Ceongchieumcieuc
  88. [InlineData ('\u2615', "☕", 2, 1, 3)] // ☕ Hot Beverage
  89. [InlineData ('\u231a', "⌚", 2, 1, 3)] // ⌚ Watch
  90. [InlineData ('\u231b', "⌛", 2, 1, 3)] // ⌛ Hourglass
  91. // From WindowsTerminal's CodepointWidthDetector tests (https://github.com/microsoft/terminal/blob/main/src/types/CodepointWidthDetector.cpp)
  92. //static constexpr std::wstring_view emoji = L"\xD83E\xDD22"; // U+1F922 nauseated face
  93. //static constexpr std::wstring_view ambiguous = L"\x414"; // U+0414 cyrillic capital de
  94. //{ 0x414, L"\x414", CodepointWidth::Narrow }, // U+0414 cyrillic capital de
  95. [InlineData ('\u0414', "Д", 1, 1, 2)] // U+0414 cyrillic capital de
  96. //{ 0x1104, L"\x1104", CodepointWidth::Wide }, // U+1104 hangul choseong ssangtikeut
  97. [InlineData ('\u1104', "ᄄ", 2, 1, 3)]
  98. //{ 0x306A, L"\x306A", CodepointWidth::Wide }, // U+306A hiragana na な
  99. [InlineData (0x306A, "な", 2, 1, 3)]
  100. //{ 0x30CA, L"\x30CA", CodepointWidth::Wide }, // U+30CA katakana na ナ
  101. [InlineData (0x30CA, "ナ", 2, 1, 3)]
  102. //{ 0x72D7, L"\x72D7", CodepointWidth::Wide }, // U+72D7
  103. [InlineData (0x72D7, "狗", 2, 1, 3)]
  104. public void GetColumns_With_Single_Code (int code, string str, int columns, int stringLength, int utf8Length)
  105. {
  106. var rune = new Rune (code);
  107. Assert.Equal (str, rune.ToString ());
  108. Assert.Equal (columns, rune.GetColumns ());
  109. Assert.Equal (stringLength, rune.ToString ().Length);
  110. Assert.Equal (utf8Length, rune.Utf8SequenceLength);
  111. Assert.True (Rune.IsValid (rune.Value));
  112. }
  113. [Theory]
  114. [InlineData (new byte [] { 0xf0, 0x9f, 0xa8, 0x81 }, "🨁", 1, 2)] // Neutral Chess Queen
  115. [InlineData (new byte [] { 0xf3, 0xa0, 0xbf, 0xa1 }, "󠿡", 1, 2)] // Undefined Character
  116. [InlineData (new byte [] { 0xf0, 0x9f, 0x8d, 0x95 }, "🍕", 2, 2)] // 🍕 Slice of Pizza
  117. [InlineData (new byte [] { 0xf0, 0x9f, 0xa4, 0x96 }, "🤖", 2, 2)] // 🤖 Robot Face
  118. [InlineData (new byte [] { 0xf0, 0x90, 0x90, 0xa1 }, "𐐡", 1, 2)] // 𐐡 Deseret Capital Letter Er
  119. [InlineData (new byte [] { 0xf0, 0x9f, 0x8c, 0xb9 }, "🌹", 2, 2)] // 🌹 Rose
  120. public void GetColumns_Utf8_Encode (byte [] code, string str, int columns, int stringLength)
  121. {
  122. var operationStatus = Rune.DecodeFromUtf8 (code, out Rune rune, out int bytesConsumed);
  123. Assert.Equal (OperationStatus.Done, operationStatus);
  124. Assert.Equal (str, rune.ToString ());
  125. Assert.Equal (columns, rune.GetColumns ());
  126. Assert.Equal (stringLength, rune.ToString ().Length);
  127. Assert.Equal (bytesConsumed, rune.Utf8SequenceLength);
  128. Assert.True (Rune.IsValid (rune.Value));
  129. }
  130. [Theory]
  131. [InlineData (new char [] { '\ud83e', '\ude01' }, "🨁", 1, 2, 4)] // Neutral Chess Queen
  132. [InlineData (new char [] { '\udb43', '\udfe1' }, "󠿡", 1, 2, 4)] // Undefined Character
  133. [InlineData (new char [] { '\ud83c', '\udf55' }, "🍕", 2, 2, 4)] // 🍕 Slice of Pizza
  134. [InlineData (new char [] { '\ud83e', '\udd16' }, "🤖", 2, 2, 4)] // 🤖 Robot Face
  135. [InlineData (new char [] { '\ud83e', '\udde0' }, "🧠", 2, 2, 4)] // 🧠 Brain
  136. [InlineData (new char [] { '\ud801', '\udc21' }, "𐐡", 1, 2, 4)] // 𐐡 Deseret Capital Letter Er
  137. [InlineData (new char [] { '\ud83c', '\udf39' }, "🌹", 2, 2, 4)] // 🌹 Rose
  138. [InlineData (new char [] { '\uD83D', '\uDC7E' }, "👾", 2, 2, 4)] // U+1F47E alien monster (CodepointWidth::Wide)
  139. [InlineData (new char [] { '\uD83D', '\uDD1C' }, "🔜", 2, 2, 4)] // 🔜 Soon With Rightwards Arrow Above (CodepointWidth::Wide)
  140. public void GetColumns_Utf16_Encode (char [] code, string str, int columns, int stringLength, int utf8Length)
  141. {
  142. var rune = new Rune (code [0], code [1]);
  143. Assert.Equal (str, rune.ToString ());
  144. Assert.Equal (columns, rune.GetColumns ());
  145. Assert.Equal (stringLength, rune.ToString ().Length);
  146. Assert.Equal (utf8Length, rune.Utf8SequenceLength);
  147. Assert.True (Rune.IsValid (rune.Value));
  148. }
  149. [Theory]
  150. [InlineData ("\U0001fa01", "🨁", 1, 2)] // Neutral Chess Queen
  151. [InlineData ("\U000e0fe1", "󠿡", 1, 2)] // Undefined Character
  152. [InlineData ("\U0001F355", "🍕", 2, 2)] // 🍕 Slice of Pizza
  153. [InlineData ("\U0001F916", "🤖", 2, 2)] // 🤖 Robot Face
  154. [InlineData ("\U0001f9e0", "🧠", 2, 2)] // 🧠 Brain
  155. [InlineData ("\U00010421", "𐐡", 1, 2)] // 𐐡 Deseret Capital Letter Er
  156. [InlineData ("\U0001f339", "🌹", 2, 2)] // 🌹 Rose
  157. //[InlineData ("\uFE20FE21", "", 1, 1)] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  158. // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  159. public void GetColumns_Utf32_Encode (string code, string str, int columns, int stringLength)
  160. {
  161. var operationStatus = Rune.DecodeFromUtf16 (code, out Rune rune, out int charsConsumed);
  162. Assert.Equal (OperationStatus.Done, operationStatus);
  163. Assert.Equal (str, rune.ToString ());
  164. Assert.Equal (columns, rune.GetColumns ());
  165. Assert.Equal (stringLength, rune.ToString ().Length);
  166. Assert.Equal (charsConsumed, rune.Utf16SequenceLength);
  167. Assert.True (Rune.IsValid (rune.Value));
  168. // with DecodeRune
  169. (var nrune, var size) = code.DecodeRune ();
  170. Assert.Equal (str, nrune.ToString ());
  171. Assert.Equal (columns, nrune.GetColumns ());
  172. Assert.Equal (stringLength, nrune.ToString ().Length);
  173. Assert.Equal (size, nrune.Utf8SequenceLength);
  174. for (int x = 0; x < code.Length - 1; x++) {
  175. Assert.Equal (nrune.Value, char.ConvertToUtf32 (code [x], code [x + 1]));
  176. Assert.True (RuneExtensions.EncodeSurrogatePair (code [x], code [x + 1], out Rune result));
  177. Assert.Equal (rune, result);
  178. }
  179. Assert.True (Rune.IsValid (nrune.Value));
  180. }
  181. [Theory]
  182. [InlineData ("\u2615\ufe0f", "☕️", 2, 2, 2)] // \ufe0f forces it to be rendered as a colorful image as compared to a monochrome text variant.
  183. [InlineData ("\u1107\u1165\u11b8", "법", 3, 2, 1)] // the letters 법 join to form the Korean word for "rice:" U+BC95 법 (read from top left to bottom right)
  184. [InlineData ("\U0001F468\u200D\U0001F469\u200D\U0001F467", "👨‍👩‍👧", 8, 6, 8)] // Man, Woman and Girl emoji.
  185. [InlineData ("\u0915\u093f", "कि", 2, 2, 2)] // Hindi कि with DEVANAGARI LETTER KA and DEVANAGARI VOWEL SIGN I
  186. [InlineData ("\u0e4d\u0e32", "ํา", 2, 1, 2)] // Decomposition: ํ (U+0E4D) - า (U+0E32) = U+0E33 ำ Thai Character Sara Am
  187. [InlineData ("\u0e33", "ำ", 1, 1, 1)] // Decomposition: ํ (U+0E4D) - า (U+0E32) = U+0E33 ำ Thai Character Sara Am
  188. public void GetColumns_String_Without_SurrogatePair (string code, string str, int codeLength, int columns, int stringLength)
  189. {
  190. Assert.Equal (str, code.Normalize ());
  191. Assert.Equal (codeLength, code.Length);
  192. //Assert.Equal (columns, code.EnumerateRunes ().Sum (x => x.GetColumns ()));
  193. Assert.Equal (columns, str.GetColumns ());
  194. Assert.Equal (stringLength, str.Length);
  195. }
  196. [Theory]
  197. [InlineData (new char [] { '\ud799', '\udc21' })]
  198. public void Rune_Exceptions_Utf16_Encode (char [] code)
  199. {
  200. Assert.False (RuneExtensions.EncodeSurrogatePair (code [0], code [1], out Rune rune));
  201. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (code [0], code [1]));
  202. }
  203. [Theory]
  204. [InlineData (0x12345678)]
  205. [InlineData ('\ud801')]
  206. public void Rune_Exceptions_Integers (int code)
  207. {
  208. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (code));
  209. }
  210. [Fact]
  211. public void GetColumns_GetRuneCount ()
  212. {
  213. PrintTextElementCount ('\u00e1'.ToString (), "á", 1, 1, 1, 1);
  214. PrintTextElementCount ("\u0061\u0301", "á", 1, 2, 2, 1);
  215. PrintTextElementCount ("\u0061\u0301", "á", 1, 2, 2, 1);
  216. PrintTextElementCount ("\u0065\u0301", "é", 1, 2, 2, 1);
  217. PrintTextElementCount ("\U0001f469\U0001f3fd\u200d\U0001f692", "👩🏽‍🚒", 6, 4, 7, 1);
  218. PrintTextElementCount ("\ud801\udccf", "𐓏", 1, 1, 2, 1);
  219. }
  220. private void PrintTextElementCount (string us, string s, int consoleWidth, int runeCount, int stringCount, int txtElementCount)
  221. {
  222. Assert.Equal (us.Length, s.Length);
  223. Assert.Equal (us, s);
  224. Assert.Equal (consoleWidth, us.GetColumns ());
  225. Assert.Equal (runeCount, us.GetRuneCount ());
  226. Assert.Equal (stringCount, s.Length);
  227. TextElementEnumerator enumerator = StringInfo.GetTextElementEnumerator (s);
  228. int textElementCount = 0;
  229. while (enumerator.MoveNext ()) {
  230. textElementCount++; // For versions prior to Net5.0 the StringInfo class might handle some grapheme clusters incorrectly.
  231. }
  232. Assert.Equal (txtElementCount, textElementCount);
  233. }
  234. [Fact]
  235. public void TestRuneIsLetter ()
  236. {
  237. Assert.Equal (5, CountLettersInString ("Hello"));
  238. Assert.Equal (8, CountLettersInString ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  239. }
  240. private int CountLettersInString (string s)
  241. {
  242. int letterCount = 0;
  243. foreach (Rune rune in s.EnumerateRunes ()) {
  244. if (Rune.IsLetter (rune)) { letterCount++; }
  245. }
  246. return letterCount;
  247. }
  248. [Fact]
  249. public void Test_SurrogatePair_From_String ()
  250. {
  251. Assert.True (ProcessTestStringUseChar ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  252. Assert.Throws<Exception> (() => ProcessTestStringUseChar ("\ud801"));
  253. Assert.True (ProcessStringUseRune ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  254. Assert.Throws<Exception> (() => ProcessStringUseRune ("\ud801"));
  255. }
  256. private bool ProcessTestStringUseChar (string s)
  257. {
  258. char surrogateChar = default;
  259. for (int i = 0; i < s.Length; i++) {
  260. Rune r;
  261. if (char.IsSurrogate (s [i])) {
  262. if (surrogateChar != default && char.IsSurrogate (surrogateChar)) {
  263. r = new Rune (surrogateChar, s [i]);
  264. Assert.True (r.IsSurrogatePair ());
  265. int codePoint = char.ConvertToUtf32 (surrogateChar, s [i]);
  266. RuneExtensions.EncodeSurrogatePair (surrogateChar, s [i], out Rune rune);
  267. Assert.Equal (codePoint, rune.Value);
  268. string sp = new string (new char [] { surrogateChar, s [i] });
  269. r = (Rune)codePoint;
  270. Assert.Equal (sp, r.ToString ());
  271. Assert.True (r.IsSurrogatePair ());
  272. surrogateChar = default;
  273. } else if (i < s.Length - 1) {
  274. surrogateChar = s [i];
  275. continue;
  276. } else {
  277. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (s [i]));
  278. throw new Exception ("String was not well-formed UTF-16.");
  279. }
  280. } else {
  281. r = new Rune (s [i]);
  282. var buff = new byte [4];
  283. ((Rune)s [i]).Encode (buff);
  284. Assert.Equal ((int)s [i], buff [0]);
  285. Assert.Equal (s [i], r.Value);
  286. Assert.True (Rune.IsValid (r.Value));
  287. Assert.False (r.IsSurrogatePair ());
  288. }
  289. }
  290. return true;
  291. }
  292. private bool ProcessStringUseRune (string s)
  293. {
  294. var us = s;
  295. string rs = "";
  296. Rune codePoint;
  297. List<Rune> runes = new List<Rune> ();
  298. int colWidth = 0;
  299. for (int i = 0; i < s.Length; i++) {
  300. Rune rune = default;
  301. if (Rune.IsValid (s [i])) {
  302. rune = new Rune (s [i]);
  303. Assert.True (Rune.IsValid (rune.Value));
  304. runes.Add (rune);
  305. Assert.Equal (s [i], rune.Value);
  306. Assert.False (rune.IsSurrogatePair ());
  307. } else if (i + 1 < s.Length && (RuneExtensions.EncodeSurrogatePair (s [i], s [i + 1], out codePoint))) {
  308. Assert.Equal (0, rune.Value);
  309. Assert.False (Rune.IsValid (s [i]));
  310. rune = codePoint;
  311. runes.Add (rune);
  312. string sp = new string (new char [] { s [i], s [i + 1] });
  313. Assert.Equal (sp, codePoint.ToString ());
  314. Assert.True (codePoint.IsSurrogatePair ());
  315. i++; // Increment the iterator by the number of surrogate pair
  316. } else {
  317. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (s [i]));
  318. throw new Exception ("String was not well-formed UTF-16.");
  319. }
  320. colWidth += rune.GetColumns (); // Increment the column width of this Rune
  321. rs += rune.ToString ();
  322. }
  323. Assert.Equal (us.GetColumns (), colWidth);
  324. Assert.Equal (s, rs);
  325. Assert.Equal (s, StringExtensions.ToString (runes));
  326. return true;
  327. }
  328. [Fact]
  329. public void TestSplit ()
  330. {
  331. string inputString = "🐂, 🐄, 🐆";
  332. string [] splitOnSpace = inputString.Split (' ');
  333. string [] splitOnComma = inputString.Split (',');
  334. Assert.Equal (3, splitOnSpace.Length);
  335. Assert.Equal (3, splitOnComma.Length);
  336. }
  337. [Theory]
  338. [InlineData (true, '\u1100')]
  339. [InlineData (true, '\ud83c', '\udf39')]
  340. [InlineData (true, '\udbff', '\udfff')]
  341. [InlineData (false, '\ud801')]
  342. [InlineData (false, '\ud83e')]
  343. public void Rune_IsValid (bool valid, params char [] chars)
  344. {
  345. Rune rune = default;
  346. bool isValid = true;
  347. if (chars.Length == 1) {
  348. try {
  349. rune = new Rune (chars [0]);
  350. } catch (Exception) {
  351. isValid = false;
  352. }
  353. } else {
  354. rune = new Rune (chars [0], chars [1]);
  355. }
  356. if (valid) {
  357. Assert.NotEqual (default, rune);
  358. Assert.True (Rune.IsValid (rune.Value));
  359. Assert.True (valid);
  360. } else {
  361. Assert.False (valid);
  362. Assert.False (isValid);
  363. }
  364. }
  365. [Theory]
  366. [InlineData ('\u006f', '\u0302', "\u006f\u0302", 1, 0, 2, "o", "̂", "ô", 1, 2)]
  367. [InlineData ('\u0065', '\u0301', "\u0065\u0301", 1, 0, 2, "e", "́", "é", 1, 2)]
  368. public void Test_NonSpacingChar (int code1, int code2, string code, int rune1Length, int rune2Length, int codeLength, string code1String, string code2String, string joinString, int joinLength, int bytesLength)
  369. {
  370. var rune = new Rune (code1);
  371. var nsRune = new Rune (code2);
  372. Assert.Equal (rune1Length, rune.GetColumns ());
  373. Assert.Equal (rune2Length, nsRune.GetColumns ());
  374. var ul = rune.ToString ();
  375. Assert.Equal (code1String, ul);
  376. var uns = nsRune.ToString ();
  377. Assert.Equal (code2String, uns);
  378. var f = $"{rune}{nsRune}".Normalize ();
  379. Assert.Equal (f, joinString);
  380. Assert.Equal (f, code.Normalize ());
  381. Assert.Equal (joinLength, f.GetColumns ());
  382. Assert.Equal (joinLength, code.EnumerateRunes ().Sum (c => c.GetColumns ()));
  383. Assert.Equal (codeLength, code.Length);
  384. (var nrune, var size) = f.DecodeRune ();
  385. Assert.Equal (f.ToRunes () [0], nrune);
  386. Assert.Equal (bytesLength, size);
  387. }
  388. [Theory]
  389. [InlineData (500000000)]
  390. [InlineData (0xf801, 0xdfff)]
  391. public void Test_MaxRune (params int [] codes)
  392. {
  393. if (codes.Length == 1) {
  394. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (codes [0]));
  395. } else {
  396. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune ((char)codes [0], (char)codes [1]));
  397. }
  398. }
  399. [Fact]
  400. public void Sum_Of_Rune_GetColumns_Is_Not_Always_Equal_To_String_GetColumns ()
  401. {
  402. const int start = 0x000000;
  403. const int end = 0x10ffff;
  404. for (int i = start; i <= end; i++) {
  405. if (char.IsSurrogate ((char)i)) {
  406. continue;
  407. }
  408. Rune r = new Rune ((uint)i);
  409. string us = r.ToString ();
  410. string hex = i.ToString ("x6");
  411. int v = int.Parse (hex, System.Globalization.NumberStyles.HexNumber);
  412. string s = char.ConvertFromUtf32 (v);
  413. if (!r.IsSurrogatePair ()) {
  414. Assert.Equal (r.ToString (), us);
  415. Assert.Equal (us, s);
  416. if (r.GetColumns () < 0) {
  417. Assert.NotEqual (r.GetColumns (), us.GetColumns ());
  418. Assert.NotEqual (s.EnumerateRunes ().Sum (c => c.GetColumns ()), us.GetColumns ());
  419. } else {
  420. Assert.Equal (r.GetColumns (), us.GetColumns ());
  421. Assert.Equal (s.EnumerateRunes ().Sum (c => c.GetColumns ()), us.GetColumns ());
  422. }
  423. Assert.Equal (us.GetRuneCount (), s.Length);
  424. } else {
  425. Assert.Equal (r.ToString (), us);
  426. Assert.Equal (us, s);
  427. Assert.Equal (r.GetColumns (), us.GetColumns ());
  428. Assert.Equal (s.GetColumns (), us.GetColumns ());
  429. Assert.Equal (1, us.GetRuneCount ()); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  430. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  431. }
  432. }
  433. }
  434. [Theory]
  435. [InlineData (0x20D0, 0x20EF)]
  436. [InlineData (0x2310, 0x231F)]
  437. [InlineData (0x1D800, 0x1D80F)]
  438. public void Test_Range (int start, int end)
  439. {
  440. for (int i = start; i <= end; i++) {
  441. Rune r = new Rune ((uint)i);
  442. string us = r.ToString ();
  443. string hex = i.ToString ("x6");
  444. int v = int.Parse (hex, System.Globalization.NumberStyles.HexNumber);
  445. string s = char.ConvertFromUtf32 (v);
  446. if (!r.IsSurrogatePair ()) {
  447. Assert.Equal (r.ToString (), us);
  448. Assert.Equal (us, s);
  449. Assert.Equal (r.GetColumns (), us.GetColumns ());
  450. Assert.Equal (us.GetRuneCount (), s.Length); // For not surrogate pairs string.RuneCount is always equal to String.Length
  451. } else {
  452. Assert.Equal (r.ToString (), us);
  453. Assert.Equal (us, s);
  454. Assert.Equal (r.GetColumns (), us.GetColumns ());
  455. Assert.Equal (1, us.GetRuneCount ()); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  456. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  457. }
  458. Assert.Equal (s.GetColumns (), us.GetColumns ());
  459. }
  460. }
  461. [Theory]
  462. [InlineData ('\ue0fd', false)]
  463. [InlineData ('\ud800', true)]
  464. [InlineData ('\udfff', true)]
  465. public void Test_IsSurrogate (char code, bool isSurrogate)
  466. {
  467. if (isSurrogate) {
  468. Assert.True (char.IsSurrogate (code.ToString (), 0));
  469. } else {
  470. Assert.False (char.IsSurrogate (code.ToString (), 0));
  471. }
  472. }
  473. [Theory]
  474. [InlineData (unchecked((char)0x40D7C0), (char)0xDC20, 0, "\0", false)]
  475. [InlineData ((char)0x0065, (char)0x0301, 0, "\0", false)]
  476. [InlineData ('\ud83c', '\udf56', 0x1F356, "🍖", true)] // 🍖 Meat On Bone
  477. public void Test_EncodeSurrogatePair (char highSurrogate, char lowSurrogate, int runeValue, string runeString, bool isSurrogatePair)
  478. {
  479. Rune rune;
  480. if (isSurrogatePair) {
  481. Assert.True (RuneExtensions.EncodeSurrogatePair ('\ud83c', '\udf56', out rune));
  482. } else {
  483. Assert.False (RuneExtensions.EncodeSurrogatePair (highSurrogate, lowSurrogate, out rune));
  484. }
  485. Assert.Equal (runeValue, rune.Value);
  486. Assert.Equal (runeString, rune.ToString ());
  487. }
  488. [Theory]
  489. [InlineData ('\uea85', null, "", false)] // Private Use Area
  490. [InlineData (0x1F356, new char [] { '\ud83c', '\udf56' }, "🍖", true)] // 🍖 Meat On Bone
  491. public void Test_DecodeSurrogatePair (int code, char [] charsValue, string runeString, bool isSurrogatePair)
  492. {
  493. Rune rune = new Rune (code);
  494. char [] chars;
  495. if (isSurrogatePair) {
  496. Assert.True (rune.DecodeSurrogatePair (out chars));
  497. Assert.Equal (2, chars.Length);
  498. Assert.Equal (charsValue [0], chars [0]);
  499. Assert.Equal (charsValue [1], chars [1]);
  500. Assert.Equal (runeString, new Rune (chars [0], chars [1]).ToString ());
  501. } else {
  502. Assert.False (rune.DecodeSurrogatePair (out chars));
  503. Assert.Null (chars);
  504. Assert.Equal (runeString, rune.ToString ());
  505. }
  506. Assert.Equal (chars, charsValue);
  507. }
  508. [Fact]
  509. public void Test_All_Surrogate_Pairs_Range ()
  510. {
  511. for (uint h = 0xd800; h <= 0xdbff; h++) {
  512. for (uint l = 0xdc00; l <= 0xdfff; l++) {
  513. Rune r = new Rune ((char)h, (char)l);
  514. string us = r.ToString ();
  515. string hex = r.Value.ToString ("x6");
  516. int v = int.Parse (hex, System.Globalization.NumberStyles.HexNumber);
  517. string s = char.ConvertFromUtf32 (v);
  518. Assert.True (v >= 0x10000 && v <= RuneExtensions.MaxUnicodeCodePoint);
  519. Assert.Equal (r.ToString (), us);
  520. Assert.Equal (us, s);
  521. Assert.Equal (r.GetColumns (), us.GetColumns ());
  522. Assert.Equal (s.GetColumns (), us.GetColumns ());
  523. Assert.Equal (1, us.GetRuneCount ()); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  524. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  525. }
  526. }
  527. }
  528. [Theory]
  529. [InlineData ("Hello, 世界", 13, 11, 9)] // Without Surrogate Pairs
  530. [InlineData ("Hello, 𝔹𝕆𝔹", 19, 10, 13)] // With Surrogate Pairs
  531. public void Test_DecodeRune_Extension (string text, int bytesLength, int colsLength, int textLength)
  532. {
  533. List<Rune> runes = new List<Rune> ();
  534. int tSize = 0;
  535. for (int i = 0; i < text.GetRuneCount (); i++) {
  536. (Rune rune, int size) = text.DecodeRune (i);
  537. runes.Add (rune);
  538. tSize += size;
  539. }
  540. string result = StringExtensions.ToString (runes);
  541. Assert.Equal (text, result);
  542. Assert.Equal (bytesLength, tSize);
  543. Assert.Equal (colsLength, result.GetColumns ());
  544. Assert.Equal (textLength, result.Length);
  545. }
  546. [Theory]
  547. [InlineData ("Hello, 世界", 13, 11, 9, "界世 ,olleH")] // Without Surrogate Pairs
  548. [InlineData ("Hello, 𝔹𝕆𝔹", 19, 10, 13, "𝔹𝕆𝔹 ,olleH")] // With Surrogate Pairs
  549. public void Test_DecodeLastRune_Extension (string text, int bytesLength, int colsLength, int textLength, string encoded)
  550. {
  551. List<Rune> runes = new List<Rune> ();
  552. int tSize = 0;
  553. for (int i = text.GetRuneCount () - 1; i >= 0; i--) {
  554. (Rune rune, int size) = text.DecodeLastRune (i);
  555. runes.Add (rune);
  556. tSize += size;
  557. }
  558. string result = StringExtensions.ToString (runes);
  559. Assert.Equal (encoded, result);
  560. Assert.Equal (bytesLength, tSize);
  561. Assert.Equal (colsLength, result.GetColumns ());
  562. Assert.Equal (textLength, result.Length);
  563. }
  564. [Theory]
  565. [InlineData ("���", false)]
  566. [InlineData ("Hello, 世界", true)]
  567. [InlineData (new byte [] { 0xff, 0xfe, 0xfd }, false)]
  568. [InlineData (new byte [] { 0xf0, 0x9f, 0x8d, 0x95 }, true)]
  569. public void Test_CanBeEncodedAsRune_Extension (object text, bool canBeEncodedAsRune)
  570. {
  571. string str;
  572. if (text is string) {
  573. str = (string)text;
  574. if (canBeEncodedAsRune) {
  575. Assert.True (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  576. } else {
  577. Assert.False (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  578. }
  579. } else if (text is byte []) {
  580. str = StringExtensions.ToString ((byte [])text);
  581. if (canBeEncodedAsRune) {
  582. Assert.True (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  583. } else {
  584. Assert.False (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  585. }
  586. }
  587. }
  588. [Fact]
  589. public void Equals_ToRuneList ()
  590. {
  591. var a = new List<List<Rune>> () { "First line.".ToRuneList () };
  592. var b = new List<List<Rune>> () { "First line.".ToRuneList (), "Second line.".ToRuneList () };
  593. var c = new List<Rune> (a [0]);
  594. var d = a [0];
  595. Assert.Equal (a [0], b [0]);
  596. // Not the same reference
  597. Assert.False (a [0] == b [0]);
  598. Assert.NotEqual (a [0], b [1]);
  599. Assert.False (a [0] == b [1]);
  600. Assert.Equal (c, a [0]);
  601. Assert.False (c == a [0]);
  602. Assert.Equal (c, b [0]);
  603. Assert.False (c == b [0]);
  604. Assert.NotEqual (c, b [1]);
  605. Assert.False (c == b [1]);
  606. Assert.Equal (d, a [0]);
  607. // Is the same reference
  608. Assert.True (d == a [0]);
  609. Assert.Equal (d, b [0]);
  610. Assert.False (d == b [0]);
  611. Assert.NotEqual (d, b [1]);
  612. Assert.False (d == b [1]);
  613. Assert.True (a [0].SequenceEqual (b [0]));
  614. Assert.False (a [0].SequenceEqual (b [1]));
  615. Assert.True (c.SequenceEqual (a [0]));
  616. Assert.True (c.SequenceEqual (b [0]));
  617. Assert.False (c.SequenceEqual (b [1]));
  618. Assert.True (d.SequenceEqual (a [0]));
  619. Assert.True (d.SequenceEqual (b [0]));
  620. Assert.False (d.SequenceEqual (b [1]));
  621. }
  622. /// <summary>
  623. /// Shows the difference between using Wcwidth.UnicodeCalculator and our
  624. /// own port of wcwidth. Specifically, the UnicodeCalculator is more accurate to spec
  625. /// where null has a width of 0, and our port says it's -1.
  626. /// </summary>
  627. /// <param name="expectedColumns"></param>
  628. /// <param name="scalar"></param>
  629. [Theory]
  630. [InlineData (0, 0)]
  631. [InlineData (-1, 1)]
  632. [InlineData (-1, 2)]
  633. [InlineData (-1, 3)]
  634. [InlineData (-1, 4)]
  635. [InlineData (-1, 5)]
  636. [InlineData (-1, 6)]
  637. [InlineData (-1, 7)]
  638. [InlineData (-1, 8)]
  639. [InlineData (-1, 9)]
  640. [InlineData (-1, 10)]
  641. [InlineData (-1, 11)]
  642. [InlineData (-1, 12)]
  643. [InlineData (-1, 13)]
  644. [InlineData (-1, 14)]
  645. [InlineData (-1, 15)]
  646. [InlineData (-1, 16)]
  647. [InlineData (-1, 17)]
  648. [InlineData (-1, 18)]
  649. [InlineData (-1, 19)]
  650. [InlineData (-1, 20)]
  651. [InlineData (-1, 21)]
  652. [InlineData (-1, 22)]
  653. [InlineData (-1, 23)]
  654. [InlineData (-1, 24)]
  655. [InlineData (-1, 25)]
  656. [InlineData (-1, 26)]
  657. [InlineData (-1, 27)]
  658. [InlineData (-1, 28)]
  659. [InlineData (-1, 29)]
  660. [InlineData (-1, 30)]
  661. [InlineData (-1, 31)]
  662. public void Rune_GetColumns_Non_Printable (int expectedColumns, int scalar)
  663. {
  664. var rune = new Rune (scalar);
  665. Assert.Equal (expectedColumns, rune.GetColumns());
  666. Assert.Equal (0, rune.ToString().GetColumns());
  667. }
  668. [Fact]
  669. public void Rune_GetColumns_Versus_String_GetColumns_With_Non_Printable_Characters ()
  670. {
  671. int sumRuneWidth = 0;
  672. int sumConsoleWidth = 0;
  673. for (uint i = 0; i < 32; i++) {
  674. sumRuneWidth += ((Rune)(i)).GetColumns ();
  675. sumConsoleWidth += ((Rune)(i)).ToString ().GetColumns ();
  676. }
  677. Assert.Equal (-31, sumRuneWidth);
  678. Assert.Equal (0, sumConsoleWidth);
  679. }
  680. [Theory]
  681. [InlineData ("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789", 200, 200, 200)]
  682. [InlineData ("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789\n", 201, 200, 199)] // has a '\n' newline
  683. [InlineData ("\t01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789\n", 202, 200, 198)] // has a '\t' and a '\n' newline
  684. public void Rune_ColumnWidth_Versus_String_ConsoleWidth (string text, int stringLength, int strCols, int runeCols)
  685. {
  686. Assert.Equal (stringLength, text.Length);
  687. Assert.Equal (stringLength, text.GetRuneCount ());
  688. Assert.Equal (strCols, text.GetColumns ());
  689. int sumRuneWidth = text.EnumerateRunes ().Sum (x => x.GetColumns ());
  690. Assert.Equal (runeCols, sumRuneWidth);
  691. }
  692. [Theory]
  693. [InlineData ('\ud800', true)]
  694. [InlineData ('\udbff', true)]
  695. [InlineData ('\udc00', false)]
  696. [InlineData ('\udfff', false)]
  697. [InlineData ('\uefff', null)]
  698. public void Rune_IsHighSurrogate_IsLowSurrogate (char code, bool? isHighSurrogate)
  699. {
  700. if (isHighSurrogate == true) {
  701. Assert.True (char.IsHighSurrogate (code));
  702. } else if (isHighSurrogate == false) {
  703. Assert.True (char.IsLowSurrogate (code));
  704. } else {
  705. Assert.False (char.IsHighSurrogate (code));
  706. Assert.False (char.IsLowSurrogate (code));
  707. }
  708. }
  709. [Theory]
  710. [InlineData ("First line.")]
  711. [InlineData ("Hello, 𝔹𝕆𝔹")]
  712. public void Rune_ToRunes (string text)
  713. {
  714. var runes = text.ToRunes ();
  715. for (int i = 0; i < runes.Length; i++) {
  716. Assert.Equal (text.EnumerateRunes ().ToArray () [i].Value, runes [i].Value);
  717. }
  718. }
  719. [Theory]
  720. [InlineData ('a', 1, 1)]
  721. [InlineData (31, 1, 1)]
  722. [InlineData (123, 1, 1)]
  723. [InlineData (127, 1, 1)]
  724. [InlineData ('\u1150', 1, 3)]
  725. [InlineData ('\u1161', 1, 3)]
  726. [InlineData (0x16fe0, 2, 4)]
  727. public void System_Text_Rune_SequenceLength (int code, int utf16Length, int utf8Length)
  728. {
  729. var r = new System.Text.Rune (code);
  730. Assert.Equal (utf16Length, r.Utf16SequenceLength);
  731. Assert.Equal (utf8Length, r.Utf8SequenceLength);
  732. }
  733. [Fact]
  734. public void Cast_To_Char_Durrogate_Pair_Return_UTF16 ()
  735. {
  736. Assert.NotEqual ("𝔹", $"{new Rune (unchecked((char)0x1d539))}");
  737. Assert.Equal ("픹", $"{new Rune (unchecked((char)0x1d539))}");
  738. Assert.Equal ("픹", $"{new Rune (0xd539)}");
  739. Assert.Equal ("𝔹", $"{new Rune (0x1d539)}");
  740. }
  741. }