RuneTests.cs 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062
  1. using System.Buffers;
  2. using System.Globalization;
  3. using System.Text;
  4. namespace Terminal.Gui.TextTests;
  5. public class RuneTests
  6. {
  7. [Fact]
  8. public void Cast_To_Char_Durrogate_Pair_Return_UTF16 ()
  9. {
  10. Assert.NotEqual ("𝔹", $"{new Rune (unchecked ((char)0x1d539))}");
  11. Assert.Equal ("픹", $"{new Rune (unchecked ((char)0x1d539))}");
  12. Assert.Equal ("픹", $"{new Rune (0xd539)}");
  13. Assert.Equal ("𝔹", $"{new Rune (0x1d539)}");
  14. }
  15. [Fact]
  16. public void Equals_ToRuneList ()
  17. {
  18. List<List<Rune>> a = new () { "First line.".ToRuneList () };
  19. List<List<Rune>> b = new () { "First line.".ToRuneList (), "Second line.".ToRuneList () };
  20. List<Rune> c = new (a [0]);
  21. List<Rune> d = a [0];
  22. Assert.Equal (a [0], b [0]);
  23. // Not the same reference
  24. Assert.False (a [0] == b [0]);
  25. Assert.NotEqual (a [0], b [1]);
  26. Assert.False (a [0] == b [1]);
  27. Assert.Equal (c, a [0]);
  28. Assert.False (c == a [0]);
  29. Assert.Equal (c, b [0]);
  30. Assert.False (c == b [0]);
  31. Assert.NotEqual (c, b [1]);
  32. Assert.False (c == b [1]);
  33. Assert.Equal (d, a [0]);
  34. // Is the same reference
  35. Assert.True (d == a [0]);
  36. Assert.Equal (d, b [0]);
  37. Assert.False (d == b [0]);
  38. Assert.NotEqual (d, b [1]);
  39. Assert.False (d == b [1]);
  40. Assert.True (a [0].SequenceEqual (b [0]));
  41. Assert.False (a [0].SequenceEqual (b [1]));
  42. Assert.True (c.SequenceEqual (a [0]));
  43. Assert.True (c.SequenceEqual (b [0]));
  44. Assert.False (c.SequenceEqual (b [1]));
  45. Assert.True (d.SequenceEqual (a [0]));
  46. Assert.True (d.SequenceEqual (b [0]));
  47. Assert.False (d.SequenceEqual (b [1]));
  48. }
  49. [Fact]
  50. public void GetColumns_GetRuneCount ()
  51. {
  52. PrintTextElementCount ('\u00e1'.ToString (), "á", 1, 1, 1, 1);
  53. PrintTextElementCount ("\u0061\u0301", "á", 1, 2, 2, 1);
  54. PrintTextElementCount ("\u0061\u0301", "á", 1, 2, 2, 1);
  55. PrintTextElementCount ("\u0065\u0301", "é", 1, 2, 2, 1);
  56. PrintTextElementCount ("\U0001f469\U0001f3fd\u200d\U0001f692", "👩🏽‍🚒", 6, 4, 7, 1);
  57. PrintTextElementCount ("\ud801\udccf", "𐓏", 1, 1, 2, 1);
  58. }
  59. [Theory]
  60. [InlineData (
  61. "\u2615\ufe0f",
  62. "☕️",
  63. 2,
  64. 2,
  65. 2
  66. )] // \ufe0f forces it to be rendered as a colorful image as compared to a monochrome text variant.
  67. [InlineData (
  68. "\u1107\u1165\u11b8",
  69. "법",
  70. 3,
  71. 2,
  72. 1
  73. )] // the letters 법 join to form the Korean word for "rice:" U+BC95 법 (read from top left to bottom right)
  74. [InlineData ("\U0001F468\u200D\U0001F469\u200D\U0001F467", "👨‍👩‍👧", 8, 6, 8)] // Man, Woman and Girl emoji.
  75. [InlineData ("\u0915\u093f", "कि", 2, 2, 2)] // Hindi कि with DEVANAGARI LETTER KA and DEVANAGARI VOWEL SIGN I
  76. [InlineData (
  77. "\u0e4d\u0e32",
  78. "ํา",
  79. 2,
  80. 1,
  81. 2
  82. )] // Decomposition: ํ (U+0E4D) - า (U+0E32) = U+0E33 ำ Thai Character Sara Am
  83. [InlineData ("\u0e33", "ำ", 1, 1, 1)] // Decomposition: ํ (U+0E4D) - า (U+0E32) = U+0E33 ำ Thai Character Sara Am
  84. public void GetColumns_String_Without_SurrogatePair (
  85. string code,
  86. string str,
  87. int codeLength,
  88. int columns,
  89. int stringLength
  90. )
  91. {
  92. Assert.Equal (str, code.Normalize ());
  93. Assert.Equal (codeLength, code.Length);
  94. //Assert.Equal (columns, code.EnumerateRunes ().Sum (x => x.GetColumns ()));
  95. Assert.Equal (columns, str.GetColumns ());
  96. Assert.Equal (stringLength, str.Length);
  97. }
  98. [Theory]
  99. [InlineData (new [] { '\ud83e', '\ude01' }, "🨁", 1, 2, 4)] // Neutral Chess Queen
  100. [InlineData (new [] { '\udb43', '\udfe1' }, "󠿡", 1, 2, 4)] // Undefined Character
  101. [InlineData (new [] { '\ud83c', '\udf55' }, "🍕", 2, 2, 4)] // 🍕 Slice of Pizza
  102. [InlineData (new [] { '\ud83e', '\udd16' }, "🤖", 2, 2, 4)] // 🤖 Robot Face
  103. [InlineData (new [] { '\ud83e', '\udde0' }, "🧠", 2, 2, 4)] // 🧠 Brain
  104. [InlineData (new [] { '\ud801', '\udc21' }, "𐐡", 1, 2, 4)] // 𐐡 Deseret Capital Letter Er
  105. [InlineData (new [] { '\ud83c', '\udf39' }, "🌹", 2, 2, 4)] // 🌹 Rose
  106. [InlineData (new [] { '\uD83D', '\uDC7E' }, "👾", 2, 2, 4)] // U+1F47E alien monster (CodepointWidth::Wide)
  107. [InlineData (
  108. new [] { '\uD83D', '\uDD1C' },
  109. "🔜",
  110. 2,
  111. 2,
  112. 4
  113. )] // 🔜 Soon With Rightwards Arrow Above (CodepointWidth::Wide)
  114. public void GetColumns_Utf16_Encode (char [] code, string str, int columns, int stringLength, int utf8Length)
  115. {
  116. var rune = new Rune (code [0], code [1]);
  117. Assert.Equal (str, rune.ToString ());
  118. Assert.Equal (columns, rune.GetColumns ());
  119. Assert.Equal (stringLength, rune.ToString ().Length);
  120. Assert.Equal (utf8Length, rune.Utf8SequenceLength);
  121. Assert.True (Rune.IsValid (rune.Value));
  122. }
  123. [Theory]
  124. [InlineData ("\U0001fa01", "🨁", 1, 2)] // Neutral Chess Queen
  125. [InlineData ("\U000e0fe1", "󠿡", 1, 2)] // Undefined Character
  126. [InlineData ("\U0001F355", "🍕", 2, 2)] // 🍕 Slice of Pizza
  127. [InlineData ("\U0001F916", "🤖", 2, 2)] // 🤖 Robot Face
  128. [InlineData ("\U0001f9e0", "🧠", 2, 2)] // 🧠 Brain
  129. [InlineData ("\U00010421", "𐐡", 1, 2)] // 𐐡 Deseret Capital Letter Er
  130. [InlineData ("\U0001f339", "🌹", 2, 2)] // 🌹 Rose
  131. //[InlineData ("\uFE20FE21", "", 1, 1)] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  132. // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  133. public void GetColumns_Utf32_Encode (string code, string str, int columns, int stringLength)
  134. {
  135. OperationStatus operationStatus = Rune.DecodeFromUtf16 (code, out Rune rune, out int charsConsumed);
  136. Assert.Equal (OperationStatus.Done, operationStatus);
  137. Assert.Equal (str, rune.ToString ());
  138. Assert.Equal (columns, rune.GetColumns ());
  139. Assert.Equal (stringLength, rune.ToString ().Length);
  140. Assert.Equal (charsConsumed, rune.Utf16SequenceLength);
  141. Assert.True (Rune.IsValid (rune.Value));
  142. // with DecodeRune
  143. (Rune nrune, int size) = code.DecodeRune ();
  144. Assert.Equal (str, nrune.ToString ());
  145. Assert.Equal (columns, nrune.GetColumns ());
  146. Assert.Equal (stringLength, nrune.ToString ().Length);
  147. Assert.Equal (size, nrune.Utf8SequenceLength);
  148. for (var x = 0; x < code.Length - 1; x++)
  149. {
  150. Assert.Equal (nrune.Value, char.ConvertToUtf32 (code [x], code [x + 1]));
  151. Assert.True (RuneExtensions.EncodeSurrogatePair (code [x], code [x + 1], out Rune result));
  152. Assert.Equal (rune, result);
  153. }
  154. Assert.True (Rune.IsValid (nrune.Value));
  155. }
  156. [Theory]
  157. [InlineData (new byte [] { 0xf0, 0x9f, 0xa8, 0x81 }, "🨁", 1, 2)] // Neutral Chess Queen
  158. [InlineData (new byte [] { 0xf3, 0xa0, 0xbf, 0xa1 }, "󠿡", 1, 2)] // Undefined Character
  159. [InlineData (new byte [] { 0xf0, 0x9f, 0x8d, 0x95 }, "🍕", 2, 2)] // 🍕 Slice of Pizza
  160. [InlineData (new byte [] { 0xf0, 0x9f, 0xa4, 0x96 }, "🤖", 2, 2)] // 🤖 Robot Face
  161. [InlineData (new byte [] { 0xf0, 0x90, 0x90, 0xa1 }, "𐐡", 1, 2)] // 𐐡 Deseret Capital Letter Er
  162. [InlineData (new byte [] { 0xf0, 0x9f, 0x8c, 0xb9 }, "🌹", 2, 2)] // 🌹 Rose
  163. public void GetColumns_Utf8_Encode (byte [] code, string str, int columns, int stringLength)
  164. {
  165. OperationStatus operationStatus = Rune.DecodeFromUtf8 (code, out Rune rune, out int bytesConsumed);
  166. Assert.Equal (OperationStatus.Done, operationStatus);
  167. Assert.Equal (str, rune.ToString ());
  168. Assert.Equal (columns, rune.GetColumns ());
  169. Assert.Equal (stringLength, rune.ToString ().Length);
  170. Assert.Equal (bytesConsumed, rune.Utf8SequenceLength);
  171. Assert.True (Rune.IsValid (rune.Value));
  172. }
  173. [Theory]
  174. [InlineData (0, "\0", 0, 1, 1)]
  175. [InlineData ('\u1dc0', "᷀", 0, 1, 3)] // ◌᷀ Combining Dotted Grave Accent
  176. [InlineData ('\u20D0', "⃐", 0, 1, 3)] // ◌⃐ Combining Left Harpoon Above
  177. [InlineData (1, "\u0001", -1, 1, 1)]
  178. [InlineData (2, "\u0002", -1, 1, 1)]
  179. [InlineData (31, "\u001f", -1, 1, 1)] // non printable character - Information Separator One
  180. [InlineData (127, "\u007f", -1, 1, 1)] // non printable character - Delete
  181. [InlineData (32, " ", 1, 1, 1)] // space
  182. [InlineData ('a', "a", 1, 1, 1)]
  183. [InlineData ('b', "b", 1, 1, 1)]
  184. [InlineData (123, "{", 1, 1, 1)] // { Left Curly Bracket
  185. [InlineData ('\u231c', "⌜", 1, 1, 3)] // ⌜ Top Left Corner
  186. // BUGBUG: These are CLEARLY wide glyphs, but GetColumns() returns 1
  187. // However, most terminals treat these as narrow and they overlap the next cell when drawn (including Windows Terminal)
  188. [InlineData (
  189. '\u1161',
  190. "ᅡ",
  191. 1,
  192. 1,
  193. 3
  194. )] // ᅡ Hangul Jungseong A - Unicode Hangul Jamo for join with column width equal to 0 alone.
  195. [InlineData ('\u2103', "℃", 1, 1, 3)] // ℃ Degree Celsius
  196. [InlineData ('\u2501', "━", 1, 1, 3)] // ━ Box Drawings Heavy Horizontal
  197. [InlineData ('\u25a0', "■", 1, 1, 3)] // ■ Black Square
  198. [InlineData ('\u25a1', "□", 1, 1, 3)] // □ White Square
  199. [InlineData ('\u277f', "❿", 1, 1, 3)] //Dingbat Negative Circled Number Ten - ❿ U+277f
  200. [InlineData (
  201. '\u4dc0',
  202. "䷀",
  203. 1,
  204. 1,
  205. 3
  206. )] // ䷀Hexagram For The Creative Heaven - U+4dc0 - https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  207. [InlineData ('\ud7b0', "ힰ", 1, 1, 3)] // ힰ ┤Hangul Jungseong O-Yeo - ힰ U+d7b0')]
  208. [InlineData ('\uf61e', "", 1, 1, 3)] // Private Use Area
  209. [InlineData ('\u23f0', "⏰", 2, 1, 3)] // Alarm Clock - ⏰ U+23f0
  210. [InlineData ('\u1100', "ᄀ", 2, 1, 3)] // ᄀ Hangul Choseong Kiyeok
  211. [InlineData ('\u1150', "ᅐ", 2, 1, 3)] // ᅐ Hangul Choseong Ceongchieumcieuc
  212. [InlineData ('\u2615', "☕", 2, 1, 3)] // ☕ Hot Beverage
  213. [InlineData ('\u231a', "⌚", 2, 1, 3)] // ⌚ Watch
  214. [InlineData ('\u231b', "⌛", 2, 1, 3)] // ⌛ Hourglass
  215. // From WindowsTerminal's CodepointWidthDetector tests (https://github.com/microsoft/terminal/blob/main/src/types/CodepointWidthDetector.cpp)
  216. //static constexpr std::wstring_view emoji = L"\xD83E\xDD22"; // U+1F922 nauseated face
  217. //static constexpr std::wstring_view ambiguous = L"\x414"; // U+0414 cyrillic capital de
  218. //{ 0x414, L"\x414", CodepointWidth::Narrow }, // U+0414 cyrillic capital de
  219. [InlineData ('\u0414', "Д", 1, 1, 2)] // U+0414 cyrillic capital de
  220. //{ 0x1104, L"\x1104", CodepointWidth::Wide }, // U+1104 hangul choseong ssangtikeut
  221. [InlineData ('\u1104', "ᄄ", 2, 1, 3)]
  222. //{ 0x306A, L"\x306A", CodepointWidth::Wide }, // U+306A hiragana na な
  223. [InlineData (0x306A, "な", 2, 1, 3)]
  224. //{ 0x30CA, L"\x30CA", CodepointWidth::Wide }, // U+30CA katakana na ナ
  225. [InlineData (0x30CA, "ナ", 2, 1, 3)]
  226. //{ 0x72D7, L"\x72D7", CodepointWidth::Wide }, // U+72D7
  227. [InlineData (0x72D7, "狗", 2, 1, 3)]
  228. public void GetColumns_With_Single_Code (int code, string str, int columns, int stringLength, int utf8Length)
  229. {
  230. var rune = new Rune (code);
  231. Assert.Equal (str, rune.ToString ());
  232. Assert.Equal (columns, rune.GetColumns ());
  233. Assert.Equal (stringLength, rune.ToString ().Length);
  234. Assert.Equal (utf8Length, rune.Utf8SequenceLength);
  235. Assert.True (Rune.IsValid (rune.Value));
  236. }
  237. // IsCombiningMark tests
  238. [Theory]
  239. [InlineData (0x0338, true)] // Combining Long Solidus Overlay (U+0338) (e.g. ≠)
  240. [InlineData (0x0300, true)] // Combining Grave Accent
  241. [InlineData (0x0301, true)] // Combining acute accent (é)
  242. [InlineData (0x0302, true)] // Combining Circumflex Accent
  243. [InlineData (0x0328, true)] // Combining ogonek (a small hook or comma shape) U+0328
  244. [InlineData (0x00E9, false)] // Latin Small Letter E with Acute, Unicode U+00E9 é
  245. [InlineData (0x0061, false)] // Latin Small Letter A is U+0061.
  246. [InlineData (
  247. '\uFE20',
  248. true
  249. )] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  250. [InlineData (
  251. '\uFE21',
  252. true
  253. )] // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  254. public void IsCombiningMark (int codepoint, bool expected)
  255. {
  256. var rune = new Rune (codepoint);
  257. Assert.Equal (expected, rune.IsCombiningMark ());
  258. }
  259. [Theory]
  260. [InlineData (0x0338)] // Combining Long Solidus Overlay (U+0338) (e.g. ≠)
  261. [InlineData (0x0300)] // Combining Grave Accent
  262. [InlineData (0x0301)] // Combining acute accent (é)
  263. [InlineData (0x0302)] // Combining Circumflex Accent
  264. [InlineData (0x0061)] // Combining ogonek (a small hook or comma shape)
  265. [InlineData (
  266. '\uFE20'
  267. )] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  268. [InlineData (
  269. '\uFE21'
  270. )] // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  271. public void MakePrintable_Combining_Character_Is_Not_Printable (int code)
  272. {
  273. var rune = new Rune (code);
  274. Rune actual = rune.MakePrintable ();
  275. Assert.Equal (code, actual.Value);
  276. }
  277. [Theory]
  278. [InlineData (0x0000001F, 0x241F)]
  279. [InlineData (0x0000007F, 0x247F)]
  280. [InlineData (0x0000009F, 0x249F)]
  281. [InlineData (0x0001001A, 0x1001A)]
  282. public void MakePrintable_Converts_Control_Chars_To_Proper_Unicode (int code, int expected)
  283. {
  284. Rune actual = ((Rune)code).MakePrintable ();
  285. Assert.Equal (expected, actual.Value);
  286. }
  287. [Theory]
  288. [InlineData (0x20)]
  289. [InlineData (0x7E)]
  290. [InlineData (0xA0)]
  291. [InlineData (0x010020)]
  292. public void MakePrintable_Does_Not_Convert_Ansi_Chars_To_Unicode (int code)
  293. {
  294. Rune actual = ((Rune)code).MakePrintable ();
  295. Assert.Equal (code, actual.Value);
  296. }
  297. [Theory]
  298. [InlineData (
  299. "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
  300. 200,
  301. 200,
  302. 200
  303. )]
  304. [InlineData (
  305. "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789\n",
  306. 201,
  307. 200,
  308. 199
  309. )] // has a '\n' newline
  310. [InlineData (
  311. "\t01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789\n",
  312. 202,
  313. 200,
  314. 198
  315. )] // has a '\t' and a '\n' newline
  316. public void Rune_ColumnWidth_Versus_String_ConsoleWidth (string text, int stringLength, int strCols, int runeCols)
  317. {
  318. Assert.Equal (stringLength, text.Length);
  319. Assert.Equal (stringLength, text.GetRuneCount ());
  320. Assert.Equal (strCols, text.GetColumns ());
  321. int sumRuneWidth = text.EnumerateRunes ().Sum (x => x.GetColumns ());
  322. Assert.Equal (runeCols, sumRuneWidth);
  323. }
  324. [Theory]
  325. [InlineData (0x12345678)]
  326. [InlineData ('\ud801')]
  327. public void Rune_Exceptions_Integers (int code) { Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (code)); }
  328. [Theory]
  329. [InlineData (new [] { '\ud799', '\udc21' })]
  330. public void Rune_Exceptions_Utf16_Encode (char [] code)
  331. {
  332. Assert.False (RuneExtensions.EncodeSurrogatePair (code [0], code [1], out Rune rune));
  333. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (code [0], code [1]));
  334. }
  335. /// <summary>
  336. /// Shows the difference between using Wcwidth.UnicodeCalculator and our own port of wcwidth. Specifically, the
  337. /// UnicodeCalculator is more accurate to spec where null has a width of 0, and our port says it's -1.
  338. /// </summary>
  339. /// <param name="expectedColumns"></param>
  340. /// <param name="scalar"></param>
  341. [Theory]
  342. [InlineData (0, 0)]
  343. [InlineData (-1, 1)]
  344. [InlineData (-1, 2)]
  345. [InlineData (-1, 3)]
  346. [InlineData (-1, 4)]
  347. [InlineData (-1, 5)]
  348. [InlineData (-1, 6)]
  349. [InlineData (-1, 7)]
  350. [InlineData (-1, 8)]
  351. [InlineData (-1, 9)]
  352. [InlineData (-1, 10)]
  353. [InlineData (-1, 11)]
  354. [InlineData (-1, 12)]
  355. [InlineData (-1, 13)]
  356. [InlineData (-1, 14)]
  357. [InlineData (-1, 15)]
  358. [InlineData (-1, 16)]
  359. [InlineData (-1, 17)]
  360. [InlineData (-1, 18)]
  361. [InlineData (-1, 19)]
  362. [InlineData (-1, 20)]
  363. [InlineData (-1, 21)]
  364. [InlineData (-1, 22)]
  365. [InlineData (-1, 23)]
  366. [InlineData (-1, 24)]
  367. [InlineData (-1, 25)]
  368. [InlineData (-1, 26)]
  369. [InlineData (-1, 27)]
  370. [InlineData (-1, 28)]
  371. [InlineData (-1, 29)]
  372. [InlineData (-1, 30)]
  373. [InlineData (-1, 31)]
  374. public void Rune_GetColumns_Non_Printable (int expectedColumns, int scalar)
  375. {
  376. var rune = new Rune (scalar);
  377. Assert.Equal (expectedColumns, rune.GetColumns ());
  378. Assert.Equal (0, rune.ToString ().GetColumns ());
  379. }
  380. [Fact]
  381. public void Rune_GetColumns_Versus_String_GetColumns_With_Non_Printable_Characters ()
  382. {
  383. var sumRuneWidth = 0;
  384. var sumConsoleWidth = 0;
  385. for (uint i = 0; i < 32; i++)
  386. {
  387. sumRuneWidth += ((Rune)i).GetColumns ();
  388. sumConsoleWidth += ((Rune)i).ToString ().GetColumns ();
  389. }
  390. Assert.Equal (-31, sumRuneWidth);
  391. Assert.Equal (0, sumConsoleWidth);
  392. }
  393. [Theory]
  394. [InlineData ('\ud800', true)]
  395. [InlineData ('\udbff', true)]
  396. [InlineData ('\udc00', false)]
  397. [InlineData ('\udfff', false)]
  398. [InlineData ('\uefff', null)]
  399. public void Rune_IsHighSurrogate_IsLowSurrogate (char code, bool? isHighSurrogate)
  400. {
  401. if (isHighSurrogate == true)
  402. {
  403. Assert.True (char.IsHighSurrogate (code));
  404. }
  405. else if (isHighSurrogate == false)
  406. {
  407. Assert.True (char.IsLowSurrogate (code));
  408. }
  409. else
  410. {
  411. Assert.False (char.IsHighSurrogate (code));
  412. Assert.False (char.IsLowSurrogate (code));
  413. }
  414. }
  415. [Theory]
  416. [InlineData (true, '\u1100')]
  417. [InlineData (true, '\ud83c', '\udf39')]
  418. [InlineData (true, '\udbff', '\udfff')]
  419. [InlineData (false, '\ud801')]
  420. [InlineData (false, '\ud83e')]
  421. public void Rune_IsValid (bool valid, params char [] chars)
  422. {
  423. Rune rune = default;
  424. var isValid = true;
  425. if (chars.Length == 1)
  426. {
  427. try
  428. {
  429. rune = new Rune (chars [0]);
  430. }
  431. catch (Exception)
  432. {
  433. isValid = false;
  434. }
  435. }
  436. else
  437. {
  438. rune = new Rune (chars [0], chars [1]);
  439. }
  440. if (valid)
  441. {
  442. Assert.NotEqual (default (Rune), rune);
  443. Assert.True (Rune.IsValid (rune.Value));
  444. Assert.True (valid);
  445. }
  446. else
  447. {
  448. Assert.False (valid);
  449. Assert.False (isValid);
  450. }
  451. }
  452. [Theory]
  453. [InlineData ("First line.")]
  454. [InlineData ("Hello, 𝔹𝕆𝔹")]
  455. public void Rune_ToRunes (string text)
  456. {
  457. Rune [] runes = text.ToRunes ();
  458. for (var i = 0; i < runes.Length; i++)
  459. {
  460. Assert.Equal (text.EnumerateRunes ().ToArray () [i].Value, runes [i].Value);
  461. }
  462. }
  463. [Fact]
  464. public void Sum_Of_Rune_GetColumns_Is_Not_Always_Equal_To_String_GetColumns ()
  465. {
  466. const int start = 0x000000;
  467. const int end = 0x10ffff;
  468. for (int i = start; i <= end; i++)
  469. {
  470. if (char.IsSurrogate ((char)i))
  471. {
  472. continue;
  473. }
  474. var r = new Rune ((uint)i);
  475. var us = r.ToString ();
  476. var hex = i.ToString ("x6");
  477. int v = int.Parse (hex, NumberStyles.HexNumber);
  478. string s = char.ConvertFromUtf32 (v);
  479. if (!r.IsSurrogatePair ())
  480. {
  481. Assert.Equal (r.ToString (), us);
  482. Assert.Equal (us, s);
  483. if (r.GetColumns () < 0)
  484. {
  485. Assert.NotEqual (r.GetColumns (), us.GetColumns ());
  486. Assert.NotEqual (s.EnumerateRunes ().Sum (c => c.GetColumns ()), us.GetColumns ());
  487. }
  488. else
  489. {
  490. Assert.Equal (r.GetColumns (), us.GetColumns ());
  491. Assert.Equal (s.EnumerateRunes ().Sum (c => c.GetColumns ()), us.GetColumns ());
  492. }
  493. Assert.Equal (us.GetRuneCount (), s.Length);
  494. }
  495. else
  496. {
  497. Assert.Equal (r.ToString (), us);
  498. Assert.Equal (us, s);
  499. Assert.Equal (r.GetColumns (), us.GetColumns ());
  500. Assert.Equal (s.GetColumns (), us.GetColumns ());
  501. Assert.Equal (
  502. 1,
  503. us.GetRuneCount ()
  504. ); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  505. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  506. }
  507. }
  508. }
  509. [Theory]
  510. [InlineData ('a', 1, 1)]
  511. [InlineData (31, 1, 1)]
  512. [InlineData (123, 1, 1)]
  513. [InlineData (127, 1, 1)]
  514. [InlineData ('\u1150', 1, 3)]
  515. [InlineData ('\u1161', 1, 3)]
  516. [InlineData (0x16fe0, 2, 4)]
  517. public void System_Text_Rune_SequenceLength (int code, int utf16Length, int utf8Length)
  518. {
  519. var r = new Rune (code);
  520. Assert.Equal (utf16Length, r.Utf16SequenceLength);
  521. Assert.Equal (utf8Length, r.Utf8SequenceLength);
  522. }
  523. [Fact]
  524. public void Test_All_Surrogate_Pairs_Range ()
  525. {
  526. for (uint h = 0xd800; h <= 0xdbff; h++)
  527. {
  528. for (uint l = 0xdc00; l <= 0xdfff; l++)
  529. {
  530. var r = new Rune ((char)h, (char)l);
  531. var us = r.ToString ();
  532. var hex = r.Value.ToString ("x6");
  533. int v = int.Parse (hex, NumberStyles.HexNumber);
  534. string s = char.ConvertFromUtf32 (v);
  535. Assert.True (v >= 0x10000 && v <= RuneExtensions.MaxUnicodeCodePoint);
  536. Assert.Equal (r.ToString (), us);
  537. Assert.Equal (us, s);
  538. Assert.Equal (r.GetColumns (), us.GetColumns ());
  539. Assert.Equal (s.GetColumns (), us.GetColumns ());
  540. Assert.Equal (
  541. 1,
  542. us.GetRuneCount ()
  543. ); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  544. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  545. }
  546. }
  547. }
  548. [Theory]
  549. [InlineData ("���", false)]
  550. [InlineData ("Hello, 世界", true)]
  551. [InlineData (new byte [] { 0xff, 0xfe, 0xfd }, false)]
  552. [InlineData (new byte [] { 0xf0, 0x9f, 0x8d, 0x95 }, true)]
  553. public void Test_CanBeEncodedAsRune_Extension (object text, bool canBeEncodedAsRune)
  554. {
  555. string str;
  556. if (text is string)
  557. {
  558. str = (string)text;
  559. if (canBeEncodedAsRune)
  560. {
  561. Assert.True (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  562. }
  563. else
  564. {
  565. Assert.False (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  566. }
  567. }
  568. else if (text is byte [])
  569. {
  570. str = StringExtensions.ToString ((byte [])text);
  571. if (canBeEncodedAsRune)
  572. {
  573. Assert.True (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  574. }
  575. else
  576. {
  577. Assert.False (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  578. }
  579. }
  580. }
  581. [Theory]
  582. [InlineData ("Hello, 世界", 13, 11, 9, "界世 ,olleH")] // Without Surrogate Pairs
  583. [InlineData ("Hello, 𝔹𝕆𝔹", 19, 10, 13, "𝔹𝕆𝔹 ,olleH")] // With Surrogate Pairs
  584. public void Test_DecodeLastRune_Extension (
  585. string text,
  586. int bytesLength,
  587. int colsLength,
  588. int textLength,
  589. string encoded
  590. )
  591. {
  592. List<Rune> runes = new ();
  593. var tSize = 0;
  594. for (int i = text.GetRuneCount () - 1; i >= 0; i--)
  595. {
  596. (Rune rune, int size) = text.DecodeLastRune (i);
  597. runes.Add (rune);
  598. tSize += size;
  599. }
  600. var result = StringExtensions.ToString (runes);
  601. Assert.Equal (encoded, result);
  602. Assert.Equal (bytesLength, tSize);
  603. Assert.Equal (colsLength, result.GetColumns ());
  604. Assert.Equal (textLength, result.Length);
  605. }
  606. [Theory]
  607. [InlineData ("Hello, 世界", 13, 11, 9)] // Without Surrogate Pairs
  608. [InlineData ("Hello, 𝔹𝕆𝔹", 19, 10, 13)] // With Surrogate Pairs
  609. public void Test_DecodeRune_Extension (string text, int bytesLength, int colsLength, int textLength)
  610. {
  611. List<Rune> runes = new ();
  612. var tSize = 0;
  613. for (var i = 0; i < text.GetRuneCount (); i++)
  614. {
  615. (Rune rune, int size) = text.DecodeRune (i);
  616. runes.Add (rune);
  617. tSize += size;
  618. }
  619. var result = StringExtensions.ToString (runes);
  620. Assert.Equal (text, result);
  621. Assert.Equal (bytesLength, tSize);
  622. Assert.Equal (colsLength, result.GetColumns ());
  623. Assert.Equal (textLength, result.Length);
  624. }
  625. [Theory]
  626. [InlineData ('\uea85', null, "", false)] // Private Use Area
  627. [InlineData (0x1F356, new [] { '\ud83c', '\udf56' }, "🍖", true)] // 🍖 Meat On Bone
  628. public void Test_DecodeSurrogatePair (int code, char [] charsValue, string runeString, bool isSurrogatePair)
  629. {
  630. var rune = new Rune (code);
  631. char [] chars;
  632. if (isSurrogatePair)
  633. {
  634. Assert.True (rune.DecodeSurrogatePair (out chars));
  635. Assert.Equal (2, chars.Length);
  636. Assert.Equal (charsValue [0], chars [0]);
  637. Assert.Equal (charsValue [1], chars [1]);
  638. Assert.Equal (runeString, new Rune (chars [0], chars [1]).ToString ());
  639. }
  640. else
  641. {
  642. Assert.False (rune.DecodeSurrogatePair (out chars));
  643. Assert.Null (chars);
  644. Assert.Equal (runeString, rune.ToString ());
  645. }
  646. Assert.Equal (chars, charsValue);
  647. }
  648. [Theory]
  649. [InlineData (unchecked ((char)0x40D7C0), (char)0xDC20, 0, "\0", false)]
  650. [InlineData ((char)0x0065, (char)0x0301, 0, "\0", false)]
  651. [InlineData ('\ud83c', '\udf56', 0x1F356, "🍖", true)] // 🍖 Meat On Bone
  652. public void Test_EncodeSurrogatePair (
  653. char highSurrogate,
  654. char lowSurrogate,
  655. int runeValue,
  656. string runeString,
  657. bool isSurrogatePair
  658. )
  659. {
  660. Rune rune;
  661. if (isSurrogatePair)
  662. {
  663. Assert.True (RuneExtensions.EncodeSurrogatePair ('\ud83c', '\udf56', out rune));
  664. }
  665. else
  666. {
  667. Assert.False (RuneExtensions.EncodeSurrogatePair (highSurrogate, lowSurrogate, out rune));
  668. }
  669. Assert.Equal (runeValue, rune.Value);
  670. Assert.Equal (runeString, rune.ToString ());
  671. }
  672. [Theory]
  673. [InlineData ('\ue0fd', false)]
  674. [InlineData ('\ud800', true)]
  675. [InlineData ('\udfff', true)]
  676. public void Test_IsSurrogate (char code, bool isSurrogate)
  677. {
  678. if (isSurrogate)
  679. {
  680. Assert.True (char.IsSurrogate (code.ToString (), 0));
  681. }
  682. else
  683. {
  684. Assert.False (char.IsSurrogate (code.ToString (), 0));
  685. }
  686. }
  687. [Theory]
  688. [InlineData (500000000)]
  689. [InlineData (0xf801, 0xdfff)]
  690. public void Test_MaxRune (params int [] codes)
  691. {
  692. if (codes.Length == 1)
  693. {
  694. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (codes [0]));
  695. }
  696. else
  697. {
  698. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune ((char)codes [0], (char)codes [1]));
  699. }
  700. }
  701. [Theory]
  702. [InlineData (
  703. '\u006f',
  704. '\u0302',
  705. "\u006f\u0302",
  706. 1,
  707. 0,
  708. 2,
  709. "o",
  710. "̂",
  711. "ô",
  712. 1,
  713. 2
  714. )]
  715. [InlineData (
  716. '\u0065',
  717. '\u0301',
  718. "\u0065\u0301",
  719. 1,
  720. 0,
  721. 2,
  722. "e",
  723. "́",
  724. "é",
  725. 1,
  726. 2
  727. )]
  728. public void Test_NonSpacingChar (
  729. int code1,
  730. int code2,
  731. string code,
  732. int rune1Length,
  733. int rune2Length,
  734. int codeLength,
  735. string code1String,
  736. string code2String,
  737. string joinString,
  738. int joinLength,
  739. int bytesLength
  740. )
  741. {
  742. var rune = new Rune (code1);
  743. var nsRune = new Rune (code2);
  744. Assert.Equal (rune1Length, rune.GetColumns ());
  745. Assert.Equal (rune2Length, nsRune.GetColumns ());
  746. var ul = rune.ToString ();
  747. Assert.Equal (code1String, ul);
  748. var uns = nsRune.ToString ();
  749. Assert.Equal (code2String, uns);
  750. string f = $"{rune}{nsRune}".Normalize ();
  751. Assert.Equal (f, joinString);
  752. Assert.Equal (f, code.Normalize ());
  753. Assert.Equal (joinLength, f.GetColumns ());
  754. Assert.Equal (joinLength, code.EnumerateRunes ().Sum (c => c.GetColumns ()));
  755. Assert.Equal (codeLength, code.Length);
  756. (Rune nrune, int size) = f.DecodeRune ();
  757. Assert.Equal (f.ToRunes () [0], nrune);
  758. Assert.Equal (bytesLength, size);
  759. }
  760. [Theory]
  761. [InlineData (0x20D0, 0x20EF)]
  762. [InlineData (0x2310, 0x231F)]
  763. [InlineData (0x1D800, 0x1D80F)]
  764. public void Test_Range (int start, int end)
  765. {
  766. for (int i = start; i <= end; i++)
  767. {
  768. var r = new Rune ((uint)i);
  769. var us = r.ToString ();
  770. var hex = i.ToString ("x6");
  771. int v = int.Parse (hex, NumberStyles.HexNumber);
  772. string s = char.ConvertFromUtf32 (v);
  773. if (!r.IsSurrogatePair ())
  774. {
  775. Assert.Equal (r.ToString (), us);
  776. Assert.Equal (us, s);
  777. Assert.Equal (r.GetColumns (), us.GetColumns ());
  778. Assert.Equal (
  779. us.GetRuneCount (),
  780. s.Length
  781. ); // For not surrogate pairs string.RuneCount is always equal to String.Length
  782. }
  783. else
  784. {
  785. Assert.Equal (r.ToString (), us);
  786. Assert.Equal (us, s);
  787. Assert.Equal (r.GetColumns (), us.GetColumns ());
  788. Assert.Equal (
  789. 1,
  790. us.GetRuneCount ()
  791. ); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  792. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  793. }
  794. Assert.Equal (s.GetColumns (), us.GetColumns ());
  795. }
  796. }
  797. [Fact]
  798. public void Test_SurrogatePair_From_String ()
  799. {
  800. Assert.True (ProcessTestStringUseChar ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  801. Assert.Throws<Exception> (() => ProcessTestStringUseChar ("\ud801"));
  802. Assert.True (ProcessStringUseRune ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  803. Assert.Throws<Exception> (() => ProcessStringUseRune ("\ud801"));
  804. }
  805. [Fact]
  806. public void TestRuneIsLetter ()
  807. {
  808. Assert.Equal (5, CountLettersInString ("Hello"));
  809. Assert.Equal (8, CountLettersInString ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  810. }
  811. [Fact]
  812. public void TestSplit ()
  813. {
  814. var inputString = "🐂, 🐄, 🐆";
  815. string [] splitOnSpace = inputString.Split (' ');
  816. string [] splitOnComma = inputString.Split (',');
  817. Assert.Equal (3, splitOnSpace.Length);
  818. Assert.Equal (3, splitOnComma.Length);
  819. }
  820. [Theory]
  821. [InlineData ("a", "utf-8", 1)]
  822. [InlineData ("a", "utf-16", 1)]
  823. [InlineData ("a", "utf-32", 3)]
  824. [InlineData ("𝔹", "utf-8", 4)]
  825. [InlineData ("𝔹", "utf-16", 4)]
  826. [InlineData ("𝔹", "utf-32", 3)]
  827. public void GetEncodingLength_ReturnsLengthBasedOnSelectedEncoding (string runeStr, string encodingName, int expectedLength)
  828. {
  829. Rune rune = runeStr.EnumerateRunes ().Single ();
  830. var encoding = Encoding.GetEncoding (encodingName);
  831. int actualLength = rune.GetEncodingLength (encoding);
  832. Assert.Equal (expectedLength, actualLength);
  833. }
  834. private int CountLettersInString (string s)
  835. {
  836. var letterCount = 0;
  837. foreach (Rune rune in s.EnumerateRunes ())
  838. {
  839. if (Rune.IsLetter (rune))
  840. {
  841. letterCount++;
  842. }
  843. }
  844. return letterCount;
  845. }
  846. private void PrintTextElementCount (
  847. string us,
  848. string s,
  849. int consoleWidth,
  850. int runeCount,
  851. int stringCount,
  852. int txtElementCount
  853. )
  854. {
  855. Assert.Equal (us.Length, s.Length);
  856. Assert.Equal (us, s);
  857. Assert.Equal (consoleWidth, us.GetColumns ());
  858. Assert.Equal (runeCount, us.GetRuneCount ());
  859. Assert.Equal (stringCount, s.Length);
  860. TextElementEnumerator enumerator = StringInfo.GetTextElementEnumerator (s);
  861. var textElementCount = 0;
  862. while (enumerator.MoveNext ())
  863. {
  864. textElementCount++; // For versions prior to Net5.0 the StringInfo class might handle some grapheme clusters incorrectly.
  865. }
  866. Assert.Equal (txtElementCount, textElementCount);
  867. }
  868. private bool ProcessStringUseRune (string s)
  869. {
  870. string us = s;
  871. var rs = "";
  872. Rune codePoint;
  873. List<Rune> runes = new ();
  874. var colWidth = 0;
  875. for (var i = 0; i < s.Length; i++)
  876. {
  877. Rune rune = default;
  878. if (Rune.IsValid (s [i]))
  879. {
  880. rune = new Rune (s [i]);
  881. Assert.True (Rune.IsValid (rune.Value));
  882. runes.Add (rune);
  883. Assert.Equal (s [i], rune.Value);
  884. Assert.False (rune.IsSurrogatePair ());
  885. }
  886. else if (i + 1 < s.Length && RuneExtensions.EncodeSurrogatePair (s [i], s [i + 1], out codePoint))
  887. {
  888. Assert.Equal (0, rune.Value);
  889. Assert.False (Rune.IsValid (s [i]));
  890. rune = codePoint;
  891. runes.Add (rune);
  892. var sp = new string (new [] { s [i], s [i + 1] });
  893. Assert.Equal (sp, codePoint.ToString ());
  894. Assert.True (codePoint.IsSurrogatePair ());
  895. i++; // Increment the iterator by the number of surrogate pair
  896. }
  897. else
  898. {
  899. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (s [i]));
  900. throw new Exception ("String was not well-formed UTF-16.");
  901. }
  902. colWidth += rune.GetColumns (); // Increment the column width of this Rune
  903. rs += rune.ToString ();
  904. }
  905. Assert.Equal (us.GetColumns (), colWidth);
  906. Assert.Equal (s, rs);
  907. Assert.Equal (s, StringExtensions.ToString (runes));
  908. return true;
  909. }
  910. private bool ProcessTestStringUseChar (string s)
  911. {
  912. char surrogateChar = default;
  913. for (var i = 0; i < s.Length; i++)
  914. {
  915. Rune r;
  916. if (char.IsSurrogate (s [i]))
  917. {
  918. if (surrogateChar != default (int) && char.IsSurrogate (surrogateChar))
  919. {
  920. r = new Rune (surrogateChar, s [i]);
  921. Assert.True (r.IsSurrogatePair ());
  922. int codePoint = char.ConvertToUtf32 (surrogateChar, s [i]);
  923. RuneExtensions.EncodeSurrogatePair (surrogateChar, s [i], out Rune rune);
  924. Assert.Equal (codePoint, rune.Value);
  925. var sp = new string (new [] { surrogateChar, s [i] });
  926. r = (Rune)codePoint;
  927. Assert.Equal (sp, r.ToString ());
  928. Assert.True (r.IsSurrogatePair ());
  929. surrogateChar = default (char);
  930. }
  931. else if (i < s.Length - 1)
  932. {
  933. surrogateChar = s [i];
  934. }
  935. else
  936. {
  937. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (s [i]));
  938. throw new Exception ("String was not well-formed UTF-16.");
  939. }
  940. }
  941. else
  942. {
  943. r = new Rune (s [i]);
  944. var buff = new byte [4];
  945. ((Rune)s [i]).Encode (buff);
  946. Assert.Equal ((int)s [i], buff [0]);
  947. Assert.Equal (s [i], r.Value);
  948. Assert.True (Rune.IsValid (r.Value));
  949. Assert.False (r.IsSurrogatePair ());
  950. }
  951. }
  952. return true;
  953. }
  954. }