RuneTests.cs 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067
  1. using System.Buffers;
  2. using System.Globalization;
  3. using System.Text;
  4. namespace UnitTests_Parallelizable.TextTests;
  5. public class RuneTests
  6. {
  7. [Fact]
  8. public void Cast_To_Char_Surrogate_Pair_Return_UTF16 ()
  9. {
  10. Assert.NotEqual ("𝔹", $"{new Rune (unchecked ((char)0x1d539))}");
  11. Assert.Equal ("픹", $"{new Rune (unchecked ((char)0x1d539))}");
  12. Assert.Equal ("픹", $"{new Rune (0xd539)}");
  13. Assert.Equal ("𝔹", $"{new Rune (0x1d539)}");
  14. }
  15. [Fact]
  16. public void Equals_ToRuneList ()
  17. {
  18. List<List<Rune>> a = new () { "First line.".ToRuneList () };
  19. List<List<Rune>> b = new () { "First line.".ToRuneList (), "Second line.".ToRuneList () };
  20. List<Rune> c = new (a [0]);
  21. List<Rune> d = a [0];
  22. Assert.Equal (a [0], b [0]);
  23. // Not the same reference
  24. Assert.False (a [0] == b [0]);
  25. Assert.NotEqual (a [0], b [1]);
  26. Assert.False (a [0] == b [1]);
  27. Assert.Equal (c, a [0]);
  28. Assert.False (c == a [0]);
  29. Assert.Equal (c, b [0]);
  30. Assert.False (c == b [0]);
  31. Assert.NotEqual (c, b [1]);
  32. Assert.False (c == b [1]);
  33. Assert.Equal (d, a [0]);
  34. // Is the same reference
  35. Assert.True (d == a [0]);
  36. Assert.Equal (d, b [0]);
  37. Assert.False (d == b [0]);
  38. Assert.NotEqual (d, b [1]);
  39. Assert.False (d == b [1]);
  40. Assert.True (a [0].SequenceEqual (b [0]));
  41. Assert.False (a [0].SequenceEqual (b [1]));
  42. Assert.True (c.SequenceEqual (a [0]));
  43. Assert.True (c.SequenceEqual (b [0]));
  44. Assert.False (c.SequenceEqual (b [1]));
  45. Assert.True (d.SequenceEqual (a [0]));
  46. Assert.True (d.SequenceEqual (b [0]));
  47. Assert.False (d.SequenceEqual (b [1]));
  48. }
  49. [Fact]
  50. public void GetColumns_GetRuneCount ()
  51. {
  52. PrintTextElementCount ('\u00e1'.ToString (), "á", 1, 1, 1, 1);
  53. PrintTextElementCount ("\u0061\u0301", "á", 1, 2, 2, 1);
  54. PrintTextElementCount ("\u0061\u0301", "á", 1, 2, 2, 1);
  55. PrintTextElementCount ("\u0065\u0301", "é", 1, 2, 2, 1);
  56. PrintTextElementCount ("\U0001f469\U0001f3fd\u200d\U0001f692", "👩🏽‍🚒", 2, 4, 7, 1);
  57. PrintTextElementCount ("\ud801\udccf", "𐓏", 1, 1, 2, 1);
  58. PrintTextElementCount ("\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466", "👨‍👩‍👧‍👦", 2, 7, 11, 1);
  59. PrintTextElementCount ("\U0001f469\u200d\U0001f692", "👩‍🚒", 2, 3, 5, 1);
  60. PrintTextElementCount ("\u0068\u0069", "hi", 2, 2, 2, 2);
  61. }
  62. [Theory]
  63. [InlineData (
  64. "\u2615\ufe0f",
  65. "☕️",
  66. 2,
  67. 2,
  68. 2
  69. )] // \ufe0f forces it to be rendered as a colorful image as compared to a monochrome text variant.
  70. [InlineData (
  71. "\u1107\u1165\u11b8",
  72. "법",
  73. 3,
  74. 2,
  75. 1
  76. )] // the letters 법 join to form the Korean word for "rice:" U+BC95 법 (read from top left to bottom right)
  77. [InlineData ("\U0001F468\u200D\U0001F469\u200D\U0001F467", "👨‍👩‍👧", 8, 2, 8)] // Man, Woman and Girl emoji.
  78. [InlineData ("\u0915\u093f", "कि", 2, 1, 2)] // Hindi कि with DEVANAGARI LETTER KA and DEVANAGARI VOWEL SIGN I
  79. [InlineData (
  80. "\u0e4d\u0e32",
  81. "ํา",
  82. 2,
  83. 1,
  84. 2
  85. )] // Decomposition: ํ (U+0E4D) - า (U+0E32) = U+0E33 ำ Thai Character Sara Am
  86. [InlineData ("\u0e33", "ำ", 1, 1, 1)] // Decomposition: ํ (U+0E4D) - า (U+0E32) = U+0E33 ำ Thai Character Sara Am
  87. public void GetColumns_String_Without_SurrogatePair (
  88. string code,
  89. string str,
  90. int codeLength,
  91. int columns,
  92. int stringLength
  93. )
  94. {
  95. Assert.Equal (str, code.Normalize ());
  96. Assert.Equal (codeLength, code.Length);
  97. //Assert.Equal (columns, code.EnumerateRunes ().Sum (x => x.GetColumns ()));
  98. Assert.Equal (columns, str.GetColumns ());
  99. Assert.Equal (stringLength, str.Length);
  100. }
  101. [Theory]
  102. [InlineData (new [] { '\ud83e', '\ude01' }, "🨁", 1, 2, 4)] // Neutral Chess Queen
  103. [InlineData (new [] { '\udb43', '\udfe1' }, "󠿡", 1, 2, 4)] // Undefined Character
  104. [InlineData (new [] { '\ud83c', '\udf55' }, "🍕", 2, 2, 4)] // 🍕 Slice of Pizza
  105. [InlineData (new [] { '\ud83e', '\udd16' }, "🤖", 2, 2, 4)] // 🤖 Robot Face
  106. [InlineData (new [] { '\ud83e', '\udde0' }, "🧠", 2, 2, 4)] // 🧠 Brain
  107. [InlineData (new [] { '\ud801', '\udc21' }, "𐐡", 1, 2, 4)] // 𐐡 Deseret Capital Letter Er
  108. [InlineData (new [] { '\ud83c', '\udf39' }, "🌹", 2, 2, 4)] // 🌹 Rose
  109. [InlineData (new [] { '\uD83D', '\uDC7E' }, "👾", 2, 2, 4)] // U+1F47E alien monster (CodepointWidth::Wide)
  110. [InlineData (
  111. new [] { '\uD83D', '\uDD1C' },
  112. "🔜",
  113. 2,
  114. 2,
  115. 4
  116. )] // 🔜 Soon With Rightwards Arrow Above (CodepointWidth::Wide)
  117. public void GetColumns_Utf16_Encode (char [] code, string str, int columns, int stringLength, int utf8Length)
  118. {
  119. var rune = new Rune (code [0], code [1]);
  120. Assert.Equal (str, rune.ToString ());
  121. Assert.Equal (columns, rune.GetColumns ());
  122. Assert.Equal (stringLength, rune.ToString ().Length);
  123. Assert.Equal (utf8Length, rune.Utf8SequenceLength);
  124. Assert.True (Rune.IsValid (rune.Value));
  125. }
  126. [Theory]
  127. [InlineData ("\U0001fa01", "🨁", 1, 2)] // Neutral Chess Queen
  128. [InlineData ("\U000e0fe1", "󠿡", 1, 2)] // Undefined Character
  129. [InlineData ("\U0001F355", "🍕", 2, 2)] // 🍕 Slice of Pizza
  130. [InlineData ("\U0001F916", "🤖", 2, 2)] // 🤖 Robot Face
  131. [InlineData ("\U0001f9e0", "🧠", 2, 2)] // 🧠 Brain
  132. [InlineData ("\U00010421", "𐐡", 1, 2)] // 𐐡 Deseret Capital Letter Er
  133. [InlineData ("\U0001f339", "🌹", 2, 2)] // 🌹 Rose
  134. //[InlineData ("\uFE20FE21", "", 1, 1)] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  135. // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  136. public void GetColumns_Utf32_Encode (string code, string str, int columns, int stringLength)
  137. {
  138. OperationStatus operationStatus = Rune.DecodeFromUtf16 (code, out Rune rune, out int charsConsumed);
  139. Assert.Equal (OperationStatus.Done, operationStatus);
  140. Assert.Equal (str, rune.ToString ());
  141. Assert.Equal (columns, rune.GetColumns ());
  142. Assert.Equal (stringLength, rune.ToString ().Length);
  143. Assert.Equal (charsConsumed, rune.Utf16SequenceLength);
  144. Assert.True (Rune.IsValid (rune.Value));
  145. // with DecodeRune
  146. (Rune nrune, int size) = code.DecodeRune ();
  147. Assert.Equal (str, nrune.ToString ());
  148. Assert.Equal (columns, nrune.GetColumns ());
  149. Assert.Equal (stringLength, nrune.ToString ().Length);
  150. Assert.Equal (size, nrune.Utf8SequenceLength);
  151. for (var x = 0; x < code.Length - 1; x++)
  152. {
  153. Assert.Equal (nrune.Value, char.ConvertToUtf32 (code [x], code [x + 1]));
  154. Assert.True (RuneExtensions.EncodeSurrogatePair (code [x], code [x + 1], out Rune result));
  155. Assert.Equal (rune, result);
  156. }
  157. Assert.True (Rune.IsValid (nrune.Value));
  158. }
  159. [Theory]
  160. [InlineData (new byte [] { 0xf0, 0x9f, 0xa8, 0x81 }, "🨁", 1, 2)] // Neutral Chess Queen
  161. [InlineData (new byte [] { 0xf3, 0xa0, 0xbf, 0xa1 }, "󠿡", 1, 2)] // Undefined Character
  162. [InlineData (new byte [] { 0xf0, 0x9f, 0x8d, 0x95 }, "🍕", 2, 2)] // 🍕 Slice of Pizza
  163. [InlineData (new byte [] { 0xf0, 0x9f, 0xa4, 0x96 }, "🤖", 2, 2)] // 🤖 Robot Face
  164. [InlineData (new byte [] { 0xf0, 0x90, 0x90, 0xa1 }, "𐐡", 1, 2)] // 𐐡 Deseret Capital Letter Er
  165. [InlineData (new byte [] { 0xf0, 0x9f, 0x8c, 0xb9 }, "🌹", 2, 2)] // 🌹 Rose
  166. public void GetColumns_Utf8_Encode (byte [] code, string str, int columns, int stringLength)
  167. {
  168. OperationStatus operationStatus = Rune.DecodeFromUtf8 (code, out Rune rune, out int bytesConsumed);
  169. Assert.Equal (OperationStatus.Done, operationStatus);
  170. Assert.Equal (str, rune.ToString ());
  171. Assert.Equal (columns, rune.GetColumns ());
  172. Assert.Equal (stringLength, rune.ToString ().Length);
  173. Assert.Equal (bytesConsumed, rune.Utf8SequenceLength);
  174. Assert.True (Rune.IsValid (rune.Value));
  175. }
  176. [Theory]
  177. [InlineData (0, "\0", 0, 1, 1)]
  178. [InlineData ('\u1dc0', "᷀", 0, 1, 3)] // ◌᷀ Combining Dotted Grave Accent
  179. [InlineData ('\u20D0', "⃐", 0, 1, 3)] // ◌⃐ Combining Left Harpoon Above
  180. [InlineData (1, "\u0001", -1, 1, 1)]
  181. [InlineData (2, "\u0002", -1, 1, 1)]
  182. [InlineData (31, "\u001f", -1, 1, 1)] // non printable character - Information Separator One
  183. [InlineData (127, "\u007f", -1, 1, 1)] // non printable character - Delete
  184. [InlineData (32, " ", 1, 1, 1)] // space
  185. [InlineData ('a', "a", 1, 1, 1)]
  186. [InlineData ('b', "b", 1, 1, 1)]
  187. [InlineData (123, "{", 1, 1, 1)] // { Left Curly Bracket
  188. [InlineData ('\u231c', "⌜", 1, 1, 3)] // ⌜ Top Left Corner
  189. // BUGBUG: These are CLEARLY wide glyphs, but GetColumns() returns 1
  190. // However, most terminals treat these as narrow and they overlap the next cell when drawn (including Windows Terminal)
  191. [InlineData (
  192. '\u1161',
  193. "ᅡ",
  194. 1,
  195. 1,
  196. 3
  197. )] // ᅡ Hangul Jungseong A - Unicode Hangul Jamo for join with column width equal to 0 alone.
  198. [InlineData ('\u2103', "℃", 1, 1, 3)] // ℃ Degree Celsius
  199. [InlineData ('\u2501', "━", 1, 1, 3)] // ━ Box Drawings Heavy Horizontal
  200. [InlineData ('\u25a0', "■", 1, 1, 3)] // ■ Black Square
  201. [InlineData ('\u25a1', "□", 1, 1, 3)] // □ White Square
  202. [InlineData ('\u277f', "❿", 1, 1, 3)] //Dingbat Negative Circled Number Ten - ❿ U+277f
  203. [InlineData (
  204. '\u4dc0',
  205. "䷀",
  206. 2,
  207. 1,
  208. 3
  209. )] // ䷀Hexagram For The Creative Heaven - U+4dc0 - https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  210. // See https://github.com/microsoft/terminal/issues/19389
  211. [InlineData ('\ud7b0', "ힰ", 1, 1, 3)] // ힰ ┤Hangul Jungseong O-Yeo - ힰ U+d7b0')]
  212. [InlineData ('\uf61e', "", 1, 1, 3)] // Private Use Area
  213. [InlineData ('\u23f0', "⏰", 2, 1, 3)] // Alarm Clock - ⏰ U+23f0
  214. [InlineData ('\u1100', "ᄀ", 2, 1, 3)] // ᄀ Hangul Choseong Kiyeok
  215. [InlineData ('\u1150', "ᅐ", 2, 1, 3)] // ᅐ Hangul Choseong Ceongchieumcieuc
  216. [InlineData ('\u2615', "☕", 2, 1, 3)] // ☕ Hot Beverage
  217. [InlineData ('\u231a', "⌚", 2, 1, 3)] // ⌚ Watch
  218. [InlineData ('\u231b', "⌛", 2, 1, 3)] // ⌛ Hourglass
  219. // From WindowsTerminal's CodepointWidthDetector tests (https://github.com/microsoft/terminal/blob/main/src/types/CodepointWidthDetector.cpp)
  220. //static constexpr std::wstring_view emoji = L"\xD83E\xDD22"; // U+1F922 nauseated face
  221. //static constexpr std::wstring_view ambiguous = L"\x414"; // U+0414 cyrillic capital de
  222. //{ 0x414, L"\x414", CodepointWidth::Narrow }, // U+0414 cyrillic capital de
  223. [InlineData ('\u0414', "Д", 1, 1, 2)] // U+0414 cyrillic capital de
  224. //{ 0x1104, L"\x1104", CodepointWidth::Wide }, // U+1104 hangul choseong ssangtikeut
  225. [InlineData ('\u1104', "ᄄ", 2, 1, 3)]
  226. //{ 0x306A, L"\x306A", CodepointWidth::Wide }, // U+306A hiragana na な
  227. [InlineData (0x306A, "な", 2, 1, 3)]
  228. //{ 0x30CA, L"\x30CA", CodepointWidth::Wide }, // U+30CA katakana na ナ
  229. [InlineData (0x30CA, "ナ", 2, 1, 3)]
  230. //{ 0x72D7, L"\x72D7", CodepointWidth::Wide }, // U+72D7
  231. [InlineData (0x72D7, "狗", 2, 1, 3)]
  232. public void GetColumns_With_Single_Code (int code, string str, int columns, int stringLength, int utf8Length)
  233. {
  234. var rune = new Rune (code);
  235. Assert.Equal (str, rune.ToString ());
  236. Assert.Equal (columns, rune.GetColumns ());
  237. Assert.Equal (stringLength, rune.ToString ().Length);
  238. Assert.Equal (utf8Length, rune.Utf8SequenceLength);
  239. Assert.True (Rune.IsValid (rune.Value));
  240. }
  241. // IsCombiningMark tests
  242. [Theory]
  243. [InlineData (0x0338, true)] // Combining Long Solidus Overlay (U+0338) (e.g. ≠)
  244. [InlineData (0x0300, true)] // Combining Grave Accent
  245. [InlineData (0x0301, true)] // Combining acute accent (é)
  246. [InlineData (0x0302, true)] // Combining Circumflex Accent
  247. [InlineData (0x0328, true)] // Combining ogonek (a small hook or comma shape) U+0328
  248. [InlineData (0x00E9, false)] // Latin Small Letter E with Acute, Unicode U+00E9 é
  249. [InlineData (0x0061, false)] // Latin Small Letter A is U+0061.
  250. [InlineData (
  251. '\uFE20',
  252. true
  253. )] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  254. [InlineData (
  255. '\uFE21',
  256. true
  257. )] // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  258. public void IsCombiningMark (int codepoint, bool expected)
  259. {
  260. var rune = new Rune (codepoint);
  261. Assert.Equal (expected, rune.IsCombiningMark ());
  262. }
  263. [Theory]
  264. [InlineData (0x0338)] // Combining Long Solidus Overlay (U+0338) (e.g. ≠)
  265. [InlineData (0x0300)] // Combining Grave Accent
  266. [InlineData (0x0301)] // Combining acute accent (é)
  267. [InlineData (0x0302)] // Combining Circumflex Accent
  268. [InlineData (0x0061)] // Combining ogonek (a small hook or comma shape)
  269. [InlineData (
  270. '\uFE20'
  271. )] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  272. [InlineData (
  273. '\uFE21'
  274. )] // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  275. public void MakePrintable_Combining_Character_Is_Not_Printable (int code)
  276. {
  277. var rune = new Rune (code);
  278. Rune actual = rune.MakePrintable ();
  279. Assert.Equal (code, actual.Value);
  280. }
  281. [Theory]
  282. [InlineData (0x0000001F, 0x241F)]
  283. [InlineData (0x0000007F, 0x247F)]
  284. [InlineData (0x0000009F, 0x249F)]
  285. [InlineData (0x0001001A, 0x1001A)]
  286. public void MakePrintable_Converts_Control_Chars_To_Proper_Unicode (int code, int expected)
  287. {
  288. Rune actual = ((Rune)code).MakePrintable ();
  289. Assert.Equal (expected, actual.Value);
  290. }
  291. [Theory]
  292. [InlineData (0x20)]
  293. [InlineData (0x7E)]
  294. [InlineData (0xA0)]
  295. [InlineData (0x010020)]
  296. public void MakePrintable_Does_Not_Convert_Ansi_Chars_To_Unicode (int code)
  297. {
  298. Rune actual = ((Rune)code).MakePrintable ();
  299. Assert.Equal (code, actual.Value);
  300. }
  301. [Theory]
  302. [InlineData (
  303. "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
  304. 200,
  305. 200,
  306. 200
  307. )]
  308. [InlineData (
  309. "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789\n",
  310. 201,
  311. 200,
  312. 199
  313. )] // has a '\n' newline
  314. [InlineData (
  315. "\t01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789\n",
  316. 202,
  317. 200,
  318. 198
  319. )] // has a '\t' and a '\n' newline
  320. public void Rune_ColumnWidth_Versus_String_ConsoleWidth (string text, int stringLength, int strCols, int runeCols)
  321. {
  322. Assert.Equal (stringLength, text.Length);
  323. Assert.Equal (stringLength, text.GetRuneCount ());
  324. Assert.Equal (strCols, text.GetColumns ());
  325. int sumRuneWidth = text.EnumerateRunes ().Sum (x => x.GetColumns ());
  326. Assert.Equal (runeCols, sumRuneWidth);
  327. }
  328. [Theory]
  329. [InlineData (0x12345678)]
  330. [InlineData ('\ud801')]
  331. public void Rune_Exceptions_Integers (int code) { Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (code)); }
  332. [Theory]
  333. [InlineData (new [] { '\ud799', '\udc21' })]
  334. public void Rune_Exceptions_Utf16_Encode (char [] code)
  335. {
  336. Assert.False (RuneExtensions.EncodeSurrogatePair (code [0], code [1], out Rune rune));
  337. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (code [0], code [1]));
  338. }
  339. /// <summary>
  340. /// Shows the difference between using Wcwidth.UnicodeCalculator and our own port of wcwidth. Specifically, the
  341. /// UnicodeCalculator is more accurate to spec where null has a width of 0, and our port says it's -1.
  342. /// </summary>
  343. /// <param name="expectedColumns"></param>
  344. /// <param name="scalar"></param>
  345. [Theory]
  346. [InlineData (0, 0)]
  347. [InlineData (-1, 1)]
  348. [InlineData (-1, 2)]
  349. [InlineData (-1, 3)]
  350. [InlineData (-1, 4)]
  351. [InlineData (-1, 5)]
  352. [InlineData (-1, 6)]
  353. [InlineData (-1, 7)]
  354. [InlineData (-1, 8)]
  355. [InlineData (-1, 9)]
  356. [InlineData (-1, 10)]
  357. [InlineData (-1, 11)]
  358. [InlineData (-1, 12)]
  359. [InlineData (-1, 13)]
  360. [InlineData (-1, 14)]
  361. [InlineData (-1, 15)]
  362. [InlineData (-1, 16)]
  363. [InlineData (-1, 17)]
  364. [InlineData (-1, 18)]
  365. [InlineData (-1, 19)]
  366. [InlineData (-1, 20)]
  367. [InlineData (-1, 21)]
  368. [InlineData (-1, 22)]
  369. [InlineData (-1, 23)]
  370. [InlineData (-1, 24)]
  371. [InlineData (-1, 25)]
  372. [InlineData (-1, 26)]
  373. [InlineData (-1, 27)]
  374. [InlineData (-1, 28)]
  375. [InlineData (-1, 29)]
  376. [InlineData (-1, 30)]
  377. [InlineData (-1, 31)]
  378. public void Rune_GetColumns_Non_Printable (int expectedColumns, int scalar)
  379. {
  380. var rune = new Rune (scalar);
  381. Assert.Equal (expectedColumns, rune.GetColumns ());
  382. Assert.Equal (0, rune.ToString ().GetColumns ());
  383. }
  384. [Fact]
  385. public void Rune_GetColumns_Versus_String_GetColumns_With_Non_Printable_Characters ()
  386. {
  387. var sumRuneWidth = 0;
  388. var sumConsoleWidth = 0;
  389. for (uint i = 0; i < 32; i++)
  390. {
  391. sumRuneWidth += ((Rune)i).GetColumns ();
  392. sumConsoleWidth += ((Rune)i).ToString ().GetColumns ();
  393. }
  394. Assert.Equal (-31, sumRuneWidth);
  395. Assert.Equal (0, sumConsoleWidth);
  396. }
  397. [Theory]
  398. [InlineData ('\ud800', true)]
  399. [InlineData ('\udbff', true)]
  400. [InlineData ('\udc00', false)]
  401. [InlineData ('\udfff', false)]
  402. [InlineData ('\uefff', null)]
  403. public void Rune_IsHighSurrogate_IsLowSurrogate (char code, bool? isHighSurrogate)
  404. {
  405. if (isHighSurrogate == true)
  406. {
  407. Assert.True (char.IsHighSurrogate (code));
  408. }
  409. else if (isHighSurrogate == false)
  410. {
  411. Assert.True (char.IsLowSurrogate (code));
  412. }
  413. else
  414. {
  415. Assert.False (char.IsHighSurrogate (code));
  416. Assert.False (char.IsLowSurrogate (code));
  417. }
  418. }
  419. [Theory]
  420. [InlineData (true, '\u1100')]
  421. [InlineData (true, '\ud83c', '\udf39')]
  422. [InlineData (true, '\udbff', '\udfff')]
  423. [InlineData (false, '\ud801')]
  424. [InlineData (false, '\ud83e')]
  425. public void Rune_IsValid (bool valid, params char [] chars)
  426. {
  427. Rune rune = default;
  428. var isValid = true;
  429. if (chars.Length == 1)
  430. {
  431. try
  432. {
  433. rune = new Rune (chars [0]);
  434. }
  435. catch (Exception)
  436. {
  437. isValid = false;
  438. }
  439. }
  440. else
  441. {
  442. rune = new Rune (chars [0], chars [1]);
  443. }
  444. if (valid)
  445. {
  446. Assert.NotEqual (default (Rune), rune);
  447. Assert.True (Rune.IsValid (rune.Value));
  448. Assert.True (valid);
  449. }
  450. else
  451. {
  452. Assert.False (valid);
  453. Assert.False (isValid);
  454. }
  455. }
  456. [Theory]
  457. [InlineData ("First line.")]
  458. [InlineData ("Hello, 𝔹𝕆𝔹")]
  459. public void Rune_ToRunes (string text)
  460. {
  461. Rune [] runes = text.ToRunes ();
  462. for (var i = 0; i < runes.Length; i++)
  463. {
  464. Assert.Equal (text.EnumerateRunes ().ToArray () [i].Value, runes [i].Value);
  465. }
  466. }
  467. [Fact]
  468. public void Sum_Of_Rune_GetColumns_Is_Not_Always_Equal_To_String_GetColumns ()
  469. {
  470. const int start = 0x000000;
  471. const int end = 0x10ffff;
  472. for (int i = start; i <= end; i++)
  473. {
  474. if (char.IsSurrogate ((char)i))
  475. {
  476. continue;
  477. }
  478. var r = new Rune ((uint)i);
  479. var us = r.ToString ();
  480. var hex = i.ToString ("x6");
  481. int v = int.Parse (hex, NumberStyles.HexNumber);
  482. string s = char.ConvertFromUtf32 (v);
  483. if (!r.IsSurrogatePair ())
  484. {
  485. Assert.Equal (r.ToString (), us);
  486. Assert.Equal (us, s);
  487. if (r.GetColumns () < 0)
  488. {
  489. Assert.NotEqual (r.GetColumns (), us.GetColumns ());
  490. Assert.NotEqual (s.EnumerateRunes ().Sum (c => c.GetColumns ()), us.GetColumns ());
  491. }
  492. else
  493. {
  494. Assert.Equal (r.GetColumns (), us.GetColumns ());
  495. Assert.Equal (s.EnumerateRunes ().Sum (c => c.GetColumns ()), us.GetColumns ());
  496. }
  497. Assert.Equal (us.GetRuneCount (), s.Length);
  498. }
  499. else
  500. {
  501. Assert.Equal (r.ToString (), us);
  502. Assert.Equal (us, s);
  503. Assert.Equal (r.GetColumns (), us.GetColumns ());
  504. Assert.Equal (s.GetColumns (), us.GetColumns ());
  505. Assert.Equal (
  506. 1,
  507. us.GetRuneCount ()
  508. ); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  509. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  510. }
  511. }
  512. }
  513. [Theory]
  514. [InlineData ('a', 1, 1)]
  515. [InlineData (31, 1, 1)]
  516. [InlineData (123, 1, 1)]
  517. [InlineData (127, 1, 1)]
  518. [InlineData ('\u1150', 1, 3)]
  519. [InlineData ('\u1161', 1, 3)]
  520. [InlineData (0x16fe0, 2, 4)]
  521. public void System_Text_Rune_SequenceLength (int code, int utf16Length, int utf8Length)
  522. {
  523. var r = new Rune (code);
  524. Assert.Equal (utf16Length, r.Utf16SequenceLength);
  525. Assert.Equal (utf8Length, r.Utf8SequenceLength);
  526. }
  527. [Fact]
  528. public void Test_All_Surrogate_Pairs_Range ()
  529. {
  530. for (uint h = 0xd800; h <= 0xdbff; h++)
  531. {
  532. for (uint l = 0xdc00; l <= 0xdfff; l++)
  533. {
  534. var r = new Rune ((char)h, (char)l);
  535. var us = r.ToString ();
  536. var hex = r.Value.ToString ("x6");
  537. int v = int.Parse (hex, NumberStyles.HexNumber);
  538. string s = char.ConvertFromUtf32 (v);
  539. Assert.True (v >= 0x10000 && v <= RuneExtensions.MaxUnicodeCodePoint);
  540. Assert.Equal (r.ToString (), us);
  541. Assert.Equal (us, s);
  542. Assert.Equal (r.GetColumns (), us.GetColumns ());
  543. Assert.Equal (s.GetColumns (), us.GetColumns ());
  544. Assert.Equal (
  545. 1,
  546. us.GetRuneCount ()
  547. ); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  548. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  549. }
  550. }
  551. }
  552. [Theory]
  553. [InlineData ("���", false)]
  554. [InlineData ("Hello, 世界", true)]
  555. [InlineData (new byte [] { 0xff, 0xfe, 0xfd }, false)]
  556. [InlineData (new byte [] { 0xf0, 0x9f, 0x8d, 0x95 }, true)]
  557. public void Test_CanBeEncodedAsRune_Extension (object text, bool canBeEncodedAsRune)
  558. {
  559. string str;
  560. if (text is string)
  561. {
  562. str = (string)text;
  563. if (canBeEncodedAsRune)
  564. {
  565. Assert.True (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  566. }
  567. else
  568. {
  569. Assert.False (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  570. }
  571. }
  572. else if (text is byte [])
  573. {
  574. str = StringExtensions.ToString ((byte [])text);
  575. if (canBeEncodedAsRune)
  576. {
  577. Assert.True (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  578. }
  579. else
  580. {
  581. Assert.False (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  582. }
  583. }
  584. }
  585. [Theory]
  586. [InlineData ("Hello, 世界", 13, 11, 9, "界世 ,olleH")] // Without Surrogate Pairs
  587. [InlineData ("Hello, 𝔹𝕆𝔹", 19, 10, 13, "𝔹𝕆𝔹 ,olleH")] // With Surrogate Pairs
  588. public void Test_DecodeLastRune_Extension (
  589. string text,
  590. int bytesLength,
  591. int colsLength,
  592. int textLength,
  593. string encoded
  594. )
  595. {
  596. List<Rune> runes = new ();
  597. var tSize = 0;
  598. for (int i = text.GetRuneCount () - 1; i >= 0; i--)
  599. {
  600. (Rune rune, int size) = text.DecodeLastRune (i);
  601. runes.Add (rune);
  602. tSize += size;
  603. }
  604. var result = StringExtensions.ToString (runes);
  605. Assert.Equal (encoded, result);
  606. Assert.Equal (bytesLength, tSize);
  607. Assert.Equal (colsLength, result.GetColumns ());
  608. Assert.Equal (textLength, result.Length);
  609. }
  610. [Theory]
  611. [InlineData ("Hello, 世界", 13, 11, 9)] // Without Surrogate Pairs
  612. [InlineData ("Hello, 𝔹𝕆𝔹", 19, 10, 13)] // With Surrogate Pairs
  613. public void Test_DecodeRune_Extension (string text, int bytesLength, int colsLength, int textLength)
  614. {
  615. List<Rune> runes = new ();
  616. var tSize = 0;
  617. for (var i = 0; i < text.GetRuneCount (); i++)
  618. {
  619. (Rune rune, int size) = text.DecodeRune (i);
  620. runes.Add (rune);
  621. tSize += size;
  622. }
  623. var result = StringExtensions.ToString (runes);
  624. Assert.Equal (text, result);
  625. Assert.Equal (bytesLength, tSize);
  626. Assert.Equal (colsLength, result.GetColumns ());
  627. Assert.Equal (textLength, result.Length);
  628. }
  629. [Theory]
  630. [InlineData ('\uea85', null, "", false)] // Private Use Area
  631. [InlineData (0x1F356, new [] { '\ud83c', '\udf56' }, "🍖", true)] // 🍖 Meat On Bone
  632. public void Test_DecodeSurrogatePair (int code, char [] charsValue, string runeString, bool isSurrogatePair)
  633. {
  634. var rune = new Rune (code);
  635. char [] chars;
  636. if (isSurrogatePair)
  637. {
  638. Assert.True (rune.DecodeSurrogatePair (out chars));
  639. Assert.Equal (2, chars.Length);
  640. Assert.Equal (charsValue [0], chars [0]);
  641. Assert.Equal (charsValue [1], chars [1]);
  642. Assert.Equal (runeString, new Rune (chars [0], chars [1]).ToString ());
  643. }
  644. else
  645. {
  646. Assert.False (rune.DecodeSurrogatePair (out chars));
  647. Assert.Null (chars);
  648. Assert.Equal (runeString, rune.ToString ());
  649. }
  650. Assert.Equal (chars, charsValue);
  651. }
  652. [Theory]
  653. [InlineData (unchecked ((char)0x40D7C0), (char)0xDC20, 0, "\0", false)]
  654. [InlineData ((char)0x0065, (char)0x0301, 0, "\0", false)]
  655. [InlineData ('\ud83c', '\udf56', 0x1F356, "🍖", true)] // 🍖 Meat On Bone
  656. public void Test_EncodeSurrogatePair (
  657. char highSurrogate,
  658. char lowSurrogate,
  659. int runeValue,
  660. string runeString,
  661. bool isSurrogatePair
  662. )
  663. {
  664. Rune rune;
  665. if (isSurrogatePair)
  666. {
  667. Assert.True (RuneExtensions.EncodeSurrogatePair ('\ud83c', '\udf56', out rune));
  668. }
  669. else
  670. {
  671. Assert.False (RuneExtensions.EncodeSurrogatePair (highSurrogate, lowSurrogate, out rune));
  672. }
  673. Assert.Equal (runeValue, rune.Value);
  674. Assert.Equal (runeString, rune.ToString ());
  675. }
  676. [Theory]
  677. [InlineData ('\ue0fd', false)]
  678. [InlineData ('\ud800', true)]
  679. [InlineData ('\udfff', true)]
  680. public void Test_IsSurrogate (char code, bool isSurrogate)
  681. {
  682. if (isSurrogate)
  683. {
  684. Assert.True (char.IsSurrogate (code.ToString (), 0));
  685. }
  686. else
  687. {
  688. Assert.False (char.IsSurrogate (code.ToString (), 0));
  689. }
  690. }
  691. [Theory]
  692. [InlineData (500000000)]
  693. [InlineData (0xf801, 0xdfff)]
  694. public void Test_MaxRune (params int [] codes)
  695. {
  696. if (codes.Length == 1)
  697. {
  698. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (codes [0]));
  699. }
  700. else
  701. {
  702. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune ((char)codes [0], (char)codes [1]));
  703. }
  704. }
  705. [Theory]
  706. [InlineData (
  707. '\u006f',
  708. '\u0302',
  709. "\u006f\u0302",
  710. 1,
  711. 0,
  712. 2,
  713. "o",
  714. "̂",
  715. "ô",
  716. 1,
  717. 2
  718. )]
  719. [InlineData (
  720. '\u0065',
  721. '\u0301',
  722. "\u0065\u0301",
  723. 1,
  724. 0,
  725. 2,
  726. "e",
  727. "́",
  728. "é",
  729. 1,
  730. 2
  731. )]
  732. public void Test_NonSpacingChar (
  733. int code1,
  734. int code2,
  735. string code,
  736. int rune1Length,
  737. int rune2Length,
  738. int codeLength,
  739. string code1String,
  740. string code2String,
  741. string joinString,
  742. int joinLength,
  743. int bytesLength
  744. )
  745. {
  746. var rune = new Rune (code1);
  747. var nsRune = new Rune (code2);
  748. Assert.Equal (rune1Length, rune.GetColumns ());
  749. Assert.Equal (rune2Length, nsRune.GetColumns ());
  750. var ul = rune.ToString ();
  751. Assert.Equal (code1String, ul);
  752. var uns = nsRune.ToString ();
  753. Assert.Equal (code2String, uns);
  754. string f = $"{rune}{nsRune}".Normalize ();
  755. Assert.Equal (f, joinString);
  756. Assert.Equal (f, code.Normalize ());
  757. Assert.Equal (joinLength, f.GetColumns ());
  758. Assert.Equal (joinLength, code.EnumerateRunes ().Sum (c => c.GetColumns ()));
  759. Assert.Equal (codeLength, code.Length);
  760. (Rune nrune, int size) = f.DecodeRune ();
  761. Assert.Equal (f.ToRunes () [0], nrune);
  762. Assert.Equal (bytesLength, size);
  763. }
  764. [Theory]
  765. [InlineData (0x20D0, 0x20EF)]
  766. [InlineData (0x2310, 0x231F)]
  767. [InlineData (0x1D800, 0x1D80F)]
  768. public void Test_Range (int start, int end)
  769. {
  770. for (int i = start; i <= end; i++)
  771. {
  772. var r = new Rune ((uint)i);
  773. var us = r.ToString ();
  774. var hex = i.ToString ("x6");
  775. int v = int.Parse (hex, NumberStyles.HexNumber);
  776. string s = char.ConvertFromUtf32 (v);
  777. if (!r.IsSurrogatePair ())
  778. {
  779. Assert.Equal (r.ToString (), us);
  780. Assert.Equal (us, s);
  781. Assert.Equal (r.GetColumns (), us.GetColumns ());
  782. Assert.Equal (
  783. us.GetRuneCount (),
  784. s.Length
  785. ); // For not surrogate pairs string.RuneCount is always equal to String.Length
  786. }
  787. else
  788. {
  789. Assert.Equal (r.ToString (), us);
  790. Assert.Equal (us, s);
  791. Assert.Equal (r.GetColumns (), us.GetColumns ());
  792. Assert.Equal (
  793. 1,
  794. us.GetRuneCount ()
  795. ); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  796. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  797. }
  798. Assert.Equal (s.GetColumns (), us.GetColumns ());
  799. }
  800. }
  801. [Fact]
  802. public void Test_SurrogatePair_From_String ()
  803. {
  804. Assert.True (ProcessTestStringUseChar ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  805. Assert.Throws<Exception> (() => ProcessTestStringUseChar ("\ud801"));
  806. Assert.True (ProcessStringUseRune ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  807. Assert.Throws<Exception> (() => ProcessStringUseRune ("\ud801"));
  808. }
  809. [Fact]
  810. public void TestRuneIsLetter ()
  811. {
  812. Assert.Equal (5, CountLettersInString ("Hello"));
  813. Assert.Equal (8, CountLettersInString ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  814. }
  815. [Fact]
  816. public void TestSplit ()
  817. {
  818. var inputString = "🐂, 🐄, 🐆";
  819. string [] splitOnSpace = inputString.Split (' ');
  820. string [] splitOnComma = inputString.Split (',');
  821. Assert.Equal (3, splitOnSpace.Length);
  822. Assert.Equal (3, splitOnComma.Length);
  823. }
  824. [Theory]
  825. [InlineData ("a", "utf-8", 1)]
  826. [InlineData ("a", "utf-16", 1)]
  827. [InlineData ("a", "utf-32", 3)]
  828. [InlineData ("𝔹", "utf-8", 4)]
  829. [InlineData ("𝔹", "utf-16", 4)]
  830. [InlineData ("𝔹", "utf-32", 3)]
  831. public void GetEncodingLength_ReturnsLengthBasedOnSelectedEncoding (string runeStr, string encodingName, int expectedLength)
  832. {
  833. Rune rune = runeStr.EnumerateRunes ().Single ();
  834. var encoding = Encoding.GetEncoding (encodingName);
  835. int actualLength = rune.GetEncodingLength (encoding);
  836. Assert.Equal (expectedLength, actualLength);
  837. }
  838. private int CountLettersInString (string s)
  839. {
  840. var letterCount = 0;
  841. foreach (Rune rune in s.EnumerateRunes ())
  842. {
  843. if (Rune.IsLetter (rune))
  844. {
  845. letterCount++;
  846. }
  847. }
  848. return letterCount;
  849. }
  850. private void PrintTextElementCount (
  851. string us,
  852. string s,
  853. int consoleWidth,
  854. int runeCount,
  855. int stringCount,
  856. int txtElementCount
  857. )
  858. {
  859. Assert.Equal (us.Length, s.Length);
  860. Assert.Equal (us, s);
  861. Assert.Equal (consoleWidth, us.GetColumns ());
  862. Assert.Equal (runeCount, us.GetRuneCount ());
  863. Assert.Equal (stringCount, s.Length);
  864. TextElementEnumerator enumerator = StringInfo.GetTextElementEnumerator (s);
  865. var textElementCount = 0;
  866. while (enumerator.MoveNext ())
  867. {
  868. textElementCount++; // For versions prior to Net5.0 the StringInfo class might handle some grapheme clusters incorrectly.
  869. }
  870. Assert.Equal (txtElementCount, textElementCount);
  871. }
  872. private bool ProcessStringUseRune (string s)
  873. {
  874. string us = s;
  875. var rs = "";
  876. Rune codePoint;
  877. List<Rune> runes = new ();
  878. var colWidth = 0;
  879. for (var i = 0; i < s.Length; i++)
  880. {
  881. Rune rune = default;
  882. if (Rune.IsValid (s [i]))
  883. {
  884. rune = new Rune (s [i]);
  885. Assert.True (Rune.IsValid (rune.Value));
  886. runes.Add (rune);
  887. Assert.Equal (s [i], rune.Value);
  888. Assert.False (rune.IsSurrogatePair ());
  889. }
  890. else if (i + 1 < s.Length && RuneExtensions.EncodeSurrogatePair (s [i], s [i + 1], out codePoint))
  891. {
  892. Assert.Equal (0, rune.Value);
  893. Assert.False (Rune.IsValid (s [i]));
  894. rune = codePoint;
  895. runes.Add (rune);
  896. var sp = new string (new [] { s [i], s [i + 1] });
  897. Assert.Equal (sp, codePoint.ToString ());
  898. Assert.True (codePoint.IsSurrogatePair ());
  899. i++; // Increment the iterator by the number of surrogate pair
  900. }
  901. else
  902. {
  903. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (s [i]));
  904. throw new Exception ("String was not well-formed UTF-16.");
  905. }
  906. colWidth += rune.GetColumns (); // Increment the column width of this Rune
  907. rs += rune.ToString ();
  908. }
  909. Assert.Equal (us.GetColumns (), colWidth);
  910. Assert.Equal (s, rs);
  911. Assert.Equal (s, StringExtensions.ToString (runes));
  912. return true;
  913. }
  914. private bool ProcessTestStringUseChar (string s)
  915. {
  916. char surrogateChar = default;
  917. for (var i = 0; i < s.Length; i++)
  918. {
  919. Rune r;
  920. if (char.IsSurrogate (s [i]))
  921. {
  922. if (surrogateChar != default (int) && char.IsSurrogate (surrogateChar))
  923. {
  924. r = new Rune (surrogateChar, s [i]);
  925. Assert.True (r.IsSurrogatePair ());
  926. int codePoint = char.ConvertToUtf32 (surrogateChar, s [i]);
  927. RuneExtensions.EncodeSurrogatePair (surrogateChar, s [i], out Rune rune);
  928. Assert.Equal (codePoint, rune.Value);
  929. var sp = new string (new [] { surrogateChar, s [i] });
  930. r = (Rune)codePoint;
  931. Assert.Equal (sp, r.ToString ());
  932. Assert.True (r.IsSurrogatePair ());
  933. surrogateChar = default (char);
  934. }
  935. else if (i < s.Length - 1)
  936. {
  937. surrogateChar = s [i];
  938. }
  939. else
  940. {
  941. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (s [i]));
  942. throw new Exception ("String was not well-formed UTF-16.");
  943. }
  944. }
  945. else
  946. {
  947. r = new Rune (s [i]);
  948. var buff = new byte [4];
  949. ((Rune)s [i]).Encode (buff);
  950. Assert.Equal ((int)s [i], buff [0]);
  951. Assert.Equal (s [i], r.Value);
  952. Assert.True (Rune.IsValid (r.Value));
  953. Assert.False (r.IsSurrogatePair ());
  954. }
  955. }
  956. return true;
  957. }
  958. }