2
0

RuneTests.cs 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192
  1. using System.Buffers;
  2. using System.Globalization;
  3. using System.Text;
  4. namespace UnitTests_Parallelizable.TextTests;
  5. public class RuneTests
  6. {
  7. [Fact]
  8. public void Cast_To_Char_Surrogate_Pair_Return_UTF16 ()
  9. {
  10. Assert.NotEqual ("𝔹", $"{new Rune (unchecked ((char)0x1d539))}");
  11. Assert.Equal ("픹", $"{new Rune (unchecked ((char)0x1d539))}");
  12. Assert.Equal ("픹", $"{new Rune (0xd539)}");
  13. Assert.Equal ("𝔹", $"{new Rune (0x1d539)}");
  14. }
  15. [Fact]
  16. public void Equals_ToRuneList ()
  17. {
  18. List<List<Rune>> a = new () { "First line.".ToRuneList () };
  19. List<List<Rune>> b = new () { "First line.".ToRuneList (), "Second line.".ToRuneList () };
  20. List<Rune> c = new (a [0]);
  21. List<Rune> d = a [0];
  22. Assert.Equal (a [0], b [0]);
  23. // Not the same reference
  24. Assert.False (a [0] == b [0]);
  25. Assert.NotEqual (a [0], b [1]);
  26. Assert.False (a [0] == b [1]);
  27. Assert.Equal (c, a [0]);
  28. Assert.False (c == a [0]);
  29. Assert.Equal (c, b [0]);
  30. Assert.False (c == b [0]);
  31. Assert.NotEqual (c, b [1]);
  32. Assert.False (c == b [1]);
  33. Assert.Equal (d, a [0]);
  34. // Is the same reference
  35. Assert.True (d == a [0]);
  36. Assert.Equal (d, b [0]);
  37. Assert.False (d == b [0]);
  38. Assert.NotEqual (d, b [1]);
  39. Assert.False (d == b [1]);
  40. Assert.True (a [0].SequenceEqual (b [0]));
  41. Assert.False (a [0].SequenceEqual (b [1]));
  42. Assert.True (c.SequenceEqual (a [0]));
  43. Assert.True (c.SequenceEqual (b [0]));
  44. Assert.False (c.SequenceEqual (b [1]));
  45. Assert.True (d.SequenceEqual (a [0]));
  46. Assert.True (d.SequenceEqual (b [0]));
  47. Assert.False (d.SequenceEqual (b [1]));
  48. }
  49. [Fact]
  50. public void GetColumns_GetRuneCount ()
  51. {
  52. PrintTextElementCount ('\u00e1'.ToString (), "á", 1, 1, 1, 1);
  53. PrintTextElementCount ("\u0061\u0301", "á", 1, 2, 2, 1);
  54. PrintTextElementCount ("\u0061\u0301", "á", 1, 2, 2, 1);
  55. PrintTextElementCount ("\u0065\u0301", "é", 1, 2, 2, 1);
  56. PrintTextElementCount ("\U0001f469\U0001f3fd\u200d\U0001f692", "👩🏽‍🚒", 2, 4, 7, 1);
  57. PrintTextElementCount ("\ud801\udccf", "𐓏", 1, 1, 2, 1);
  58. PrintTextElementCount ("\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466", "👨‍👩‍👧‍👦", 2, 7, 11, 1);
  59. PrintTextElementCount ("\U0001f469\u200d\U0001f692", "👩‍🚒", 2, 3, 5, 1);
  60. PrintTextElementCount ("\u0068\u0069", "hi", 2, 2, 2, 2);
  61. }
  62. [Theory]
  63. [InlineData (
  64. "\u2615\ufe0f",
  65. "☕️",
  66. 2,
  67. 2,
  68. 2
  69. )] // \ufe0f forces it to be rendered as a colorful image as compared to a monochrome text variant.
  70. [InlineData (
  71. "\u1107\u1165\u11b8",
  72. "법",
  73. 3,
  74. 2,
  75. 1
  76. )] // the letters 법 join to form the Korean word for "rice:" U+BC95 법 (read from top left to bottom right)
  77. [InlineData ("\U0001F468\u200D\U0001F469\u200D\U0001F467", "👨‍👩‍👧", 8, 2, 8)] // Man, Woman and Girl emoji.
  78. //[InlineData ("\u0915\u093f", "कि", 2, 2, 2)] // Hindi कि with DEVANAGARI LETTER KA and DEVANAGARI VOWEL SIGN I
  79. [InlineData (
  80. "\u0e4d\u0e32",
  81. "ํา",
  82. 2,
  83. 1,
  84. 2
  85. )] // Decomposition: ํ (U+0E4D) - า (U+0E32) = U+0E33 ำ Thai Character Sara Am
  86. [InlineData ("\u0e33", "ำ", 1, 1, 1)] // Decomposition: ํ (U+0E4D) - า (U+0E32) = U+0E33 ำ Thai Character Sara Am
  87. public void GetColumns_String_Without_SurrogatePair (
  88. string code,
  89. string str,
  90. int codeLength,
  91. int columns,
  92. int stringLength
  93. )
  94. {
  95. Assert.Equal (str, code.Normalize ());
  96. Assert.Equal (codeLength, code.Length);
  97. //Assert.Equal (columns, code.EnumerateRunes ().Sum (x => x.GetColumns ()));
  98. Assert.Equal (columns, str.GetColumns ());
  99. Assert.Equal (stringLength, str.Length);
  100. }
  101. [Theory]
  102. [InlineData (new [] { '\ud83e', '\ude01' }, "🨁", 1, 2, 4)] // Neutral Chess Queen
  103. [InlineData (new [] { '\udb43', '\udfe1' }, "󠿡", 1, 2, 4)] // Undefined Character
  104. [InlineData (new [] { '\ud83c', '\udf55' }, "🍕", 2, 2, 4)] // 🍕 Slice of Pizza
  105. [InlineData (new [] { '\ud83e', '\udd16' }, "🤖", 2, 2, 4)] // 🤖 Robot Face
  106. [InlineData (new [] { '\ud83e', '\udde0' }, "🧠", 2, 2, 4)] // 🧠 Brain
  107. [InlineData (new [] { '\ud801', '\udc21' }, "𐐡", 1, 2, 4)] // 𐐡 Deseret Capital Letter Er
  108. [InlineData (new [] { '\ud83c', '\udf39' }, "🌹", 2, 2, 4)] // 🌹 Rose
  109. [InlineData (new [] { '\uD83D', '\uDC7E' }, "👾", 2, 2, 4)] // U+1F47E alien monster (CodepointWidth::Wide)
  110. [InlineData (
  111. new [] { '\uD83D', '\uDD1C' },
  112. "🔜",
  113. 2,
  114. 2,
  115. 4
  116. )] // 🔜 Soon With Rightwards Arrow Above (CodepointWidth::Wide)
  117. public void GetColumns_Utf16_Encode (char [] code, string str, int columns, int stringLength, int utf8Length)
  118. {
  119. var rune = new Rune (code [0], code [1]);
  120. Assert.Equal (str, rune.ToString ());
  121. Assert.Equal (columns, rune.GetColumns ());
  122. Assert.Equal (stringLength, rune.ToString ().Length);
  123. Assert.Equal (utf8Length, rune.Utf8SequenceLength);
  124. Assert.True (Rune.IsValid (rune.Value));
  125. }
  126. [Theory]
  127. [InlineData ("\U0001fa01", "🨁", 1, 2)] // Neutral Chess Queen
  128. [InlineData ("\U000e0fe1", "󠿡", 1, 2)] // Undefined Character
  129. [InlineData ("\U0001F355", "🍕", 2, 2)] // 🍕 Slice of Pizza
  130. [InlineData ("\U0001F916", "🤖", 2, 2)] // 🤖 Robot Face
  131. [InlineData ("\U0001f9e0", "🧠", 2, 2)] // 🧠 Brain
  132. [InlineData ("\U00010421", "𐐡", 1, 2)] // 𐐡 Deseret Capital Letter Er
  133. [InlineData ("\U0001f339", "🌹", 2, 2)] // 🌹 Rose
  134. //[InlineData ("\uFE20FE21", "", 1, 1)] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  135. // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  136. public void GetColumns_Utf32_Encode (string code, string str, int columns, int stringLength)
  137. {
  138. OperationStatus operationStatus = Rune.DecodeFromUtf16 (code, out Rune rune, out int charsConsumed);
  139. Assert.Equal (OperationStatus.Done, operationStatus);
  140. Assert.Equal (str, rune.ToString ());
  141. Assert.Equal (columns, rune.GetColumns ());
  142. Assert.Equal (stringLength, rune.ToString ().Length);
  143. Assert.Equal (charsConsumed, rune.Utf16SequenceLength);
  144. Assert.True (Rune.IsValid (rune.Value));
  145. // with DecodeRune
  146. (Rune nrune, int size) = code.DecodeRune ();
  147. Assert.Equal (str, nrune.ToString ());
  148. Assert.Equal (columns, nrune.GetColumns ());
  149. Assert.Equal (stringLength, nrune.ToString ().Length);
  150. Assert.Equal (size, nrune.Utf8SequenceLength);
  151. for (var x = 0; x < code.Length - 1; x++)
  152. {
  153. Assert.Equal (nrune.Value, char.ConvertToUtf32 (code [x], code [x + 1]));
  154. Assert.True (RuneExtensions.EncodeSurrogatePair (code [x], code [x + 1], out Rune result));
  155. Assert.Equal (rune, result);
  156. }
  157. Assert.True (Rune.IsValid (nrune.Value));
  158. }
  159. [Theory]
  160. [InlineData (new byte [] { 0xf0, 0x9f, 0xa8, 0x81 }, "🨁", 1, 2)] // Neutral Chess Queen
  161. [InlineData (new byte [] { 0xf3, 0xa0, 0xbf, 0xa1 }, "󠿡", 1, 2)] // Undefined Character
  162. [InlineData (new byte [] { 0xf0, 0x9f, 0x8d, 0x95 }, "🍕", 2, 2)] // 🍕 Slice of Pizza
  163. [InlineData (new byte [] { 0xf0, 0x9f, 0xa4, 0x96 }, "🤖", 2, 2)] // 🤖 Robot Face
  164. [InlineData (new byte [] { 0xf0, 0x90, 0x90, 0xa1 }, "𐐡", 1, 2)] // 𐐡 Deseret Capital Letter Er
  165. [InlineData (new byte [] { 0xf0, 0x9f, 0x8c, 0xb9 }, "🌹", 2, 2)] // 🌹 Rose
  166. public void GetColumns_Utf8_Encode (byte [] code, string str, int columns, int stringLength)
  167. {
  168. OperationStatus operationStatus = Rune.DecodeFromUtf8 (code, out Rune rune, out int bytesConsumed);
  169. Assert.Equal (OperationStatus.Done, operationStatus);
  170. Assert.Equal (str, rune.ToString ());
  171. Assert.Equal (columns, rune.GetColumns ());
  172. Assert.Equal (stringLength, rune.ToString ().Length);
  173. Assert.Equal (bytesConsumed, rune.Utf8SequenceLength);
  174. Assert.True (Rune.IsValid (rune.Value));
  175. }
  176. [Theory]
  177. [InlineData (0, "\0", 0, 1, 1)]
  178. [InlineData ('\u1dc0', "᷀", 0, 1, 3)] // ◌᷀ Combining Dotted Grave Accent
  179. [InlineData ('\u20D0', "⃐", 0, 1, 3)] // ◌⃐ Combining Left Harpoon Above
  180. [InlineData (1, "\u0001", -1, 1, 1)]
  181. [InlineData (2, "\u0002", -1, 1, 1)]
  182. [InlineData (31, "\u001f", -1, 1, 1)] // non printable character - Information Separator One
  183. [InlineData (127, "\u007f", -1, 1, 1)] // non printable character - Delete
  184. [InlineData (32, " ", 1, 1, 1)] // space
  185. [InlineData ('a', "a", 1, 1, 1)]
  186. [InlineData ('b', "b", 1, 1, 1)]
  187. [InlineData (123, "{", 1, 1, 1)] // { Left Curly Bracket
  188. [InlineData ('\u231c', "⌜", 1, 1, 3)] // ⌜ Top Left Corner
  189. // BUGBUG: These are CLEARLY wide glyphs, but GetColumns() returns 1
  190. // However, most terminals treat these as narrow and they overlap the next cell when drawn (including Windows Terminal)
  191. [InlineData (
  192. '\u1161',
  193. "ᅡ",
  194. 0,
  195. 1,
  196. 3
  197. )] // ᅡ Hangul Jungseong A - Unicode Hangul Jamo for join with column width equal to 0 alone.
  198. [InlineData ('\u2103', "℃", 1, 1, 3)] // ℃ Degree Celsius
  199. [InlineData ('\u2501', "━", 1, 1, 3)] // ━ Box Drawings Heavy Horizontal
  200. [InlineData ('\u25a0', "■", 1, 1, 3)] // ■ Black Square
  201. [InlineData ('\u25a1', "□", 1, 1, 3)] // □ White Square
  202. [InlineData ('\u277f', "❿", 1, 1, 3)] //Dingbat Negative Circled Number Ten - ❿ U+277f
  203. [InlineData (
  204. '\u4dc0',
  205. "䷀",
  206. 2,
  207. 1,
  208. 3
  209. )] // ䷀Hexagram For The Creative Heaven - U+4dc0 - https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  210. // See https://github.com/microsoft/terminal/issues/19389
  211. [InlineData ('\ud7b0', "ힰ", 0, 1, 3)] // ힰ ┤Hangul Jungseong O-Yeo - ힰ U+d7b0')]
  212. [InlineData ('\uf61e', "", 1, 1, 3)] // Private Use Area
  213. [InlineData ('\u23f0', "⏰", 2, 1, 3)] // Alarm Clock - ⏰ U+23f0
  214. [InlineData ('\u1100', "ᄀ", 2, 1, 3)] // ᄀ Hangul Choseong Kiyeok
  215. [InlineData ('\u1150', "ᅐ", 2, 1, 3)] // ᅐ Hangul Choseong Ceongchieumcieuc
  216. [InlineData ('\u2615', "☕", 2, 1, 3)] // ☕ Hot Beverage
  217. [InlineData ('\u231a', "⌚", 2, 1, 3)] // ⌚ Watch
  218. [InlineData ('\u231b', "⌛", 2, 1, 3)] // ⌛ Hourglass
  219. // From WindowsTerminal's CodepointWidthDetector tests (https://github.com/microsoft/terminal/blob/main/src/types/CodepointWidthDetector.cpp)
  220. //static constexpr std::wstring_view emoji = L"\xD83E\xDD22"; // U+1F922 nauseated face
  221. //static constexpr std::wstring_view ambiguous = L"\x414"; // U+0414 cyrillic capital de
  222. //{ 0x414, L"\x414", CodepointWidth::Narrow }, // U+0414 cyrillic capital de
  223. [InlineData ('\u0414', "Д", 1, 1, 2)] // U+0414 cyrillic capital de
  224. //{ 0x1104, L"\x1104", CodepointWidth::Wide }, // U+1104 hangul choseong ssangtikeut
  225. [InlineData ('\u1104', "ᄄ", 2, 1, 3)]
  226. //{ 0x306A, L"\x306A", CodepointWidth::Wide }, // U+306A hiragana na な
  227. [InlineData (0x306A, "な", 2, 1, 3)]
  228. //{ 0x30CA, L"\x30CA", CodepointWidth::Wide }, // U+30CA katakana na ナ
  229. [InlineData (0x30CA, "ナ", 2, 1, 3)]
  230. //{ 0x72D7, L"\x72D7", CodepointWidth::Wide }, // U+72D7
  231. [InlineData (0x72D7, "狗", 2, 1, 3)]
  232. public void GetColumns_With_Single_Code (int code, string str, int columns, int stringLength, int utf8Length)
  233. {
  234. var rune = new Rune (code);
  235. Assert.Equal (str, rune.ToString ());
  236. Assert.Equal (columns, rune.GetColumns ());
  237. Assert.Equal (stringLength, rune.ToString ().Length);
  238. Assert.Equal (utf8Length, rune.Utf8SequenceLength);
  239. Assert.True (Rune.IsValid (rune.Value));
  240. }
  241. // IsCombiningMark tests
  242. [Theory]
  243. [InlineData (0x0338, true)] // Combining Long Solidus Overlay (U+0338) (e.g. ≠)
  244. [InlineData (0x0300, true)] // Combining Grave Accent
  245. [InlineData (0x0301, true)] // Combining acute accent (é)
  246. [InlineData (0x0302, true)] // Combining Circumflex Accent
  247. [InlineData (0x0328, true)] // Combining ogonek (a small hook or comma shape) U+0328
  248. [InlineData (0x00E9, false)] // Latin Small Letter E with Acute, Unicode U+00E9 é
  249. [InlineData (0x0061, false)] // Latin Small Letter A is U+0061.
  250. [InlineData (
  251. '\uFE20',
  252. true
  253. )] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  254. [InlineData (
  255. '\uFE21',
  256. true
  257. )] // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  258. public void IsCombiningMark (int codepoint, bool expected)
  259. {
  260. var rune = new Rune (codepoint);
  261. Assert.Equal (expected, rune.IsCombiningMark ());
  262. }
  263. [Theory]
  264. [InlineData (0x0338)] // Combining Long Solidus Overlay (U+0338) (e.g. ≠)
  265. [InlineData (0x0300)] // Combining Grave Accent
  266. [InlineData (0x0301)] // Combining acute accent (é)
  267. [InlineData (0x0302)] // Combining Circumflex Accent
  268. [InlineData (0x0061)] // Combining ogonek (a small hook or comma shape)
  269. [InlineData (
  270. '\uFE20'
  271. )] // Combining Ligature Left Half - U+fe20 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  272. [InlineData (
  273. '\uFE21'
  274. )] // Combining Ligature Right Half - U+fe21 -https://github.com/microsoft/terminal/blob/main/src/types/unicode_width_overrides.xml
  275. public void MakePrintable_Combining_Character_Is_Not_Printable (int code)
  276. {
  277. var rune = new Rune (code);
  278. Rune actual = rune.MakePrintable ();
  279. Assert.Equal (code, actual.Value);
  280. }
  281. [Theory]
  282. [InlineData (0x0000001F, 0x241F)]
  283. [InlineData (0x0000007F, 0x247F)]
  284. [InlineData (0x0000009F, 0x249F)]
  285. [InlineData (0x0001001A, 0x1001A)]
  286. public void MakePrintable_Converts_Control_Chars_To_Proper_Unicode (int code, int expected)
  287. {
  288. Rune actual = ((Rune)code).MakePrintable ();
  289. Assert.Equal (expected, actual.Value);
  290. }
  291. [Theory]
  292. [InlineData (0x20)]
  293. [InlineData (0x7E)]
  294. [InlineData (0xA0)]
  295. [InlineData (0x010020)]
  296. public void MakePrintable_Does_Not_Convert_Ansi_Chars_To_Unicode (int code)
  297. {
  298. Rune actual = ((Rune)code).MakePrintable ();
  299. Assert.Equal (code, actual.Value);
  300. }
  301. [Theory]
  302. [InlineData (
  303. "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
  304. 200,
  305. 200,
  306. 200
  307. )]
  308. [InlineData (
  309. "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789\n",
  310. 201,
  311. 200,
  312. 199
  313. )] // has a '\n' newline
  314. [InlineData (
  315. "\t01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789\n",
  316. 202,
  317. 200,
  318. 198
  319. )] // has a '\t' and a '\n' newline
  320. public void Rune_ColumnWidth_Versus_String_ConsoleWidth (string text, int stringLength, int strCols, int runeCols)
  321. {
  322. Assert.Equal (stringLength, text.Length);
  323. Assert.Equal (stringLength, text.GetRuneCount ());
  324. Assert.Equal (strCols, text.GetColumns ());
  325. int sumRuneWidth = text.EnumerateRunes ().Sum (x => x.GetColumns ());
  326. Assert.Equal (runeCols, sumRuneWidth);
  327. }
  328. [Theory]
  329. [InlineData (0x12345678)]
  330. [InlineData ('\ud801')]
  331. public void Rune_Exceptions_Integers (int code) { Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (code)); }
  332. [Theory]
  333. // Control characters (should be mapped to Control Pictures)
  334. [InlineData ('\u0000', 0x2400)] // NULL → ␀
  335. [InlineData ('\u0009', 0x2409)] // TAB → ␉
  336. [InlineData ('\u000A', 0x240A)] // LF → ␊
  337. [InlineData ('\u000D', 0x240D)] // CR → ␍
  338. // Printable characters (should remain unchanged)
  339. [InlineData ('A', 'A')]
  340. [InlineData (' ', ' ')]
  341. [InlineData ('~', '~')]
  342. public void MakePrintable_ReturnsExpected (char inputChar, int expectedCodePoint)
  343. {
  344. // Arrange
  345. Rune input = new Rune (inputChar);
  346. // Act
  347. Rune result = input.MakePrintable ();
  348. // Assert
  349. Assert.Equal (expectedCodePoint, result.Value);
  350. }
  351. [Fact]
  352. public void MakePrintable_SupplementaryRune_RemainsUnchanged ()
  353. {
  354. // Arrange: supplementary character outside BMP (not a control)
  355. Rune input = new Rune (0x1F600); // 😀 grinning face emoji
  356. // Act
  357. Rune result = input.MakePrintable ();
  358. // Assert
  359. Assert.Equal (input.Value, result.Value);
  360. }
  361. [Theory]
  362. [InlineData (new [] { '\ud799', '\udc21' })]
  363. public void Rune_Exceptions_Utf16_Encode (char [] code)
  364. {
  365. Assert.False (RuneExtensions.EncodeSurrogatePair (code [0], code [1], out Rune rune));
  366. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (code [0], code [1]));
  367. }
  368. /// <summary>
  369. /// Shows the difference between using Wcwidth.UnicodeCalculator and our own port of wcwidth. Specifically, the
  370. /// UnicodeCalculator is more accurate to spec where null has a width of 0, and our port says it's -1.
  371. /// </summary>
  372. /// <param name="expectedColumns"></param>
  373. /// <param name="scalar"></param>
  374. [Theory]
  375. [InlineData (0, 0)]
  376. [InlineData (-1, 1)]
  377. [InlineData (-1, 2)]
  378. [InlineData (-1, 3)]
  379. [InlineData (-1, 4)]
  380. [InlineData (-1, 5)]
  381. [InlineData (-1, 6)]
  382. [InlineData (-1, 7)]
  383. [InlineData (-1, 8)]
  384. [InlineData (-1, 9)]
  385. [InlineData (-1, 10)]
  386. [InlineData (-1, 11)]
  387. [InlineData (-1, 12)]
  388. [InlineData (-1, 13)]
  389. [InlineData (-1, 14)]
  390. [InlineData (-1, 15)]
  391. [InlineData (-1, 16)]
  392. [InlineData (-1, 17)]
  393. [InlineData (-1, 18)]
  394. [InlineData (-1, 19)]
  395. [InlineData (-1, 20)]
  396. [InlineData (-1, 21)]
  397. [InlineData (-1, 22)]
  398. [InlineData (-1, 23)]
  399. [InlineData (-1, 24)]
  400. [InlineData (-1, 25)]
  401. [InlineData (-1, 26)]
  402. [InlineData (-1, 27)]
  403. [InlineData (-1, 28)]
  404. [InlineData (-1, 29)]
  405. [InlineData (-1, 30)]
  406. [InlineData (-1, 31)]
  407. public void Rune_GetColumns_Non_Printable (int expectedColumns, int scalar)
  408. {
  409. var rune = new Rune (scalar);
  410. Assert.Equal (expectedColumns, rune.GetColumns ());
  411. Assert.Equal (0, rune.ToString ().GetColumns ());
  412. }
  413. [Fact]
  414. public void Rune_GetColumns_Versus_String_GetColumns_With_Non_Printable_Characters ()
  415. {
  416. var sumRuneWidth = 0;
  417. var sumConsoleWidth = 0;
  418. for (uint i = 0; i < 32; i++)
  419. {
  420. sumRuneWidth += ((Rune)i).GetColumns ();
  421. sumConsoleWidth += ((Rune)i).ToString ().GetColumns ();
  422. }
  423. Assert.Equal (-31, sumRuneWidth);
  424. Assert.Equal (0, sumConsoleWidth);
  425. }
  426. [Theory]
  427. [InlineData ('\ud800', true)]
  428. [InlineData ('\udbff', true)]
  429. [InlineData ('\udc00', false)]
  430. [InlineData ('\udfff', false)]
  431. [InlineData ('\uefff', null)]
  432. public void Rune_IsHighSurrogate_IsLowSurrogate (char code, bool? isHighSurrogate)
  433. {
  434. if (isHighSurrogate == true)
  435. {
  436. Assert.True (char.IsHighSurrogate (code));
  437. }
  438. else if (isHighSurrogate == false)
  439. {
  440. Assert.True (char.IsLowSurrogate (code));
  441. }
  442. else
  443. {
  444. Assert.False (char.IsHighSurrogate (code));
  445. Assert.False (char.IsLowSurrogate (code));
  446. }
  447. }
  448. [Theory]
  449. [InlineData (true, '\u1100')]
  450. [InlineData (true, '\ud83c', '\udf39')]
  451. [InlineData (true, '\udbff', '\udfff')]
  452. [InlineData (false, '\ud801')]
  453. [InlineData (false, '\ud83e')]
  454. public void Rune_IsValid (bool valid, params char [] chars)
  455. {
  456. Rune rune = default;
  457. var isValid = true;
  458. if (chars.Length == 1)
  459. {
  460. try
  461. {
  462. rune = new Rune (chars [0]);
  463. }
  464. catch (Exception)
  465. {
  466. isValid = false;
  467. }
  468. }
  469. else
  470. {
  471. rune = new Rune (chars [0], chars [1]);
  472. }
  473. if (valid)
  474. {
  475. Assert.NotEqual (default (Rune), rune);
  476. Assert.True (Rune.IsValid (rune.Value));
  477. Assert.True (valid);
  478. }
  479. else
  480. {
  481. Assert.False (valid);
  482. Assert.False (isValid);
  483. }
  484. }
  485. [Theory]
  486. [InlineData ("First line.")]
  487. [InlineData ("Hello, 𝔹𝕆𝔹")]
  488. public void Rune_ToRunes (string text)
  489. {
  490. Rune [] runes = text.ToRunes ();
  491. for (var i = 0; i < runes.Length; i++)
  492. {
  493. Assert.Equal (text.EnumerateRunes ().ToArray () [i].Value, runes [i].Value);
  494. }
  495. }
  496. [Fact]
  497. public void Sum_Of_Rune_GetColumns_Is_Not_Always_Equal_To_String_GetColumns ()
  498. {
  499. const int start = 0x000000;
  500. const int end = 0x10ffff;
  501. for (int i = start; i <= end; i++)
  502. {
  503. if (char.IsSurrogate ((char)i))
  504. {
  505. continue;
  506. }
  507. var r = new Rune ((uint)i);
  508. var us = r.ToString ();
  509. var hex = i.ToString ("x6");
  510. int v = int.Parse (hex, NumberStyles.HexNumber);
  511. string s = char.ConvertFromUtf32 (v);
  512. if (!r.IsSurrogatePair ())
  513. {
  514. Assert.Equal (r.ToString (), us);
  515. Assert.Equal (us, s);
  516. if (r.GetColumns () < 0)
  517. {
  518. Assert.NotEqual (r.GetColumns (), us.GetColumns ());
  519. Assert.NotEqual (s.EnumerateRunes ().Sum (c => c.GetColumns ()), us.GetColumns ());
  520. }
  521. else
  522. {
  523. Assert.Equal (r.GetColumns (), us.GetColumns ());
  524. Assert.Equal (s.EnumerateRunes ().Sum (c => c.GetColumns ()), us.GetColumns ());
  525. }
  526. Assert.Equal (us.GetRuneCount (), s.Length);
  527. }
  528. else
  529. {
  530. Assert.Equal (r.ToString (), us);
  531. Assert.Equal (us, s);
  532. Assert.Equal (r.GetColumns (), us.GetColumns ());
  533. Assert.Equal (s.GetColumns (), us.GetColumns ());
  534. Assert.Equal (
  535. 1,
  536. us.GetRuneCount ()
  537. ); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  538. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  539. }
  540. }
  541. }
  542. [Theory]
  543. [InlineData ('a', 1, 1)]
  544. [InlineData (31, 1, 1)]
  545. [InlineData (123, 1, 1)]
  546. [InlineData (127, 1, 1)]
  547. [InlineData ('\u1150', 1, 3)]
  548. [InlineData ('\u1161', 1, 3)]
  549. [InlineData (0x16fe0, 2, 4)]
  550. public void System_Text_Rune_SequenceLength (int code, int utf16Length, int utf8Length)
  551. {
  552. var r = new Rune (code);
  553. Assert.Equal (utf16Length, r.Utf16SequenceLength);
  554. Assert.Equal (utf8Length, r.Utf8SequenceLength);
  555. }
  556. [Fact]
  557. public void Test_All_Surrogate_Pairs_Range ()
  558. {
  559. for (uint h = 0xd800; h <= 0xdbff; h++)
  560. {
  561. for (uint l = 0xdc00; l <= 0xdfff; l++)
  562. {
  563. var r = new Rune ((char)h, (char)l);
  564. var us = r.ToString ();
  565. var hex = r.Value.ToString ("x6");
  566. int v = int.Parse (hex, NumberStyles.HexNumber);
  567. string s = char.ConvertFromUtf32 (v);
  568. Assert.True (v >= 0x10000 && v <= RuneExtensions.MaxUnicodeCodePoint);
  569. Assert.Equal (r.ToString (), us);
  570. Assert.Equal (us, s);
  571. Assert.Equal (r.GetColumns (), us.GetColumns ());
  572. Assert.Equal (s.GetColumns (), us.GetColumns ());
  573. Assert.Equal (
  574. 1,
  575. us.GetRuneCount ()
  576. ); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  577. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  578. }
  579. }
  580. }
  581. [Theory]
  582. [InlineData ("���", false)]
  583. [InlineData ("Hello, 世界", true)]
  584. [InlineData (new byte [] { 0xff, 0xfe, 0xfd }, false)]
  585. [InlineData (new byte [] { 0xf0, 0x9f, 0x8d, 0x95 }, true)]
  586. public void Test_CanBeEncodedAsRune_Extension (object text, bool canBeEncodedAsRune)
  587. {
  588. string str;
  589. if (text is string)
  590. {
  591. str = (string)text;
  592. if (canBeEncodedAsRune)
  593. {
  594. Assert.True (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  595. }
  596. else
  597. {
  598. Assert.False (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  599. }
  600. }
  601. else if (text is byte [])
  602. {
  603. str = StringExtensions.ToString ((byte [])text);
  604. if (canBeEncodedAsRune)
  605. {
  606. Assert.True (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  607. }
  608. else
  609. {
  610. Assert.False (RuneExtensions.CanBeEncodedAsRune (Encoding.Unicode.GetBytes (str.ToCharArray ())));
  611. }
  612. }
  613. }
  614. [Theory]
  615. [InlineData ("Hello, 世界", 13, 11, 9, "界世 ,olleH")] // Without Surrogate Pairs
  616. [InlineData ("Hello, 𝔹𝕆𝔹", 19, 10, 13, "𝔹𝕆𝔹 ,olleH")] // With Surrogate Pairs
  617. public void Test_DecodeLastRune_Extension (
  618. string text,
  619. int bytesLength,
  620. int colsLength,
  621. int textLength,
  622. string encoded
  623. )
  624. {
  625. List<Rune> runes = new ();
  626. var tSize = 0;
  627. for (int i = text.GetRuneCount () - 1; i >= 0; i--)
  628. {
  629. (Rune rune, int size) = text.DecodeLastRune (i);
  630. runes.Add (rune);
  631. tSize += size;
  632. }
  633. var result = StringExtensions.ToString (runes);
  634. Assert.Equal (encoded, result);
  635. Assert.Equal (bytesLength, tSize);
  636. Assert.Equal (colsLength, result.GetColumns ());
  637. Assert.Equal (textLength, result.Length);
  638. }
  639. [Theory]
  640. [InlineData ("Hello, 世界", 13, 11, 9)] // Without Surrogate Pairs
  641. [InlineData ("Hello, 𝔹𝕆𝔹", 19, 10, 13)] // With Surrogate Pairs
  642. public void Test_DecodeRune_Extension (string text, int bytesLength, int colsLength, int textLength)
  643. {
  644. List<Rune> runes = new ();
  645. var tSize = 0;
  646. for (var i = 0; i < text.GetRuneCount (); i++)
  647. {
  648. (Rune rune, int size) = text.DecodeRune (i);
  649. runes.Add (rune);
  650. tSize += size;
  651. }
  652. var result = StringExtensions.ToString (runes);
  653. Assert.Equal (text, result);
  654. Assert.Equal (bytesLength, tSize);
  655. Assert.Equal (colsLength, result.GetColumns ());
  656. Assert.Equal (textLength, result.Length);
  657. }
  658. [Theory]
  659. [InlineData ('\uea85', null, "", false)] // Private Use Area
  660. [InlineData (0x1F356, new [] { '\ud83c', '\udf56' }, "🍖", true)] // 🍖 Meat On Bone
  661. public void Test_DecodeSurrogatePair (int code, char [] charsValue, string runeString, bool isSurrogatePair)
  662. {
  663. var rune = new Rune (code);
  664. char [] chars;
  665. if (isSurrogatePair)
  666. {
  667. Assert.True (rune.DecodeSurrogatePair (out chars));
  668. Assert.Equal (2, chars.Length);
  669. Assert.Equal (charsValue [0], chars [0]);
  670. Assert.Equal (charsValue [1], chars [1]);
  671. Assert.Equal (runeString, new Rune (chars [0], chars [1]).ToString ());
  672. }
  673. else
  674. {
  675. Assert.False (rune.DecodeSurrogatePair (out chars));
  676. Assert.Null (chars);
  677. Assert.Equal (runeString, rune.ToString ());
  678. }
  679. Assert.Equal (chars, charsValue);
  680. }
  681. [Theory]
  682. [InlineData (unchecked ((char)0x40D7C0), (char)0xDC20, 0, "\0", false)]
  683. [InlineData ((char)0x0065, (char)0x0301, 0, "\0", false)]
  684. [InlineData ('\ud83c', '\udf56', 0x1F356, "🍖", true)] // 🍖 Meat On Bone
  685. public void Test_EncodeSurrogatePair (
  686. char highSurrogate,
  687. char lowSurrogate,
  688. int runeValue,
  689. string runeString,
  690. bool isSurrogatePair
  691. )
  692. {
  693. Rune rune;
  694. if (isSurrogatePair)
  695. {
  696. Assert.True (RuneExtensions.EncodeSurrogatePair ('\ud83c', '\udf56', out rune));
  697. }
  698. else
  699. {
  700. Assert.False (RuneExtensions.EncodeSurrogatePair (highSurrogate, lowSurrogate, out rune));
  701. }
  702. Assert.Equal (runeValue, rune.Value);
  703. Assert.Equal (runeString, rune.ToString ());
  704. }
  705. [Theory]
  706. [InlineData ('\ue0fd', false)]
  707. [InlineData ('\ud800', true)]
  708. [InlineData ('\udfff', true)]
  709. public void Test_IsSurrogate (char code, bool isSurrogate)
  710. {
  711. if (isSurrogate)
  712. {
  713. Assert.True (char.IsSurrogate (code.ToString (), 0));
  714. }
  715. else
  716. {
  717. Assert.False (char.IsSurrogate (code.ToString (), 0));
  718. }
  719. }
  720. [Theory]
  721. [InlineData (500000000)]
  722. [InlineData (0xf801, 0xdfff)]
  723. public void Test_MaxRune (params int [] codes)
  724. {
  725. if (codes.Length == 1)
  726. {
  727. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (codes [0]));
  728. }
  729. else
  730. {
  731. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune ((char)codes [0], (char)codes [1]));
  732. }
  733. }
  734. [Theory]
  735. [InlineData (
  736. '\u006f',
  737. '\u0302',
  738. "\u006f\u0302",
  739. 1,
  740. 0,
  741. 2,
  742. "o",
  743. "̂",
  744. "ô",
  745. 1,
  746. 2
  747. )]
  748. [InlineData (
  749. '\u0065',
  750. '\u0301',
  751. "\u0065\u0301",
  752. 1,
  753. 0,
  754. 2,
  755. "e",
  756. "́",
  757. "é",
  758. 1,
  759. 2
  760. )]
  761. public void Test_NonSpacingChar (
  762. int code1,
  763. int code2,
  764. string code,
  765. int rune1Length,
  766. int rune2Length,
  767. int codeLength,
  768. string code1String,
  769. string code2String,
  770. string joinString,
  771. int joinLength,
  772. int bytesLength
  773. )
  774. {
  775. var rune = new Rune (code1);
  776. var nsRune = new Rune (code2);
  777. Assert.Equal (rune1Length, rune.GetColumns ());
  778. Assert.Equal (rune2Length, nsRune.GetColumns ());
  779. var ul = rune.ToString ();
  780. Assert.Equal (code1String, ul);
  781. var uns = nsRune.ToString ();
  782. Assert.Equal (code2String, uns);
  783. string f = $"{rune}{nsRune}".Normalize ();
  784. Assert.Equal (f, joinString);
  785. Assert.Equal (f, code.Normalize ());
  786. Assert.Equal (joinLength, f.GetColumns ());
  787. Assert.Equal (joinLength, code.EnumerateRunes ().Sum (c => c.GetColumns ()));
  788. Assert.Equal (codeLength, code.Length);
  789. (Rune nrune, int size) = f.DecodeRune ();
  790. Assert.Equal (f.ToRunes () [0], nrune);
  791. Assert.Equal (bytesLength, size);
  792. }
  793. [Theory]
  794. [InlineData (0x20D0, 0x20EF)]
  795. [InlineData (0x2310, 0x231F)]
  796. [InlineData (0x1D800, 0x1D80F)]
  797. public void Test_Range (int start, int end)
  798. {
  799. for (int i = start; i <= end; i++)
  800. {
  801. var r = new Rune ((uint)i);
  802. var us = r.ToString ();
  803. var hex = i.ToString ("x6");
  804. int v = int.Parse (hex, NumberStyles.HexNumber);
  805. string s = char.ConvertFromUtf32 (v);
  806. if (!r.IsSurrogatePair ())
  807. {
  808. Assert.Equal (r.ToString (), us);
  809. Assert.Equal (us, s);
  810. Assert.Equal (r.GetColumns (), us.GetColumns ());
  811. Assert.Equal (
  812. us.GetRuneCount (),
  813. s.Length
  814. ); // For not surrogate pairs string.RuneCount is always equal to String.Length
  815. }
  816. else
  817. {
  818. Assert.Equal (r.ToString (), us);
  819. Assert.Equal (us, s);
  820. Assert.Equal (r.GetColumns (), us.GetColumns ());
  821. Assert.Equal (
  822. 1,
  823. us.GetRuneCount ()
  824. ); // Here returns 1 because is a valid surrogate pair resulting in only rune >=U+10000..U+10FFFF
  825. Assert.Equal (2, s.Length); // String always preserves the originals values of each surrogate pair
  826. }
  827. Assert.Equal (s.GetColumns (), us.GetColumns ());
  828. }
  829. }
  830. [Fact]
  831. public void Test_SurrogatePair_From_String ()
  832. {
  833. Assert.True (ProcessTestStringUseChar ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  834. Assert.Throws<Exception> (() => ProcessTestStringUseChar ("\ud801"));
  835. Assert.True (ProcessStringUseRune ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  836. Assert.Throws<Exception> (() => ProcessStringUseRune ("\ud801"));
  837. }
  838. [Fact]
  839. public void TestRuneIsLetter ()
  840. {
  841. Assert.Equal (5, CountLettersInString ("Hello"));
  842. Assert.Equal (8, CountLettersInString ("𐓏𐓘𐓻𐓘𐓻𐓟 𐒻𐓟"));
  843. }
  844. [Fact]
  845. public void TestSplit ()
  846. {
  847. var inputString = "🐂, 🐄, 🐆";
  848. string [] splitOnSpace = inputString.Split (' ');
  849. string [] splitOnComma = inputString.Split (',');
  850. Assert.Equal (3, splitOnSpace.Length);
  851. Assert.Equal (3, splitOnComma.Length);
  852. }
  853. [Theory]
  854. [InlineData ("a", "utf-8", 1)]
  855. [InlineData ("a", "utf-16", 1)]
  856. [InlineData ("a", "utf-32", 3)]
  857. [InlineData ("𝔹", "utf-8", 4)]
  858. [InlineData ("𝔹", "utf-16", 4)]
  859. [InlineData ("𝔹", "utf-32", 3)]
  860. public void GetEncodingLength_ReturnsLengthBasedOnSelectedEncoding (string runeStr, string encodingName, int expectedLength)
  861. {
  862. Rune rune = runeStr.EnumerateRunes ().Single ();
  863. var encoding = Encoding.GetEncoding (encodingName);
  864. int actualLength = rune.GetEncodingLength (encoding);
  865. Assert.Equal (expectedLength, actualLength);
  866. }
  867. private int CountLettersInString (string s)
  868. {
  869. var letterCount = 0;
  870. foreach (Rune rune in s.EnumerateRunes ())
  871. {
  872. if (Rune.IsLetter (rune))
  873. {
  874. letterCount++;
  875. }
  876. }
  877. return letterCount;
  878. }
  879. private void PrintTextElementCount (
  880. string us,
  881. string s,
  882. int consoleWidth,
  883. int runeCount,
  884. int stringCount,
  885. int txtElementCount
  886. )
  887. {
  888. Assert.Equal (us.Length, s.Length);
  889. Assert.Equal (us, s);
  890. Assert.Equal (consoleWidth, us.GetColumns ());
  891. Assert.Equal (runeCount, us.GetRuneCount ());
  892. Assert.Equal (stringCount, s.Length);
  893. var textElementCount = 0;
  894. foreach (string _ in GraphemeHelper.GetGraphemes (s))
  895. {
  896. textElementCount++; // For versions prior to Net5.0 the StringInfo class might handle some grapheme clusters incorrectly.
  897. }
  898. Assert.Equal (txtElementCount, textElementCount);
  899. }
  900. private bool ProcessStringUseRune (string s)
  901. {
  902. string us = s;
  903. var rs = "";
  904. Rune codePoint;
  905. List<Rune> runes = new ();
  906. var colWidth = 0;
  907. for (var i = 0; i < s.Length; i++)
  908. {
  909. Rune rune = default;
  910. if (Rune.IsValid (s [i]))
  911. {
  912. rune = new Rune (s [i]);
  913. Assert.True (Rune.IsValid (rune.Value));
  914. runes.Add (rune);
  915. Assert.Equal (s [i], rune.Value);
  916. Assert.False (rune.IsSurrogatePair ());
  917. }
  918. else if (i + 1 < s.Length && RuneExtensions.EncodeSurrogatePair (s [i], s [i + 1], out codePoint))
  919. {
  920. Assert.Equal (0, rune.Value);
  921. Assert.False (Rune.IsValid (s [i]));
  922. rune = codePoint;
  923. runes.Add (rune);
  924. var sp = new string (new [] { s [i], s [i + 1] });
  925. Assert.Equal (sp, codePoint.ToString ());
  926. Assert.True (codePoint.IsSurrogatePair ());
  927. i++; // Increment the iterator by the number of surrogate pair
  928. }
  929. else
  930. {
  931. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (s [i]));
  932. throw new Exception ("String was not well-formed UTF-16.");
  933. }
  934. colWidth += rune.GetColumns (); // Increment the column width of this Rune
  935. rs += rune.ToString ();
  936. }
  937. Assert.Equal (us.GetColumns (), colWidth);
  938. Assert.Equal (s, rs);
  939. Assert.Equal (s, StringExtensions.ToString (runes));
  940. return true;
  941. }
  942. private bool ProcessTestStringUseChar (string s)
  943. {
  944. char surrogateChar = default;
  945. for (var i = 0; i < s.Length; i++)
  946. {
  947. Rune r;
  948. if (char.IsSurrogate (s [i]))
  949. {
  950. if (surrogateChar != default (int) && char.IsSurrogate (surrogateChar))
  951. {
  952. r = new Rune (surrogateChar, s [i]);
  953. Assert.True (r.IsSurrogatePair ());
  954. int codePoint = char.ConvertToUtf32 (surrogateChar, s [i]);
  955. RuneExtensions.EncodeSurrogatePair (surrogateChar, s [i], out Rune rune);
  956. Assert.Equal (codePoint, rune.Value);
  957. var sp = new string (new [] { surrogateChar, s [i] });
  958. r = (Rune)codePoint;
  959. Assert.Equal (sp, r.ToString ());
  960. Assert.True (r.IsSurrogatePair ());
  961. surrogateChar = default (char);
  962. }
  963. else if (i < s.Length - 1)
  964. {
  965. surrogateChar = s [i];
  966. }
  967. else
  968. {
  969. Assert.Throws<ArgumentOutOfRangeException> (() => new Rune (s [i]));
  970. throw new Exception ("String was not well-formed UTF-16.");
  971. }
  972. }
  973. else
  974. {
  975. r = new Rune (s [i]);
  976. var buff = new byte [4];
  977. ((Rune)s [i]).Encode (buff);
  978. Assert.Equal ((int)s [i], buff [0]);
  979. Assert.Equal (s [i], r.Value);
  980. Assert.True (Rune.IsValid (r.Value));
  981. Assert.False (r.IsSurrogatePair ());
  982. }
  983. }
  984. return true;
  985. }
  986. [Theory]
  987. [InlineData (0x0041, new byte [] { 0x41 })] // 'A', ASCII
  988. [InlineData (0x00E9, new byte [] { 0xC3, 0xA9 })] // 'é', 2-byte UTF-8
  989. [InlineData (0x20AC, new byte [] { 0xE2, 0x82, 0xAC })] // '€', 3-byte UTF-8
  990. [InlineData (0x1F600, new byte [] { 0xF0, 0x9F, 0x98, 0x80 })] // 😀 emoji, 4-byte UTF-8
  991. public void Encode_WritesExpectedBytes (int codePoint, byte [] expectedBytes)
  992. {
  993. // Arrange
  994. Rune rune = new Rune (codePoint);
  995. byte [] buffer = new byte [10]; // extra space
  996. for (int i = 0; i < buffer.Length; i++)
  997. {
  998. buffer [i] = 0xFF;
  999. }
  1000. // Act
  1001. int written = rune.Encode (buffer);
  1002. // Assert
  1003. Assert.Equal (expectedBytes.Length, written);
  1004. for (int i = 0; i < written; i++)
  1005. {
  1006. Assert.Equal (expectedBytes [i], buffer [i]);
  1007. }
  1008. }
  1009. [Fact]
  1010. public void Encode_WithStartAndCount_WritesPartialBytes ()
  1011. {
  1012. // Arrange: U+1F600 😀 (4 bytes)
  1013. Rune rune = new Rune (0x1F600);
  1014. byte [] buffer = new byte [10];
  1015. for (int i = 0; i < buffer.Length; i++)
  1016. {
  1017. buffer [i] = 0xFF;
  1018. }
  1019. // Act: write starting at index 2, limit count to 2 bytes
  1020. int written = rune.Encode (buffer, start: 2, count: 2);
  1021. // Assert
  1022. Assert.Equal (2, written);
  1023. // Original UTF-8 bytes: F0 9F 98 80
  1024. Assert.Equal (0xF0, buffer [2]);
  1025. Assert.Equal (0x9F, buffer [3]);
  1026. // Remaining buffer untouched
  1027. Assert.Equal (0xFF, buffer [0]);
  1028. Assert.Equal (0xFF, buffer [1]);
  1029. Assert.Equal (0xFF, buffer [4]);
  1030. }
  1031. [Fact]
  1032. public void Encode_WithCountGreaterThanRuneBytes_WritesAllBytes ()
  1033. {
  1034. // Arrange: é → C3 A9
  1035. Rune rune = new Rune ('é');
  1036. byte [] buffer = new byte [10];
  1037. for (int i = 0; i < buffer.Length; i++)
  1038. {
  1039. buffer [i] = 0xFF;
  1040. }
  1041. // Act: count larger than needed
  1042. int written = rune.Encode (buffer, start: 1, count: 10);
  1043. // Assert
  1044. Assert.Equal (2, written);
  1045. Assert.Equal (0xC3, buffer [1]);
  1046. Assert.Equal (0xA9, buffer [2]);
  1047. Assert.Equal (0xFF, buffer [3]); // next byte untouched
  1048. }
  1049. [Fact]
  1050. public void Encode_ZeroCount_WritesNothing ()
  1051. {
  1052. Rune rune = new Rune ('A');
  1053. byte [] buffer = new byte [5];
  1054. for (int i = 0; i < buffer.Length; i++)
  1055. {
  1056. buffer [i] = 0xFF;
  1057. }
  1058. int written = rune.Encode (buffer, start: 0, count: 0);
  1059. Assert.Equal (0, written);
  1060. foreach (var b in buffer)
  1061. {
  1062. Assert.Equal (0xFF, b); // buffer untouched
  1063. }
  1064. }
  1065. }