StringTetsLithuaniaData.cs 6.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. namespace Jint.Tests.Runtime
  2. {
  3. public class StringTetsLithuaniaData
  4. {
  5. // Contains the non-uppercased string that will be processed by the engine and the expected result.
  6. private readonly TheoryData<string, string> fullSetOfData = new TheoryData<string, string>();
  7. // From: https://github.com/tc39/test262/blob/main/test/intl402/String/prototype/toLocaleUpperCase/special_casing_Lithuanian.js
  8. private readonly string[] softDotted = [
  9. "\u0069", "\u006A", // LATIN SMALL LETTER I..LATIN SMALL LETTER J
  10. "\u012F", // LATIN SMALL LETTER I WITH OGONEK
  11. "\u0249", // LATIN SMALL LETTER J WITH STROKE
  12. "\u0268", // LATIN SMALL LETTER I WITH STROKE
  13. "\u029D", // LATIN SMALL LETTER J WITH CROSSED-TAIL
  14. "\u02B2", // MODIFIER LETTER SMALL J
  15. "\u03F3", // GREEK LETTER YOT
  16. "\u0456", // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
  17. "\u0458", // CYRILLIC SMALL LETTER JE
  18. "\u1D62", // LATIN SUBSCRIPT SMALL LETTER I
  19. "\u1D96", // LATIN SMALL LETTER I WITH RETROFLEX HOOK
  20. "\u1DA4", // MODIFIER LETTER SMALL I WITH STROKE
  21. "\u1DA8", // MODIFIER LETTER SMALL J WITH CROSSED-TAIL
  22. "\u1E2D", // LATIN SMALL LETTER I WITH TILDE BELOW
  23. "\u1ECB", // LATIN SMALL LETTER I WITH DOT BELOW
  24. "\u2071", // SUPERSCRIPT LATIN SMALL LETTER I
  25. "\u2148", "\u2149", // DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J
  26. "\u2C7C", // LATIN SUBSCRIPT SMALL LETTER J
  27. "\uD835\uDC22", "\uD835\uDC23", // MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J
  28. "\uD835\uDC56", "\uD835\uDC57", // MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J
  29. "\uD835\uDC8A", "\uD835\uDC8B", // MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J
  30. "\uD835\uDCBE", "\uD835\uDCBF", // MATHEMATICAL SCRIPT SMALL I..MATHEMATICAL SCRIPT SMALL J
  31. "\uD835\uDCF2", "\uD835\uDCF3", // MATHEMATICAL BOLD SCRIPT SMALL I..MATHEMATICAL BOLD SCRIPT SMALL J
  32. "\uD835\uDD26", "\uD835\uDD27", // MATHEMATICAL FRAKTUR SMALL I..MATHEMATICAL FRAKTUR SMALL J
  33. "\uD835\uDD5A", "\uD835\uDD5B", // MATHEMATICAL DOUBLE-STRUCK SMALL I..MATHEMATICAL DOUBLE-STRUCK SMALL J
  34. "\uD835\uDD8E", "\uD835\uDD8F", // MATHEMATICAL BOLD FRAKTUR SMALL I..MATHEMATICAL BOLD FRAKTUR SMALL J
  35. "\uD835\uDDC2", "\uD835\uDDC3", // MATHEMATICAL SANS-SERIF SMALL I..MATHEMATICAL SANS-SERIF SMALL J
  36. "\uD835\uDDF6", "\uD835\uDDF7", // MATHEMATICAL SANS-SERIF BOLD SMALL I..MATHEMATICAL SANS-SERIF BOLD SMALL J
  37. "\uD835\uDE2A", "\uD835\uDE2B", // MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J
  38. "\uD835\uDE5E", "\uD835\uDE5F", // MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J
  39. "\uD835\uDE92", "\uD835\uDE93", // MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J
  40. ];
  41. // Results obtained from node -v 18.12.0.
  42. private readonly string[] softDottedUpperCased = [
  43. "I", "J", "Į", "Ɉ", "Ɨ", "Ʝ", "ʲ", "Ϳ", "І", "Ј",
  44. "ᵢ", "ᶖ", "ᶤ", "ᶨ", "Ḭ", "Ị", "ⁱ", "ⅈ", "ⅉ", "ⱼ",
  45. "𝐢", "𝐣", "𝑖", "𝑗", "𝒊", "𝒋", "𝒾", "𝒿", "𝓲", "𝓳",
  46. "𝔦", "𝔧", "𝕚", "𝕛", "𝖎", "𝖏", "𝗂", "𝗃", "𝗶", "𝗷",
  47. "𝘪", "𝘫", "𝙞", "𝙟", "𝚒", "𝚓",
  48. ];
  49. /// <summary>
  50. /// Creates and adds the data to <fullSetOfData> that will be used for the tests. Six cases:
  51. /// 1.- String with character at the beginning of the string.
  52. /// 2.- String with double character at the beginning of the string.
  53. /// 3.- String with character at the middle of the string.
  54. /// 4.- String with double character at the middle of the string.
  55. /// 5.- String with character at the end of the string.
  56. /// 6.- String with double character at the end of the string.
  57. /// </summary>
  58. private void AddStringsForChars(string nonCapChar, string toUpperChar)
  59. {
  60. fullSetOfData.Add($"{nonCapChar}lorem ipsum", $"{toUpperChar}LOREM IPSUM");
  61. fullSetOfData.Add($"{nonCapChar}{nonCapChar}lorem ipsum", $"{toUpperChar}{toUpperChar}LOREM IPSUM");
  62. fullSetOfData.Add($"lorem{nonCapChar}ipsum", $"LOREM{toUpperChar}IPSUM");
  63. fullSetOfData.Add($"lorem{nonCapChar}{nonCapChar}ipsum", $"LOREM{toUpperChar}{toUpperChar}IPSUM");
  64. fullSetOfData.Add($"lorem ipsum{nonCapChar}", $"LOREM IPSUM{toUpperChar}");
  65. fullSetOfData.Add($"lorem ipsum{nonCapChar}{nonCapChar}", $"LOREM IPSUM{toUpperChar}{toUpperChar}");
  66. }
  67. // All the cases from https://github.com/tc39/test262/blob/main/test/intl402/String/prototype/toLocaleUpperCase/special_casing_Lithuanian.js
  68. public TheoryData<string, string> TestData()
  69. {
  70. // COMBINING DOT ABOVE (U+0307) not removed when uppercasing capital I
  71. AddStringsForChars("İ", "İ");
  72. // COMBINING DOT ABOVE (U+0307) not removed when uppercasing capital J
  73. AddStringsForChars("J̇", "J̇");
  74. for (int i = 0; i < softDotted.Length; i++)
  75. {
  76. // COMBINING DOT ABOVE (U+0307) removed when preceded by Soft_Dotted.
  77. // Character directly preceded by Soft_Dotted.
  78. AddStringsForChars(softDotted[i] + "\u0307", softDottedUpperCased[i]);
  79. // COMBINING DOT ABOVE (U+0307) removed if preceded by Soft_Dotted.
  80. // Character not directly preceded by Soft_Dotted.
  81. // - COMBINING DOT BELOW (U+0323), combining class 220 (Below)
  82. AddStringsForChars(softDotted[i] + "\u0323\u0307", softDottedUpperCased[i] + "\u0323");
  83. // COMBINING DOT ABOVE removed if preceded by Soft_Dotted.
  84. // Character not directly preceded by Soft_Dotted.
  85. // - PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE (U+101FD = D800 DDFD), combining class 220 (Below)
  86. AddStringsForChars(softDotted[i] + "\uD800\uDDFD\u0307", softDottedUpperCased[i] + "\uD800\uDDFD");
  87. }
  88. return fullSetOfData;
  89. }
  90. }
  91. }