6 жил өмнө · 776e2a84c3
--- a/netcore/System.Private.CoreLib/shared/System/Char.cs
+++ b/netcore/System.Private.CoreLib/shared/System/Char.cs
@@ -904,7 +904,14 @@ namespace System
 
				 
			
 
				         public static bool IsSurrogatePair(char highSurrogate, char lowSurrogate)
			
 
				         {
			
 
				-            return IsHighSurrogate(highSurrogate) && IsLowSurrogate(lowSurrogate);
			
 
				+            // Since both the high and low surrogate ranges are exactly 0x400 elements
			
 
				+            // wide, and since this is a power of two, we can perform a single comparison
			
 
				+            // by baselining each value to the start of its respective range and taking
			
 
				+            // the logical OR of them.
			
 
				+
			
 
				+            uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
			
 
				+            uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;
			
 
				+            return (highSurrogateOffset | lowSurrogateOffset) <= CharUnicodeInfo.HIGH_SURROGATE_RANGE;
			
 
				         }
			
 
				 
			
 
				         internal const int UNICODE_PLANE00_END = 0x00ffff;
			
@@ -937,15 +944,44 @@ namespace System
 
				 
			
 
				         public static int ConvertToUtf32(char highSurrogate, char lowSurrogate)
			
 
				         {
			
 
				-            if (!IsHighSurrogate(highSurrogate))
			
 
				+            // First, extend both to 32 bits, then calculate the offset of
			
 
				+            // each candidate surrogate char from the start of its range.
			
 
				+
			
 
				+            uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
			
 
				+            uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;
			
 
				+
			
 
				+            // This is a single comparison which allows us to check both for validity at once since
			
 
				+            // both the high surrogate range and the low surrogate range are the same length.
			
 
				+            // If the comparison fails, we call to a helper method to throw the correct exception message.
			
 
				+
			
 
				+            if ((highSurrogateOffset | lowSurrogateOffset) > CharUnicodeInfo.HIGH_SURROGATE_RANGE)
			
 
				+            {
			
 
				+                ConvertToUtf32_ThrowInvalidArgs(highSurrogateOffset);
			
 
				+            }
			
 
				+
			
 
				+            // The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding.
			
 
				+            return ((int)highSurrogateOffset << 10) + (lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + (0x40 << 10);
			
 
				+        }
			
 
				+
			
 
				+        [StackTraceHidden]
			
 
				+        private static void ConvertToUtf32_ThrowInvalidArgs(uint highSurrogateOffset)
			
 
				+        {
			
 
				+            // If the high surrogate is not within its expected range, throw an exception
			
 
				+            // whose message fingers it as invalid. If it's within the expected range,
			
 
				+            // change the message to read that the low surrogate was the problem.
			
 
				+
			
 
				+            if (highSurrogateOffset > CharUnicodeInfo.HIGH_SURROGATE_RANGE)
			
 
				             {
			
 
				-                throw new ArgumentOutOfRangeException(nameof(highSurrogate), SR.ArgumentOutOfRange_InvalidHighSurrogate);
			
 
				+                throw new ArgumentOutOfRangeException(
			
 
				+                    paramName: "highSurrogate",
			
 
				+                    message: SR.ArgumentOutOfRange_InvalidHighSurrogate);
			
 
				             }
			
 
				-            if (!IsLowSurrogate(lowSurrogate))
			
 
				+            else
			
 
				             {
			
 
				-                throw new ArgumentOutOfRangeException(nameof(lowSurrogate), SR.ArgumentOutOfRange_InvalidLowSurrogate);
			
 
				+                throw new ArgumentOutOfRangeException(
			
 
				+                    paramName: "lowSurrogate",
			
 
				+                    message: SR.ArgumentOutOfRange_InvalidLowSurrogate);
			
 
				             }
			
 
				-            return (((highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START) * 0x400) + (lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + UNICODE_PLANE01_START);
			
 
				         }
			
 
				 
			
 
				         /*=============================ConvertToUtf32===================================
			
--- a/netcore/System.Private.CoreLib/shared/System/Text/Rune.cs
+++ b/netcore/System.Private.CoreLib/shared/System/Text/Rune.cs
@@ -58,6 +58,18 @@ namespace System.Text
 
				             _value = expanded;
			
 
				         }
			
 
				 
			
 
				+        /// <summary>
			
 
				+        /// Creates a <see cref="Rune"/> from the provided UTF-16 surrogate pair.
			
 
				+        /// </summary>
			
 
				+        /// <exception cref="ArgumentOutOfRangeException">
			
 
				+        /// If <paramref name="highSurrogate"/> does not represent a UTF-16 high surrogate code point
			
 
				+        /// or <paramref name="lowSurrogate"/> does not represent a UTF-16 low surrogate code point.
			
 
				+        /// </exception>
			
 
				+        public Rune(char highSurrogate, char lowSurrogate)
			
 
				+            : this((uint)char.ConvertToUtf32(highSurrogate, lowSurrogate), false)
			
 
				+        {
			
 
				+        }
			
 
				+
			
 
				         /// <summary>
			
 
				         /// Creates a <see cref="Rune"/> from the provided Unicode scalar value.
			
 
				         /// </summary>
			
@@ -364,6 +376,36 @@ namespace System.Text
 
				             }
			
 
				         }
			
 
				 
			
 
				+        /// <summary>
			
 
				+        /// Attempts to create a <see cref="Rune"/> from the provided UTF-16 surrogate pair.
			
 
				+        /// Returns <see langword="false"/> if the input values don't represent a well-formed UTF-16surrogate pair.
			
 
				+        /// </summary>
			
 
				+        public static bool TryCreate(char highSurrogate, char lowSurrogate, out Rune result)
			
 
				+        {
			
 
				+            // First, extend both to 32 bits, then calculate the offset of
			
 
				+            // each candidate surrogate char from the start of its range.
			
 
				+
			
 
				+            uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
			
 
				+            uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;
			
 
				+
			
 
				+            // This is a single comparison which allows us to check both for validity at once since
			
 
				+            // both the high surrogate range and the low surrogate range are the same length.
			
 
				+            // If the comparison fails, we call to a helper method to throw the correct exception message.
			
 
				+
			
 
				+            if ((highSurrogateOffset | lowSurrogateOffset) <= CharUnicodeInfo.HIGH_SURROGATE_RANGE)
			
 
				+            {
			
 
				+                // The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding.
			
 
				+                result = UnsafeCreate((highSurrogateOffset << 10) + ((uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + (0x40u << 10));
			
 
				+                return true;
			
 
				+            }
			
 
				+            else
			
 
				+            {
			
 
				+                // Didn't have a high surrogate followed by a low surrogate.
			
 
				+                result = default;
			
 
				+                return false;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				         /// <summary>
			
 
				         /// Attempts to create a <see cref="Rune"/> from the provided input value.
			
 
				         /// </summary>
			
--- a/netcore/System.Private.CoreLib/shared/System/Text/UnicodeUtility.cs
+++ b/netcore/System.Private.CoreLib/shared/System/Text/UnicodeUtility.cs
@@ -169,12 +169,18 @@ namespace System.Text
 
				         [MethodImpl(MethodImplOptions.AggressiveInlining)]
			
 
				         public static bool IsValidUnicodeScalar(uint value)
			
 
				         {
			
 
				-            // By XORing the incoming value with 0xD800, surrogate code points
			
 
				-            // are moved to the range [ U+0000..U+07FF ], and all valid scalar
			
 
				-            // values are clustered into the single range [ U+0800..U+10FFFF ],
			
 
				-            // which allows performing a single fast range check.
			
 
				+            // This is an optimized check that on x86 is just three instructions: lea, xor, cmp.
			
 
				+            // 
			
 
				+            // After the subtraction operation, the input value is modified as such:
			
 
				+            // [ 00000000..0010FFFF ] -> [ FFEF0000..FFFFFFFF ]
			
 
				+            //
			
 
				+            // We now want to _exclude_ the range [ FFEFD800..FFEFDFFF ] (surrogates) from being valid.
			
 
				+            // After the xor, this particular exclusion range becomes [ FFEF0000..FFEF07FF ].
			
 
				+            //
			
 
				+            // So now the range [ FFEF0800..FFFFFFFF ] contains all valid code points,
			
 
				+            // excluding surrogates. This allows us to perform a single comparison.
			
 
				 
			
 
				-            return IsInRangeInclusive(value ^ 0xD800U, 0x800U, 0x10FFFFU);
			
 
				+            return ((value - 0x110000u) ^ 0xD800u) >= 0xFFEF0800u;
			
 
				         }
			
 
				     }
			
 
				 }