Browse Source

Add support for detecting UTF32 and UTF-32 bigendian

patch from  Peter Dettman ([email protected])


svn path=/trunk/mcs/; revision=73779
Miguel de Icaza 19 years ago
parent
commit
c6e197166f

+ 33 - 4
mcs/class/corlib/System.IO/StreamReader.cs

@@ -259,13 +259,15 @@ namespace System.IO {
 				if (count < 2)
 					return 0;
 
-				if (input_buffer [0] == 0xfe && input_buffer [1] == 0xff){
-					this.encoding = Encoding.BigEndianUnicode;
+#if !NET_2_0
+				if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe){
+					this.encoding = Encoding.Unicode;
 					return 2;
 				}
+#endif
 
-				if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe){
-					this.encoding = Encoding.Unicode;
+				if (input_buffer [0] == 0xfe && input_buffer [1] == 0xff){
+					this.encoding = Encoding.BigEndianUnicode;
 					return 2;
 				}
 
@@ -276,6 +278,33 @@ namespace System.IO {
 					this.encoding = Encoding.UTF8Unmarked;
 					return 3;
 				}
+
+#if NET_2_0
+				if (count < 4) {
+					if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe && input_buffer [2] != 0) {
+						this.encoding = Encoding.Unicode;
+						return 2;
+					}
+					return 0;
+				}
+
+				if (input_buffer [0] == 0 && input_buffer [1] == 0
+					&& input_buffer [2] == 0xfe && input_buffer [3] == 0xff)
+				{
+					this.encoding = Encoding.BigEndianUTF32;
+					return 4;
+				}
+
+				if (input_buffer [0] == 0xff && input_buffer [1] == 0xfe) {
+					if (input_buffer [2] == 0 && input_buffer[3] == 0) {
+						this.encoding = Encoding.UTF32;
+						return 4;
+					}
+
+					this.encoding = Encoding.Unicode;
+					return 2;
+				}
+#endif
 			}
 
 			return 0;

+ 1 - 1
mcs/class/corlib/System.Text/Encoding.cs

@@ -1046,7 +1046,7 @@ public abstract class Encoding
 	}
 
 	// Get the standard big-endian UTF-32 encoding object.
-	private static Encoding BigEndianUTF32
+	internal static Encoding BigEndianUTF32
 	{
 		get {
 			if (bigEndianUTF32Encoding == null) {

+ 3 - 3
mcs/class/corlib/System.Text/UTF32Encoding.cs

@@ -309,10 +309,10 @@ public sealed class UTF32Encoding : Encoding
 	public override byte[] GetPreamble ()
 	{
 		if (byteOrderMark) {
-			byte[] preamble = new byte[2];
+			byte[] preamble = new byte[4];
 			if (bigEndian) {
-				preamble[0] = (byte)0xFE;
-				preamble[1] = (byte)0xFF;
+				preamble[2] = (byte)0xFE;
+				preamble[3] = (byte)0xFF;
 			} else {
 				preamble[0] = (byte)0xFF;
 				preamble[1] = (byte)0xFE;

+ 33 - 0
mcs/class/corlib/Test/System.IO/StreamReaderTest.cs

@@ -19,6 +19,7 @@ public class StreamReaderTest
 {
 	static string TempFolder = Path.Combine (Path.GetTempPath (), "MonoTests.System.IO.Tests");
 	private string _codeFileName = TempFolder + Path.DirectorySeparatorChar + "AFile.txt";
+	private const string TestString = "Hello World!";
 
 	[SetUp]
 	public void SetUp ()
@@ -716,6 +717,38 @@ public class StreamReaderTest
 		Assert.AreEqual (24, reader.Read (new char[24], 0, 24));
 	}
 
+	[Test]
+	public void EncodingDetection()
+	{
+		if (!CheckEncodingDetected(Encoding.UTF8))
+			Assert.Fail ("Failed to detect UTF8 encoded string");
+		if (!CheckEncodingDetected(Encoding.Unicode))
+			Assert.Fail ("Failed to detect UTF16LE encoded string");
+		if (!CheckEncodingDetected(Encoding.BigEndianUnicode))
+			Assert.Fail ("Failed to detect UTF16BE encoded string");
+#if NET_2_0
+		if (!CheckEncodingDetected(Encoding.UTF32))
+			Assert.Fail ("Failed to detect UTF32LE encoded string");
+		if (!CheckEncodingDetected(new UTF32Encoding(true, true)))
+			Assert.Fail ("Failed to detect UTF32BE encoded string");
+#endif
+	}
+
+	private bool CheckEncodingDetected(Encoding encoding)
+	{
+		MemoryStream outStream = new MemoryStream();
+		using (StreamWriter outWriter = new StreamWriter(outStream, encoding))
+		{
+			outWriter.Write(TestString);
+		}
+		byte[] testBytes = outStream.ToArray();
+
+		StreamReader inReader = new StreamReader(new MemoryStream(testBytes, false));
+		string decodedString = inReader.ReadToEnd();
+
+		return decodedString == TestString;
+	}
+    
 	[Test]
 	public void bug75526 ()
 	{

+ 1 - 0
mcs/class/corlib/corlib_test.dll.sources

@@ -306,6 +306,7 @@ System.Text/TestEncoding.cs
 System.Text/UnicodeEncodingTest.cs
 System.Text/UTF7EncodingTest.cs
 System.Text/UTF8EncodingTest.cs
+System.Text/UTF32EncodingTest.cs
 System.Threading/AutoResetEventTest.cs
 System.Threading/CompressedStackTest.cs
 System.Threading/ExecutionContextTest.cs