|
|
@@ -31,7 +31,6 @@ using System.Runtime.InteropServices;
|
|
|
|
|
|
[Serializable]
|
|
|
[MonoLimitation ("Serialization format not compatible with .NET")]
|
|
|
-[MonoLimitation ("EncoderFallback is not handled")]
|
|
|
[ComVisible (true)]
|
|
|
public class UTF8Encoding : Encoding
|
|
|
{
|
|
|
@@ -51,9 +50,9 @@ public class UTF8Encoding : Encoding
|
|
|
{
|
|
|
emitIdentifier = encoderShouldEmitUTF8Identifier;
|
|
|
if (throwOnInvalidBytes)
|
|
|
- SetFallbackInternal (null, DecoderFallback.ExceptionFallback);
|
|
|
+ SetFallbackInternal (EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback);
|
|
|
else
|
|
|
- SetFallbackInternal (null, DecoderFallback.StandardSafeFallback);
|
|
|
+ SetFallbackInternal (EncoderFallback.StandardSafeFallback, DecoderFallback.StandardSafeFallback);
|
|
|
|
|
|
web_name = body_name = header_name = "utf-8";
|
|
|
encoding_name = "Unicode (UTF-8)";
|
|
|
@@ -68,7 +67,7 @@ public class UTF8Encoding : Encoding
|
|
|
|
|
|
// Internal version of "GetByteCount" which can handle a rolling
|
|
|
// state between multiple calls to this method.
|
|
|
- private static int InternalGetByteCount (char[] chars, int index, int count, ref char leftOver, bool flush)
|
|
|
+ private static int InternalGetByteCount (char[] chars, int index, int count, EncoderFallback fallback, ref char leftOver, bool flush)
|
|
|
{
|
|
|
// Validate the parameters.
|
|
|
if (chars == null) {
|
|
|
@@ -92,15 +91,17 @@ public class UTF8Encoding : Encoding
|
|
|
|
|
|
unsafe {
|
|
|
fixed (char* cptr = chars) {
|
|
|
- return InternalGetByteCount (cptr + index, count, ref leftOver, flush);
|
|
|
+ return InternalGetByteCount (cptr + index, count, fallback, ref leftOver, flush);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private unsafe static int InternalGetByteCount (char* chars, int count, ref char leftOver, bool flush)
|
|
|
+ private unsafe static int InternalGetByteCount (char* chars, int count, EncoderFallback fallback, ref char leftOver, bool flush)
|
|
|
{
|
|
|
int length = 0;
|
|
|
char* end = chars + count;
|
|
|
+ char* start = chars;
|
|
|
+ EncoderFallbackBuffer buffer = null;
|
|
|
while (chars < end) {
|
|
|
if (leftOver == 0) {
|
|
|
for (; chars < end; chars++) {
|
|
|
@@ -126,7 +127,12 @@ public class UTF8Encoding : Encoding
|
|
|
// leading surrogate. In NET_2_0 it
|
|
|
// uses fallback. In NET_1_1 we output
|
|
|
// wrong surrogate.
|
|
|
- length += 3;
|
|
|
+ char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
|
|
|
+ fixed (char *fb_chars = fallback_chars) {
|
|
|
+ char dummy = '\0';
|
|
|
+ length += InternalGetByteCount (fb_chars, fallback_chars.Length, fallback, ref dummy, true);
|
|
|
+ }
|
|
|
+
|
|
|
leftOver = '\0';
|
|
|
}
|
|
|
}
|
|
|
@@ -141,7 +147,11 @@ public class UTF8Encoding : Encoding
|
|
|
// invalid, but we have to do something.
|
|
|
// We write out the surrogate start and then
|
|
|
// re-visit the current character again.
|
|
|
- length += 3;
|
|
|
+ char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
|
|
|
+ fixed (char *fb_chars = fallback_chars) {
|
|
|
+ char dummy = '\0';
|
|
|
+ length += InternalGetByteCount (fb_chars, fallback_chars.Length, fallback, ref dummy, true);
|
|
|
+ }
|
|
|
}
|
|
|
leftOver = '\0';
|
|
|
}
|
|
|
@@ -156,11 +166,27 @@ public class UTF8Encoding : Encoding
|
|
|
return length;
|
|
|
}
|
|
|
|
|
|
+ unsafe static char [] GetFallbackChars (char *chars, char *start, EncoderFallback fallback, ref EncoderFallbackBuffer buffer)
|
|
|
+ {
|
|
|
+ if (buffer == null)
|
|
|
+ buffer = fallback.CreateFallbackBuffer ();
|
|
|
+
|
|
|
+ buffer.Fallback (*chars, (int) (chars - start));
|
|
|
+
|
|
|
+ char [] fallback_chars = new char [buffer.Remaining];
|
|
|
+ for (int i = 0; i < fallback_chars.Length; i++)
|
|
|
+ fallback_chars [i] = buffer.GetNextChar ();
|
|
|
+
|
|
|
+ buffer.Reset ();
|
|
|
+
|
|
|
+ return fallback_chars;
|
|
|
+ }
|
|
|
+
|
|
|
// Get the number of bytes needed to encode a character buffer.
|
|
|
public override int GetByteCount (char[] chars, int index, int count)
|
|
|
{
|
|
|
char dummy = '\0';
|
|
|
- return InternalGetByteCount (chars, index, count, ref dummy, true);
|
|
|
+ return InternalGetByteCount (chars, index, count, EncoderFallback, ref dummy, true);
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -173,7 +199,7 @@ public class UTF8Encoding : Encoding
|
|
|
if (count == 0)
|
|
|
return 0;
|
|
|
char dummy = '\0';
|
|
|
- return InternalGetByteCount (chars, count, ref dummy, true);
|
|
|
+ return InternalGetByteCount (chars, count, EncoderFallback, ref dummy, true);
|
|
|
}
|
|
|
|
|
|
#endregion
|
|
|
@@ -184,8 +210,9 @@ public class UTF8Encoding : Encoding
|
|
|
// state between multiple calls to this method.
|
|
|
private static int InternalGetBytes (char[] chars, int charIndex,
|
|
|
int charCount, byte[] bytes,
|
|
|
- int byteIndex, ref char leftOver,
|
|
|
- bool flush)
|
|
|
+ int byteIndex,
|
|
|
+ EncoderFallback fallback, ref EncoderFallbackBuffer buffer,
|
|
|
+ ref char leftOver, bool flush)
|
|
|
{
|
|
|
// Validate the parameters.
|
|
|
if (chars == null) {
|
|
|
@@ -219,20 +246,23 @@ public class UTF8Encoding : Encoding
|
|
|
if (bytes.Length == byteIndex)
|
|
|
return InternalGetBytes (
|
|
|
cptr + charIndex, charCount,
|
|
|
- null, 0, ref leftOver, flush);
|
|
|
+ null, 0, fallback, ref buffer, ref leftOver, flush);
|
|
|
fixed (byte *bptr = bytes) {
|
|
|
return InternalGetBytes (
|
|
|
cptr + charIndex, charCount,
|
|
|
bptr + byteIndex, bytes.Length - byteIndex,
|
|
|
+ fallback, ref buffer,
|
|
|
ref leftOver, flush);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private unsafe static int InternalGetBytes (char* chars, int count, byte* bytes, int bcount, ref char leftOver, bool flush)
|
|
|
+ private unsafe static int InternalGetBytes (char* chars, int count, byte* bytes, int bcount, EncoderFallback fallback, ref EncoderFallbackBuffer buffer, ref char leftOver, bool flush)
|
|
|
{
|
|
|
char* end = chars + count;
|
|
|
+ char* start = chars;
|
|
|
+ byte* start_bytes = bytes;
|
|
|
byte* end_bytes = bytes + bcount;
|
|
|
while (chars < end) {
|
|
|
if (leftOver == 0) {
|
|
|
@@ -265,12 +295,14 @@ public class UTF8Encoding : Encoding
|
|
|
// leading surrogate. In NET_2_0 it
|
|
|
// uses fallback. In NET_1_1 we output
|
|
|
// wrong surrogate.
|
|
|
- if (bytes + 2 >= end_bytes)
|
|
|
+ char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
|
|
|
+ char dummy = '\0';
|
|
|
+ if (bytes + InternalGetByteCount (fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes)
|
|
|
goto fail_no_space;
|
|
|
- bytes [0] = (byte) (0xE0 | (ch >> 12));
|
|
|
- bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F));
|
|
|
- bytes [2] = (byte) (0x80 | (ch & 0x3F));
|
|
|
- bytes += 3;
|
|
|
+ fixed (char *fb_chars = fallback_chars) {
|
|
|
+ bytes += InternalGetBytes (fb_chars, fallback_chars.Length, bytes, bcount - (int) (bytes - start_bytes), fallback, ref buffer, ref dummy, true);
|
|
|
+ }
|
|
|
+
|
|
|
leftOver = '\0';
|
|
|
}
|
|
|
}
|
|
|
@@ -292,13 +324,15 @@ public class UTF8Encoding : Encoding
|
|
|
// invalid, but we have to do something.
|
|
|
// We write out the surrogate start and then
|
|
|
// re-visit the current character again.
|
|
|
- int ch = leftOver;
|
|
|
- if (bytes + 2 >= end_bytes)
|
|
|
+ char [] fallback_chars = GetFallbackChars (chars, start, fallback, ref buffer);
|
|
|
+ char dummy = '\0';
|
|
|
+ if (bytes + InternalGetByteCount (fallback_chars, 0, fallback_chars.Length, fallback, ref dummy, true) > end_bytes)
|
|
|
goto fail_no_space;
|
|
|
- bytes [0] = (byte) (0xE0 | (ch >> 12));
|
|
|
- bytes [1] = (byte) (0x80 | ((ch >> 6) & 0x3F));
|
|
|
- bytes [2] = (byte) (0x80 | (ch & 0x3F));
|
|
|
- bytes += 3;
|
|
|
+ fixed (char *fb_chars = fallback_chars) {
|
|
|
+ InternalGetBytes (fb_chars, fallback_chars.Length, bytes, bcount - (int) (bytes - start_bytes), fallback, ref buffer, ref dummy, true);
|
|
|
+ }
|
|
|
+
|
|
|
+ leftOver = '\0';
|
|
|
}
|
|
|
leftOver = '\0';
|
|
|
}
|
|
|
@@ -328,7 +362,8 @@ fail_no_space:
|
|
|
byte[] bytes, int byteIndex)
|
|
|
{
|
|
|
char leftOver = '\0';
|
|
|
- return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, ref leftOver, true);
|
|
|
+ EncoderFallbackBuffer buffer = null;
|
|
|
+ return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, EncoderFallback, ref buffer, ref leftOver, true);
|
|
|
}
|
|
|
|
|
|
// Convenience wrappers for "GetBytes".
|
|
|
@@ -358,14 +393,16 @@ fail_no_space:
|
|
|
unsafe {
|
|
|
fixed (char* cptr = s) {
|
|
|
char dummy = '\0';
|
|
|
+ EncoderFallbackBuffer buffer = null;
|
|
|
if (bytes.Length == byteIndex)
|
|
|
return InternalGetBytes (
|
|
|
cptr + charIndex, charCount,
|
|
|
- null, 0, ref dummy, true);
|
|
|
+ null, 0, EncoderFallback, ref buffer, ref dummy, true);
|
|
|
fixed (byte *bptr = bytes) {
|
|
|
return InternalGetBytes (
|
|
|
cptr + charIndex, charCount,
|
|
|
bptr + byteIndex, bytes.Length - byteIndex,
|
|
|
+ EncoderFallback, ref buffer,
|
|
|
ref dummy, true);
|
|
|
}
|
|
|
}
|
|
|
@@ -389,10 +426,11 @@ fail_no_space:
|
|
|
return 0;
|
|
|
|
|
|
char dummy = '\0';
|
|
|
+ EncoderFallbackBuffer buffer = null;
|
|
|
if (byteCount == 0)
|
|
|
- return InternalGetBytes (chars, charCount, null, 0, ref dummy, true);
|
|
|
+ return InternalGetBytes (chars, charCount, null, 0, EncoderFallback, ref buffer, ref dummy, true);
|
|
|
else
|
|
|
- return InternalGetBytes (chars, charCount, bytes, byteCount, ref dummy, true);
|
|
|
+ return InternalGetBytes (chars, charCount, bytes, byteCount, EncoderFallback, ref buffer, ref dummy, true);
|
|
|
}
|
|
|
|
|
|
#endregion
|
|
|
@@ -836,7 +874,7 @@ fail_no_space:
|
|
|
// Get a UTF8-specific encoder that is attached to this instance.
|
|
|
public override Encoder GetEncoder ()
|
|
|
{
|
|
|
- return new UTF8Encoder (emitIdentifier);
|
|
|
+ return new UTF8Encoder (EncoderFallback, emitIdentifier);
|
|
|
}
|
|
|
|
|
|
// Get the UTF8 preamble.
|
|
|
@@ -924,8 +962,9 @@ fail_no_space:
|
|
|
private char leftOverForConv;
|
|
|
|
|
|
// Constructor.
|
|
|
- public UTF8Encoder (bool emitIdentifier)
|
|
|
+ public UTF8Encoder (EncoderFallback fallback, bool emitIdentifier)
|
|
|
{
|
|
|
+ Fallback = fallback;
|
|
|
// this.emitIdentifier = emitIdentifier;
|
|
|
leftOverForCount = '\0';
|
|
|
leftOverForConv = '\0';
|
|
|
@@ -935,27 +974,29 @@ fail_no_space:
|
|
|
public override int GetByteCount (char[] chars, int index,
|
|
|
int count, bool flush)
|
|
|
{
|
|
|
- return InternalGetByteCount (chars, index, count, ref leftOverForCount, flush);
|
|
|
+ return InternalGetByteCount (chars, index, count, Fallback, ref leftOverForCount, flush);
|
|
|
}
|
|
|
public override int GetBytes (char[] chars, int charIndex,
|
|
|
int charCount, byte[] bytes, int byteIndex, bool flush)
|
|
|
{
|
|
|
int result;
|
|
|
- result = InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, ref leftOverForConv, flush);
|
|
|
+ EncoderFallbackBuffer buffer = null;
|
|
|
+ result = InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, Fallback, ref buffer, ref leftOverForConv, flush);
|
|
|
// emitIdentifier = false;
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
public unsafe override int GetByteCount (char* chars, int count, bool flush)
|
|
|
{
|
|
|
- return InternalGetByteCount (chars, count, ref leftOverForCount, flush);
|
|
|
+ return InternalGetByteCount (chars, count, Fallback, ref leftOverForCount, flush);
|
|
|
}
|
|
|
|
|
|
public unsafe override int GetBytes (char* chars, int charCount,
|
|
|
byte* bytes, int byteCount, bool flush)
|
|
|
{
|
|
|
int result;
|
|
|
- result = InternalGetBytes (chars, charCount, bytes, byteCount, ref leftOverForConv, flush);
|
|
|
+ EncoderFallbackBuffer buffer = null;
|
|
|
+ result = InternalGetBytes (chars, charCount, bytes, byteCount, Fallback, ref buffer, ref leftOverForConv, flush);
|
|
|
// emitIdentifier = false;
|
|
|
return result;
|
|
|
}
|