hace 4 años · d4a7ad7698
--- a/jme3-core/src/plugins/java/com/jme3/export/binary/BinaryInputCapsule.java
+++ b/jme3-core/src/plugins/java/com/jme3/export/binary/BinaryInputCapsule.java
@@ -37,11 +37,11 @@ import com.jme3.export.SavableClassUtil;
 
				 import com.jme3.util.BufferUtils;
			
 
				 import com.jme3.util.IntMap;
			
 
				 import java.io.IOException;
			
 
				-import java.io.UnsupportedEncodingException;
			
 
				 import java.nio.ByteBuffer;
			
 
				 import java.nio.FloatBuffer;
			
 
				 import java.nio.IntBuffer;
			
 
				 import java.nio.ShortBuffer;
			
 
				+import java.nio.charset.StandardCharsets;
			
 
				 import java.util.ArrayList;
			
 
				 import java.util.BitSet;
			
 
				 import java.util.HashMap;
			
@@ -1013,129 +1013,17 @@ final class BinaryInputCapsule implements InputCapsule {
 
				         return value;
			
 
				     }
			
 
				 
			
 
				-    /*
			
 
				-     * UTF-8 crash course:
			
 
				-     *
			
 
				-     * UTF-8 codepoints map to UTF-16 codepoints and vv, which is what Java uses for its Strings.
			
 
				-     * (so a UTF-8 codepoint can contain all possible values for a Java char)
			
 
				-     *
			
 
				-     * A UTF-8 codepoint can be 1, 2 or 3 bytes long. How long a codepint is can be told by reading the first byte:
			
 
				-     * b < 0x80, 1 byte
			
 
				-     * (b & 0xC0) == 0xC0, 2 bytes
			
 
				-     * (b & 0xE0) == 0xE0, 3 bytes
			
 
				-     *
			
 
				-     * However there is an additional restriction to UTF-8, to enable you to find the start of a UTF-8 codepoint,
			
 
				-     * if you start reading at a random point in a UTF-8 byte stream. That's why UTF-8 requires for the second and third byte of
			
 
				-     * a multibyte codepoint:
			
 
				-     * (b & 0x80) == 0x80  (in other words, first bit must be 1)
			
 
				-     */
			
 
				-    private final static int UTF8_START = 0; // next byte should be the start of a new
			
 
				-    private final static int UTF8_2BYTE = 2; // next byte should be the second byte of a 2 byte codepoint
			
 
				-    private final static int UTF8_3BYTE_1 = 3; // next byte should be the second byte of a 3 byte codepoint
			
 
				-    private final static int UTF8_3BYTE_2 = 4; // next byte should be the third byte of a 3 byte codepoint
			
 
				-    private final static int UTF8_ILLEGAL = 10; // not an UTF8 string
			
 
				-
			
 
				-    // String
			
 
				     protected String readString(byte[] content) throws IOException {
			
 
				         int length = readInt(content);
			
 
				         if (length == BinaryOutputCapsule.NULL_OBJECT)
			
 
				             return null;
			
 
				 
			
 
				-        /*
			
 
				-         * @see ISSUE 276
			
 
				-         *
			
 
				-         * We'll transfer the bytes into a separate byte array.
			
 
				-         * While we do that we'll take the opportunity to check if the byte data is valid UTF-8.
			
 
				-         *
			
 
				-         * If it is not UTF-8 it is most likely saved with the BinaryOutputCapsule bug, that saves Strings using their native
			
 
				-         * encoding. Unfortunatly there is no way to know what encoding was used, so we'll parse using the most common one in
			
 
				-         * that case; latin-1 aka ISO8859_1
			
 
				-         *
			
 
				-         * Encoding of "low" ASCII codepoint (in plain speak: when no special characters are used) will usually look the same
			
 
				-         * for UTF-8 and the other 1 byte codepoint encodings (espc true for numbers and regular letters of the alphabet). So these
			
 
				-         * are valid UTF-8 and will give the same result (at most a few charakters will appear different, such as the euro sign).
			
 
				-         *
			
 
				-         * However, when "high" codepoints are used (any codepoint that over 0x7F, in other words where the first bit is a 1) it's
			
 
				-         * a different matter and UTF-8 and the 1 byte encoding greatly will differ, as well as most 1 byte encodings relative to each
			
 
				-         * other.
			
 
				-         *
			
 
				-         * It is impossible to detect which one-byte encoding is used. Since UTF8 and practically all 1-byte encodings share the most
			
 
				-         * used characters (the "none-high" ones) parsing them will give the same result. However, not all byte sequences are legal in
			
 
				-         * UTF-8 (see explantion above). If not UTF-8 encoded content is detected we therefore fall back on latin1. We also log a warning.
			
 
				-         *
			
 
				-         * By this method we detect all use of 1 byte encoding if they:
			
 
				-         * - use a "high" codepoint after a "low" codepoint or a sequence of codepoints that is valid as UTF-8 bytes, that starts with 1000
			
 
				-         * - use a "low" codepoint after a "high" codepoint
			
 
				-         * - use a "low" codepoint after "high" codepoint, after a "high" codepoint that starts with 1110
			
 
				-         *
			
 
				-         *  In practise this means that unless 2 or 3 "high" codepoints are used after each other in proper order, we'll detect the string
			
 
				-         *  was not originally UTF-8 encoded.
			
 
				-         *
			
 
				-         */
			
 
				         byte[] bytes = new byte[length];
			
 
				-        int utf8State = UTF8_START;
			
 
				-        int b;
			
 
				         for (int x = 0; x < length; x++) {
			
 
				             bytes[x] =  content[index++];
			
 
				-            b = (int) bytes[x] & 0xFF; // unsign our byte
			
 
				-
			
 
				-            switch (utf8State) {
			
 
				-            case UTF8_START:
			
 
				-                if (b < 0x80) {
			
 
				-                    // good
			
 
				-                }
			
 
				-                else if ((b & 0xC0) == 0xC0) {
			
 
				-                    utf8State = UTF8_2BYTE;
			
 
				-                }
			
 
				-                else if ((b & 0xE0) == 0xE0) {
			
 
				-                    utf8State = UTF8_3BYTE_1;
			
 
				-                }
			
 
				-                else {
			
 
				-                    utf8State = UTF8_ILLEGAL;
			
 
				-                }
			
 
				-                break;
			
 
				-            case UTF8_3BYTE_1:
			
 
				-            case UTF8_3BYTE_2:
			
 
				-            case UTF8_2BYTE:
			
 
				-                 if ((b & 0x80) == 0x80)
			
 
				-                    utf8State = utf8State == UTF8_3BYTE_1 ? UTF8_3BYTE_2 : UTF8_START;
			
 
				-                 else
			
 
				-                    utf8State = UTF8_ILLEGAL;
			
 
				-                break;
			
 
				-            }
			
 
				         }
			
 
				 
			
 
				-        try {
			
 
				-            // even though so far the parsing might have been a legal UTF-8 sequence, only if a codepoint is fully given is it correct UTF-8
			
 
				-            if (utf8State == UTF8_START) {
			
 
				-                // Java misspells UTF-8 as UTF8 for official use in java.lang
			
 
				-                return new String(bytes, "UTF8");
			
 
				-            }
			
 
				-            else {
			
 
				-                logger.log(
			
 
				-                        Level.WARNING,
			
 
				-                        "Your export has been saved with an incorrect encoding for its String fields which means it might not load correctly " +
			
 
				-                        "due to encoding issues. You should probably re-export your work. See ISSUE 276 in the jME issue tracker."
			
 
				-                );
			
 
				-                // We use ISO8859_1 to be consistent across platforms. We could default to native encoding, but this would lead to inconsistent
			
 
				-                // behaviour across platforms!
			
 
				-                // Developers that have previously saved their exports using the old exporter (which uses native encoding), can temporarly
			
 
				-                // remove the ""ISO8859_1" parameter, and change the above if statement to "if (false)".
			
 
				-                // They should then import and re-export their models using the same environment they were originally created in.
			
 
				-                return new String(bytes, "ISO8859_1");
			
 
				-            }
			
 
				-        } catch (UnsupportedEncodingException uee) {
			
 
				-            // as a last resort fall back to platform native.
			
 
				-            // JavaDoc is vague about what happens when a decoding a String that contains un undecodable sequence
			
 
				-            // it also doesn't specify which encodings have to be supported (though UTF-8 and ISO8859 have been in the SUN JRE since at least 1.1)
			
 
				-            logger.log(
			
 
				-                    Level.SEVERE,
			
 
				-                    "Your export has been saved with an incorrect encoding or your version of Java is unable to decode the stored string. " +
			
 
				-                    "While your export may load correctly by falling back, using it on different platforms or java versions might lead to "+
			
 
				-                    "very strange inconsitenties. You should probably re-export your work. See ISSUE 276 in the jME issue tracker."
			
 
				-            );
			
 
				-            return new String(bytes);
			
 
				-        }
			
 
				+        return new String(bytes, StandardCharsets.UTF_8);
			
 
				     }
			
 
				 
			
 
				     protected String[] readStringArray(byte[] content) throws IOException {
			
@@ -1418,4 +1306,4 @@ final class BinaryInputCapsule implements InputCapsule {
 
				             return null;
			
 
				         }
			
 
				     }
			
 
				-}
			
 
				+}
			
--- a/jme3-core/src/plugins/java/com/jme3/export/binary/BinaryOutputCapsule.java
+++ b/jme3-core/src/plugins/java/com/jme3/export/binary/BinaryOutputCapsule.java
@@ -41,6 +41,7 @@ import java.nio.ByteBuffer;
 
				 import java.nio.FloatBuffer;
			
 
				 import java.nio.IntBuffer;
			
 
				 import java.nio.ShortBuffer;
			
 
				+import java.nio.charset.StandardCharsets;
			
 
				 import java.util.ArrayList;
			
 
				 import java.util.Arrays;
			
 
				 import java.util.BitSet;
			
@@ -684,8 +685,7 @@ final class BinaryOutputCapsule implements OutputCapsule {
 
				             write(NULL_OBJECT);
			
 
				             return;
			
 
				         }
			
 
				-        // write our output as UTF-8. Java misspells UTF-8 as UTF8 for official use in java.lang
			
 
				-        byte[] bytes = value.getBytes("UTF8");
			
 
				+        byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
			
 
				         write(bytes.length);
			
 
				         baos.write(bytes);
			
 
				     }