Przeglądaj źródła

Merge pull request #54625 from bruvzg/surrogates

Rémi Verschelde 3 lat temu
rodzic
commit
f9174d1c28

+ 36 - 6
core/ustring.cpp

@@ -1480,10 +1480,15 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 					skip = 2;
 				} else if ((c & 0xF8) == 0xF0) {
 					skip = 3;
+					if (sizeof(wchar_t) == 2) {
+						str_size++; // encode as surrogate pair.
+					}
 				} else if ((c & 0xFC) == 0xF8) {
 					skip = 4;
+					// invalid character, too long to encode as surrogates.
 				} else if ((c & 0xFE) == 0xFC) {
 					skip = 5;
+					// invalid character, too long to encode as surrogates.
 				} else {
 					_UNICERROR("invalid skip");
 					return true; //invalid utf8
@@ -1575,12 +1580,14 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 			}
 		}
 
-		//printf("char %i, len %i\n",unichar,len);
-		if (sizeof(wchar_t) == 2 && unichar > 0xFFFF) {
-			unichar = ' '; //too long for windows
+		if (sizeof(wchar_t) == 2 && unichar > 0x10FFFF) {
+			unichar = ' '; // invalid character, too long to encode as surrogates.
+		} else if (sizeof(wchar_t) == 2 && unichar > 0xFFFF) {
+			*(dst++) = uint32_t((unichar >> 10) + 0xD7C0); // lead surrogate.
+			*(dst++) = uint32_t((unichar & 0x3FF) | 0xDC00); // trail surrogate.
+		} else {
+			*(dst++) = unichar;
 		}
-
-		*(dst++) = unichar;
 		cstr_size -= len;
 		p_utf8 += len;
 	}
@@ -1598,6 +1605,18 @@ CharString String::utf8() const {
 	int fl = 0;
 	for (int i = 0; i < l; i++) {
 		uint32_t c = d[i];
+		if ((c & 0xfffffc00) == 0xd800) { // decode surrogate pair.
+			if ((i < l - 1) && (d[i + 1] & 0xfffffc00) == 0xdc00) {
+				c = (c << 10UL) + d[i + 1] - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+				i++; // skip trail surrogate.
+			} else {
+				fl += 1;
+				continue;
+			}
+		} else if ((c & 0xfffffc00) == 0xdc00) {
+			fl += 1;
+			continue;
+		}
 		if (c <= 0x7f) { // 7 bits.
 			fl += 1;
 		} else if (c <= 0x7ff) { // 11 bits
@@ -1606,7 +1625,6 @@ CharString String::utf8() const {
 			fl += 3;
 		} else if (c <= 0x001fffff) { // 21 bits
 			fl += 4;
-
 		} else if (c <= 0x03ffffff) { // 26 bits
 			fl += 5;
 		} else if (c <= 0x7fffffff) { // 31 bits
@@ -1626,6 +1644,18 @@ CharString String::utf8() const {
 
 	for (int i = 0; i < l; i++) {
 		uint32_t c = d[i];
+		if ((c & 0xfffffc00) == 0xd800) { // decode surrogate pair.
+			if ((i < l - 1) && (d[i + 1] & 0xfffffc00) == 0xdc00) {
+				c = (c << 10UL) + d[i + 1] - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+				i++; // skip trail surrogate.
+			} else {
+				APPEND_CHAR(' ');
+				continue;
+			}
+		} else if ((c & 0xfffffc00) == 0xdc00) {
+			APPEND_CHAR(' ');
+			continue;
+		}
 
 		if (c <= 0x7f) { // 7 bits.
 			APPEND_CHAR(c);

+ 36 - 11
scene/resources/dynamic_font.cpp

@@ -208,7 +208,7 @@ float DynamicFontAtSize::get_descent() const {
 	return descent;
 }
 
-const Pair<const DynamicFontAtSize::Character *, DynamicFontAtSize *> DynamicFontAtSize::_find_char_with_font(CharType p_char, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks) const {
+const Pair<const DynamicFontAtSize::Character *, DynamicFontAtSize *> DynamicFontAtSize::_find_char_with_font(int32_t p_char, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks) const {
 	const Character *chr = char_map.getptr(p_char);
 	ERR_FAIL_COND_V(!chr, (Pair<const Character *, DynamicFontAtSize *>(NULL, NULL)));
 
@@ -240,7 +240,7 @@ const Pair<const DynamicFontAtSize::Character *, DynamicFontAtSize *> DynamicFon
 	return Pair<const Character *, DynamicFontAtSize *>(chr, const_cast<DynamicFontAtSize *>(this));
 }
 
-float DynamicFontAtSize::_get_kerning_advance(const DynamicFontAtSize *font, CharType p_char, CharType p_next) const {
+float DynamicFontAtSize::_get_kerning_advance(const DynamicFontAtSize *font, int32_t p_char, int32_t p_next) const {
 	float advance = 0.0;
 
 	if (p_next) {
@@ -256,9 +256,20 @@ Size2 DynamicFontAtSize::get_char_size(CharType p_char, CharType p_next, const V
 	if (!valid) {
 		return Size2(1, 1);
 	}
-	const_cast<DynamicFontAtSize *>(this)->_update_char(p_char);
 
-	Pair<const Character *, DynamicFontAtSize *> char_pair_with_font = _find_char_with_font(p_char, p_fallbacks);
+	int32_t c = p_char;
+	bool skip_kerning = false;
+	if (((p_char & 0xfffffc00) == 0xd800) && (p_next & 0xfffffc00) == 0xdc00) { // decode surrogate pair.
+		c = (p_char << 10UL) + p_next - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+		skip_kerning = true;
+	}
+	if ((p_char & 0xfffffc00) == 0xdc00) { // skip trail surrogate.
+		return Size2();
+	}
+
+	const_cast<DynamicFontAtSize *>(this)->_update_char(c);
+
+	Pair<const Character *, DynamicFontAtSize *> char_pair_with_font = _find_char_with_font(c, p_fallbacks);
 	const Character *ch = char_pair_with_font.first;
 	DynamicFontAtSize *font = char_pair_with_font.second;
 	ERR_FAIL_COND_V(!ch, Size2());
@@ -268,7 +279,9 @@ Size2 DynamicFontAtSize::get_char_size(CharType p_char, CharType p_next, const V
 	if (ch->found) {
 		ret.x = ch->advance;
 	}
-	ret.x += _get_kerning_advance(font, p_char, p_next);
+	if (!skip_kerning) {
+		ret.x += _get_kerning_advance(font, p_char, p_next);
+	}
 
 	return ret;
 }
@@ -307,9 +320,19 @@ float DynamicFontAtSize::draw_char(RID p_canvas_item, const Point2 &p_pos, CharT
 		return 0;
 	}
 
-	const_cast<DynamicFontAtSize *>(this)->_update_char(p_char);
+	int32_t c = p_char;
+	bool skip_kerning = false;
+	if (((p_char & 0xfffffc00) == 0xd800) && (p_next & 0xfffffc00) == 0xdc00) { // decode surrogate pair.
+		c = (p_char << 10UL) + p_next - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+		skip_kerning = true;
+	}
+	if ((p_char & 0xfffffc00) == 0xdc00) { // skip trail surrogate.
+		return 0;
+	}
 
-	Pair<const Character *, DynamicFontAtSize *> char_pair_with_font = _find_char_with_font(p_char, p_fallbacks);
+	const_cast<DynamicFontAtSize *>(this)->_update_char(c);
+
+	Pair<const Character *, DynamicFontAtSize *> char_pair_with_font = _find_char_with_font(c, p_fallbacks);
 	const Character *ch = char_pair_with_font.first;
 	DynamicFontAtSize *font = char_pair_with_font.second;
 
@@ -320,7 +343,7 @@ float DynamicFontAtSize::draw_char(RID p_canvas_item, const Point2 &p_pos, CharT
 	// use normal character size if there's no outline character
 	if (p_outline && !ch->found) {
 		FT_GlyphSlot slot = face->glyph;
-		int error = FT_Load_Char(face, p_char, FT_HAS_COLOR(face) ? FT_LOAD_COLOR : FT_LOAD_DEFAULT);
+		int error = FT_Load_Char(face, c, FT_HAS_COLOR(face) ? FT_LOAD_COLOR : FT_LOAD_DEFAULT);
 		if (!error) {
 			error = FT_Render_Glyph(face->glyph, FT_RENDER_MODE_NORMAL);
 			if (!error) {
@@ -350,7 +373,9 @@ float DynamicFontAtSize::draw_char(RID p_canvas_item, const Point2 &p_pos, CharT
 		advance = ch->advance;
 	}
 
-	advance += _get_kerning_advance(font, p_char, p_next);
+	if (!skip_kerning) {
+		advance += _get_kerning_advance(font, p_char, p_next);
+	}
 
 	return advance;
 }
@@ -552,7 +577,7 @@ DynamicFontAtSize::Character DynamicFontAtSize::_bitmap_to_character(FT_Bitmap b
 	return chr;
 }
 
-DynamicFontAtSize::Character DynamicFontAtSize::_make_outline_char(CharType p_char) {
+DynamicFontAtSize::Character DynamicFontAtSize::_make_outline_char(int32_t p_char) {
 	Character ret = Character::not_found();
 
 	if (FT_Load_Char(face, p_char, FT_LOAD_NO_BITMAP | (font->force_autohinter ? FT_LOAD_FORCE_AUTOHINT : 0)) != 0) {
@@ -588,7 +613,7 @@ cleanup_stroker:
 	return ret;
 }
 
-void DynamicFontAtSize::_update_char(CharType p_char) {
+void DynamicFontAtSize::_update_char(int32_t p_char) {
 	if (char_map.has(p_char)) {
 		return;
 	}

+ 5 - 5
scene/resources/dynamic_font.h

@@ -162,15 +162,15 @@ class DynamicFontAtSize : public Reference {
 		int y;
 	};
 
-	const Pair<const Character *, DynamicFontAtSize *> _find_char_with_font(CharType p_char, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks) const;
-	Character _make_outline_char(CharType p_char);
-	float _get_kerning_advance(const DynamicFontAtSize *font, CharType p_char, CharType p_next) const;
+	const Pair<const Character *, DynamicFontAtSize *> _find_char_with_font(int32_t p_char, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks) const;
+	Character _make_outline_char(int32_t p_char);
+	float _get_kerning_advance(const DynamicFontAtSize *font, int32_t p_char, int32_t p_next) const;
 	TexturePosition _find_texture_pos_for_glyph(int p_color_size, Image::Format p_image_format, int p_width, int p_height);
 	Character _bitmap_to_character(FT_Bitmap bitmap, int yofs, int xofs, float advance);
 
-	HashMap<CharType, Character> char_map;
+	HashMap<int32_t, Character> char_map;
 
-	_FORCE_INLINE_ void _update_char(CharType p_char);
+	_FORCE_INLINE_ void _update_char(int32_t p_char);
 
 	friend class DynamicFontData;
 	Ref<DynamicFontData> font;

+ 39 - 19
scene/resources/font.cpp

@@ -125,7 +125,7 @@ void BitmapFont::_set_chars(const PoolVector<int> &p_chars) {
 PoolVector<int> BitmapFont::_get_chars() const {
 	PoolVector<int> chars;
 
-	const CharType *key = nullptr;
+	const int32_t *key = nullptr;
 
 	while ((key = char_map.next(key))) {
 		const Character *c = char_map.getptr(*key);
@@ -272,7 +272,7 @@ Error BitmapFont::create_from_fnt(const String &p_file) {
 				}
 			}
 		} else if (type == "char") {
-			CharType idx = 0;
+			int32_t idx = 0;
 			if (keys.has("id")) {
 				idx = keys["id"].to_int();
 			}
@@ -313,7 +313,7 @@ Error BitmapFont::create_from_fnt(const String &p_file) {
 			add_char(idx, texture, rect, ofs, advance);
 
 		} else if (type == "kerning") {
-			CharType first = 0, second = 0;
+			int32_t first = 0, second = 0;
 			int k = 0;
 
 			if (keys.has("first")) {
@@ -374,10 +374,10 @@ int BitmapFont::get_character_count() const {
 	return char_map.size();
 };
 
-Vector<CharType> BitmapFont::get_char_keys() const {
-	Vector<CharType> chars;
+Vector<int32_t> BitmapFont::get_char_keys() const {
+	Vector<int32_t> chars;
 	chars.resize(char_map.size());
-	const CharType *ct = nullptr;
+	const int32_t *ct = nullptr;
 	int count = 0;
 	while ((ct = char_map.next(ct))) {
 		chars.write[count++] = *ct;
@@ -386,7 +386,7 @@ Vector<CharType> BitmapFont::get_char_keys() const {
 	return chars;
 };
 
-BitmapFont::Character BitmapFont::get_character(CharType p_char) const {
+BitmapFont::Character BitmapFont::get_character(int32_t p_char) const {
 	if (!char_map.has(p_char)) {
 		ERR_FAIL_V(Character());
 	};
@@ -394,7 +394,7 @@ BitmapFont::Character BitmapFont::get_character(CharType p_char) const {
 	return char_map[p_char];
 };
 
-void BitmapFont::add_char(CharType p_char, int p_texture_idx, const Rect2 &p_rect, const Size2 &p_align, float p_advance) {
+void BitmapFont::add_char(int32_t p_char, int p_texture_idx, const Rect2 &p_rect, const Size2 &p_align, float p_advance) {
 	if (p_advance < 0) {
 		p_advance = p_rect.size.width;
 	}
@@ -409,7 +409,7 @@ void BitmapFont::add_char(CharType p_char, int p_texture_idx, const Rect2 &p_rec
 	char_map[p_char] = c;
 }
 
-void BitmapFont::add_kerning_pair(CharType p_A, CharType p_B, int p_kerning) {
+void BitmapFont::add_kerning_pair(int32_t p_A, int32_t p_B, int p_kerning) {
 	KerningPairKey kpk;
 	kpk.A = p_A;
 	kpk.B = p_B;
@@ -433,7 +433,7 @@ Vector<BitmapFont::KerningPairKey> BitmapFont::get_kerning_pair_keys() const {
 	return ret;
 }
 
-int BitmapFont::get_kerning_pair(CharType p_A, CharType p_B) const {
+int BitmapFont::get_kerning_pair(int32_t p_A, int32_t p_B) const {
 	KerningPairKey kpk;
 	kpk.A = p_A;
 	kpk.B = p_B;
@@ -524,7 +524,15 @@ Ref<BitmapFont> BitmapFont::get_fallback() const {
 }
 
 float BitmapFont::draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next, const Color &p_modulate, bool p_outline) const {
-	const Character *c = char_map.getptr(p_char);
+	int32_t ch = p_char;
+	if (((p_char & 0xfffffc00) == 0xd800) && (p_next & 0xfffffc00) == 0xdc00) { // decode surrogate pair.
+		ch = (p_char << 10UL) + p_next - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+	}
+	if ((p_char & 0xfffffc00) == 0xdc00) { // skip trail surrogate.
+		return 0;
+	}
+
+	const Character *c = char_map.getptr(ch);
 
 	if (!c) {
 		if (fallback.is_valid()) {
@@ -546,7 +554,17 @@ float BitmapFont::draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_c
 }
 
 Size2 BitmapFont::get_char_size(CharType p_char, CharType p_next) const {
-	const Character *c = char_map.getptr(p_char);
+	int32_t ch = p_char;
+	bool skip_kerning = false;
+	if (((p_char & 0xfffffc00) == 0xd800) && (p_next & 0xfffffc00) == 0xdc00) { // decode surrogate pair.
+		ch = (p_char << 10UL) + p_next - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+		skip_kerning = true;
+	}
+	if ((p_char & 0xfffffc00) == 0xdc00) { // skip trail surrogate.
+		return Size2();
+	}
+
+	const Character *c = char_map.getptr(ch);
 
 	if (!c) {
 		if (fallback.is_valid()) {
@@ -557,14 +575,16 @@ Size2 BitmapFont::get_char_size(CharType p_char, CharType p_next) const {
 
 	Size2 ret(c->advance, c->rect.size.y);
 
-	if (p_next) {
-		KerningPairKey kpk;
-		kpk.A = p_char;
-		kpk.B = p_next;
+	if (!skip_kerning) {
+		if (p_next) {
+			KerningPairKey kpk;
+			kpk.A = p_char;
+			kpk.B = p_next;
 
-		const Map<KerningPairKey, int>::Element *E = kerning_map.find(kpk);
-		if (E) {
-			ret.width -= E->get();
+			const Map<KerningPairKey, int>::Element *E = kerning_map.find(kpk);
+			if (E) {
+				ret.width -= E->get();
+			}
 		}
 	}
 

+ 6 - 6
scene/resources/font.h

@@ -135,7 +135,7 @@ public:
 	};
 
 private:
-	HashMap<CharType, Character> char_map;
+	HashMap<int32_t, Character> char_map;
 	Map<KerningPairKey, int> kerning_map;
 
 	float height;
@@ -165,17 +165,17 @@ public:
 	float get_descent() const;
 
 	void add_texture(const Ref<Texture> &p_texture);
-	void add_char(CharType p_char, int p_texture_idx, const Rect2 &p_rect, const Size2 &p_align, float p_advance = -1);
+	void add_char(int32_t p_char, int p_texture_idx, const Rect2 &p_rect, const Size2 &p_align, float p_advance = -1);
 
 	int get_character_count() const;
-	Vector<CharType> get_char_keys() const;
-	Character get_character(CharType p_char) const;
+	Vector<int32_t> get_char_keys() const;
+	Character get_character(int32_t p_char) const;
 
 	int get_texture_count() const;
 	Ref<Texture> get_texture(int p_idx) const;
 
-	void add_kerning_pair(CharType p_A, CharType p_B, int p_kerning);
-	int get_kerning_pair(CharType p_A, CharType p_B) const;
+	void add_kerning_pair(int32_t p_A, int32_t p_B, int p_kerning);
+	int get_kerning_pair(int32_t p_A, int32_t p_B) const;
 	Vector<KerningPairKey> get_kerning_pair_keys() const;
 
 	Size2 get_char_size(CharType p_char, CharType p_next = 0) const;