瀏覽代碼

HarfBuzz font engine: Fix unsupported glyph cluster rendering and emoji rendering (#837)

Hannaford Schäfer 2 月之前
父節點
當前提交
9363865c6b

+ 3 - 0
Include/RmlUi/Core/StringUtilities.h

@@ -112,6 +112,9 @@ namespace StringUtilities {
 	// Decode the first code point in a zero-terminated UTF-8 string.
 	// Decode the first code point in a zero-terminated UTF-8 string.
 	RMLUICORE_API Character ToCharacter(const char* p, const char* p_end);
 	RMLUICORE_API Character ToCharacter(const char* p, const char* p_end);
 
 
+	/// Returns number of bytes in a UTF-8 character.
+	RMLUICORE_API size_t BytesUTF8(Character character);
+
 	// Encode a single code point as a UTF-8 string.
 	// Encode a single code point as a UTF-8 string.
 	RMLUICORE_API String ToUTF8(Character character);
 	RMLUICORE_API String ToUTF8(Character character);
 
 

+ 269 - 47
Samples/basic/harfbuzz/src/FontFaceHandleHarfBuzz.cpp

@@ -37,10 +37,11 @@
 #include <hb-ft.h>
 #include <hb-ft.h>
 #include <hb.h>
 #include <hb.h>
 #include <numeric>
 #include <numeric>
+#include <utility>
 
 
-static bool IsASCIIControlCharacter(Character c)
+static bool IsControlCharacter(Character c)
 {
 {
-	return (char32_t)c < ' ';
+	return (char32_t)c < U' ' || ((char32_t)c >= U'\x7F' && (char32_t)c <= U'\x9F');
 }
 }
 
 
 FontFaceHandleHarfBuzz::FontFaceHandleHarfBuzz()
 FontFaceHandleHarfBuzz::FontFaceHandleHarfBuzz()
@@ -95,6 +96,11 @@ const FallbackFontGlyphMap& FontFaceHandleHarfBuzz::GetFallbackGlyphs() const
 	return fallback_glyphs;
 	return fallback_glyphs;
 }
 }
 
 
+const FallbackFontClusterGlyphsMap& FontFaceHandleHarfBuzz::GetFallbackClusterGlyphs() const
+{
+	return fallback_cluster_glyphs;
+}
+
 int FontFaceHandleHarfBuzz::GetStringWidth(StringView string, const TextShapingContext& text_shaping_context,
 int FontFaceHandleHarfBuzz::GetStringWidth(StringView string, const TextShapingContext& text_shaping_context,
 	const LanguageDataMap& registered_languages, Character /*prior_character*/)
 	const LanguageDataMap& registered_languages, Character /*prior_character*/)
 {
 {
@@ -103,7 +109,7 @@ int FontFaceHandleHarfBuzz::GetStringWidth(StringView string, const TextShapingC
 	// Apply text shaping.
 	// Apply text shaping.
 	hb_buffer_t* shaping_buffer = hb_buffer_create();
 	hb_buffer_t* shaping_buffer = hb_buffer_create();
 	RMLUI_ASSERT(shaping_buffer != nullptr);
 	RMLUI_ASSERT(shaping_buffer != nullptr);
-	ConfigureTextShapingBuffer(shaping_buffer, string, text_shaping_context, registered_languages);
+	ConfigureTextShapingBuffer(shaping_buffer, string, text_shaping_context, registered_languages, nullptr);
 	hb_buffer_add_utf8(shaping_buffer, string.begin(), (int)string.size(), 0, (int)string.size());
 	hb_buffer_add_utf8(shaping_buffer, string.begin(), (int)string.size(), 0, (int)string.size());
 	hb_shape(hb_font, shaping_buffer, nullptr, 0);
 	hb_shape(hb_font, shaping_buffer, nullptr, 0);
 
 
@@ -111,26 +117,64 @@ int FontFaceHandleHarfBuzz::GetStringWidth(StringView string, const TextShapingC
 	hb_glyph_info_t* glyph_info = hb_buffer_get_glyph_infos(shaping_buffer, &glyph_count);
 	hb_glyph_info_t* glyph_info = hb_buffer_get_glyph_infos(shaping_buffer, &glyph_count);
 	hb_glyph_position_t* glyph_positions = hb_buffer_get_glyph_positions(shaping_buffer, &glyph_count);
 	hb_glyph_position_t* glyph_positions = hb_buffer_get_glyph_positions(shaping_buffer, &glyph_count);
 
 
+	Queue<Pair<FontGlyphIndex, const FontGlyph*>> glyph_queue;
+
 	for (int g = 0; g < (int)glyph_count; ++g)
 	for (int g = 0; g < (int)glyph_count; ++g)
 	{
 	{
-		// Don't render control characters.
 		Character character = Rml::StringUtilities::ToCharacter(string.begin() + glyph_info[g].cluster, string.end());
 		Character character = Rml::StringUtilities::ToCharacter(string.begin() + glyph_info[g].cluster, string.end());
-		if (IsASCIIControlCharacter(character))
-			continue;
 
 
-		FontGlyphIndex glyph_index = glyph_info[g].codepoint;
-		const FontGlyph* glyph = GetOrAppendGlyph(glyph_index, character);
-		if (!glyph)
+		// Don't render control characters.
+		if (IsControlCharacter(character))
 			continue;
 			continue;
 
 
-		// Adjust the cursor for this character's advance.
-		if (glyph_index != 0)
-			width += glyph_positions[g].x_advance >> 6;
-		else
-			// Use the unshaped advance for unsupported characters.
-			width += glyph->advance;
+		const FontGlyphIndex glyph_index = glyph_info[g].codepoint;
+		int extra_glyph_index_offset = 0;
+
+		if (glyph_index == 0)
+		{
+			// Check to see if the glyph is the start of an unsupported multi-character cluster.
+			int cluster_codepoint_count = 0;
+			StringView cluster_string = GetCurrentClusterString(glyph_info, glyph_count, g, character, string, cluster_codepoint_count);
+
+			if (cluster_codepoint_count > 1)
+			{
+				// Unsupported cluster detected; use fallback cluster glyph if one is available.
+				const Vector<FontClusterGlyphData>* cluster_glyphs =
+					GetOrAppendFallbackClusterGlyphs(cluster_string, text_shaping_context, registered_languages);
+
+				if (cluster_glyphs)
+				{
+					extra_glyph_index_offset = cluster_codepoint_count - 1;
+					for (const auto& cluster_glyph : *cluster_glyphs)
+						glyph_queue.emplace(cluster_glyph.glyph_index, &cluster_glyph.glyph_data.bitmap);
+				}
+			}
+		}
 
 
-		width += (int)text_shaping_context.letter_spacing;
+		if (glyph_queue.empty())
+		{
+			const FontGlyph* glyph = GetOrAppendGlyph(glyph_index, character);
+			if (glyph)
+				glyph_queue.emplace(glyph_index, glyph);
+		}
+
+		while (!glyph_queue.empty())
+		{
+			auto& glyph_pair = glyph_queue.front();
+
+			// Adjust the cursor for this character's advance.
+			if (glyph_info[g].codepoint != 0)
+				width += glyph_positions[g].x_advance >> 6;
+			else
+				// Use the unshaped advance for unsupported characters.
+				width += glyph_pair.second->advance;
+
+			width += (int)text_shaping_context.letter_spacing;
+
+			glyph_queue.pop();
+		}
+		
+		g += extra_glyph_index_offset;
 	}
 	}
 
 
 	hb_buffer_destroy(shaping_buffer);
 	hb_buffer_destroy(shaping_buffer);
@@ -218,7 +262,8 @@ bool FontFaceHandleHarfBuzz::GenerateLayerTexture(Vector<byte>& texture_data, Ve
 		return false;
 		return false;
 	}
 	}
 
 
-	return it->layer->GenerateTexture(texture_data, texture_dimensions, texture_id, glyphs, fallback_glyphs);
+	return it->layer->GenerateTexture(texture_data, texture_dimensions, texture_id,
+		FontGlyphMaps{&glyphs, &fallback_glyphs, &fallback_cluster_glyphs_lookup});
 }
 }
 
 
 int FontFaceHandleHarfBuzz::GenerateString(RenderManager& render_manager, TexturedMeshList& mesh_list, StringView string, const Vector2f position,
 int FontFaceHandleHarfBuzz::GenerateString(RenderManager& render_manager, TexturedMeshList& mesh_list, StringView string, const Vector2f position,
@@ -269,7 +314,7 @@ int FontFaceHandleHarfBuzz::GenerateString(RenderManager& render_manager, Textur
 
 
 		// Set up and apply text shaping.
 		// Set up and apply text shaping.
 		hb_buffer_clear_contents(shaping_buffer);
 		hb_buffer_clear_contents(shaping_buffer);
-		ConfigureTextShapingBuffer(shaping_buffer, string, text_shaping_context, registered_languages);
+		ConfigureTextShapingBuffer(shaping_buffer, string, text_shaping_context, registered_languages, nullptr);
 		hb_buffer_add_utf8(shaping_buffer, string.begin(), (int)string.size(), 0, (int)string.size());
 		hb_buffer_add_utf8(shaping_buffer, string.begin(), (int)string.size(), 0, (int)string.size());
 		hb_shape(hb_font, shaping_buffer, nullptr, 0);
 		hb_shape(hb_font, shaping_buffer, nullptr, 0);
 
 
@@ -280,36 +325,76 @@ int FontFaceHandleHarfBuzz::GenerateString(RenderManager& render_manager, Textur
 		mesh_list[geometry_index].mesh.indices.reserve(string.size() * 6);
 		mesh_list[geometry_index].mesh.indices.reserve(string.size() * 6);
 		mesh_list[geometry_index].mesh.vertices.reserve(string.size() * 4);
 		mesh_list[geometry_index].mesh.vertices.reserve(string.size() * 4);
 
 
+		Queue<Pair<FontGlyphIndex, FontGlyphReference>> glyph_queue;
+
 		for (int g = 0; g < (int)glyph_count; ++g)
 		for (int g = 0; g < (int)glyph_count; ++g)
 		{
 		{
 			Character character = Rml::StringUtilities::ToCharacter(string.begin() + glyph_info[g].cluster, string.end());
 			Character character = Rml::StringUtilities::ToCharacter(string.begin() + glyph_info[g].cluster, string.end());
 
 
 			// Don't render control characters.
 			// Don't render control characters.
-			if (IsASCIIControlCharacter(character))
+			if (IsControlCharacter(character))
 				continue;
 				continue;
 
 
-			FontGlyphIndex glyph_index = glyph_info[g].codepoint;
-			const FontGlyph* glyph = GetOrAppendGlyph(glyph_index, character);
-			if (!glyph)
-				continue;
+			const FontGlyphIndex glyph_index = glyph_info[g].codepoint;
+			int extra_glyph_index_offset = 0;
+			bool is_cluster = false;
 
 
-			ColourbPremultiplied glyph_color = layer_colour;
-			// Use white vertex colors on RGB glyphs.
-			if (layer == base_layer && glyph->color_format == ColorFormat::RGBA8)
-				glyph_color = ColourbPremultiplied(layer_colour.alpha, layer_colour.alpha);
+			if (glyph_index == 0)
+			{
+				// Check to see if the glyph is the start of an unsupported multi-character cluster.
+				int cluster_codepoint_count = 0;
+				StringView cluster_string = GetCurrentClusterString(glyph_info, glyph_count, g, character, string, cluster_codepoint_count);
+
+				if (cluster_codepoint_count > 1)
+				{
+					// Unsupported cluster detected; use fallback cluster glyph if one is available.
+					const Vector<FontClusterGlyphData>* cluster_glyphs = GetOrAppendFallbackClusterGlyphs(cluster_string, text_shaping_context, registered_languages);
+					
+					if (cluster_glyphs)
+					{
+						extra_glyph_index_offset = cluster_codepoint_count - 1;
+						is_cluster = true;
+
+						for (const auto& cluster_glyph : *cluster_glyphs)
+							glyph_queue.emplace(cluster_glyph.glyph_index, FontGlyphReference{&cluster_glyph.glyph_data.bitmap, cluster_glyph.glyph_data.character});
+					}
+				}
+			}
 
 
-			Vector2f glyph_offset = {float(glyph_positions[g].x_offset >> 6), float(glyph_positions[g].y_offset >> 6)};
-			Vector2f glyph_geometry_position = Vector2f{position.x + line_width, position.y} + glyph_offset;
-			layer->GenerateGeometry(&mesh_list[geometry_index], glyph_index, character, glyph_geometry_position, glyph_color);
+			if (glyph_queue.empty())
+			{
+				const FontGlyph* glyph = GetOrAppendGlyph(glyph_index, character);
+				if (glyph)
+					glyph_queue.emplace(glyph_index, FontGlyphReference{glyph, character});
+			}
 
 
-			// Adjust the cursor for this character's advance.
-			if (glyph_index != 0)
-				line_width += glyph_positions[g].x_advance >> 6;
-			else
-				// Use the unshaped advance for unsupported characters.
-				line_width += glyph->advance;
+			while (!glyph_queue.empty())
+			{
+				auto& glyph_pair = glyph_queue.front();
 
 
-			line_width += (int)text_shaping_context.letter_spacing;
+				ColourbPremultiplied glyph_color = layer_colour;
+				// Use white vertex colors on RGB glyphs.
+				if (layer == base_layer && glyph_pair.second.bitmap->color_format == ColorFormat::RGBA8)
+					glyph_color = ColourbPremultiplied(layer_colour.alpha, layer_colour.alpha);
+
+				Vector2f glyph_offset = {0.0f, 0.0f};
+				glyph_offset += Vector2f{float(glyph_positions[g].x_offset >> 6), float(glyph_positions[g].y_offset >> 6)};
+				Vector2f glyph_geometry_position = Vector2f{position.x + line_width, position.y} + glyph_offset;
+				layer->GenerateGeometry(&mesh_list[geometry_index], glyph_pair.first, glyph_pair.second.character, is_cluster, glyph_geometry_position, glyph_color);
+
+				// Adjust the cursor for this character's advance.
+				if (glyph_info[g].codepoint != 0)
+					line_width += glyph_positions[g].x_advance >> 6;
+				else
+					// Use the unshaped advance for unsupported characters.
+					line_width += glyph_pair.second.bitmap->advance;
+
+				line_width += (int)text_shaping_context.letter_spacing;
+
+				glyph_queue.pop();
+			}
+
+			g += extra_glyph_index_offset;
 		}
 		}
 
 
 		geometry_index += num_textures;
 		geometry_index += num_textures;
@@ -355,7 +440,7 @@ bool FontFaceHandleHarfBuzz::AppendGlyph(FontGlyphIndex glyph_index, Character c
 	return result;
 	return result;
 }
 }
 
 
-bool FontFaceHandleHarfBuzz::AppendFallbackGlyph(Character character)
+bool FontFaceHandleHarfBuzz::AppendFallbackGlyph(Character& character)
 {
 {
 	const int num_fallback_faces = FontProvider::CountFallbackFontFaces();
 	const int num_fallback_faces = FontProvider::CountFallbackFontFaces();
 	for (int i = 0; i < num_fallback_faces; i++)
 	for (int i = 0; i < num_fallback_faces; i++)
@@ -416,12 +501,14 @@ const FontGlyph* FontFaceHandleHarfBuzz::GetOrAppendGlyph(FontGlyphIndex glyph_i
 
 
 	if (glyph_index == 0)
 	if (glyph_index == 0)
 		character = Character::Replacement;
 		character = Character::Replacement;
+	else if (character != glyph_location->second.character)
+		character = glyph_location->second.character;
 
 
 	const FontGlyph* glyph = &glyph_location->second.bitmap;
 	const FontGlyph* glyph = &glyph_location->second.bitmap;
 	return glyph;
 	return glyph;
 }
 }
 
 
-const FontGlyph* FontFaceHandleHarfBuzz::GetOrAppendFallbackGlyph(Character character)
+const FontGlyph* FontFaceHandleHarfBuzz::GetOrAppendFallbackGlyph(Character& character)
 {
 {
 	auto fallback_glyph_location = fallback_glyphs.find(character);
 	auto fallback_glyph_location = fallback_glyphs.find(character);
 	if (fallback_glyph_location != fallback_glyphs.cend())
 	if (fallback_glyph_location != fallback_glyphs.cend())
@@ -447,6 +534,115 @@ const FontGlyph* FontFaceHandleHarfBuzz::GetOrAppendFallbackGlyph(Character char
 	return fallback_glyph;
 	return fallback_glyph;
 }
 }
 
 
+bool FontFaceHandleHarfBuzz::AppendFallbackClusterGlyphs(StringView cluster, const TextShapingContext& text_shaping_context,
+	const LanguageDataMap& registered_languages)
+{
+	hb_buffer_t* shaping_buffer = hb_buffer_create();
+	RMLUI_ASSERT(shaping_buffer != nullptr);
+
+	TextFlowDirection text_direction = TextFlowDirection::LeftToRight;
+
+	// Iterate through all available fallback font faces.
+	const int num_fallback_faces = FontProvider::CountFallbackFontFaces();
+	for (int i = 0; i < num_fallback_faces; i++)
+	{
+		FontFaceHandleHarfBuzz* fallback_face = FontProvider::GetFallbackFontFace(i, metrics.size);
+		if (!fallback_face || fallback_face == this)
+			continue;
+
+		// Insert the cluster into a shaping buffer and perform text shaping.
+		hb_buffer_clear_contents(shaping_buffer);
+		ConfigureTextShapingBuffer(shaping_buffer, cluster, text_shaping_context, registered_languages, &text_direction);
+		hb_buffer_add_utf8(shaping_buffer, cluster.begin(), (int)cluster.size(), 0, (int)cluster.size());
+		hb_shape(fallback_face->hb_font, shaping_buffer, nullptr, 0);
+
+		unsigned int glyph_count = 0;
+		hb_glyph_info_t* glyph_info = hb_buffer_get_glyph_infos(shaping_buffer, &glyph_count);
+		if (glyph_count == 0)
+			continue;
+
+		Vector<FontClusterGlyphData> cluster_glyphs;
+		cluster_glyphs.reserve((size_t)glyph_count);
+
+		int glyph_info_index_offset = text_direction == TextFlowDirection::RightToLeft ? (int)glyph_count - 1 : 0;
+		int cluster_string_offset = 0;
+		bool has_nonzero_codepoint = false;
+
+		// Create the cluster glyphs.
+		for (int g = 0; g < (int)glyph_count; ++g)
+		{
+			int glyph_info_index = g + glyph_info_index_offset;
+			RMLUI_ASSERT(glyph_info_index < (int)glyph_count);
+
+			// Reverse the order of the glyphs in right-to-left text.
+			if (text_direction == TextFlowDirection::RightToLeft)
+				glyph_info_index_offset -= 2;
+
+			if (!has_nonzero_codepoint && glyph_info[glyph_info_index].codepoint != 0)
+				has_nonzero_codepoint = true;
+
+			Character character = Rml::StringUtilities::ToCharacter(cluster.begin() + cluster_string_offset, cluster.end());
+			const FontGlyph* glyph = fallback_face->GetOrAppendGlyph(glyph_info[glyph_info_index].codepoint, character, false);
+			if (glyph && glyph->bitmap_data)
+				cluster_glyphs.push_back(FontClusterGlyphData{glyph_info[glyph_info_index].codepoint, FontGlyphData{glyph->WeakCopy(), character}});
+
+			cluster_string_offset += (int)Rml::StringUtilities::BytesUTF8(character);
+			RMLUI_ASSERT(cluster_string_offset <= (int)cluster.size());
+		}
+
+		if (cluster_glyphs.empty() || !has_nonzero_codepoint)
+			continue;
+
+		// Insert the cluster glyphs into our own set of fallback cluster glyphs.
+		auto pair = fallback_cluster_glyphs.emplace(cluster, std::move(cluster_glyphs));
+		if (pair.second)
+		{
+			is_layers_dirty = true;
+
+			// Populate quick-lookup glyph map to glyph search times during rendering.
+			for (const auto& cluster_glyph : pair.first->second)
+			{
+				uint64_t cluster_glyph_id = GetFallbackFontClusterGlyphLookupID(cluster_glyph.glyph_index, cluster_glyph.glyph_data.character);
+				fallback_cluster_glyphs_lookup.emplace(cluster_glyph_id, &cluster_glyph.glyph_data.bitmap);
+			}
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
+const Vector<FontClusterGlyphData>* FontFaceHandleHarfBuzz::GetOrAppendFallbackClusterGlyphs(StringView cluster,
+	const TextShapingContext& text_shaping_context, const LanguageDataMap& registered_languages)
+{
+	String cluster_string(cluster);
+	auto fallback_cluster_glyphs_location = fallback_cluster_glyphs.find(cluster_string);
+	if (fallback_cluster_glyphs_location != fallback_cluster_glyphs.cend())
+	{
+		return &fallback_cluster_glyphs_location->second;
+	}
+
+	bool result = AppendFallbackClusterGlyphs(cluster, text_shaping_context, registered_languages);
+
+	if (result)
+	{
+		fallback_cluster_glyphs_location = fallback_cluster_glyphs.find(cluster_string);
+		if (fallback_cluster_glyphs_location == fallback_cluster_glyphs.cend())
+		{
+			RMLUI_ERROR;
+			return nullptr;
+		}
+
+		is_layers_dirty = true;
+	}
+	else
+		return nullptr;
+
+	const Vector<FontClusterGlyphData>* fallback_cluster_glyphs = &fallback_cluster_glyphs_location->second;
+	return fallback_cluster_glyphs;
+}
+
 FontFaceLayer* FontFaceHandleHarfBuzz::GetOrCreateLayer(const SharedPtr<const FontEffect>& font_effect)
 FontFaceLayer* FontFaceHandleHarfBuzz::GetOrCreateLayer(const SharedPtr<const FontEffect>& font_effect)
 {
 {
 	// Search for the font effect layer first, it may have been instanced before as part of a different configuration.
 	// Search for the font effect layer first, it may have been instanced before as part of a different configuration.
@@ -509,7 +705,7 @@ bool FontFaceHandleHarfBuzz::GenerateLayer(FontFaceLayer* layer)
 }
 }
 
 
 void FontFaceHandleHarfBuzz::ConfigureTextShapingBuffer(hb_buffer_t* shaping_buffer, StringView string,
 void FontFaceHandleHarfBuzz::ConfigureTextShapingBuffer(hb_buffer_t* shaping_buffer, StringView string,
-	const TextShapingContext& text_shaping_context, const LanguageDataMap& registered_languages)
+	const TextShapingContext& text_shaping_context, const LanguageDataMap& registered_languages, TextFlowDirection* determined_text_direction) const
 {
 {
 	// Set the buffer's language based on the value of the element's 'lang' attribute.
 	// Set the buffer's language based on the value of the element's 'lang' attribute.
 	hb_buffer_set_language(shaping_buffer, hb_language_from_string(text_shaping_context.language.c_str(), -1));
 	hb_buffer_set_language(shaping_buffer, hb_language_from_string(text_shaping_context.language.c_str(), -1));
@@ -534,6 +730,7 @@ void FontFaceHandleHarfBuzz::ConfigureTextShapingBuffer(hb_buffer_t* shaping_buf
 	hb_buffer_set_script(shaping_buffer, script);
 	hb_buffer_set_script(shaping_buffer, script);
 
 
 	// Set the buffer's text-flow direction based on the value of the element's 'dir' attribute.
 	// Set the buffer's text-flow direction based on the value of the element's 'dir' attribute.
+	hb_direction_t text_direction = HB_DIRECTION_LTR;
 	switch (text_shaping_context.text_direction)
 	switch (text_shaping_context.text_direction)
 	{
 	{
 	case Rml::Style::Direction::Auto:
 	case Rml::Style::Direction::Auto:
@@ -541,25 +738,28 @@ void FontFaceHandleHarfBuzz::ConfigureTextShapingBuffer(hb_buffer_t* shaping_buf
 			// Automatically determine the text-flow direction from the registered language.
 			// Automatically determine the text-flow direction from the registered language.
 			switch (registered_language_location->second.text_flow_direction)
 			switch (registered_language_location->second.text_flow_direction)
 			{
 			{
-			case TextFlowDirection::LeftToRight: hb_buffer_set_direction(shaping_buffer, HB_DIRECTION_LTR); break;
-			case TextFlowDirection::RightToLeft: hb_buffer_set_direction(shaping_buffer, HB_DIRECTION_RTL); break;
+			case TextFlowDirection::LeftToRight: text_direction = HB_DIRECTION_LTR; break;
+			case TextFlowDirection::RightToLeft: text_direction = HB_DIRECTION_RTL; break;
 			}
 			}
 		else
 		else
 		{
 		{
 			// Language not registered; determine best text-flow direction based on script.
 			// Language not registered; determine best text-flow direction based on script.
-			hb_direction_t text_direction = hb_script_get_horizontal_direction(script);
+			text_direction = hb_script_get_horizontal_direction(script);
 			if (text_direction == HB_DIRECTION_INVALID)
 			if (text_direction == HB_DIRECTION_INVALID)
 				// Some scripts support both horizontal directions of text flow; default to left-to-right.
 				// Some scripts support both horizontal directions of text flow; default to left-to-right.
 				text_direction = HB_DIRECTION_LTR;
 				text_direction = HB_DIRECTION_LTR;
-
-			hb_buffer_set_direction(shaping_buffer, text_direction);
 		}
 		}
 		break;
 		break;
 
 
-	case Rml::Style::Direction::Ltr: hb_buffer_set_direction(shaping_buffer, HB_DIRECTION_LTR); break;
-	case Rml::Style::Direction::Rtl: hb_buffer_set_direction(shaping_buffer, HB_DIRECTION_RTL); break;
+	case Rml::Style::Direction::Ltr: text_direction = HB_DIRECTION_LTR; break;
+	case Rml::Style::Direction::Rtl: text_direction = HB_DIRECTION_RTL; break;
 	}
 	}
 
 
+	RMLUI_ASSERT(text_direction == HB_DIRECTION_LTR || text_direction == HB_DIRECTION_RTL);
+	hb_buffer_set_direction(shaping_buffer, text_direction);
+	if (determined_text_direction)
+		*determined_text_direction = text_direction == HB_DIRECTION_LTR ? TextFlowDirection::LeftToRight : TextFlowDirection::RightToLeft;
+
 	// Set buffer flags for additional text-shaping configuration.
 	// Set buffer flags for additional text-shaping configuration.
 	int buffer_flags = HB_BUFFER_FLAG_DEFAULT | HB_BUFFER_FLAG_BOT | HB_BUFFER_FLAG_EOT;
 	int buffer_flags = HB_BUFFER_FLAG_DEFAULT | HB_BUFFER_FLAG_BOT | HB_BUFFER_FLAG_EOT;
 
 
@@ -573,3 +773,25 @@ void FontFaceHandleHarfBuzz::ConfigureTextShapingBuffer(hb_buffer_t* shaping_buf
 
 
 	hb_buffer_set_flags(shaping_buffer, (hb_buffer_flags_t)buffer_flags);
 	hb_buffer_set_flags(shaping_buffer, (hb_buffer_flags_t)buffer_flags);
 }
 }
+
+StringView FontFaceHandleHarfBuzz::GetCurrentClusterString(const hb_glyph_info_t* glyph_info, int glyph_count, int glyph_index,
+	Character first_character, StringView string, int& cluster_codepoint_count) const
+{
+	unsigned int cluster_index = glyph_info[glyph_index].cluster;
+	cluster_codepoint_count = 1;
+	int cluster_offset = glyph_index + 1;
+	int cluster_string_size = (int)Rml::StringUtilities::BytesUTF8(first_character);
+
+	// Continue counting characters that are part of the same cluster.
+	while (cluster_offset < (int)glyph_count && glyph_info[cluster_offset].cluster == cluster_index)
+	{
+		Character current_cluster_character =
+			Rml::StringUtilities::ToCharacter(string.begin() + glyph_info[glyph_index].cluster + cluster_string_size, string.end());
+		cluster_string_size += (int)Rml::StringUtilities::BytesUTF8(current_cluster_character);
+
+		++cluster_codepoint_count;
+		++cluster_offset;
+	}
+
+	return StringView(string.begin() + glyph_info[glyph_index].cluster, string.begin() + glyph_info[glyph_index].cluster + cluster_string_size);
+}

+ 40 - 6
Samples/basic/harfbuzz/src/FontFaceHandleHarfBuzz.h

@@ -43,6 +43,8 @@ using Rml::FontEffectList;
 using Rml::FontFaceHandleFreetype;
 using Rml::FontFaceHandleFreetype;
 using Rml::FontGlyph;
 using Rml::FontGlyph;
 using Rml::FontMetrics;
 using Rml::FontMetrics;
+using Rml::Pair;
+using Rml::Queue;
 using Rml::RenderManager;
 using Rml::RenderManager;
 using Rml::SharedPtr;
 using Rml::SharedPtr;
 using Rml::SmallUnorderedMap;
 using Rml::SmallUnorderedMap;
@@ -71,6 +73,7 @@ public:
 
 
 	const FontGlyphMap& GetGlyphs() const;
 	const FontGlyphMap& GetGlyphs() const;
 	const FallbackFontGlyphMap& GetFallbackGlyphs() const;
 	const FallbackFontGlyphMap& GetFallbackGlyphs() const;
+	const FallbackFontClusterGlyphsMap& GetFallbackClusterGlyphs() const;
 
 
 	/// Returns the width a string will take up if rendered with this handle.
 	/// Returns the width a string will take up if rendered with this handle.
 	/// @param[in] string The string to measure.
 	/// @param[in] string The string to measure.
@@ -117,19 +120,31 @@ private:
 	bool AppendGlyph(FontGlyphIndex glyph_index, Character character);
 	bool AppendGlyph(FontGlyphIndex glyph_index, Character character);
 
 
 	// Build and append fallback glyph to 'fallback_glyphs'.
 	// Build and append fallback glyph to 'fallback_glyphs'.
-	bool AppendFallbackGlyph(Character character);
+	bool AppendFallbackGlyph(Character& character);
 
 
 	/// Retrieve a glyph from the given code index, building and appending a new glyph if not already built.
 	/// Retrieve a glyph from the given code index, building and appending a new glyph if not already built.
 	/// @param[in] glyph_index  The glyph index.
 	/// @param[in] glyph_index  The glyph index.
-	/// @param[in-out] character  The character codepoint, can be changed e.g. to the replacement character if no glyph is found..
+	/// @param[in-out] character  The character codepoint, can be changed e.g. to the replacement character if no glyph is found.
 	/// @param[in] look_in_fallback_fonts  Look for the glyph in fallback fonts if not found locally, adding it to our fallback glyph map.
 	/// @param[in] look_in_fallback_fonts  Look for the glyph in fallback fonts if not found locally, adding it to our fallback glyph map.
 	/// @return The font glyph for the returned glyph index.
 	/// @return The font glyph for the returned glyph index.
 	const FontGlyph* GetOrAppendGlyph(FontGlyphIndex glyph_index, Character& character, bool look_in_fallback_fonts = true);
 	const FontGlyph* GetOrAppendGlyph(FontGlyphIndex glyph_index, Character& character, bool look_in_fallback_fonts = true);
 
 
 	/// Retrieve a fallback glyph from the given character, building and appending a new fallback glyph if not already built.
 	/// Retrieve a fallback glyph from the given character, building and appending a new fallback glyph if not already built.
-	/// @param[in] character  The character codepoint.
+	/// @param[in-out] character  The character codepoint, can be changed e.g. to the replacement character if no glyph is found.
 	/// @return The fallback font glyph for character.
 	/// @return The fallback font glyph for character.
-	const FontGlyph* GetOrAppendFallbackGlyph(Character character);
+	const FontGlyph* GetOrAppendFallbackGlyph(Character& character);
+
+	// Build and append fallback cluster glyph to 'fallback_cluster_glyphs'.
+	bool AppendFallbackClusterGlyphs(StringView cluster, const TextShapingContext& text_shaping_context,
+		const LanguageDataMap& registered_languages);
+
+	/// Retrieve a fallback cluster glyph from the given cluster and text-shaping/language data, building and appending a new fallback cluster glyph if not already built.
+	/// @param[in] cluster  The cluster.
+	/// @param[in] text_shaping_context  Extra parameters that provide context for text shaping.
+	/// @param[in] registered_languages  A list of languages registered in the font engine interface.
+	/// @return The fallback glyphs of the cluster.
+	const Vector<FontClusterGlyphData>* GetOrAppendFallbackClusterGlyphs(StringView cluster, const TextShapingContext& text_shaping_context,
+		const LanguageDataMap& registered_languages);
 
 
 	// Regenerate layers if dirty, such as after adding new glyphs.
 	// Regenerate layers if dirty, such as after adding new glyphs.
 	bool UpdateLayersOnDirty();
 	bool UpdateLayersOnDirty();
@@ -140,13 +155,32 @@ private:
 	// (Re-)generate a layer in this font face handle.
 	// (Re-)generate a layer in this font face handle.
 	bool GenerateLayer(FontFaceLayer* layer);
 	bool GenerateLayer(FontFaceLayer* layer);
 
 
-	// Configure internal text shaping buffer values with context.
+	/// Configure internal text shaping buffer values with context.
+	/// @param[in] shaping_buffer  The shaping buffer to be configured.
+	/// @param[in] string  The string currently being measured/rendered.
+	/// @param[in] text_shaping_context  Extra parameters that provide context for text shaping.
+	/// @param[in] registered_languages  A list of languages registered in the font engine interface.
+	/// @param[out] determined_text_direction  The text direction that was used to shape the buffer.
 	void ConfigureTextShapingBuffer(struct hb_buffer_t* shaping_buffer, StringView string, const TextShapingContext& text_shaping_context,
 	void ConfigureTextShapingBuffer(struct hb_buffer_t* shaping_buffer, StringView string, const TextShapingContext& text_shaping_context,
-		const LanguageDataMap& registered_languages);
+		const LanguageDataMap& registered_languages, TextFlowDirection* determined_text_direction) const;
+
+	/// Creates a cluster string from shaped glyph info and index.
+	/// @param[in] glyph_info  The shaped glyph info list (supplied by HarfBuzz).
+	/// @param[in] glyph_count  The number of shaped glyphs in glyph_info.
+	/// @param[in] glyph_index  The current glyph index.
+	/// @param[in] first_character  The first character of the cluster.
+	/// @param[in] string  The string currently being measured/rendered.
+	/// @param[out] cluster_codepoint_count  The number of codepoints in the cluster (which may differ from the length of the returned string).
+	/// @return A UTF8 string built from all codepoints in the current glyph cluster.
+	StringView GetCurrentClusterString(const struct hb_glyph_info_t* glyph_info, int glyph_count, int glyph_index, Character first_character,
+		StringView string, int& cluster_codepoint_count) const;
 
 
 	FontGlyphMap glyphs;
 	FontGlyphMap glyphs;
 	FallbackFontGlyphMap fallback_glyphs;
 	FallbackFontGlyphMap fallback_glyphs;
 
 
+	FallbackFontClusterGlyphsMap fallback_cluster_glyphs;
+	FallbackFontClusterGlyphLookupMap fallback_cluster_glyphs_lookup;
+
 	struct EffectLayerPair {
 	struct EffectLayerPair {
 		const FontEffect* font_effect;
 		const FontEffect* font_effect;
 		UniquePtr<FontFaceLayer> layer;
 		UniquePtr<FontFaceLayer> layer;

+ 83 - 25
Samples/basic/harfbuzz/src/FontFaceLayer.cpp

@@ -54,6 +54,7 @@ bool FontFaceLayer::Generate(const FontFaceHandleHarfBuzz* handle, const FontFac
 
 
 	const FontGlyphMap& glyphs = handle->GetGlyphs();
 	const FontGlyphMap& glyphs = handle->GetGlyphs();
 	const FallbackFontGlyphMap& fallback_glyphs = handle->GetFallbackGlyphs();
 	const FallbackFontGlyphMap& fallback_glyphs = handle->GetFallbackGlyphs();
+	const FallbackFontClusterGlyphsMap& fallback_cluster_glyphs = handle->GetFallbackClusterGlyphs();
 
 
 	// Generate the new layout.
 	// Generate the new layout.
 	if (clone)
 	if (clone)
@@ -73,7 +74,7 @@ bool FontFaceLayer::Generate(const FontFaceHandleHarfBuzz* handle, const FontFac
 				const FontGlyph& glyph = pair.second.bitmap;
 				const FontGlyph& glyph = pair.second.bitmap;
 				const Character glyph_character = pair.second.character;
 				const Character glyph_character = pair.second.character;
 
 
-				CloneTextureBox(glyph, glyph_index, glyph_character);
+				CloneTextureBox(glyph, glyph_index, glyph_character, false);
 			}
 			}
 
 
 			for (auto& pair : fallback_glyphs)
 			for (auto& pair : fallback_glyphs)
@@ -81,21 +82,30 @@ bool FontFaceLayer::Generate(const FontFaceHandleHarfBuzz* handle, const FontFac
 				const Character glyph_character = pair.first;
 				const Character glyph_character = pair.first;
 				const FontGlyph& glyph = pair.second;
 				const FontGlyph& glyph = pair.second;
 
 
-				CloneTextureBox(glyph, 0, glyph_character);
+				CloneTextureBox(glyph, 0, glyph_character, false);
 			}
 			}
+
+			for (auto& pair : fallback_cluster_glyphs)
+				for (auto& cluster_glyph : pair.second)
+				{
+					const Character glyph_character = cluster_glyph.glyph_data.character;
+					const FontGlyph& glyph = cluster_glyph.glyph_data.bitmap;
+
+					CloneTextureBox(glyph, cluster_glyph.glyph_index, glyph_character, true);
+				}
 		}
 		}
 	}
 	}
 	else
 	else
 	{
 	{
 		// Initialise the texture layout for the glyphs.
 		// Initialise the texture layout for the glyphs.
-		character_boxes.reserve(glyphs.size() + fallback_glyphs.size());
+		character_boxes.reserve(glyphs.size() + fallback_glyphs.size() + fallback_cluster_glyphs.size());
 		for (auto& pair : glyphs)
 		for (auto& pair : glyphs)
 		{
 		{
 			FontGlyphIndex glyph_index = pair.first;
 			FontGlyphIndex glyph_index = pair.first;
 			const FontGlyph& glyph = pair.second.bitmap;
 			const FontGlyph& glyph = pair.second.bitmap;
 			Character glyph_character = pair.second.character;
 			Character glyph_character = pair.second.character;
 
 
-			CreateTextureLayout(glyph, glyph_index, glyph_character);
+			CreateTextureLayout(glyph, glyph_index, glyph_character, false);
 		}
 		}
 
 
 		for (auto& pair : fallback_glyphs)
 		for (auto& pair : fallback_glyphs)
@@ -103,9 +113,18 @@ bool FontFaceLayer::Generate(const FontFaceHandleHarfBuzz* handle, const FontFac
 			Character glyph_character = pair.first;
 			Character glyph_character = pair.first;
 			const FontGlyph& glyph = pair.second;
 			const FontGlyph& glyph = pair.second;
 
 
-			CreateTextureLayout(glyph, 0, glyph_character);
+			CreateTextureLayout(glyph, 0, glyph_character, false);
 		}
 		}
 
 
+		for (auto& pair : fallback_cluster_glyphs)
+			for (auto& cluster_glyph : pair.second)
+			{
+				const Character glyph_character = cluster_glyph.glyph_data.character;
+				const FontGlyph& glyph = cluster_glyph.glyph_data.bitmap;
+
+				CreateTextureLayout(glyph, cluster_glyph.glyph_index, glyph_character, true);
+			}
+
 		constexpr int max_texture_dimensions = 1024;
 		constexpr int max_texture_dimensions = 1024;
 
 
 		// Generate the texture layout; this will position the glyph rectangles efficiently and
 		// Generate the texture layout; this will position the glyph rectangles efficiently and
@@ -162,8 +181,7 @@ bool FontFaceLayer::Generate(const FontFaceHandleHarfBuzz* handle, const FontFac
 	return true;
 	return true;
 }
 }
 
 
-bool FontFaceLayer::GenerateTexture(Vector<byte>& texture_data, Vector2i& texture_dimensions, int texture_id, const FontGlyphMap& glyphs,
-	const FallbackFontGlyphMap& fallback_glyphs)
+bool FontFaceLayer::GenerateTexture(Vector<byte>& texture_data, Vector2i& texture_dimensions, int texture_id, const FontGlyphMaps& glyph_maps)
 {
 {
 	if (texture_id < 0 || texture_id > texture_layout.GetNumTextures())
 	if (texture_id < 0 || texture_id > texture_layout.GetNumTextures())
 		return false;
 		return false;
@@ -186,23 +204,40 @@ bool FontFaceLayer::GenerateTexture(Vector<byte>& texture_data, Vector2i& textur
 		const FontGlyph* glyph = nullptr;
 		const FontGlyph* glyph = nullptr;
 		FontGlyphIndex glyph_index = GetFontGlyphIndexFromID(font_glyph_id);
 		FontGlyphIndex glyph_index = GetFontGlyphIndexFromID(font_glyph_id);
 		Rml::Character glyph_character = GetCharacterCodepointFromID(font_glyph_id);
 		Rml::Character glyph_character = GetCharacterCodepointFromID(font_glyph_id);
+		bool is_cluster = IsFontGlyphIDPartOfCluster(font_glyph_id);
 
 
 		// Get the glyph bitmap by looking it up with the glyph index.
 		// Get the glyph bitmap by looking it up with the glyph index.
-		auto it = glyphs.find(glyph_index);
-		if (it == glyphs.end() || glyph_index == 0)
+		RMLUI_ASSERT(glyph_maps.glyphs != nullptr);
+		auto it = glyph_maps.glyphs->find(is_cluster ? 0 : glyph_index);
+		if (it == glyph_maps.glyphs->end() || glyph_index == 0 || is_cluster)
 		{
 		{
-			// Glyph was not found; attempt to find it in the fallback glyphs.
-			auto fallback_it = fallback_glyphs.find(glyph_character);
-			if (fallback_it == fallback_glyphs.end())
-				if (it != glyphs.end())
+			// Glyph was not found; attempt to find it in the fallback cluster glyphs.
+			if (is_cluster && glyph_maps.fallback_cluster_glyphs)
+			{
+				uint64_t cluster_glyph_lookup_id = GetFallbackFontClusterGlyphLookupID(glyph_index, glyph_character);
+				auto cluster_glyph_it = glyph_maps.fallback_cluster_glyphs->find(cluster_glyph_lookup_id);
+				if (cluster_glyph_it != glyph_maps.fallback_cluster_glyphs->end())
+					glyph = cluster_glyph_it->second;
+			}
+
+			// Glyph was still not found; attempt to find it in the fallback glyphs.
+			if (!glyph && !is_cluster && glyph_maps.fallback_glyphs)
+			{
+				auto fallback_it = glyph_maps.fallback_glyphs->find(glyph_character);
+				if (fallback_it != glyph_maps.fallback_glyphs->end())
+					// Fallback glyph was found.
+					glyph = &fallback_it->second;
+			}
+
+			if (!glyph)
+			{
+				if (it != glyph_maps.glyphs->end())
 					// Fallback glyph was not found, but replacement glyph bitmap exists, so use it instead.
 					// Fallback glyph was not found, but replacement glyph bitmap exists, so use it instead.
 					glyph = &it->second.bitmap;
 					glyph = &it->second.bitmap;
 				else
 				else
 					// No fallback glyph nor replacement glyph bitmap was found; ignore this glyph.
 					// No fallback glyph nor replacement glyph bitmap was found; ignore this glyph.
 					continue;
 					continue;
-			else
-				// Fallback glyph was found.
-				glyph = &fallback_it->second;
+			}
 		}
 		}
 		else
 		else
 			// Glyph was found.
 			// Glyph was found.
@@ -242,9 +277,7 @@ bool FontFaceLayer::GenerateTexture(Vector<byte>& texture_data, Vector2i& textur
 			}
 			}
 		}
 		}
 		else
 		else
-		{
 			effect->GenerateGlyphTexture(rectangle.GetTextureData(), Vector2i(box.dimensions), rectangle.GetTextureStride(), *glyph);
 			effect->GenerateGlyphTexture(rectangle.GetTextureData(), Vector2i(box.dimensions), rectangle.GetTextureStride(), *glyph);
-		}
 	}
 	}
 
 
 	return true;
 	return true;
@@ -273,9 +306,26 @@ ColourbPremultiplied FontFaceLayer::GetColour(float opacity) const
 	return colour.ToPremultiplied(opacity);
 	return colour.ToPremultiplied(opacity);
 }
 }
 
 
-uint64_t FontFaceLayer::CreateFontGlyphID(const FontGlyphIndex glyph_index, const Character character_code) const
+uint64_t FontFaceLayer::CreateFontGlyphID(const FontGlyphIndex glyph_index, const Character character_code, bool is_cluster) const
 {
 {
-	return (static_cast<uint64_t>(glyph_index) << (sizeof(Character) * 8)) | static_cast<uint64_t>(std::underlying_type_t<Character>(character_code));
+	// Font glyph ID details:
+	// 64                  48                  32                  16
+	// 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
+	// | <---------- glyph_index ----------> | ^| <-------- character_code --------> |
+	//                                         |
+	//                                     is_cluster
+	// 
+	// The maximum valid Unicode codepoint is U+10FFFF (slightly larger than 2^20),
+	// so dedicating the 32nd bit of 'character_code' to 'is_cluster' shouldn't cause any issues.
+
+	uint64_t font_glyph_id = (static_cast<uint64_t>(glyph_index) << (sizeof(Character) * 8)) | static_cast<uint64_t>(character_code);
+
+	if (is_cluster)
+		font_glyph_id |= font_glyph_id_cluster_bit_mask;
+	else
+		font_glyph_id &= ~font_glyph_id_cluster_bit_mask;
+
+	return font_glyph_id;
 }
 }
 
 
 FontGlyphIndex FontFaceLayer::GetFontGlyphIndexFromID(const uint64_t glyph_id) const
 FontGlyphIndex FontFaceLayer::GetFontGlyphIndexFromID(const uint64_t glyph_id) const
@@ -285,10 +335,18 @@ FontGlyphIndex FontFaceLayer::GetFontGlyphIndexFromID(const uint64_t glyph_id) c
 
 
 Character FontFaceLayer::GetCharacterCodepointFromID(const uint64_t glyph_id) const
 Character FontFaceLayer::GetCharacterCodepointFromID(const uint64_t glyph_id) const
 {
 {
-	return static_cast<Character>(glyph_id & static_cast<std::underlying_type_t<Character>>(-1));
+	uint64_t character_codepoint = glyph_id & static_cast<std::underlying_type_t<Character>>(-1);
+	character_codepoint &= ~font_glyph_id_cluster_bit_mask;
+
+	return static_cast<Character>(character_codepoint);
+}
+
+bool FontFaceLayer::IsFontGlyphIDPartOfCluster(const uint64_t glyph_id) const
+{
+	return glyph_id & font_glyph_id_cluster_bit_mask;
 }
 }
 
 
-void FontFaceLayer::CreateTextureLayout(const FontGlyph& glyph, FontGlyphIndex glyph_index, Character glyph_character)
+void FontFaceLayer::CreateTextureLayout(const FontGlyph& glyph, FontGlyphIndex glyph_index, Character glyph_character, bool is_cluster)
 {
 {
 	Vector2i glyph_origin(0, 0);
 	Vector2i glyph_origin(0, 0);
 	Vector2i glyph_dimensions = glyph.bitmap_dimensions;
 	Vector2i glyph_dimensions = glyph.bitmap_dimensions;
@@ -306,16 +364,16 @@ void FontFaceLayer::CreateTextureLayout(const FontGlyph& glyph, FontGlyphIndex g
 
 
 	RMLUI_ASSERT(box.dimensions.x >= 0 && box.dimensions.y >= 0);
 	RMLUI_ASSERT(box.dimensions.x >= 0 && box.dimensions.y >= 0);
 
 
-	uint64_t font_glyph_id = CreateFontGlyphID(glyph_index, glyph_character);
+	uint64_t font_glyph_id = CreateFontGlyphID(glyph_index, glyph_character, is_cluster);
 	character_boxes[font_glyph_id] = box;
 	character_boxes[font_glyph_id] = box;
 
 
 	// Add the character's dimensions into the texture layout engine.
 	// Add the character's dimensions into the texture layout engine.
 	texture_layout.AddRectangle(font_glyph_id, glyph_dimensions);
 	texture_layout.AddRectangle(font_glyph_id, glyph_dimensions);
 }
 }
 
 
-void FontFaceLayer::CloneTextureBox(const FontGlyph& glyph, FontGlyphIndex glyph_index, Character glyph_character)
+void FontFaceLayer::CloneTextureBox(const FontGlyph& glyph, FontGlyphIndex glyph_index, Character glyph_character, bool is_cluster)
 {
 {
-	auto it = character_boxes.find(CreateFontGlyphID(glyph_index, glyph_character));
+	auto it = character_boxes.find(CreateFontGlyphID(glyph_index, glyph_character, is_cluster));
 	if (it == character_boxes.end())
 	if (it == character_boxes.end())
 	{
 	{
 		// This can happen if the layers have been dirtied in FontHandleDefault. We will
 		// This can happen if the layers have been dirtied in FontHandleDefault. We will

+ 14 - 8
Samples/basic/harfbuzz/src/FontFaceLayer.h

@@ -82,18 +82,19 @@ public:
 	/// @param[out] texture_data The generated texture data.
 	/// @param[out] texture_data The generated texture data.
 	/// @param[out] texture_dimensions The dimensions of the texture.
 	/// @param[out] texture_dimensions The dimensions of the texture.
 	/// @param[in] texture_id The index of the texture within the layer to generate.
 	/// @param[in] texture_id The index of the texture within the layer to generate.
-	/// @param[in] glyphs The glyphs required by the font face handle.
-	bool GenerateTexture(Vector<byte>& texture_data, Vector2i& texture_dimensions, int texture_id, const FontGlyphMap& glyphs, const FallbackFontGlyphMap& fallback_glyphs);
+	/// @param[in] glyph_maps The glyph maps required by the font face handle.
+	bool GenerateTexture(Vector<byte>& texture_data, Vector2i& texture_dimensions, int texture_id, const FontGlyphMaps& glyph_maps);
 
 
 	/// Generates the geometry required to render a single character.
 	/// Generates the geometry required to render a single character.
 	/// @param[out] mesh_list An array of meshes this layer will write to. It must be at least as big as the number of textures in this layer.
 	/// @param[out] mesh_list An array of meshes this layer will write to. It must be at least as big as the number of textures in this layer.
 	/// @param[in] character_code The character to generate geometry for.
 	/// @param[in] character_code The character to generate geometry for.
+	/// @param[in] is_cluster Whether the glyph is part of a cluster or not.
 	/// @param[in] position The position of the baseline.
 	/// @param[in] position The position of the baseline.
 	/// @param[in] colour The colour of the string.
 	/// @param[in] colour The colour of the string.
-	inline void GenerateGeometry(TexturedMesh* mesh_list, const FontGlyphIndex glyph_index, const Character character_code, const Vector2f position,
-		const ColourbPremultiplied colour) const
+	inline void GenerateGeometry(TexturedMesh* mesh_list, const FontGlyphIndex glyph_index, const Character character_code, bool is_cluster,
+		const Vector2f position, const ColourbPremultiplied colour) const
 	{
 	{
-		auto it = character_boxes.find(CreateFontGlyphID(glyph_index, character_code));
+		auto it = character_boxes.find(CreateFontGlyphID(glyph_index, character_code, is_cluster));
 		if (it == character_boxes.end())
 		if (it == character_boxes.end())
 			return;
 			return;
 
 
@@ -120,7 +121,7 @@ public:
 
 
 private:
 private:
 	/// Creates an ID for a font glyph from a glyph index and character codepoint.
 	/// Creates an ID for a font glyph from a glyph index and character codepoint.
-	uint64_t CreateFontGlyphID(const FontGlyphIndex glyph_index, const Character character_code) const;
+	uint64_t CreateFontGlyphID(const FontGlyphIndex glyph_index, const Character character_code, bool is_cluster) const;
 
 
 	/// Retrieves the font glyph index from a font glyph ID.
 	/// Retrieves the font glyph index from a font glyph ID.
 	FontGlyphIndex GetFontGlyphIndexFromID(const uint64_t glyph_id) const;
 	FontGlyphIndex GetFontGlyphIndexFromID(const uint64_t glyph_id) const;
@@ -128,11 +129,14 @@ private:
 	/// Retrieves the character from a font glyph ID.
 	/// Retrieves the character from a font glyph ID.
 	Character GetCharacterCodepointFromID(const uint64_t glyph_id) const;
 	Character GetCharacterCodepointFromID(const uint64_t glyph_id) const;
 
 
+	/// Determines if a font glyph ID is part of a cluster instead of a single glyph.
+	bool IsFontGlyphIDPartOfCluster(const uint64_t glyph_id) const;
+
 	/// Creates a texture layout from the given glyph bitmap and data.
 	/// Creates a texture layout from the given glyph bitmap and data.
-	void CreateTextureLayout(const FontGlyph& glyph, FontGlyphIndex glyph_index, Character glyph_character);
+	void CreateTextureLayout(const FontGlyph& glyph, FontGlyphIndex glyph_index, Character glyph_character, bool is_cluster);
 
 
 	/// Clones the given glyph bitmap and data into a texture box.
 	/// Clones the given glyph bitmap and data into a texture box.
-	void CloneTextureBox(const FontGlyph& glyph, FontGlyphIndex glyph_index, Character glyph_character);
+	void CloneTextureBox(const FontGlyph& glyph, FontGlyphIndex glyph_index, Character glyph_character, bool is_cluster);
 
 
 	struct TextureBox {
 	struct TextureBox {
 		// The offset, in pixels, of the baseline from the start of this character's geometry.
 		// The offset, in pixels, of the baseline from the start of this character's geometry.
@@ -149,6 +153,8 @@ private:
 	using CharacterMap = UnorderedMap<uint64_t, TextureBox>;
 	using CharacterMap = UnorderedMap<uint64_t, TextureBox>;
 	using TextureList = Vector<CallbackTextureSource>;
 	using TextureList = Vector<CallbackTextureSource>;
 
 
+	static constexpr uint64_t font_glyph_id_cluster_bit_mask = 1ull << 31ull;
+
 	SharedPtr<const FontEffect> effect;
 	SharedPtr<const FontEffect> effect;
 
 
 	TextureList textures_owned;
 	TextureList textures_owned;

+ 28 - 1
Samples/basic/harfbuzz/src/FontGlyph.h

@@ -31,14 +31,41 @@
 
 
 #include <RmlUi/Core.h>
 #include <RmlUi/Core.h>
 
 
+using FontGlyphIndex = uint32_t;
+
 struct FontGlyphData
 struct FontGlyphData
 {
 {
 	Rml::FontGlyph bitmap;
 	Rml::FontGlyph bitmap;
 	Rml::Character character;
 	Rml::Character character;
 };
 };
 
 
-using FontGlyphIndex = uint32_t;
+struct FontGlyphReference
+{
+	const Rml::FontGlyph* bitmap;
+	Rml::Character character;
+};
+
+struct FontClusterGlyphData
+{
+	FontGlyphIndex glyph_index;
+	FontGlyphData glyph_data;
+};
+
 using FontGlyphMap = Rml::UnorderedMap<FontGlyphIndex, FontGlyphData>;
 using FontGlyphMap = Rml::UnorderedMap<FontGlyphIndex, FontGlyphData>;
 using FallbackFontGlyphMap = Rml::UnorderedMap<Rml::Character, Rml::FontGlyph>;
 using FallbackFontGlyphMap = Rml::UnorderedMap<Rml::Character, Rml::FontGlyph>;
+using FallbackFontClusterGlyphsMap = Rml::UnorderedMap<Rml::String, Rml::Vector<FontClusterGlyphData>>;
+using FallbackFontClusterGlyphLookupMap = Rml::UnorderedMap<uint64_t, const Rml::FontGlyph*>;
+
+struct FontGlyphMaps {
+	const FontGlyphMap* glyphs;
+	const FallbackFontGlyphMap* fallback_glyphs;
+	const FallbackFontClusterGlyphLookupMap* fallback_cluster_glyphs;
+};
+
+inline uint64_t GetFallbackFontClusterGlyphLookupID(FontGlyphIndex glyph_index, Rml::Character character)
+{
+	// Combine 32-bit glyph index and 32-bit character into a single 64-bit integer.
+	return (static_cast<uint64_t>(glyph_index) << (sizeof(Rml::Character) * 8)) | static_cast<uint64_t>(character);
+}
 
 
 #endif
 #endif

+ 17 - 0
Source/Core/StringUtilities.cpp

@@ -494,6 +494,23 @@ Character StringUtilities::ToCharacter(const char* p, const char* p_end)
 	return static_cast<Character>(code);
 	return static_cast<Character>(code);
 }
 }
 
 
+size_t StringUtilities::BytesUTF8(Character character)
+{
+	char32_t c = (char32_t)character;
+
+	if (c < 0x80)
+		return 1;
+	else if (c < 0x800)
+		return 2;
+	else if (c < 0x10000)
+		return 3;
+	else if (c <= 0x10FFFF)
+		return 4;
+	else
+		// Invalid character.
+		return 0;
+}
+
 String StringUtilities::ToUTF8(Character character)
 String StringUtilities::ToUTF8(Character character)
 {
 {
 	return ToUTF8(&character, 1);
 	return ToUTF8(&character, 1);