2
0
Эх сурвалжийг харах

Fix CPU/GPUParticles2D bugs on Compatibility Rendering (GLES3) on Adreno 3XX devices.

joined72 1 жил өмнө
parent
commit
4df39dc140

+ 11 - 5
drivers/gles3/shaders/canvas.glsl

@@ -160,15 +160,18 @@ void main() {
 	if (gl_VertexID % 3 == 0) {
 	if (gl_VertexID % 3 == 0) {
 		vertex = read_draw_data_point_a;
 		vertex = read_draw_data_point_a;
 		uv = read_draw_data_uv_a;
 		uv = read_draw_data_uv_a;
-		color = vec4(unpackHalf2x16(read_draw_data_color_a_rg), unpackHalf2x16(read_draw_data_color_a_ba));
+		color.xy = unpackHalf2x16(read_draw_data_color_a_rg);
+		color.zw = unpackHalf2x16(read_draw_data_color_a_ba);
 	} else if (gl_VertexID % 3 == 1) {
 	} else if (gl_VertexID % 3 == 1) {
 		vertex = read_draw_data_point_b;
 		vertex = read_draw_data_point_b;
 		uv = read_draw_data_uv_b;
 		uv = read_draw_data_uv_b;
-		color = vec4(unpackHalf2x16(read_draw_data_color_b_rg), unpackHalf2x16(read_draw_data_color_b_ba));
+		color.xy = unpackHalf2x16(read_draw_data_color_b_rg);
+		color.zw = unpackHalf2x16(read_draw_data_color_b_ba);
 	} else {
 	} else {
 		vertex = read_draw_data_point_c;
 		vertex = read_draw_data_point_c;
 		uv = read_draw_data_uv_c;
 		uv = read_draw_data_uv_c;
-		color = vec4(unpackHalf2x16(read_draw_data_color_c_rg), unpackHalf2x16(read_draw_data_color_c_ba));
+		color.xy = unpackHalf2x16(read_draw_data_color_c_rg);
+		color.zw = unpackHalf2x16(read_draw_data_color_c_ba);
 	}
 	}
 
 
 #elif defined(USE_ATTRIBUTES)
 #elif defined(USE_ATTRIBUTES)
@@ -178,11 +181,14 @@ void main() {
 
 
 #ifdef USE_INSTANCING
 #ifdef USE_INSTANCING
 	if (bool(read_draw_data_flags & FLAGS_INSTANCING_HAS_COLORS)) {
 	if (bool(read_draw_data_flags & FLAGS_INSTANCING_HAS_COLORS)) {
-		vec4 instance_color = vec4(unpackHalf2x16(instance_color_custom_data.x), unpackHalf2x16(instance_color_custom_data.y));
+		vec4 instance_color;
+		instance_color.xy = unpackHalf2x16(uint(instance_color_custom_data.x));
+		instance_color.zw = unpackHalf2x16(uint(instance_color_custom_data.y));
 		color *= instance_color;
 		color *= instance_color;
 	}
 	}
 	if (bool(read_draw_data_flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) {
 	if (bool(read_draw_data_flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) {
-		instance_custom = vec4(unpackHalf2x16(instance_color_custom_data.z), unpackHalf2x16(instance_color_custom_data.w));
+		instance_custom.xy = unpackHalf2x16(instance_color_custom_data.z);
+		instance_custom.zw = unpackHalf2x16(instance_color_custom_data.w);
 	}
 	}
 #endif // !USE_INSTANCING
 #endif // !USE_INSTANCING
 
 

+ 2 - 1
drivers/gles3/shaders/particles.glsl

@@ -321,7 +321,8 @@ void main() {
 				amount = max(0.0, 1.0 - d);
 				amount = max(0.0, 1.0 - d);
 			} else if (attractors[i].type == ATTRACTOR_TYPE_VECTOR_FIELD) {
 			} else if (attractors[i].type == ATTRACTOR_TYPE_VECTOR_FIELD) {
 			}
 			}
-			amount = pow(amount, attractors[i].attenuation);
+			mediump float attractor_attenuation = attractors[i].attenuation;
+			amount = pow(amount, attractor_attenuation);
 			dir = safe_normalize(mix(dir, attractors[i].transform[2].xyz, attractors[i].directionality));
 			dir = safe_normalize(mix(dir, attractors[i].transform[2].xyz, attractors[i].directionality));
 			attractor_force -= amount * dir * attractors[i].strength;
 			attractor_force -= amount * dir * attractors[i].strength;
 		}
 		}

+ 37 - 40
drivers/gles3/shaders/particles_copy.glsl

@@ -57,45 +57,39 @@ void main() {
 		txform = transpose(mat4(xform_1, xform_2, vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)));
 		txform = transpose(mat4(xform_1, xform_2, vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)));
 #endif
 #endif
 
 
-		switch (align_mode) {
-			case TRANSFORM_ALIGN_DISABLED: {
-			} break; //nothing
-			case TRANSFORM_ALIGN_Z_BILLBOARD: {
-				mat3 local = mat3(normalize(cross(align_up, sort_direction)), align_up, sort_direction);
-				local = local * mat3(txform);
-				txform[0].xyz = local[0];
-				txform[1].xyz = local[1];
-				txform[2].xyz = local[2];
-
-			} break;
-			case TRANSFORM_ALIGN_Y_TO_VELOCITY: {
-				vec3 v = velocity_flags.xyz;
-				float s = (length(txform[0]) + length(txform[1]) + length(txform[2])) / 3.0;
-				if (length(v) > 0.0) {
-					txform[1].xyz = normalize(v);
-				} else {
-					txform[1].xyz = normalize(txform[1].xyz);
-				}
-
-				txform[0].xyz = normalize(cross(txform[1].xyz, txform[2].xyz));
-				txform[2].xyz = vec3(0.0, 0.0, 1.0) * s;
-				txform[0].xyz *= s;
-				txform[1].xyz *= s;
-			} break;
-			case TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY: {
-				vec3 sv = velocity_flags.xyz - sort_direction * dot(sort_direction, velocity_flags.xyz); //screen velocity
-
-				if (length(sv) == 0.0) {
-					sv = align_up;
-				}
-
-				sv = normalize(sv);
-
-				txform[0].xyz = normalize(cross(sv, sort_direction)) * length(txform[0]);
-				txform[1].xyz = sv * length(txform[1]);
-				txform[2].xyz = sort_direction * length(txform[2]);
-
-			} break;
+		if (align_mode == TRANSFORM_ALIGN_DISABLED) {
+			// nothing
+		} else if (align_mode == TRANSFORM_ALIGN_Z_BILLBOARD) {
+			mat3 local = mat3(normalize(cross(align_up, sort_direction)), align_up, sort_direction);
+			local = local * mat3(txform);
+			txform[0].xyz = local[0];
+			txform[1].xyz = local[1];
+			txform[2].xyz = local[2];
+		} else if (align_mode == TRANSFORM_ALIGN_Y_TO_VELOCITY) {
+			vec3 v = velocity_flags.xyz;
+			float s = (length(txform[0]) + length(txform[1]) + length(txform[2])) / 3.0;
+			if (length(v) > 0.0) {
+				txform[1].xyz = normalize(v);
+			} else {
+				txform[1].xyz = normalize(txform[1].xyz);
+			}
+
+			txform[0].xyz = normalize(cross(txform[1].xyz, txform[2].xyz));
+			txform[2].xyz = vec3(0.0, 0.0, 1.0) * s;
+			txform[0].xyz *= s;
+			txform[1].xyz *= s;
+		} else if (align_mode == TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY) {
+			vec3 sv = velocity_flags.xyz - sort_direction * dot(sort_direction, velocity_flags.xyz); //screen velocity
+
+			if (length(sv) == 0.0) {
+				sv = align_up;
+			}
+
+			sv = normalize(sv);
+
+			txform[0].xyz = normalize(cross(sv, sort_direction)) * length(txform[0]);
+			txform[1].xyz = sv * length(txform[1]);
+			txform[2].xyz = sort_direction * length(txform[2]);
 		}
 		}
 
 
 		txform[3].xyz += velocity_flags.xyz * frame_remainder;
 		txform[3].xyz += velocity_flags.xyz * frame_remainder;
@@ -108,7 +102,10 @@ void main() {
 	}
 	}
 	txform = transpose(txform);
 	txform = transpose(txform);
 
 
-	instance_color_custom_data = uvec4(packHalf2x16(color.xy), packHalf2x16(color.zw), packHalf2x16(custom.xy), packHalf2x16(custom.zw));
+	instance_color_custom_data.x = packHalf2x16(color.xy);
+	instance_color_custom_data.y = packHalf2x16(color.zw);
+	instance_color_custom_data.z = packHalf2x16(custom.xy);
+	instance_color_custom_data.w = packHalf2x16(custom.zw);
 	out_xform_1 = txform[0];
 	out_xform_1 = txform[0];
 	out_xform_2 = txform[1];
 	out_xform_2 = txform[1];
 #ifdef MODE_3D
 #ifdef MODE_3D

+ 10 - 4
drivers/gles3/shaders/scene.glsl

@@ -366,7 +366,9 @@ void main() {
 #if defined(COLOR_USED)
 #if defined(COLOR_USED)
 	color_interp = color_attrib;
 	color_interp = color_attrib;
 #ifdef USE_INSTANCING
 #ifdef USE_INSTANCING
-	vec4 instance_color = vec4(unpackHalf2x16(instance_color_custom_data.x), unpackHalf2x16(instance_color_custom_data.y));
+	vec4 instance_color;
+	instance_color.xy = unpackHalf2x16(instance_color_custom_data.x);
+	instance_color.zw = unpackHalf2x16(instance_color_custom_data.y);
 	color_interp *= instance_color;
 	color_interp *= instance_color;
 #endif
 #endif
 #endif
 #endif
@@ -403,7 +405,9 @@ void main() {
 #endif //USE_MULTIVIEW
 #endif //USE_MULTIVIEW
 
 
 #ifdef USE_INSTANCING
 #ifdef USE_INSTANCING
-	vec4 instance_custom = vec4(unpackHalf2x16(instance_color_custom_data.z), unpackHalf2x16(instance_color_custom_data.w));
+	vec4 instance_custom;
+	instance_custom.xy = unpackHalf2x16(instance_color_custom_data.z);
+	instance_custom.zw = unpackHalf2x16(instance_color_custom_data.w);
 #else
 #else
 	vec4 instance_custom = vec4(0.0);
 	vec4 instance_custom = vec4(0.0);
 #endif
 #endif
@@ -1749,7 +1753,8 @@ void main() {
 #endif //!MODE_UNSHADED
 #endif //!MODE_UNSHADED
 
 
 #ifndef FOG_DISABLED
 #ifndef FOG_DISABLED
-	fog = vec4(unpackHalf2x16(fog_rg), unpackHalf2x16(fog_ba));
+	fog.xy = unpackHalf2x16(fog_rg);
+	fog.zw = unpackHalf2x16(fog_ba);
 
 
 #ifndef DISABLE_FOG
 #ifndef DISABLE_FOG
 	if (scene_data.fog_enabled) {
 	if (scene_data.fog_enabled) {
@@ -1966,7 +1971,8 @@ void main() {
 	vec3 additive_light_color = diffuse_light + specular_light;
 	vec3 additive_light_color = diffuse_light + specular_light;
 
 
 #ifndef FOG_DISABLED
 #ifndef FOG_DISABLED
-	fog = vec4(unpackHalf2x16(fog_rg), unpackHalf2x16(fog_ba));
+	fog.xy = unpackHalf2x16(fog_rg);
+	fog.zw = unpackHalf2x16(fog_ba);
 
 
 #ifndef DISABLE_FOG
 #ifndef DISABLE_FOG
 	if (scene_data.fog_enabled) {
 	if (scene_data.fog_enabled) {

+ 20 - 13
drivers/gles3/shaders/stdlib_inc.glsl

@@ -1,5 +1,12 @@
 
 
-#ifdef USE_GLES_OVER_GL
+// Compatibility renames. These are exposed with the "godot_" prefix
+// to work around two distinct Adreno bugs:
+// 1. Some Adreno devices expose ES310 functions in ES300 shaders.
+//    Internally, we must use the "godot_" prefix, but user shaders
+//    will be mapped automatically.
+// 2. Adreno 3XX devices have poor implementations of the other packing
+//    functions, so we just use our own everywhere to keep it simple.
+
 // Floating point pack/unpack functions are part of the GLSL ES 300 specification used by web and mobile.
 // Floating point pack/unpack functions are part of the GLSL ES 300 specification used by web and mobile.
 uint float2half(uint f) {
 uint float2half(uint f) {
 	uint e = f & uint(0x7f800000);
 	uint e = f & uint(0x7f800000);
@@ -17,40 +24,34 @@ uint half2float(uint h) {
 	return ((h & uint(0x8000)) << uint(16)) | uint((h_e >> uint(10)) != uint(0)) * (((h_e + uint(0x1c000)) << uint(13)) | ((h & uint(0x03ff)) << uint(13)));
 	return ((h & uint(0x8000)) << uint(16)) | uint((h_e >> uint(10)) != uint(0)) * (((h_e + uint(0x1c000)) << uint(13)) | ((h & uint(0x03ff)) << uint(13)));
 }
 }
 
 
-uint packHalf2x16(vec2 v) {
+uint godot_packHalf2x16(vec2 v) {
 	return float2half(floatBitsToUint(v.x)) | float2half(floatBitsToUint(v.y)) << uint(16);
 	return float2half(floatBitsToUint(v.x)) | float2half(floatBitsToUint(v.y)) << uint(16);
 }
 }
 
 
-vec2 unpackHalf2x16(uint v) {
+vec2 godot_unpackHalf2x16(uint v) {
 	return vec2(uintBitsToFloat(half2float(v & uint(0xffff))),
 	return vec2(uintBitsToFloat(half2float(v & uint(0xffff))),
 			uintBitsToFloat(half2float(v >> uint(16))));
 			uintBitsToFloat(half2float(v >> uint(16))));
 }
 }
 
 
-uint packUnorm2x16(vec2 v) {
+uint godot_packUnorm2x16(vec2 v) {
 	uvec2 uv = uvec2(round(clamp(v, vec2(0.0), vec2(1.0)) * 65535.0));
 	uvec2 uv = uvec2(round(clamp(v, vec2(0.0), vec2(1.0)) * 65535.0));
 	return uv.x | uv.y << uint(16);
 	return uv.x | uv.y << uint(16);
 }
 }
 
 
-vec2 unpackUnorm2x16(uint p) {
+vec2 godot_unpackUnorm2x16(uint p) {
 	return vec2(float(p & uint(0xffff)), float(p >> uint(16))) * 0.000015259021; // 1.0 / 65535.0 optimization
 	return vec2(float(p & uint(0xffff)), float(p >> uint(16))) * 0.000015259021; // 1.0 / 65535.0 optimization
 }
 }
 
 
-uint packSnorm2x16(vec2 v) {
+uint godot_packSnorm2x16(vec2 v) {
 	uvec2 uv = uvec2(round(clamp(v, vec2(-1.0), vec2(1.0)) * 32767.0) + 32767.0);
 	uvec2 uv = uvec2(round(clamp(v, vec2(-1.0), vec2(1.0)) * 32767.0) + 32767.0);
 	return uv.x | uv.y << uint(16);
 	return uv.x | uv.y << uint(16);
 }
 }
 
 
-vec2 unpackSnorm2x16(uint p) {
+vec2 godot_unpackSnorm2x16(uint p) {
 	vec2 v = vec2(float(p & uint(0xffff)), float(p >> uint(16)));
 	vec2 v = vec2(float(p & uint(0xffff)), float(p >> uint(16)));
 	return clamp((v - 32767.0) * vec2(0.00003051851), vec2(-1.0), vec2(1.0));
 	return clamp((v - 32767.0) * vec2(0.00003051851), vec2(-1.0), vec2(1.0));
 }
 }
 
 
-#endif
-
-// Compatibility renames. These are exposed with the "godot_" prefix
-// to work around an Adreno bug which was exposing these ES310 functions
-// in ES300 shaders. Internally, we must use the "godot_" prefix, but user shaders
-// will be mapped automatically.
 uint godot_packUnorm4x8(vec4 v) {
 uint godot_packUnorm4x8(vec4 v) {
 	uvec4 uv = uvec4(round(clamp(v, vec4(0.0), vec4(1.0)) * 255.0));
 	uvec4 uv = uvec4(round(clamp(v, vec4(0.0), vec4(1.0)) * 255.0));
 	return uv.x | (uv.y << uint(8)) | (uv.z << uint(16)) | (uv.w << uint(24));
 	return uv.x | (uv.y << uint(8)) | (uv.z << uint(16)) | (uv.w << uint(24));
@@ -74,3 +75,9 @@ vec4 godot_unpackSnorm4x8(uint p) {
 #define unpackUnorm4x8 godot_unpackUnorm4x8
 #define unpackUnorm4x8 godot_unpackUnorm4x8
 #define packSnorm4x8 godot_packSnorm4x8
 #define packSnorm4x8 godot_packSnorm4x8
 #define unpackSnorm4x8 godot_unpackSnorm4x8
 #define unpackSnorm4x8 godot_unpackSnorm4x8
+#define packHalf2x16 godot_packHalf2x16
+#define unpackHalf2x16 godot_unpackHalf2x16
+#define packUnorm2x16 godot_packUnorm2x16
+#define unpackUnorm2x16 godot_unpackUnorm2x16
+#define packSnorm2x16 godot_packSnorm2x16
+#define unpackSnorm2x16 godot_unpackSnorm2x16

+ 8 - 0
drivers/gles3/storage/particles_storage.cpp

@@ -31,6 +31,8 @@
 #ifdef GLES3_ENABLED
 #ifdef GLES3_ENABLED
 
 
 #include "particles_storage.h"
 #include "particles_storage.h"
+
+#include "config.h"
 #include "material_storage.h"
 #include "material_storage.h"
 #include "mesh_storage.h"
 #include "mesh_storage.h"
 #include "texture_storage.h"
 #include "texture_storage.h"
@@ -120,6 +122,8 @@ void ParticlesStorage::particles_set_mode(RID p_particles, RS::ParticlesMode p_m
 }
 }
 
 
 void ParticlesStorage::particles_set_emitting(RID p_particles, bool p_emitting) {
 void ParticlesStorage::particles_set_emitting(RID p_particles, bool p_emitting) {
+	ERR_FAIL_COND_MSG(GLES3::Config::get_singleton()->adreno_3xx_compatibility, "Due to driver bugs, GPUParticles are not supported on Adreno 3XX devices. Please use CPUParticles instead.");
+
 	Particles *particles = particles_owner.get_or_null(p_particles);
 	Particles *particles = particles_owner.get_or_null(p_particles);
 	ERR_FAIL_NULL(particles);
 	ERR_FAIL_NULL(particles);
 
 
@@ -127,6 +131,10 @@ void ParticlesStorage::particles_set_emitting(RID p_particles, bool p_emitting)
 }
 }
 
 
 bool ParticlesStorage::particles_get_emitting(RID p_particles) {
 bool ParticlesStorage::particles_get_emitting(RID p_particles) {
+	if (GLES3::Config::get_singleton()->adreno_3xx_compatibility) {
+		return false;
+	}
+
 	ERR_FAIL_COND_V_MSG(RSG::threaded, false, "This function should never be used with threaded rendering, as it stalls the renderer.");
 	ERR_FAIL_COND_V_MSG(RSG::threaded, false, "This function should never be used with threaded rendering, as it stalls the renderer.");
 	Particles *particles = particles_owner.get_or_null(p_particles);
 	Particles *particles = particles_owner.get_or_null(p_particles);
 	ERR_FAIL_NULL_V(particles, false);
 	ERR_FAIL_NULL_V(particles, false);