Browse Source

Align Vertex Buffer to 4 Bytes

With the octahedral compression, we had attributes of a size of 2 bytes
which potentially caused performance regressions on iOS/Mac

Now add padding to the normal/tangent buffer

For octahedral, normal will always be oct32 encoded
UNLESS tangent exists and is also compressed
then both will be oct16 encoded and packed into a vec4<GL_BYTE>
attribute
Omar El Sheikh 4 years ago
parent
commit
f0de7ec2b6

+ 21 - 26
drivers/gles2/rasterizer_storage_gles2.cpp

@@ -2099,13 +2099,8 @@ static PoolVector<uint8_t> _unpack_half_floats(const PoolVector<uint8_t> &array,
 			} break;
 			case VS::ARRAY_NORMAL: {
 				if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
-					if (p_format & VS::ARRAY_COMPRESS_NORMAL) {
-						src_size[i] = 2;
-						dst_size[i] = 2;
-					} else {
-						src_size[i] = 4;
-						dst_size[i] = 4;
-					}
+					src_size[i] = 4;
+					dst_size[i] = 4;
 				} else {
 					if (p_format & VS::ARRAY_COMPRESS_NORMAL) {
 						src_size[i] = 4;
@@ -2119,13 +2114,12 @@ static PoolVector<uint8_t> _unpack_half_floats(const PoolVector<uint8_t> &array,
 			} break;
 			case VS::ARRAY_TANGENT: {
 				if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
-					if (p_format & VS::ARRAY_COMPRESS_TANGENT) {
-						src_size[i] = 2;
-						dst_size[i] = 2;
-					} else {
-						src_size[i] = 4;
-						dst_size[i] = 4;
+					if (!(p_format & VS::ARRAY_COMPRESS_TANGENT)) {
+						src_size[VS::ARRAY_NORMAL] = 8;
+						dst_size[VS::ARRAY_NORMAL] = 8;
 					}
+					src_size[i] = 0;
+					dst_size[i] = 0;
 				} else {
 					if (p_format & VS::ARRAY_COMPRESS_TANGENT) {
 						src_size[i] = 4;
@@ -2309,15 +2303,15 @@ void RasterizerStorageGLES2::mesh_add_surface(RID p_mesh, uint32_t p_format, VS:
 			} break;
 			case VS::ARRAY_NORMAL: {
 				if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
+					// Always pack normal and tangent into vec4
+					// normal will be xy tangent will be zw
+					// normal will always be oct32 encoded
+					// UNLESS tangent exists and is also compressed
+					// then it will be oct16 encoded along with tangent
 					attribs[i].normalized = GL_TRUE;
-					attribs[i].size = 2;
-					if (p_format & VS::ARRAY_COMPRESS_NORMAL) {
-						attribs[i].type = GL_BYTE;
-						attributes_stride += 2;
-					} else {
-						attribs[i].type = GL_SHORT;
-						attributes_stride += 4;
-					}
+					attribs[i].size = 4;
+					attribs[i].type = GL_SHORT;
+					attributes_stride += 4;
 				} else {
 					attribs[i].size = 3;
 
@@ -2335,13 +2329,14 @@ void RasterizerStorageGLES2::mesh_add_surface(RID p_mesh, uint32_t p_format, VS:
 			} break;
 			case VS::ARRAY_TANGENT: {
 				if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
-					attribs[i].normalized = GL_TRUE;
-					attribs[i].size = 2;
+					attribs[i].enabled = false;
 					if (p_format & VS::ARRAY_COMPRESS_TANGENT) {
-						attribs[i].type = GL_BYTE;
-						attributes_stride += 2;
+						// normal and tangent will each be oct16 (2 bytes each)
+						// pack into single vec4<GL_BYTE> for memory bandwidth
+						// savings while keeping 4 byte alignment
+						attribs[VS::ARRAY_NORMAL].type = GL_BYTE;
 					} else {
-						attribs[i].type = GL_SHORT;
+						// normal and tangent will each be oct32 (4 bytes each)
 						attributes_stride += 4;
 					}
 				} else {

+ 5 - 5
drivers/gles2/shaders/scene.glsl

@@ -32,14 +32,14 @@ precision highp int;
 attribute highp vec4 vertex_attrib; // attrib:0
 /* clang-format on */
 #ifdef ENABLE_OCTAHEDRAL_COMPRESSION
-attribute vec2 normal_attrib; // attrib:1
+attribute vec4 normal_tangent_attrib; // attrib:1
 #else
 attribute vec3 normal_attrib; // attrib:1
 #endif
 
 #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP)
 #ifdef ENABLE_OCTAHEDRAL_COMPRESSION
-attribute vec2 tangent_attrib; // attrib:2
+// packed into normal_attrib zw component
 #else
 attribute vec4 tangent_attrib; // attrib:2
 #endif
@@ -359,15 +359,15 @@ void main() {
 #endif
 
 #ifdef ENABLE_OCTAHEDRAL_COMPRESSION
-	vec3 normal = oct_to_vec3(normal_attrib);
+	vec3 normal = oct_to_vec3(normal_tangent_attrib.xy);
 #else
 	vec3 normal = normal_attrib;
 #endif
 
 #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP)
 #ifdef ENABLE_OCTAHEDRAL_COMPRESSION
-	vec3 tangent = oct_to_vec3(vec2(tangent_attrib.x, abs(tangent_attrib.y) * 2.0 - 1.0));
-	float binormalf = sign(tangent_attrib.y);
+	vec3 tangent = oct_to_vec3(vec2(normal_tangent_attrib.z, abs(normal_tangent_attrib.w) * 2.0 - 1.0));
+	float binormalf = sign(normal_tangent_attrib.w);
 #else
 	vec3 tangent = tangent_attrib.xyz;
 	float binormalf = tangent_attrib.a;

+ 14 - 13
drivers/gles3/rasterizer_storage_gles3.cpp

@@ -3397,15 +3397,15 @@ void RasterizerStorageGLES3::mesh_add_surface(RID p_mesh, uint32_t p_format, VS:
 			} break;
 			case VS::ARRAY_NORMAL: {
 				if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
+					// Always pack normal and tangent into vec4
+					// normal will be xy tangent will be zw
+					// normal will always be oct32 (4 byte) encoded
+					// UNLESS tangent exists and is also compressed
+					// then it will be oct16 encoded along with tangent
 					attribs[i].normalized = GL_TRUE;
-					attribs[i].size = 2;
-					if (p_format & VS::ARRAY_COMPRESS_NORMAL) {
-						attribs[i].type = GL_BYTE;
-						attributes_stride += 2;
-					} else {
-						attribs[i].type = GL_SHORT;
-						attributes_stride += 4;
-					}
+					attribs[i].size = 4;
+					attribs[i].type = GL_SHORT;
+					attributes_stride += 4;
 				} else {
 					attribs[i].size = 3;
 
@@ -3423,13 +3423,14 @@ void RasterizerStorageGLES3::mesh_add_surface(RID p_mesh, uint32_t p_format, VS:
 			} break;
 			case VS::ARRAY_TANGENT: {
 				if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
-					attribs[i].normalized = GL_TRUE;
-					attribs[i].size = 2;
+					attribs[i].enabled = false;
 					if (p_format & VS::ARRAY_COMPRESS_TANGENT) {
-						attribs[i].type = GL_BYTE;
-						attributes_stride += 2;
+						// normal and tangent will each be oct16 (2 bytes each)
+						// pack into single vec4<GL_BYTE> for memory bandwidth
+						// savings while keeping 4 byte alignment
+						attribs[VS::ARRAY_NORMAL].type = GL_BYTE;
 					} else {
-						attribs[i].type = GL_SHORT;
+						// normal and tangent will each be oct32 (4 bytes each)
 						attributes_stride += 4;
 					}
 				} else {

+ 5 - 7
drivers/gles3/shaders/scene.glsl

@@ -26,13 +26,11 @@ ARRAY_INDEX=8,
 layout(location = 0) in highp vec4 vertex_attrib;
 /* clang-format on */
 #ifdef ENABLE_OCTAHEDRAL_COMPRESSION
-layout(location = 1) in vec2 normal_attrib;
-#else
-layout(location = 1) in vec3 normal_attrib;
+layout(location = 1) in vec4 normal_tangent_attrib;
 #endif
 #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP) || defined(LIGHT_USE_ANISOTROPY)
 #ifdef ENABLE_OCTAHEDRAL_COMPRESSION
-layout(location = 2) in vec2 tangent_attrib;
+// packed into normal_attrib zw component
 #else
 layout(location = 2) in vec4 tangent_attrib;
 #endif
@@ -340,15 +338,15 @@ void main() {
 #endif
 
 #ifdef ENABLE_OCTAHEDRAL_COMPRESSION
-	vec3 normal = oct_to_vec3(normal_attrib);
+	vec3 normal = oct_to_vec3(normal_tangent_attrib.xy);
 #else
 	vec3 normal = normal_attrib;
 #endif
 
 #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP) || defined(LIGHT_USE_ANISOTROPY)
 #ifdef ENABLE_OCTAHEDRAL_COMPRESSION
-	vec3 tangent = oct_to_vec3(vec2(tangent_attrib.x, abs(tangent_attrib.y) * 2.0 - 1.0));
-	float binormalf = sign(tangent_attrib.y);
+	vec3 tangent = oct_to_vec3(vec2(normal_tangent_attrib.z, abs(normal_tangent_attrib.w) * 2.0 - 1.0));
+	float binormalf = sign(normal_tangent_attrib.w);
 #else
 	vec3 tangent = tangent_attrib.xyz;
 	float binormalf = tangent_attrib.a;

+ 14 - 5
servers/visual_server.cpp

@@ -491,7 +491,7 @@ Error VisualServer::_surface_set_data(Array p_arrays, uint32_t p_format, uint32_
 				// setting vertices means regenerating the AABB
 
 				if (p_format & ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
-					if (p_format & ARRAY_COMPRESS_NORMAL) {
+					if ((p_format & ARRAY_COMPRESS_NORMAL) && (p_format & ARRAY_FORMAT_TANGENT) && (p_format & ARRAY_COMPRESS_TANGENT)) {
 						for (int i = 0; i < p_vertex_array_len; i++) {
 							Vector2 res = norm_to_oct(src[i]);
 							int8_t vector[2] = {
@@ -878,7 +878,10 @@ uint32_t VisualServer::mesh_surface_make_offsets_from_format(uint32_t p_format,
 			} break;
 			case VS::ARRAY_NORMAL: {
 				if (p_format & ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
-					if (p_format & ARRAY_COMPRESS_NORMAL) {
+					// normal will always be oct32 (4 byte) encoded
+					// UNLESS tangent exists and is also compressed
+					// then it will be oct16 encoded along with tangent
+					if ((p_format & ARRAY_COMPRESS_NORMAL) && (p_format & ARRAY_FORMAT_TANGENT) && (p_format & ARRAY_COMPRESS_TANGENT)) {
 						elem_size = sizeof(uint8_t) * 2;
 					} else {
 						elem_size = sizeof(uint16_t) * 2;
@@ -1083,7 +1086,10 @@ void VisualServer::mesh_add_surface_from_arrays(RID p_mesh, PrimitiveType p_prim
 			} break;
 			case VS::ARRAY_NORMAL: {
 				if (p_compress_format & ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
-					if (p_compress_format & ARRAY_COMPRESS_NORMAL) {
+					// normal will always be oct32 (4 byte) encoded
+					// UNLESS tangent exists and is also compressed
+					// then it will be oct16 encoded along with tangent
+					if ((p_compress_format & ARRAY_COMPRESS_NORMAL) && (format & ARRAY_FORMAT_TANGENT) && (p_compress_format & ARRAY_COMPRESS_TANGENT)) {
 						elem_size = sizeof(uint8_t) * 2;
 					} else {
 						elem_size = sizeof(uint16_t) * 2;
@@ -1286,7 +1292,10 @@ Array VisualServer::_get_array_from_surface(uint32_t p_format, PoolVector<uint8_
 			} break;
 			case VS::ARRAY_NORMAL: {
 				if (p_format & ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
-					if (p_format & ARRAY_COMPRESS_NORMAL) {
+					// normal will always be oct32 (4 byte) encoded
+					// UNLESS tangent exists and is also compressed
+					// then it will be oct16 encoded along with tangent
+					if ((p_format & ARRAY_COMPRESS_NORMAL) && (p_format & ARRAY_FORMAT_TANGENT) && (p_format & ARRAY_COMPRESS_TANGENT)) {
 						elem_size = sizeof(uint8_t) * 2;
 					} else {
 						elem_size = sizeof(uint16_t) * 2;
@@ -1443,7 +1452,7 @@ Array VisualServer::_get_array_from_surface(uint32_t p_format, PoolVector<uint8_
 				arr.resize(p_vertex_len);
 
 				if (p_format & ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) {
-					if (p_format & ARRAY_COMPRESS_NORMAL) {
+					if (p_format & ARRAY_COMPRESS_NORMAL && (p_format & ARRAY_FORMAT_TANGENT) && (p_format & ARRAY_COMPRESS_TANGENT)) {
 						PoolVector<Vector3>::Write w = arr.write();
 
 						for (int j = 0; j < p_vertex_len; j++) {