Browse Source

Making the VAOs a bit less BW hangry. Normals tangents and texture coordinates are fp16 now

Panagiotis Christopoulos Charitos 12 years ago
parent
commit
554b8465db

+ 16 - 19
bench/Main.cpp

@@ -37,7 +37,7 @@ struct LogFile
 
 	void handler(const Logger::Info& info)
 	{
-		const char* x;
+		const char* x = nullptr;
 		switch(info.type)
 		{
 		case Logger::LMT_NORMAL:
@@ -164,7 +164,7 @@ void initScene()
 
 	PerspectiveCamera* cam = nullptr;
 	scene.newSceneNode(
-		cam, "main_camera", nullptr, (U32)Movable::MF_NONE);
+		cam, "main_camera");
 
 	const F32 ang = 45.0;
 	cam->setAll(
@@ -193,9 +193,9 @@ void initScene()
 		Vec3 lightPos = vaseLightPos[i];
 
 		PointLight* point;
-		scene.newSceneNode(point, ("vase_plight" + std::to_string(i)).c_str(),
-			nullptr, Movable::MF_NONE, 
-			(i != 100) ? "textures/lens_flare/flares0.ankitex" : nullptr);
+		scene.newSceneNode(point, ("vase_plight" + std::to_string(i)).c_str());
+
+		point->loadLensFlare("textures/lens_flare/flares0.ankitex");
 		point->setRadius(2.0);
 		point->setLocalOrigin(lightPos);
 		point->setDiffuseColor(Vec4(3.0, 0.2, 0.0, 0.0));
@@ -211,36 +211,33 @@ void initScene()
 		scene.getEventManager().newEvent(event, 0.0, 0.8, point, eventData);
 		event->enableBits(Event::EF_REANIMATE);
 
-		MovableEventData moveData;
+		MoveEventData moveData;
 		moveData.posMin = Vec3(-0.5, 0.0, -0.5);
 		moveData.posMax = Vec3(0.5, 0.0, 0.5);
-		MovableEvent* mevent;
+		MoveEvent* mevent;
 		scene.getEventManager().newEvent(mevent, 0.0, 2.0, point, moveData);
 		mevent->enableBits(Event::EF_REANIMATE);
 
 		ParticleEmitter* pe;
 		scene.newSceneNode(pe,
-			("pe" + std::to_string(i)).c_str(), nullptr,
-			Movable::MF_NONE, "particles/smoke.ankipart");
+			("pe" + std::to_string(i)).c_str(), "particles/smoke.ankipart");
 		pe->setLocalOrigin(lightPos);
 
 		scene.newSceneNode(pe,
-			("pef" + std::to_string(i)).c_str(), nullptr,
-			Movable::MF_NONE, "particles/fire.ankipart");
+			("pef" + std::to_string(i)).c_str(), "particles/fire.ankipart");
 		pe->setLocalOrigin(lightPos);
 	}
 #endif
 
 	// horse
 	ModelNode* horse;
-	scene.newSceneNode(horse, "horse", nullptr, 
-		Movable::MF_NONE, "models/horse/horse.ankimdl");
+	scene.newSceneNode(horse, "horse", "models/horse/horse.ankimdl");
 	horse->setLocalTransform(Transform(Vec3(-2, 0, 0), Mat3::getIdentity(),
 		0.7));
 
 	// Light
 	SpotLight* spot;
-	scene.newSceneNode(spot, "spot0", nullptr, Movable::MF_NONE);
+	scene.newSceneNode(spot, "spot0");
 	spot->setOuterAngle(toRad(45.0));
 	spot->setInnerAngle(toRad(15.0));
 	spot->setLocalTransform(Transform(Vec3(8.27936, 5.86285, 1.85526),
@@ -254,19 +251,19 @@ void initScene()
 	scene.load("maps/sponza/master.ankiscene");
 
 	PointLight* pl;
-	scene.newSceneNode(pl, "pl0", nullptr, Movable::MF_NONE);
+	scene.newSceneNode(pl, "pl0");
 	pl->setRadius(12.5);
 	pl->setDiffuseColor(Vec4(0.5, 0.3, 0.2, 1.0));
 	pl->setSpecularColor(Vec4(0.1, 0.0, 0.0, 1.0));
 	pl->setLocalOrigin(Vec3(10, 2.0, -0.8));
 
-	scene.newSceneNode(pl, "pl1", nullptr, Movable::MF_NONE);
+	scene.newSceneNode(pl, "pl1");
 	pl->setRadius(12.5);
 	pl->setDiffuseColor(Vec4(0.5, 0.3, 0.2, 1.0));
 	pl->setSpecularColor(Vec4(0.1, 0.0, 0.0, 1.0));
 	pl->setLocalOrigin(Vec3(0, 2.0, -0.8));
 
-	scene.newSceneNode(pl, "pl2", nullptr, Movable::MF_NONE);
+	scene.newSceneNode(pl, "pl2");
 	pl->setRadius(12.5);
 	pl->setDiffuseColor(Vec4(0.5, 0.3, 0.2, 1.0));
 	pl->setSpecularColor(Vec4(0.1, 0.0, 0.0, 1.0));
@@ -303,12 +300,12 @@ static Bool mainLoopExtra()
 {
 	const F32 dist = 0.2;
 	const F32 ang = toRad(3.0);
-	const F32 scale = 0.01;
 	const F32 mouseSensivity = 9.0;
 
 	Input& in = InputSingleton::get();
 
-	Movable* mover = SceneGraphSingleton::get().getActiveCamera().getMovable();
+	MoveComponent* mover = 
+		SceneGraphSingleton::get().getActiveCamera().getMoveComponent();
 
 	if(in.getKey(KC_UP)) mover->rotateLocalX(ang);
 	if(in.getKey(KC_DOWN)) mover->rotateLocalX(-ang);

+ 1 - 1
include/anki/Event.h

@@ -5,7 +5,7 @@
 
 #include "anki/event/SceneAmbientColorEvent.h"
 #include "anki/event/LightEvent.h"
-#include "anki/event/MovableEvent.h"
+#include "anki/event/MoveEvent.h"
 #include "anki/event/AnimationEvent.h"
 
 #endif

+ 3 - 0
include/anki/math/Vec2.h

@@ -314,6 +314,9 @@ TVec2<T> operator/(const T f, const TVec2<T>& v2)
 typedef TVec2<F32> Vec2;
 static_assert(sizeof(Vec2) == sizeof(F32) * 2, "Incorrect size");
 
+/// Half float 2D vector
+typedef TVec2<F16> HVec2;
+
 /// 32bit signed integer 2D vector 
 typedef TVec2<I32> IVec2;
 

+ 3 - 0
include/anki/math/Vec3.h

@@ -479,6 +479,9 @@ TVec3<T> operator/(const T f, const TVec3<T>& v)
 typedef TVec3<F32> Vec3;
 static_assert(sizeof(Vec3) == sizeof(F32) * 3, "Incorrect size");
 
+/// Half float 3D vector
+typedef TVec3<F16> HVec3;
+
 /// 32bit signed integer 3D vector
 typedef TVec3<I32> IVec3;
 

+ 3 - 0
include/anki/math/Vec4.h

@@ -508,6 +508,9 @@ void TVec4<F32>::normalize();
 typedef TVec4<F32> Vec4;
 static_assert(sizeof(Vec4) == sizeof(F32) * 4, "Incorrect size");
 
+/// Half float 4D vector
+typedef TVec4<F16> HVec4;
+
 /// 32bit signed integer 4D vector
 typedef TVec4<I32> IVec4;
 

+ 1 - 1
include/anki/renderer/Hdr.h

@@ -47,7 +47,7 @@ public:
 		blurringIterationsCount = x;
 	}
 
-	const Texture& getFai() const
+	Texture& getFai()
 	{
 		return vblurFai;
 	}

+ 8 - 0
include/anki/resource/MaterialShaderProgramCreator.h

@@ -57,6 +57,12 @@ public:
 		return tessellation;
 	}
 
+	/// The fragment shader needs the vInstanceId
+	Bool usesInstanceIdInFragmentShader() const
+	{
+		return instanceIdInFragmentShader;
+	}
+
 private:
 	/// The lines of the shader program source
 	StringList srcLines;
@@ -71,6 +77,8 @@ private:
 
 	Bool tessellation = false;
 
+	Bool instanceIdInFragmentShader = false;
+
 	/// Parse what is within the
 	/// @code <shaderProgram></shaderProgram> @endcode
 	void parseShaderProgramTag(const XmlElement& el);

+ 29 - 56
include/anki/resource/MeshLoader.h

@@ -53,10 +53,9 @@ public:
 		/// shader
 		static const U32 MAX_BONES_PER_VERT = 4;
 
-		/// @todo change the vals to U32 when change drivers
-		F32 bonesNum;
-		Array<F32, MAX_BONES_PER_VERT> boneIds;
-		Array<F32, MAX_BONES_PER_VERT> weights;
+		U16 bonesNum;
+		Array<U16, MAX_BONES_PER_VERT> boneIds;
+		Array<F16, MAX_BONES_PER_VERT> weights;
 	};
 
 	/// Triangle
@@ -80,22 +79,22 @@ public:
 	/// @{
 	const Vector<Vec3>& getPositions() const
 	{
-		return vertCoords;
+		return positions;
 	}
 
-	const Vector<Vec3>& getNormals() const
+	const Vector<HVec3>& getNormals() const
 	{
-		return vertNormals;
+		return normalsF16;
 	}
 
-	const Vector<Vec4>& getTangents() const
+	const Vector<HVec4>& getTangents() const
 	{
-		return vertTangents;
+		return tangentsF16;
 	}
 
-	const Vector<Vec2>& getTextureCoordinates(const U32 channel) const
+	const Vector<HVec2>& getTextureCoordinates(const U32 channel) const
 	{
-		return texCoords;
+		return texCoordsF16;
 	}
 	U getTextureChannelsCount() const
 	{
@@ -104,7 +103,7 @@ public:
 
 	const Vector<VertexWeight>& getWeights() const
 	{
-		return vertWeights;
+		return weights;
 	}
 
 	const Vector<U16>& getIndices() const
@@ -113,46 +112,8 @@ public:
 	}
 	/// @}
 
-	/// @name BucketMesh methods
-	/// @{
-	void appendPositions(const Vector<Vec3>& positions)
-	{
-		vertCoords.insert(
-			vertCoords.end(), positions.begin(), positions.end());
-	}
-
-	void appendNormals(const Vector<Vec3>& normals)
-	{
-		vertNormals.insert(
-			vertNormals.end(), normals.begin(), normals.end());
-	}
-
-	void appendTangents(const Vector<Vec4>& tangents)
-	{
-		vertTangents.insert(
-			vertTangents.end(), tangents.begin(), tangents.end());
-	}
-
-	void appendTextureCoordinates(const Vector<Vec2>& coords, U32 channel)
-	{
-		ANKI_ASSERT(channel == 0 && "Currently only one channel is supported");
-		texCoords.insert(texCoords.end(), coords.begin(), coords.end());
-	}
-
-	void appendWeights(const Vector<VertexWeight>& weights)
-	{
-		vertWeights.insert(vertWeights.end(), weights.begin(), weights.end());
-	}
-
-	/// This will adjust the indices bias
-	void appendIndices(const Vector<U16>& indices, U16 bias)
-	{
-		for(U16 index : indices)
-		{
-			vertIndices.push_back(bias + index);
-		}
-	}
-	/// @}
+	/// Append data from another mesh loader. BucketMesh method
+	void append(const MeshLoader& other);
 
 	/// Load the mesh data from a binary file
 	/// @exception Exception
@@ -161,13 +122,22 @@ public:
 private:
 	/// @name Data
 	/// @{
-	Vector<Vec3> vertCoords; ///< Loaded from file
-	Vector<Vec3> vertNormals; ///< Generated
-	Vector<Vec4> vertTangents; ///< Generated
+	Vector<Vec3> positions; ///< Loaded from file
+
+	Vector<Vec3> normals; ///< Generated
+	Vector<HVec3> normalsF16;
+
+	Vector<Vec4> tangents; ///< Generated
+	Vector<HVec4> tangentsF16;
+
 	/// Optional. One for every vert so we can use vertex arrays & VBOs
 	Vector<Vec2> texCoords;
-	Vector<VertexWeight> vertWeights; ///< Optional
+	Vector<HVec2> texCoordsF16;
+
+	Vector<VertexWeight> weights; ///< Optional
+
 	Vector<Triangle> tris; ///< Required
+
 	/// Generated. Used for vertex arrays & VBOs
 	Vector<U16> vertIndices;
 	/// @}
@@ -189,6 +159,9 @@ private:
 
 	/// It iterates all verts and fixes the normals on seams
 	void fixNormals();
+
+	/// Compress some buffers for increased BW performance
+	void compressBuffers();
 };
 
 } // end namespace anki

+ 1 - 1
shaders/BsCommonFrag.glsl

@@ -8,7 +8,7 @@
 #define vTexCoord_DEFINED
 in highp vec2 vTexCoord;
 #define vInstanceId_DEFINED
-flat in highp uint vInstanceId;
+flat in highp int vInstanceId;
 
 #if defined(PASS_COLOR)
 layout(location = 0) out vec4 fColor;

+ 2 - 2
shaders/BsCommonVert.glsl

@@ -8,7 +8,7 @@ layout(location = 3) in vec2 texCoord;
 /// @name Varyings
 /// @{
 out vec2 vTexCoord;
-flat out uint vInstanceId;
+flat out int vInstanceId;
 /// @}
 
 //==============================================================================
@@ -38,5 +38,5 @@ void particle(in mat4 mvp)
 {
 	vTexCoord = texCoord;
 	gl_Position = mvp * vec4(position, 1);
-	vInstanceId = uint(gl_InstanceID);
+	vInstanceId = gl_InstanceID;
 }

+ 1 - 10
shaders/GaussianBlurGeneric.glsl

@@ -9,16 +9,7 @@
 /// This is an optimized version. See the clean one at r213
 
 #pragma anki start vertexShader
-
-layout(location = 0) in vec2 position;
-
-out vec2 vTexCoords;
-
-void main()
-{
-	vTexCoords = position;
-	gl_Position = vec4(position * 2.0 - 1.0, 0.0, 1.0);
-}
+#pragma anki include "shaders/SimpleVert.glsl"
 
 #pragma anki start fragmentShader
 

+ 3 - 3
shaders/MsCommonFrag.glsl

@@ -11,7 +11,7 @@
 /// Input
 #if TESSELLATION
 
-in highp vec2 teTexCoords;
+in mediump vec2 teTexCoords;
 #if PASS_COLOR
 in mediump vec3 teNormal;
 in mediump vec4 teTangent;
@@ -20,11 +20,11 @@ in mediump vec3 teVertPosViewSpace;
 
 #else // no TESSELLATION
 
-#if INSTANCING
+#if INSTANCE_ID_FRAGMENT_SHADER
 flat in mediump uint vInstanceId;
 #endif
 
-in highp vec2 vTexCoords;
+in mediump vec2 vTexCoords;
 #if PASS_COLOR
 in mediump vec3 vNormal;
 in mediump vec4 vTangent;

+ 3 - 3
shaders/MsCommonVert.glsl

@@ -3,7 +3,7 @@
 /// @name Attributes
 /// @{
 layout(location = 0) in highp vec3 position;
-layout(location = 3) in highp vec2 texCoord;
+layout(location = 3) in mediump vec2 texCoord;
 
 #if PASS_COLOR || TESSELLATION
 layout(location = 1) in mediump vec3 normal;
@@ -16,9 +16,9 @@ layout(location = 2) in mediump vec4 tangent;
 
 /// @name Varyings
 /// @{
-out highp vec2 vTexCoords;
+out mediump vec2 vTexCoords;
 
-#if INSTANCING
+#if INSTANCE_ID_FRAGMENT_SHADER
 flat out uint vInstanceId;
 #endif
 

+ 0 - 2
shaders/PpsLfPseudoPass.glsl

@@ -42,8 +42,6 @@ void main()
 {
 	vec2 texcoord = vec2(1.0) - vTexCoords;
 
-	vec2 imgSize = vec2(textureSize(tex, 0));
-
 	vec2 ghostVec = (vec2(0.5) - texcoord) * GHOST_DISPERSAL;
 
 	const vec2 texelSize = 1.0 / vec2(TEX_DIMENSIONS);

+ 1 - 1
shaders/SimpleVert.glsl

@@ -1,4 +1,4 @@
-/// Simple vertex shader for IS and PPS stages. It is used for rendering a quad 
+/// Simple vertex shader for quad rendering. It is used for rendering a quad 
 /// in the screen. Notice that it does not use ftransform(). We dont need it 
 /// because we can get the Normalized Display Coordinates ([-1,1]) simply by 
 /// looking in the vertex position. The vertex positions of the quad are from 

+ 5 - 4
src/gl/ShaderProgram.cpp

@@ -567,10 +567,11 @@ GLuint ShaderProgram::createAndCompileShader(const char* sourceCode,
 			ANKI_ASSERT(0);
 		}
 
-		File file(
-			(AppSingleton::get().getCachePath() + "/" 
-			+ std::to_string(shader) + ext).c_str(), 
-			File::OF_WRITE);
+		std::stringstream fname;
+		fname << AppSingleton::get().getCachePath() << "/" 
+			<< std::setfill('0') << std::setw(4) << (U32)shader << ext;
+
+		File file(fname.str().c_str(), File::OF_WRITE);
 
 		file.writeText("%s", fullSrc.c_str());
 	}

+ 5 - 0
src/renderer/Drawer.cpp

@@ -329,7 +329,9 @@ void RenderableDrawer::render(SceneNode& frsn, RenderingStage stage,
 
 	gl.enable(GL_BLEND, blending);
 
+#if ANKI_GL == ANKI_GL_DESKTOP
 	gl.setPolygonMode(mtl.getWireframe() ? GL_LINE : GL_FILL);
+#endif
 
 	// Calculate the LOD
 	Vec3 camPos = fr.getFrustumOrigin();
@@ -389,12 +391,14 @@ void RenderableDrawer::render(SceneNode& frsn, RenderingStage stage,
 	// Draw call
 	Drawcall dc;
 
+#if ANKI_GL == ANKI_GL_DESKTOP
 	if(mtl.getTessellation())
 	{
 		glPatchParameteri(GL_PATCH_VERTICES, 3);
 		dc.primitiveType = GL_PATCHES;
 	}
 	else
+#endif
 	{
 		dc.primitiveType = GL_TRIANGLES;
 	}
@@ -406,6 +410,7 @@ void RenderableDrawer::render(SceneNode& frsn, RenderingStage stage,
 	dc.primCount = primCount;
 
 	dc.enque();
+
 	ANKI_COUNTER_INC(C_RENDERER_DRAWCALLS_COUNT, (U64)1);
 	ANKI_COUNTER_INC(C_RENDERER_VERTICES_COUNT, 
 		countVerts(indicesCountArray, (I)primCount));

+ 2 - 0
src/resource/Material.cpp

@@ -286,6 +286,8 @@ void Material::parseMaterialTag(const XmlElement& materialEl)
 			std::stringstream src;
 
 			src << "#define INSTANCING " << (U)mspc.usesInstancing() << "\n";
+			src << "#define INSTANCE_ID_FRAGMENT_SHADER " 
+				<< (U)mspc.usesInstanceIdInFragmentShader() << "\n";
 			src << "#define TESSELLATION " << (U)tessellation << "\n";
 
 			src << "#define LOD " << level << "\n";

+ 1 - 0
src/resource/MaterialShaderProgramCreator.cpp

@@ -374,6 +374,7 @@ void MaterialShaderProgramCreator::parseOperationTag(
 				{
 					argsList.push_back(std::string(argEl.getText()) 
 						+ "[vInstanceId]");
+					instanceIdInFragmentShader = true;
 				}
 				else
 				{

+ 33 - 43
src/resource/Mesh.cpp

@@ -52,12 +52,14 @@ void Mesh::load(const char* filename)
 //==============================================================================
 U32 Mesh::calcVertexSize() const
 {
-	U32 a = sizeof(Vec3) + sizeof(Vec3) + sizeof(Vec4) 
-		+ meshProtected.texChannelsCount * sizeof(Vec2);
+	U32 a = sizeof(Vec3) + sizeof(HVec3) + sizeof(HVec4) 
+		+ meshProtected.texChannelsCount * sizeof(HVec2);
 	if(meshProtected.weights)
 	{
 		a += sizeof(MeshLoader::VertexWeight);
 	}
+
+	alignRoundUp(sizeof(F32), a);
 	return a;
 }
 
@@ -75,24 +77,25 @@ void Mesh::createVbos(const MeshLoader& loader)
 	// Create a temp buffer and populate it
 	Vector<U8> buff(vbosize, 0);
 
-	U8* ptr = &buff[0];
+	U8* ptra = &buff[0];
 	for(U i = 0; i < meshProtected.vertsCount; i++)
 	{
+		U8* ptr = ptra;
 		ANKI_ASSERT(ptr + vertexsize <= &buff[0] + vbosize);
 
 		memcpy(ptr, &loader.getPositions()[i], sizeof(Vec3));
 		ptr += sizeof(Vec3);
 
-		memcpy(ptr, &loader.getNormals()[i], sizeof(Vec3));
-		ptr += sizeof(Vec3);
+		memcpy(ptr, &loader.getNormals()[i], sizeof(HVec3));
+		ptr += sizeof(HVec3);
 
-		memcpy(ptr, &loader.getTangents()[i], sizeof(Vec4));
-		ptr += sizeof(Vec4);
+		memcpy(ptr, &loader.getTangents()[i], sizeof(HVec4));
+		ptr += sizeof(HVec4);
 
 		for(U j = 0; j < meshProtected.texChannelsCount; j++)
 		{
-			memcpy(ptr, &loader.getTextureCoordinates(j)[i], sizeof(Vec2));
-			ptr += sizeof(Vec2);
+			memcpy(ptr, &loader.getTextureCoordinates(j)[i], sizeof(HVec2));
+			ptr += sizeof(HVec2);
 		}
 
 		if(meshProtected.weights)
@@ -101,6 +104,8 @@ void Mesh::createVbos(const MeshLoader& loader)
 				sizeof(MeshLoader::VertexWeight));
 			ptr += sizeof(MeshLoader::VertexWeight);
 		}
+
+		ptra += vertexsize;
 	}
 
 	// Create VBO
@@ -141,22 +146,22 @@ void Mesh::getVboInfo(const VertexAttribute attrib, const Vbo*& v, U32& size,
 	case VA_NORMAL:
 		v = &vbo;
 		size = 3;
-		type = GL_FLOAT;
+		type = GL_HALF_FLOAT;
 		offset = sizeof(Vec3);
 		break;
 	case VA_TANGENT:
 		v = &vbo;
 		size = 4;
-		type = GL_FLOAT;
-		offset = sizeof(Vec3) * 2;
+		type = GL_HALF_FLOAT;
+		offset = sizeof(Vec3) + sizeof(HVec3);
 		break;
 	case VA_TEXTURE_COORD:
 		if(meshProtected.texChannelsCount > 0)
 		{
 			v = &vbo;
 			size = 2;
-			type = GL_FLOAT;
-			offset = sizeof(Vec3) * 2 + sizeof(Vec4);
+			type = GL_HALF_FLOAT;
+			offset = sizeof(Vec3) + sizeof(HVec3) + sizeof(HVec4);
 		}
 		break;
 	case VA_TEXTURE_COORD_1:
@@ -164,8 +169,9 @@ void Mesh::getVboInfo(const VertexAttribute attrib, const Vbo*& v, U32& size,
 		{
 			v = &vbo;
 			size = 2;
-			type = GL_FLOAT;
-			offset = sizeof(Vec3) * 2 + sizeof(Vec4) + sizeof(Vec2);
+			type = GL_HALF_FLOAT;
+			offset = sizeof(Vec3) + sizeof(HVec3) + sizeof(HVec4) 
+				+ sizeof(HVec2);
 		}
 		break;
 	case VA_BONE_COUNT:
@@ -173,9 +179,9 @@ void Mesh::getVboInfo(const VertexAttribute attrib, const Vbo*& v, U32& size,
 		{
 			v = &vbo;
 			size = 1;
-			type = GL_UNSIGNED_INT;
-			offset = sizeof(Vec3) * 2 + sizeof(Vec4) 
-				+ meshProtected.texChannelsCount * sizeof(Vec2);
+			type = GL_UNSIGNED_SHORT;
+			offset = sizeof(Vec3) + sizeof(HVec3) + sizeof(HVec4) 
+				+ meshProtected.texChannelsCount * sizeof(HVec2);
 		}
 		break;
 	case VA_BONE_IDS:
@@ -183,9 +189,9 @@ void Mesh::getVboInfo(const VertexAttribute attrib, const Vbo*& v, U32& size,
 		{
 			v = &vbo;
 			size = 4;
-			type = GL_UNSIGNED_INT;
-			offset = sizeof(Vec3) * 2 + sizeof(Vec4) 
-				+ meshProtected.texChannelsCount * sizeof(Vec2) + sizeof(U32);
+			type = GL_UNSIGNED_SHORT;
+			offset = sizeof(Vec3) + sizeof(HVec3) + sizeof(HVec4) 
+				+ meshProtected.texChannelsCount * sizeof(HVec2) + sizeof(U16);
 		}
 		break;
 	case VA_BONE_WEIGHTS:
@@ -193,10 +199,10 @@ void Mesh::getVboInfo(const VertexAttribute attrib, const Vbo*& v, U32& size,
 		{
 			v = &vbo;
 			size = 4;
-			type = GL_FLOAT;
-			offset = sizeof(Vec3) * 2 + sizeof(Vec4) 
-				+ meshProtected.texChannelsCount * sizeof(Vec2) + sizeof(U32) 
-				+ sizeof(U32) * 4;
+			type = GL_HALF_FLOAT;
+			offset = sizeof(Vec3) + sizeof(HVec3) + sizeof(HVec4) 
+				+ meshProtected.texChannelsCount * sizeof(HVec2) + sizeof(U16) 
+				+ sizeof(U16) * 4;
 		}
 	case VA_INDICES:
 		v = &indicesVbo;
@@ -264,23 +270,7 @@ void BucketMesh::load(const char* filename)
 				}
 
 				// Append
-				fullLoader.appendPositions(subLoader.getPositions());
-				fullLoader.appendNormals(subLoader.getNormals());
-				fullLoader.appendTangents(subLoader.getTangents());
-
-				for(U j = 0; j < meshProtected.texChannelsCount; j++)
-				{
-					fullLoader.appendTextureCoordinates(
-						subLoader.getTextureCoordinates(j), j);
-				}
-
-				if(meshProtected.weights)
-				{
-					fullLoader.appendWeights(subLoader.getWeights());
-				}
-
-				fullLoader.appendIndices(loader->getIndices(), 
-					meshProtected.vertsCount);
+				fullLoader.append(subLoader);
 			}
 			else
 			{

+ 110 - 35
src/resource/MeshLoader.cpp

@@ -60,10 +60,10 @@ void MeshLoader::load(const char* filename)
 
 		// Verts num
 		U vertsNum = file.readU32();
-		vertCoords.resize(vertsNum);
+		positions.resize(vertsNum);
 
 		// Vert coords
-		for(Vec3& vertCoord : vertCoords)
+		for(Vec3& vertCoord : positions)
 		{
 			for(U j = 0; j < 3; j++)
 			{
@@ -83,7 +83,7 @@ void MeshLoader::load(const char* filename)
 				tri.vertIds[j] = file.readU32();
 
 				// a sanity check
-				if(tri.vertIds[j] >= vertCoords.size())
+				if(tri.vertIds[j] >= positions.size())
 				{
 					throw ANKI_EXCEPTION("Vert index out of bounds");
 				}
@@ -104,11 +104,11 @@ void MeshLoader::load(const char* filename)
 		}
 
 		// Vert weights num
-		U vertWeightsNum = file.readU32();
-		vertWeights.resize(vertWeightsNum);
+		U weightsNum = file.readU32();
+		weights.resize(weightsNum);
 
 		// Vert weights
-		for(VertexWeight& vw : vertWeights)
+		for(VertexWeight& vw : weights)
 		{
 			// get the bone connections num
 			U32 boneConnections = file.readU32();
@@ -154,17 +154,17 @@ void MeshLoader::load(const char* filename)
 void MeshLoader::doPostLoad()
 {
 	// Sanity checks
-	if(vertCoords.size() < 1 || tris.size() < 1)
+	if(positions.size() < 1 || tris.size() < 1)
 	{
 		throw ANKI_EXCEPTION("Vert coords and tris must be filled");
 	}
-	if(texCoords.size() != 0 && texCoords.size() != vertCoords.size())
+	if(texCoords.size() != 0 && texCoords.size() != positions.size())
 	{
 		throw ANKI_EXCEPTION("Tex coords num must be "
 			"zero or equal to the vertex "
 			"coords num");
 	}
-	if(vertWeights.size() != 0 && vertWeights.size() != vertCoords.size())
+	if(weights.size() != 0 && weights.size() != positions.size())
 	{
 		throw ANKI_EXCEPTION("Vert weights num must be zero or equal to the "
 			"vertex coords num");
@@ -177,6 +177,7 @@ void MeshLoader::doPostLoad()
 		createVertTangents();
 	}
 	createVertIndeces();
+	compressBuffers();
 }
 
 //==============================================================================
@@ -199,9 +200,9 @@ void MeshLoader::createFaceNormals()
 {
 	for(Triangle& tri : tris)
 	{
-		const Vec3& v0 = vertCoords[tri.vertIds[0]];
-		const Vec3& v1 = vertCoords[tri.vertIds[1]];
-		const Vec3& v2 = vertCoords[tri.vertIds[2]];
+		const Vec3& v0 = positions[tri.vertIds[0]];
+		const Vec3& v1 = positions[tri.vertIds[1]];
+		const Vec3& v2 = positions[tri.vertIds[2]];
 
 		tri.normal = (v1 - v0).cross(v2 - v0);
 
@@ -219,21 +220,21 @@ void MeshLoader::createFaceNormals()
 //==============================================================================
 void MeshLoader::createVertNormals()
 {
-	vertNormals.resize(vertCoords.size());
+	normals.resize(positions.size());
 
-	for(Vec3& vertNormal : vertNormals)
+	for(Vec3& vertNormal : normals)
 	{
 		vertNormal = Vec3(0.0, 0.0, 0.0);
 	}
 
 	for(Triangle& tri : tris)
 	{
-		vertNormals[tri.vertIds[0]] += tri.normal;
-		vertNormals[tri.vertIds[1]] += tri.normal;
-		vertNormals[tri.vertIds[2]] += tri.normal;
+		normals[tri.vertIds[0]] += tri.normal;
+		normals[tri.vertIds[1]] += tri.normal;
+		normals[tri.vertIds[2]] += tri.normal;
 	}
 
-	for(Vec3& vertNormal : vertNormals)
+	for(Vec3& vertNormal : normals)
 	{
 		vertNormal.normalize();
 	}
@@ -242,8 +243,8 @@ void MeshLoader::createVertNormals()
 //==============================================================================
 void MeshLoader::createVertTangents()
 {
-	vertTangents.resize(vertCoords.size(), Vec4(0.0)); // alloc
-	Vector<Vec3> bitagents(vertCoords.size(), Vec3(0.0));
+	tangents.resize(positions.size(), Vec4(0.0)); // alloc
+	Vector<Vec3> bitagents(positions.size(), Vec3(0.0));
 
 	for(U32 i = 0; i < tris.size(); i++)
 	{
@@ -251,9 +252,9 @@ void MeshLoader::createVertTangents()
 		const I i0 = tri.vertIds[0];
 		const I i1 = tri.vertIds[1];
 		const I i2 = tri.vertIds[2];
-		const Vec3& v0 = vertCoords[i0];
-		const Vec3& v1 = vertCoords[i1];
-		const Vec3& v2 = vertCoords[i2];
+		const Vec3& v0 = positions[i0];
+		const Vec3& v1 = positions[i1];
+		const Vec3& v2 = positions[i2];
 		Vec3 edge01 = v1 - v0;
 		Vec3 edge02 = v2 - v0;
 		Vec2 uvedge01 = texCoords[i1] - texCoords[i0];
@@ -280,19 +281,19 @@ void MeshLoader::createVertTangents()
 		//t.normalize();
 		//b.normalize();
 
-		vertTangents[i0] += Vec4(t, 1.0);
-		vertTangents[i1] += Vec4(t, 1.0);
-		vertTangents[i2] += Vec4(t, 1.0);
+		tangents[i0] += Vec4(t, 1.0);
+		tangents[i1] += Vec4(t, 1.0);
+		tangents[i2] += Vec4(t, 1.0);
 
 		bitagents[i0] += b;
 		bitagents[i1] += b;
 		bitagents[i2] += b;
 	}
 
-	for(U i = 0; i < vertTangents.size(); i++)
+	for(U i = 0; i < tangents.size(); i++)
 	{
-		Vec3 t = vertTangents[i].xyz();
-		const Vec3& n = vertNormals[i];
+		Vec3 t = tangents[i].xyz();
+		const Vec3& n = normals[i];
 		Vec3& b = bitagents[i];
 
 		t = t - n * n.dot(t);
@@ -302,7 +303,7 @@ void MeshLoader::createVertTangents()
 
 		F32 w = ((n.cross(t)).dot(b) < 0.0) ? 1.0 : -1.0;
 
-		vertTangents[i] = Vec4(t, w);
+		tangents[i] = Vec4(t, w);
 	}
 }
 
@@ -312,10 +313,10 @@ void MeshLoader::fixNormals()
 	FixNormalsMap map;
 
 	// For all verts
-	for(U i = 1; i < vertCoords.size(); i++)
+	for(U i = 1; i < positions.size(); i++)
 	{
-		const Vec3& pos = vertCoords[i];
-		Vec3& norm = vertNormals[i];
+		const Vec3& pos = positions[i];
+		Vec3& norm = normals[i];
 
 		// Find pos
 		FixNormalsMap::iterator it = map.find(pos);
@@ -340,8 +341,8 @@ void MeshLoader::fixNormals()
 			// Search the verts with the same position
 			for(U j = 0; j < mapVal.indicesCount; j++)
 			{
-				const Vec3& posB = vertCoords[mapVal.indices[j]];
-				Vec3& normB = vertNormals[mapVal.indices[j]];
+				const Vec3& posB = positions[mapVal.indices[j]];
+				Vec3& normB = normals[mapVal.indices[j]];
 
 				ANKI_ASSERT(posB == pos);
 				(void)posB;
@@ -365,4 +366,78 @@ void MeshLoader::fixNormals()
 	}
 }
 
+//==============================================================================
+void MeshLoader::append(const MeshLoader& other)
+{
+	positions.insert(
+		positions.end(), other.positions.begin(), other.positions.end());
+
+	//normals.insert(
+	//	normals.end(), other.normals.begin(), other.normals.end());
+
+	normalsF16.insert(
+		normalsF16.end(), other.normalsF16.begin(), other.normalsF16.end());
+
+	//tangents.insert(
+	//	tangents.end(), other.tangents.begin(), other.tangents.end());
+
+	tangentsF16.insert(
+		tangentsF16.end(), other.tangentsF16.begin(), other.tangentsF16.end());
+
+	//texCoords.insert(
+	//	texCoords.end(), other.texCoords.begin(), other.texCoords.end());
+
+	texCoordsF16.insert(
+		texCoordsF16.end(), other.texCoordsF16.begin(), 
+		other.texCoordsF16.end());
+
+	weights.insert(
+		weights.end(), other.weights.begin(), other.weights.end());
+
+	U16 bias = positions.size();
+	for(U16 index : other.vertIndices)
+	{
+		vertIndices.push_back(bias + index);
+	}
+}
+
+//==============================================================================
+void MeshLoader::compressBuffers()
+{
+	ANKI_ASSERT(positions.size() > 0);
+
+	// Normals
+	normalsF16.resize(normals.size());
+
+	for(U i = 0; i < normals.size(); i++)
+	{
+		for(U j = 0; j < 3; j++)
+		{
+			normalsF16[i][j] = F16(normals[i][j]);
+		}
+	}
+
+	// Tangents
+	tangentsF16.resize(tangents.size());
+
+	for(U i = 0; i < tangents.size(); i++)
+	{
+		for(U j = 0; j < 4; j++)
+		{
+			tangentsF16[i][j] = F16(tangents[i][j]);
+		}
+	}
+
+	// Texture coords
+	texCoordsF16.resize(texCoords.size());	
+
+	for(U i = 0; i < texCoords.size(); i++)
+	{
+		for(U j = 0; j < 2; j++)
+		{
+			texCoordsF16[i][j] = F16(texCoords[i][j]);
+		}
+	}
+}
+
 } // end namespace anki

+ 18 - 1
src/util/Memory.cpp

@@ -165,7 +165,8 @@ void StackMemoryPool::reset()
 //==============================================================================
 void* mallocAligned(PtrSize size, PtrSize alignmentBytes)
 {
-#if ANKI_POSIX
+#if ANKI_POSIX 
+#	if ANKI_OS != ANKI_OS_ANDROID
 	void* out;
 	int err = posix_memalign(
 		&out, getAlignedRoundUp(alignmentBytes, sizeof(void*)), size);
@@ -181,6 +182,22 @@ void* mallocAligned(PtrSize size, PtrSize alignmentBytes)
 		throw ANKI_EXCEPTION("mallocAligned() failed");
 		return nullptr;
 	}
+#	else
+	void* out = memalign(
+		getAlignedRoundUp(alignmentBytes, sizeof(void*)), size);
+
+	if(out)
+	{
+		// Make sure it's aligned
+		ANKI_ASSERT(isAligned(alignmentBytes, out));
+		return out;
+	}
+	else
+	{
+		throw ANKI_EXCEPTION("mallocAligned() failed");
+		return nullptr;
+	}
+#	endif
 #else
 #	error "Unimplemented"
 #endif

+ 2 - 2
testapp/Main.cpp

@@ -478,7 +478,7 @@ void mainLoop()
 
 		// Sleep
 		//
-#if 1
+#if 0
 		timer.stop();
 		if(timer.getElapsedTime() < AppSingleton::get().getTimerTick())
 		{
@@ -494,7 +494,7 @@ void mainLoop()
 		increaseGlobTimestamp();
 	}
 
-#if 0
+#if 1
 	MainRendererSingleton::get().takeScreenshot("screenshot.tga");
 #endif