Browse Source

Merge pull request #51 from dariomanesku/master

Updates on 14-shadowvolumes.
Branimir Karadžić 12 years ago
parent
commit
aadd8aa8d3

+ 17 - 26
examples/14-shadowvolumes/platform.obj

@@ -1,20 +1,16 @@
 # Blender v2.66 (sub 1) OBJ File: ''
 # Blender v2.66 (sub 1) OBJ File: ''
 # www.blender.org
 # www.blender.org
-g Plane
+g Plane.002
 v -1.000000 -0.000000 1.000000
 v -1.000000 -0.000000 1.000000
 v -1.000000 0.000000 -1.000000
 v -1.000000 0.000000 -1.000000
 v 1.000000 0.000000 -1.000000
 v 1.000000 0.000000 -1.000000
 v 1.000000 -0.000000 1.000000
 v 1.000000 -0.000000 1.000000
 v 1.000000 0.081440 -1.000000
 v 1.000000 0.081440 -1.000000
 v -1.000000 0.081440 -1.000000
 v -1.000000 0.081440 -1.000000
-v -1.000000 0.081440 -1.000000
-v 1.000000 0.081440 -1.000000
 v 1.000000 0.081440 1.000000
 v 1.000000 0.081440 1.000000
 v -1.000000 0.081440 1.000000
 v -1.000000 0.081440 1.000000
 v -0.934893 0.081440 -0.934893
 v -0.934893 0.081440 -0.934893
 v 0.934893 0.081440 -0.934893
 v 0.934893 0.081440 -0.934893
-v 1.000000 0.081440 1.000000
-v -1.000000 0.081440 1.000000
 v -0.934893 0.081440 0.934893
 v -0.934893 0.081440 0.934893
 v 0.934893 0.081440 0.934893
 v 0.934893 0.081440 0.934893
 v 0.934893 0.140616 0.934893
 v 0.934893 0.140616 0.934893
@@ -26,19 +22,18 @@ vt 0.011681 0.988318
 vt 0.988318 0.988318
 vt 0.988318 0.988318
 vt 0.988318 0.011681
 vt 0.988318 0.011681
 vt 1.000000 1.000000
 vt 1.000000 1.000000
-vt 0.000000 1.000000
 vt 1.000000 0.000000
 vt 1.000000 0.000000
 vt 0.000000 0.000000
 vt 0.000000 0.000000
-vt 0.032554 0.967446
-vt 0.967446 0.967446
+vt 0.000000 1.000000
 vt 0.032554 0.032553
 vt 0.032554 0.032553
 vt 0.967446 0.032553
 vt 0.967446 0.032553
 vt 0.975715 0.024285
 vt 0.975715 0.024285
 vt 0.024285 0.024285
 vt 0.024285 0.024285
 vt 0.975715 0.975715
 vt 0.975715 0.975715
 vt 0.024285 0.975715
 vt 0.024285 0.975715
+vt 0.967446 0.967446
+vt 0.032554 0.967446
 vn 0.000000 -1.000000 -0.000000
 vn 0.000000 -1.000000 -0.000000
-vn 0.000000 0.000000 0.000000
 vn 1.000000 0.000000 0.000000
 vn 1.000000 0.000000 0.000000
 vn -1.000000 -0.000000 0.000000
 vn -1.000000 -0.000000 0.000000
 vn 0.000000 0.000000 1.000000
 vn 0.000000 0.000000 1.000000
@@ -46,20 +41,16 @@ vn 0.000000 -0.000000 -1.000000
 vn 0.000000 1.000000 0.000000
 vn 0.000000 1.000000 0.000000
 s off
 s off
 f 1/1/1 2/2/1 3/3/1 4/4/1
 f 1/1/1 2/2/1 3/3/1 4/4/1
-f 5/5/2 6/6/2 7/6/2 8/5/2
-f 4/4/3 3/3/3 5/5/3 9/7/3
-f 2/2/4 1/1/4 10/8/4 6/6/4
-f 1/1/5 4/4/5 9/7/5 10/8/5
-f 3/3/6 2/2/6 6/6/6 5/5/6
-f 8/5/7 7/6/7 11/9/7 12/10/7
-f 9/7/2 5/5/2 8/5/2 13/7/2
-f 6/6/2 10/8/2 14/8/2 7/6/2
-f 10/8/2 9/7/2 13/7/2 14/8/2
-f 15/11/5 16/12/5 17/13/5 18/14/5
-f 13/7/7 8/5/7 12/10/7 16/12/7
-f 7/6/7 14/8/7 15/11/7 11/9/7
-f 14/8/7 13/7/7 16/12/7 15/11/7
-f 18/14/7 17/13/7 19/15/7 20/16/7
-f 12/10/6 11/9/6 20/16/6 19/15/6
-f 16/12/3 12/10/3 19/15/3 17/13/3
-f 11/9/4 15/11/4 18/14/4 20/16/4
+f 4/4/2 3/3/2 5/5/2 7/6/2
+f 2/2/3 1/1/3 8/7/3 6/8/3
+f 1/1/4 4/4/4 7/6/4 8/7/4
+f 3/3/5 2/2/5 6/8/5 5/5/5
+f 11/9/4 12/10/4 13/11/4 14/12/4
+f 14/12/6 13/11/6 15/13/6 16/14/6
+f 10/15/5 9/16/5 16/14/5 15/13/5
+f 12/10/2 10/15/2 15/13/2 13/11/2
+f 9/16/3 11/9/3 14/12/3 16/14/3
+f 5/5/6 6/8/6 9/16/6 10/15/6
+f 7/6/6 5/5/6 10/15/6 12/10/6
+f 6/8/6 8/7/6 11/9/6 9/16/6
+f 8/7/6 7/6/6 12/10/6 11/9/6

+ 259 - 145
examples/14-shadowvolumes/shadowvolumes.cpp

@@ -11,6 +11,8 @@ using namespace std::tr1;
 #include <bgfx.h>
 #include <bgfx.h>
 #include <bx/timer.h>
 #include <bx/timer.h>
 #include <bx/readerwriter.h>
 #include <bx/readerwriter.h>
+#include <bx/allocator.h>
+#include <bx/float4_t.h>
 #include "entry/entry.h"
 #include "entry/entry.h"
 #include "fpumath.h"
 #include "fpumath.h"
 #include "imgui/imgui.h"
 #include "imgui/imgui.h"
@@ -23,10 +25,11 @@ using namespace std::tr1;
 #include <unordered_map>
 #include <unordered_map>
 #include <map>
 #include <map>
 
 
+#define SV_USE_SIMD 1
 #define MAX_INSTANCE_COUNT 25
 #define MAX_INSTANCE_COUNT 25
 #define MAX_LIGHTS_COUNT 5
 #define MAX_LIGHTS_COUNT 5
 
 
-#define VIEWID_RANGE1_PASS0     1 
+#define VIEWID_RANGE1_PASS0     1
 #define VIEWID_RANGE1_RT_PASS1  2
 #define VIEWID_RANGE1_RT_PASS1  2
 #define VIEWID_RANGE15_PASS2    3
 #define VIEWID_RANGE15_PASS2    3
 #define VIEWID_RANGE1_PASS3    20
 #define VIEWID_RANGE1_PASS3    20
@@ -372,7 +375,7 @@ struct Uniforms
 		float m_alpha;
 		float m_alpha;
 		float m_lightCount;
 		float m_lightCount;
 	};
 	};
-	
+
 	struct SvParams
 	struct SvParams
 	{
 	{
 		float m_useStencilTex;
 		float m_useStencilTex;
@@ -527,13 +530,13 @@ static RenderState s_renderStates[RenderState::Count]  =
 		| BGFX_STATE_MSAA
 		| BGFX_STATE_MSAA
 		, UINT32_MAX
 		, UINT32_MAX
 		, BGFX_STENCIL_TEST_ALWAYS
 		, BGFX_STENCIL_TEST_ALWAYS
-		| BGFX_STENCIL_FUNC_REF(1)        
+		| BGFX_STENCIL_FUNC_REF(1)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_Z_KEEP
 		| BGFX_STENCIL_OP_FAIL_Z_KEEP
 		| BGFX_STENCIL_OP_PASS_Z_DECR
 		| BGFX_STENCIL_OP_PASS_Z_DECR
 		, BGFX_STENCIL_TEST_ALWAYS
 		, BGFX_STENCIL_TEST_ALWAYS
-		| BGFX_STENCIL_FUNC_REF(1)        
+		| BGFX_STENCIL_FUNC_REF(1)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_Z_KEEP
 		| BGFX_STENCIL_OP_FAIL_Z_KEEP
@@ -544,13 +547,13 @@ static RenderState s_renderStates[RenderState::Count]  =
 		| BGFX_STATE_MSAA
 		| BGFX_STATE_MSAA
 		, UINT32_MAX
 		, UINT32_MAX
 		, BGFX_STENCIL_TEST_ALWAYS
 		, BGFX_STENCIL_TEST_ALWAYS
-		| BGFX_STENCIL_FUNC_REF(1)        
+		| BGFX_STENCIL_FUNC_REF(1)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_Z_INCR
 		| BGFX_STENCIL_OP_FAIL_Z_INCR
 		| BGFX_STENCIL_OP_PASS_Z_KEEP
 		| BGFX_STENCIL_OP_PASS_Z_KEEP
 		, BGFX_STENCIL_TEST_ALWAYS
 		, BGFX_STENCIL_TEST_ALWAYS
-		| BGFX_STENCIL_FUNC_REF(1)        
+		| BGFX_STENCIL_FUNC_REF(1)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_Z_DECR
 		| BGFX_STENCIL_OP_FAIL_Z_DECR
@@ -695,31 +698,15 @@ typedef std::vector<Face> FaceArray;
 
 
 struct Edge
 struct Edge
 {
 {
-	struct Plane
-	{
-		float m_plane[4];
-		bool m_reverseVertexOrder;
-	};
-
-	Edge(const float* _v0, const float* _v1)
-		: m_faceIndex(0)
-	{
-		memcpy(m_v0, _v0, 3*sizeof(float) );
-		memcpy(m_v1, _v1, 3*sizeof(float) );
-	}               
-
-	Plane& nextFace()
-	{
-		BX_CHECK(m_faceIndex < FACE_NUM, "Error! 2-manifold meshes must be used!");
-		return m_faces[(m_faceIndex++)%FACE_NUM];
-	}
-
-	float m_v0[3], m_v1[3];    
-	static const uint8_t FACE_NUM = 2;
-	Plane m_faces[FACE_NUM];
+	bool m_faceReverseOrder[2];
 	uint8_t m_faceIndex;
 	uint8_t m_faceIndex;
+	float m_v0[3], m_v1[3];
+};
+
+struct Plane
+{
+	float m_plane[4];
 };
 };
-typedef std::vector<Edge> EdgeArray;
 
 
 struct HalfEdge
 struct HalfEdge
 {
 {
@@ -844,24 +831,70 @@ struct Group
 		m_vertices = NULL;
 		m_vertices = NULL;
 		m_numIndices = 0;
 		m_numIndices = 0;
 		m_indices = NULL;
 		m_indices = NULL;
+		m_numEdges = 0;
+		m_edges = NULL;
+		m_edgePlanesUnalignedPtr = NULL;
 		m_prims.clear();
 		m_prims.clear();
 	}
 	}
 
 
+	typedef struct { float f[6]; } f6_t;
+
+	struct EdgeComparator
+	{
+		bool operator()(const f6_t& _a, const f6_t& _b)
+		{
+			const uint8_t t0 = 0
+				| ( (_a.f[0] < _b.f[0]) << 5)
+				| ( (_a.f[1] < _b.f[1]) << 4)
+				| ( (_a.f[2] < _b.f[2]) << 3)
+				| ( (_a.f[3] < _b.f[3]) << 2)
+				| ( (_a.f[4] < _b.f[4]) << 1)
+				| ( (_a.f[5] < _b.f[5]) << 0)
+				;
+
+			const uint8_t t1 = 0
+				| ( (_a.f[0] > _b.f[0]) << 5)
+				| ( (_a.f[1] > _b.f[1]) << 4)
+				| ( (_a.f[2] > _b.f[2]) << 3)
+				| ( (_a.f[3] > _b.f[3]) << 2)
+				| ( (_a.f[4] > _b.f[4]) << 1)
+				| ( (_a.f[5] > _b.f[5]) << 0)
+				;
+
+			return t0 > t1;
+		}
+	};
+
 	void fillStructures(uint16_t _stride)
 	void fillStructures(uint16_t _stride)
 	{
 	{
 		m_faces.clear();
 		m_faces.clear();
-		m_edges.clear();
 		m_halfEdges.destroy();
 		m_halfEdges.destroy();
 
 
 		//init halfedges
 		//init halfedges
 		m_halfEdges.init(m_indices, m_numIndices);
 		m_halfEdges.init(m_indices, m_numIndices);
 
 
 		//init faces and edges
 		//init faces and edges
-		m_faces.reserve(m_numIndices/3); //1 face = 3 indices 
-		m_edges.reserve(m_numIndices);   //1 triangle = 3 indices = 3 edges.
+		m_faces.reserve(m_numIndices/3); //1 face = 3 indices
+		m_edges = (Edge*)malloc(m_numIndices * sizeof(Edge)); //1 triangle = 3 indices = 3 edges.
+		m_edgePlanesUnalignedPtr = (Plane*)malloc(m_numIndices * sizeof(Plane) + 15);
+		m_edgePlanes = (Plane*)bx::alignPtr(m_edgePlanesUnalignedPtr, 0, 16);
+
+		struct EdgeAndPlane
+		{
+			EdgeAndPlane(const float* _v0, const float* _v1)
+				: m_faceIndex(0)
+			{
+				memcpy(m_v0, _v0, 3*sizeof(float) );
+				memcpy(m_v1, _v1, 3*sizeof(float) );
+			}
 
 
-		typedef std::map<std::pair<uint16_t, uint16_t>, uint32_t> EdgeIndexMap;
-		EdgeIndexMap edgeIndexMap;
+			bool m_faceReverseOrder[2];
+			uint8_t m_faceIndex;
+			float m_v0[3], m_v1[3];
+			Plane m_plane[2];
+		};
+		typedef std::map<f6_t, EdgeAndPlane, EdgeComparator> EdgeMap;
+		EdgeMap edgeMap;
 
 
 		for (uint32_t ii = 0, size = m_numIndices/3; ii < size; ++ii)
 		for (uint32_t ii = 0, size = m_numIndices/3; ii < size; ++ii)
 		{
 		{
@@ -883,13 +916,6 @@ struct Group
 			memcpy(face.m_plane, plane, 4*sizeof(float) );
 			memcpy(face.m_plane, plane, 4*sizeof(float) );
 			m_faces.push_back(face);
 			m_faces.push_back(face);
 
 
-			uint16_t triangleI[3][2] =
-			{
-				{i0, i1},
-				{i1, i2},
-				{i2, i0},
-			};
-
 			const float* triangleV[3][2] =
 			const float* triangleV[3][2] =
 			{
 			{
 				{v0, v1},
 				{v0, v1},
@@ -897,43 +923,46 @@ struct Group
 				{v2, v0},
 				{v2, v0},
 			};
 			};
 
 
-			typedef std::vector<uint8_t> TriangleIndex;
-			TriangleIndex triangleIndex;
-
 			for (uint8_t jj = 0; jj < 3; ++jj)
 			for (uint8_t jj = 0; jj < 3; ++jj)
-			{   
-				EdgeIndexMap::iterator iter = edgeIndexMap.find(std::make_pair(triangleI[jj][1], triangleI[jj][0]) );
-				if (edgeIndexMap.end() != iter)
+			{
+				const float* v0 = triangleV[jj][0];
+				const float* v1 = triangleV[jj][1];
+				f6_t key;
+				f6_t keyInv;
+				memcpy(&key.f[0], v0, 3*sizeof(float) );
+				memcpy(&key.f[3], v1, 3*sizeof(float) );
+				memcpy(&keyInv.f[0], v1, 3*sizeof(float) );
+				memcpy(&keyInv.f[3], v0, 3*sizeof(float) );
+
+				EdgeMap::iterator iter = edgeMap.find(keyInv);
+				if (iter != edgeMap.end())
 				{
 				{
-					const uint32_t index = iter->second;
-					Edge* edge = &m_edges[index];
-
-					Edge::Plane& face = edge->nextFace();
-					memcpy(face.m_plane, plane, 4*sizeof(float) );
-					face.m_reverseVertexOrder = true;
+					EdgeAndPlane& ep = iter->second;
+					memcpy(ep.m_plane[ep.m_faceIndex].m_plane, plane, 4*sizeof(float) );
+					ep.m_faceReverseOrder[ep.m_faceIndex] = true;
 				}
 				}
 				else
 				else
 				{
 				{
-					triangleIndex.push_back(jj);
+					std::pair<EdgeMap::iterator, bool> result = edgeMap.insert(std::make_pair(key, EdgeAndPlane(v0, v1)) );
+					EdgeAndPlane& ep = result.first->second;
+					memcpy(ep.m_plane[ep.m_faceIndex].m_plane, plane, 4*sizeof(float) );
+					ep.m_faceReverseOrder[ep.m_faceIndex] = false;
+					ep.m_faceIndex++;
 				}
 				}
 			}
 			}
+		}
 
 
-			for (TriangleIndex::const_iterator iter = triangleIndex.begin(), end = triangleIndex.end(); iter != end; ++iter)
-			{
-				const uint8_t index = *iter;
-				const uint16_t i0 = triangleI[index][0];
-				const uint16_t i1 = triangleI[index][1];
-				const float* v0 = triangleV[index][0];
-				const float* v1 = triangleV[index][1];
-
-				Edge edge(v0, v1);
-				Edge::Plane& face = edge.nextFace();
-				memcpy(face.m_plane, plane, 4*sizeof(float) );
-				face.m_reverseVertexOrder = false;
-				m_edges.push_back(edge);
-
-				edgeIndexMap.insert(std::make_pair(std::make_pair(i0, i1), (uint32_t)m_edges.size()-1) );
-			}
+		uint32_t index = 0;
+		for (EdgeMap::const_iterator iter = edgeMap.begin(), end = edgeMap.end(); iter != end; ++iter)
+		{
+			Edge* edge = &m_edges[m_numEdges];
+			Plane* plane = &m_edgePlanes[index];
+
+			memcpy(edge, iter->second.m_faceReverseOrder, sizeof(Edge));
+			memcpy(plane, iter->second.m_plane, 2 * sizeof(Plane));
+
+			m_numEdges++;
+			index += 2;
 		}
 		}
 	}
 	}
 
 
@@ -948,6 +977,10 @@ struct Group
 		m_vertices = NULL;
 		m_vertices = NULL;
 		free(m_indices);
 		free(m_indices);
 		m_indices = NULL;
 		m_indices = NULL;
+		free(m_edges);
+		m_edges = NULL;
+		free(m_edgePlanesUnalignedPtr);
+		m_edgePlanesUnalignedPtr = NULL;
 		m_halfEdges.destroy();
 		m_halfEdges.destroy();
 	}
 	}
 
 
@@ -961,7 +994,10 @@ struct Group
 	Aabb m_aabb;
 	Aabb m_aabb;
 	Obb m_obb;
 	Obb m_obb;
 	PrimitiveArray m_prims;
 	PrimitiveArray m_prims;
-	EdgeArray m_edges;
+	uint32_t m_numEdges;
+	Edge* m_edges;
+	Plane* m_edgePlanesUnalignedPtr;
+	Plane* m_edgePlanes;
 	FaceArray m_faces;
 	FaceArray m_faces;
 	HalfEdges m_halfEdges;
 	HalfEdges m_halfEdges;
 };
 };
@@ -1114,7 +1150,7 @@ struct Mesh
 struct Model
 struct Model
 {
 {
 	Model()
 	Model()
-	{ 
+	{
 		m_program.idx = bgfx::invalidHandle;
 		m_program.idx = bgfx::invalidHandle;
 		m_texture.idx = bgfx::invalidHandle;
 		m_texture.idx = bgfx::invalidHandle;
 	}
 	}
@@ -1178,7 +1214,7 @@ struct Instance
 {
 {
 	Instance()
 	Instance()
 		: m_svExtrusionDistance(150.0f)
 		: m_svExtrusionDistance(150.0f)
-	{ 
+	{
 		m_color[0] = 1.0f;
 		m_color[0] = 1.0f;
 		m_color[1] = 1.0f;
 		m_color[1] = 1.0f;
 		m_color[2] = 1.0f;
 		m_color[2] = 1.0f;
@@ -1341,10 +1377,12 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 					  , bool _textureAsStencil = false
 					  , bool _textureAsStencil = false
 					  )
 					  )
 {
 {
-	const uint8_t*    vertices  = _group.m_vertices;
-	const FaceArray&  faces     = _group.m_faces;
-	const EdgeArray&  edges     = _group.m_edges;
-	HalfEdges&        halfEdges = _group.m_halfEdges;
+	const uint8_t*    vertices   = _group.m_vertices;
+	const FaceArray&  faces      = _group.m_faces;
+	const Edge*       edges      = _group.m_edges;
+	const Plane*      edgePlanes = _group.m_edgePlanes;
+	const uint32_t    numEdges   = _group.m_numEdges;
+	HalfEdges&        halfEdges  = _group.m_halfEdges;
 
 
 	struct VertexData
 	struct VertexData
 	{
 	{
@@ -1366,8 +1404,8 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 
 
 	bool cap = (ShadowVolumeImpl::DepthFail == _impl);
 	bool cap = (ShadowVolumeImpl::DepthFail == _impl);
 
 
-	VertexData* verticesSide    = (VertexData*) s_svAllocator.alloc (20000 * sizeof(VertexData) );
-	uint16_t*   indicesSide     = (uint16_t*)   s_svAllocator.alloc (20000 * 3*sizeof(uint16_t) );
+	VertexData* verticesSide    = (VertexData*) s_svAllocator.alloc(20000 * sizeof(VertexData) );
+	uint16_t*   indicesSide     = (uint16_t*)   s_svAllocator.alloc(20000 * 3*sizeof(uint16_t) );
 	uint16_t*   indicesFrontCap = 0;
 	uint16_t*   indicesFrontCap = 0;
 	uint16_t*   indicesBackCap  = 0;
 	uint16_t*   indicesBackCap  = 0;
 
 
@@ -1395,7 +1433,7 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 			if (f > 0.0f)
 			if (f > 0.0f)
 			{
 			{
 				frontFacing = true;
 				frontFacing = true;
-				uint16_t triangleEdges[3][2] = 
+				uint16_t triangleEdges[3][2] =
 				{
 				{
 					{ face.m_i[0], face.m_i[1] },
 					{ face.m_i[0], face.m_i[1] },
 					{ face.m_i[1], face.m_i[2] },
 					{ face.m_i[1], face.m_i[2] },
@@ -1482,49 +1520,125 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 	}
 	}
 	else // ShadowVolumeAlgorithm::EdgeBased:
 	else // ShadowVolumeAlgorithm::EdgeBased:
 	{
 	{
-		for (EdgeArray::const_iterator iter = edges.begin(), end = edges.end(); iter != end; ++iter)
-		{
-			const Edge& edge = *iter;
-			const float* v0 = edge.m_v0;
-			const float* v1 = edge.m_v1;
+		uint32_t ii = 0;
 
 
-			int16_t k = 0;
-			for (uint8_t ii = 0; ii < edge.m_faceIndex; ++ii)
-			{
-				const Edge::Plane& face = edge.m_faces[ii];
+#if SV_USE_SIMD
+		uint32_t numEdgesRounded = numEdges & (~0x1);
 
 
-				int16_t s = (int16_t)fsign(vec3Dot(face.m_plane, _light) + face.m_plane[3]);
-				if (face.m_reverseVertexOrder)
+		using namespace bx;
+
+		const float4_t lx = float4_splat(_light[0]);
+		const float4_t ly = float4_splat(_light[1]);
+		const float4_t lz = float4_splat(_light[2]);
+
+		for (; ii < numEdgesRounded; ii+=2)
+		{
+			const Edge& edge0 = edges[ii];
+			const Edge& edge1 = edges[ii+1];
+			const Plane* edgePlane0 = &edgePlanes[ii*2];
+			const Plane* edgePlane1 = &edgePlanes[ii*2 + 2];
+
+			const float4_t reverse = float4_ild(edge0.m_faceReverseOrder[0]
+						 , edge1.m_faceReverseOrder[0]
+						 , edge0.m_faceReverseOrder[1]
+						 , edge1.m_faceReverseOrder[1]
+						 );
+
+			const float4_t v0 = float4_ld(edgePlane0[0].m_plane);
+			const float4_t v1 = float4_ld(edgePlane1[0].m_plane);
+			const float4_t v2 = float4_ld(edgePlane0[1].m_plane);
+			const float4_t v3 = float4_ld(edgePlane1[1].m_plane);
+
+			const float4_t xxyy0 = float4_shuf_xAyB(v0, v2);
+			const float4_t zzww0 = float4_shuf_zCwD(v0, v2);
+			const float4_t xxyy1 = float4_shuf_xAyB(v1, v3);
+			const float4_t zzww1 = float4_shuf_zCwD(v1, v3);
+
+			const float4_t vX = float4_shuf_xAyB(xxyy0, xxyy1);
+			const float4_t vY = float4_shuf_zCwD(xxyy0, xxyy1);
+			const float4_t vZ = float4_shuf_xAyB(zzww0, zzww1);
+			const float4_t vW = float4_shuf_zCwD(zzww0, zzww1);
+
+			const float4_t r0 = float4_mul(vX, lx);
+			const float4_t r1 = float4_mul(vY, ly);
+			const float4_t r2 = float4_mul(vZ, lz);
+
+			const float4_t dot = float4_add(r0, float4_add(r1, r2));
+			const float4_t f = float4_add(dot, vW);
+
+			const float4_t zero = float4_zero();
+			const float4_t mask = float4_cmpgt(f, zero);
+			const float4_t onef = float4_splat(1.0f);
+			const float4_t tmp0 = float4_and(mask, onef);
+			const float4_t tmp1 = float4_ftoi(tmp0);
+			const float4_t tmp2 = float4_xor(tmp1, reverse);
+			const float4_t tmp3 = float4_sll(tmp2, 1);
+			const float4_t onei = float4_isplat(1);
+			const float4_t tmp4 = float4_isub(tmp3, onei);
+
+			BX_ALIGN_STRUCT_16(int32_t res[4]);
+			float4_st(&res, tmp4);
+
+			for (uint16_t jj = 0; jj < 2; ++jj)
+			{
+				int16_t k = res[jj] + res[jj+2];
+				if (k != 0)
 				{
 				{
-					s = -s;
+					verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v0, 0.0f, float(k));
+					verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v0, 1.0f, float(k));
+					verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v1, 0.0f, float(k));
+					verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v1, 1.0f, float(k));
+
+					k = _textureAsStencil ? 1 : k;
+					uint16_t winding = uint16_t(k > 0);
+					for (uint8_t ii = 0, end = abs(k); ii < end; ++ii)
+					{
+						indicesSide[sideI++] = indexSide;
+						indicesSide[sideI++] = indexSide + 2 - winding;
+						indicesSide[sideI++] = indexSide + 1 + winding;
+
+						indicesSide[sideI++] = indexSide + 2;
+						indicesSide[sideI++] = indexSide + 3 - winding*2;
+						indicesSide[sideI++] = indexSide + 1 + winding*2;
+					}
+
+					indexSide += 4;
 				}
 				}
-				k += s;
 			}
 			}
+		}
+#endif
 
 
-			if (k == 0)
-			{
-				continue;
-			}
+		for (; ii < numEdges; ++ii)
+		{
+			const Edge& edge = edges[ii];
+			const Plane* edgePlane = &edgePlanes[ii*2];
 
 
-			verticesSide[vsideI++] = VertexData(v0, 0.0f, k);
-			verticesSide[vsideI++] = VertexData(v0, 1.0f, k);
-			verticesSide[vsideI++] = VertexData(v1, 0.0f, k);
-			verticesSide[vsideI++] = VertexData(v1, 1.0f, k);
+			int16_t s0 = ( (vec3Dot(edgePlane[0].m_plane, _light) + edgePlane[0].m_plane[3]) > 0.0f) ^ edge.m_faceReverseOrder[0];
+			int16_t s1 = ( (vec3Dot(edgePlane[1].m_plane, _light) + edgePlane[1].m_plane[3]) > 0.0f) ^ edge.m_faceReverseOrder[1];
+			int16_t k = ( (s0 + s1) << 1) - 2;
 
 
-			k = _textureAsStencil ? 1 : k;
-			uint16_t winding = uint16_t(k > 0);
-			for (uint8_t ii = 0, end = abs(k); ii < end; ++ii)
+			if (k != 0)
 			{
 			{
-				indicesSide[sideI++] = indexSide;
-				indicesSide[sideI++] = indexSide + 2 - winding;
-				indicesSide[sideI++] = indexSide + 1 + winding;
+				verticesSide[vsideI++] = VertexData(edge.m_v0, 0.0f, k);
+				verticesSide[vsideI++] = VertexData(edge.m_v0, 1.0f, k);
+				verticesSide[vsideI++] = VertexData(edge.m_v1, 0.0f, k);
+				verticesSide[vsideI++] = VertexData(edge.m_v1, 1.0f, k);
+
+				k = _textureAsStencil ? 1 : k;
+				uint16_t winding = uint16_t(k > 0);
+				for (uint8_t ii = 0, end = abs(k); ii < end; ++ii)
+				{
+					indicesSide[sideI++] = indexSide;
+					indicesSide[sideI++] = indexSide + 2 - winding;
+					indicesSide[sideI++] = indexSide + 1 + winding;
 
 
-				indicesSide[sideI++] = indexSide + 2;
-				indicesSide[sideI++] = indexSide + 3 - winding*2;
-				indicesSide[sideI++] = indexSide + 1 + winding*2;
-			}
+					indicesSide[sideI++] = indexSide + 2;
+					indicesSide[sideI++] = indexSide + 3 - winding*2;
+					indicesSide[sideI++] = indexSide + 1 + winding*2;
+				}
 
 
-			indexSide += 4;
+				indexSide += 4;
+			}
 		}
 		}
 
 
 		if (cap)
 		if (cap)
@@ -1535,7 +1649,7 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 				const Face& face = *iter;
 				const Face& face = *iter;
 
 
 				float f = vec3Dot(face.m_plane, _light) + face.m_plane[3];
 				float f = vec3Dot(face.m_plane, _light) + face.m_plane[3];
-				bool frontFacing = (f > 0.0f); 
+				bool frontFacing = (f > 0.0f);
 
 
 				for (uint8_t ii = 0, end = 1 + uint8_t(!_textureAsStencil); ii < end; ++ii)
 				for (uint8_t ii = 0, end = 1 + uint8_t(!_textureAsStencil); ii < end; ++ii)
 				{
 				{
@@ -1684,10 +1798,10 @@ void createNearClipVolume(float* __restrict _outPlanes24f
 
 
 	float nearPlaneV[4] =
 	float nearPlaneV[4] =
 	{
 	{
-		0.0f * lightSide,  
-		0.0f * lightSide,  
-		1.0f * lightSide,  
-		_near * lightSide, 
+		0.0f * lightSide,
+		0.0f * lightSide,
+		1.0f * lightSide,
+		_near * lightSide,
 	};
 	};
 	vec4MulMtx(volumePlanes[4], nearPlaneV, mtxViewTrans);
 	vec4MulMtx(volumePlanes[4], nearPlaneV, mtxViewTrans);
 
 
@@ -1735,9 +1849,9 @@ bool clipTest(const float* _planes, uint8_t _planeNum, const Mesh& _mesh, const
 			}
 			}
 		}
 		}
 
 
-		if (isInside) 
+		if (isInside)
 		{
 		{
-			return true; 
+			return true;
 		}
 		}
 	}
 	}
 
 
@@ -1825,19 +1939,19 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	bgfx::ProgramHandle programColorTexture     = loadProgram("vs_shadowvolume_color_texture",     "fs_shadowvolume_color_texture"    );
 	bgfx::ProgramHandle programColorTexture     = loadProgram("vs_shadowvolume_color_texture",     "fs_shadowvolume_color_texture"    );
 	bgfx::ProgramHandle programTexture          = loadProgram("vs_shadowvolume_texture",           "fs_shadowvolume_texture"          );
 	bgfx::ProgramHandle programTexture          = loadProgram("vs_shadowvolume_texture",           "fs_shadowvolume_texture"          );
 
 
-	bgfx::ProgramHandle programBackBlank        = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbackblank" ); 
-	bgfx::ProgramHandle programSideBlank        = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsideblank" ); 
-	bgfx::ProgramHandle programFrontBlank       = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontblank"); 
+	bgfx::ProgramHandle programBackBlank        = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbackblank" );
+	bgfx::ProgramHandle programSideBlank        = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsideblank" );
+	bgfx::ProgramHandle programFrontBlank       = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontblank");
 
 
-	bgfx::ProgramHandle programBackColor        = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbackcolor" ); 
-	bgfx::ProgramHandle programSideColor        = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsidecolor" ); 
-	bgfx::ProgramHandle programFrontColor       = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontcolor"); 
+	bgfx::ProgramHandle programBackColor        = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbackcolor" );
+	bgfx::ProgramHandle programSideColor        = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsidecolor" );
+	bgfx::ProgramHandle programFrontColor       = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontcolor");
 
 
-	bgfx::ProgramHandle programSideTex          = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsidetex"   ); 
-	bgfx::ProgramHandle programBackTex1         = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbacktex1"  ); 
-	bgfx::ProgramHandle programBackTex2         = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbacktex2"  ); 
-	bgfx::ProgramHandle programFrontTex1        = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex1" ); 
-	bgfx::ProgramHandle programFrontTex2        = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex2" ); 
+	bgfx::ProgramHandle programSideTex          = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsidetex"   );
+	bgfx::ProgramHandle programBackTex1         = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbacktex1"  );
+	bgfx::ProgramHandle programBackTex2         = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbacktex2"  );
+	bgfx::ProgramHandle programFrontTex1        = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex1" );
+	bgfx::ProgramHandle programFrontTex2        = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex2" );
 
 
 	struct ShadowVolumeProgramType
 	struct ShadowVolumeProgramType
 	{
 	{
@@ -1865,7 +1979,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	};
 	};
 
 
 	bgfx::ProgramHandle svProgs[ShadowVolumeProgramType::Count][ShadowVolumePart::Count] =
 	bgfx::ProgramHandle svProgs[ShadowVolumeProgramType::Count][ShadowVolumePart::Count] =
-	{ 
+	{
 		{ programBackBlank, programSideBlank, programFrontBlank } // Blank
 		{ programBackBlank, programSideBlank, programFrontBlank } // Blank
 		,{ programBackColor, programSideColor, programFrontColor } // Color
 		,{ programBackColor, programSideColor, programFrontColor } // Color
 		,{ programBackTex1,  programSideTex,   programFrontTex1  } // Tex1
 		,{ programBackTex1,  programSideTex,   programFrontTex1  } // Tex1
@@ -1933,7 +2047,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	int64_t profTime = 0;
 	int64_t profTime = 0;
 	int64_t timeOffset = bx::getHPCounter();
 	int64_t timeOffset = bx::getHPCounter();
 
 
-	uint32_t numShadowVolumeVertices = 0; 
+	uint32_t numShadowVolumeVertices = 0;
 	uint32_t numShadowVolumeIndices  = 0;
 	uint32_t numShadowVolumeIndices  = 0;
 
 
 	uint32_t oldWidth = 0;
 	uint32_t oldWidth = 0;
@@ -1944,12 +2058,12 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	bool settings_updateLights       = true;
 	bool settings_updateLights       = true;
 	bool settings_updateScene        = true;
 	bool settings_updateScene        = true;
 	bool settings_mixedSvImpl        = true;
 	bool settings_mixedSvImpl        = true;
-	bool settings_useStencilTexture  = false;
+	bool settings_useStencilTexture  = true;
 	bool settings_drawShadowVolumes  = false;
 	bool settings_drawShadowVolumes  = false;
 	float settings_numLights         = 1.0f;
 	float settings_numLights         = 1.0f;
 	float settings_instanceCount     = 9.0f;
 	float settings_instanceCount     = 9.0f;
-	ShadowVolumeImpl::Enum      settings_shadowVolumeImpl      = ShadowVolumeImpl::DepthFail; 
-	ShadowVolumeAlgorithm::Enum settings_shadowVolumeAlgorithm = ShadowVolumeAlgorithm::FaceBased;
+	ShadowVolumeImpl::Enum      settings_shadowVolumeImpl      = ShadowVolumeImpl::DepthFail;
+	ShadowVolumeAlgorithm::Enum settings_shadowVolumeAlgorithm = ShadowVolumeAlgorithm::EdgeBased;
 	int32_t scrollAreaRight = 0;
 	int32_t scrollAreaRight = 0;
 
 
 	const char* titles[2] =
 	const char* titles[2] =
@@ -2111,8 +2225,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		imguiLabel("CPU Time: %7.1f [ms]", double(profTime)*toMs);
 		imguiLabel("CPU Time: %7.1f [ms]", double(profTime)*toMs);
 		imguiLabel("Volume Vertices: %5.uk", numShadowVolumeVertices/1000);
 		imguiLabel("Volume Vertices: %5.uk", numShadowVolumeVertices/1000);
 		imguiLabel("Volume Indices: %6.uk", numShadowVolumeIndices/1000);
 		imguiLabel("Volume Indices: %6.uk", numShadowVolumeIndices/1000);
-		numShadowVolumeVertices = 0; 
-		numShadowVolumeIndices = 0;  
+		numShadowVolumeVertices = 0;
+		numShadowVolumeIndices = 0;
 
 
 		imguiSeparatorLine();
 		imguiSeparatorLine();
 		settings_drawShadowVolumes = imguiCheck("Draw Shadow Volumes", settings_drawShadowVolumes)
 		settings_drawShadowVolumes = imguiCheck("Draw Shadow Volumes", settings_drawShadowVolumes)
@@ -2204,7 +2318,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			bgfx::dbgTextPrintf(3, row++, 0x0f, "Stencil:");
 			bgfx::dbgTextPrintf(3, row++, 0x0f, "Stencil:");
 			bgfx::dbgTextPrintf(8, row++, 0x0f, "Stencil buffer     - Faster, but capable only of +1 incr.");
 			bgfx::dbgTextPrintf(8, row++, 0x0f, "Stencil buffer     - Faster, but capable only of +1 incr.");
 			bgfx::dbgTextPrintf(8, row++, 0x0f, "Texture as stencil - Slower, but capable of +2 incr.");
 			bgfx::dbgTextPrintf(8, row++, 0x0f, "Texture as stencil - Slower, but capable of +2 incr.");
-		}                       
+		}
 
 
 		// Setup instances
 		// Setup instances
 		Instance shadowCasters[SceneCount][60];
 		Instance shadowCasters[SceneCount][60];
@@ -2456,7 +2570,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			bgfx::setViewClear(VIEWID_RANGE1_RT_PASS1, BGFX_CLEAR_DEPTH_BIT, 0x00000000, 1.0f, 0);
 			bgfx::setViewClear(VIEWID_RANGE1_RT_PASS1, BGFX_CLEAR_DEPTH_BIT, 0x00000000, 1.0f, 0);
 			bgfx::setViewRenderTarget(VIEWID_RANGE1_RT_PASS1, s_stencilRt);
 			bgfx::setViewRenderTarget(VIEWID_RANGE1_RT_PASS1, s_stencilRt);
 
 
-			const RenderState& renderState = s_renderStates[RenderState::ShadowVolume_UsingStencilTexture_BuildDepth]; 
+			const RenderState& renderState = s_renderStates[RenderState::ShadowVolume_UsingStencilTexture_BuildDepth];
 
 
 			for (uint8_t ii = 0; ii < shadowCastersCount[currentScene]; ++ii)
 			for (uint8_t ii = 0; ii < shadowCastersCount[currentScene]; ++ii)
 			{
 			{
@@ -2472,7 +2586,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		profTime = bx::getHPCounter();
 		profTime = bx::getHPCounter();
 
 
 		/**
 		/**
-		 * For each light: 
+		 * For each light:
 		 * 1. Compute and draw shadow volume to stencil buffer
 		 * 1. Compute and draw shadow volume to stencil buffer
 		 * 2. Draw diffuse with stencil test
 		 * 2. Draw diffuse with stencil test
 		 */
 		 */
@@ -2525,14 +2639,14 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				const Instance& instance = shadowCasters[currentScene][jj];
 				const Instance& instance = shadowCasters[currentScene][jj];
 				Model* model = instance.m_model;
 				Model* model = instance.m_model;
 
 
-				ShadowVolumeImpl::Enum shadowVolumeImpl = settings_shadowVolumeImpl; 
+				ShadowVolumeImpl::Enum shadowVolumeImpl = settings_shadowVolumeImpl;
 				if (settings_mixedSvImpl)
 				if (settings_mixedSvImpl)
-				{                                                     
+				{
 					// If instance is inside near clip volume, depth fail must be used, else depth pass is fine.
 					// If instance is inside near clip volume, depth fail must be used, else depth pass is fine.
 					bool isInsideVolume = clipTest(nearClipVolume, 6, model->m_mesh, instance.m_scale, instance.m_pos);
 					bool isInsideVolume = clipTest(nearClipVolume, 6, model->m_mesh, instance.m_scale, instance.m_pos);
 					shadowVolumeImpl = (isInsideVolume ? ShadowVolumeImpl::DepthFail : ShadowVolumeImpl::DepthPass);
 					shadowVolumeImpl = (isInsideVolume ? ShadowVolumeImpl::DepthFail : ShadowVolumeImpl::DepthPass);
 				}
 				}
-				s_uniforms.m_svparams.m_dfail = float(ShadowVolumeImpl::DepthFail == shadowVolumeImpl); 
+				s_uniforms.m_svparams.m_dfail = float(ShadowVolumeImpl::DepthFail == shadowVolumeImpl);
 
 
 				// Compute virtual light position for shadow volume generation.
 				// Compute virtual light position for shadow volume generation.
 				float transformedLightPos[3];
 				float transformedLightPos[3];
@@ -2591,7 +2705,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 							: RenderState::ShadowVolume_UsingStencilTexture_CraftStencil_DepthPass
 							: RenderState::ShadowVolume_UsingStencilTexture_CraftStencil_DepthPass
 							;
 							;
 
 
-						programIndex = ShadowVolumeAlgorithm::FaceBased == settings_shadowVolumeAlgorithm 
+						programIndex = ShadowVolumeAlgorithm::FaceBased == settings_shadowVolumeAlgorithm
 							? ShadowVolumeProgramType::Tex1
 							? ShadowVolumeProgramType::Tex1
 							: ShadowVolumeProgramType::Tex2
 							: ShadowVolumeProgramType::Tex2
 							;
 							;
@@ -2691,7 +2805,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			}
 			}
 		}
 		}
 
 
-		profTime = bx::getHPCounter() - profTime; 
+		profTime = bx::getHPCounter() - profTime;
 
 
 		// Lights.
 		// Lights.
 		const float lightScale[3] = { 1.5f, 1.5f, 1.5f };
 		const float lightScale[3] = { 1.5f, 1.5f, 1.5f };

BIN
examples/runtime/meshes/platform.bin