Parcourir la source

Merge pull request #51 from dariomanesku/master

Updates on 14-shadowvolumes.
Branimir Karadžić il y a 12 ans
Parent
commit
aadd8aa8d3

+ 17 - 26
examples/14-shadowvolumes/platform.obj

@@ -1,20 +1,16 @@
 # Blender v2.66 (sub 1) OBJ File: ''
 # www.blender.org
-g Plane
+g Plane.002
 v -1.000000 -0.000000 1.000000
 v -1.000000 0.000000 -1.000000
 v 1.000000 0.000000 -1.000000
 v 1.000000 -0.000000 1.000000
 v 1.000000 0.081440 -1.000000
 v -1.000000 0.081440 -1.000000
-v -1.000000 0.081440 -1.000000
-v 1.000000 0.081440 -1.000000
 v 1.000000 0.081440 1.000000
 v -1.000000 0.081440 1.000000
 v -0.934893 0.081440 -0.934893
 v 0.934893 0.081440 -0.934893
-v 1.000000 0.081440 1.000000
-v -1.000000 0.081440 1.000000
 v -0.934893 0.081440 0.934893
 v 0.934893 0.081440 0.934893
 v 0.934893 0.140616 0.934893
@@ -26,19 +22,18 @@ vt 0.011681 0.988318
 vt 0.988318 0.988318
 vt 0.988318 0.011681
 vt 1.000000 1.000000
-vt 0.000000 1.000000
 vt 1.000000 0.000000
 vt 0.000000 0.000000
-vt 0.032554 0.967446
-vt 0.967446 0.967446
+vt 0.000000 1.000000
 vt 0.032554 0.032553
 vt 0.967446 0.032553
 vt 0.975715 0.024285
 vt 0.024285 0.024285
 vt 0.975715 0.975715
 vt 0.024285 0.975715
+vt 0.967446 0.967446
+vt 0.032554 0.967446
 vn 0.000000 -1.000000 -0.000000
-vn 0.000000 0.000000 0.000000
 vn 1.000000 0.000000 0.000000
 vn -1.000000 -0.000000 0.000000
 vn 0.000000 0.000000 1.000000
@@ -46,20 +41,16 @@ vn 0.000000 -0.000000 -1.000000
 vn 0.000000 1.000000 0.000000
 s off
 f 1/1/1 2/2/1 3/3/1 4/4/1
-f 5/5/2 6/6/2 7/6/2 8/5/2
-f 4/4/3 3/3/3 5/5/3 9/7/3
-f 2/2/4 1/1/4 10/8/4 6/6/4
-f 1/1/5 4/4/5 9/7/5 10/8/5
-f 3/3/6 2/2/6 6/6/6 5/5/6
-f 8/5/7 7/6/7 11/9/7 12/10/7
-f 9/7/2 5/5/2 8/5/2 13/7/2
-f 6/6/2 10/8/2 14/8/2 7/6/2
-f 10/8/2 9/7/2 13/7/2 14/8/2
-f 15/11/5 16/12/5 17/13/5 18/14/5
-f 13/7/7 8/5/7 12/10/7 16/12/7
-f 7/6/7 14/8/7 15/11/7 11/9/7
-f 14/8/7 13/7/7 16/12/7 15/11/7
-f 18/14/7 17/13/7 19/15/7 20/16/7
-f 12/10/6 11/9/6 20/16/6 19/15/6
-f 16/12/3 12/10/3 19/15/3 17/13/3
-f 11/9/4 15/11/4 18/14/4 20/16/4
+f 4/4/2 3/3/2 5/5/2 7/6/2
+f 2/2/3 1/1/3 8/7/3 6/8/3
+f 1/1/4 4/4/4 7/6/4 8/7/4
+f 3/3/5 2/2/5 6/8/5 5/5/5
+f 11/9/4 12/10/4 13/11/4 14/12/4
+f 14/12/6 13/11/6 15/13/6 16/14/6
+f 10/15/5 9/16/5 16/14/5 15/13/5
+f 12/10/2 10/15/2 15/13/2 13/11/2
+f 9/16/3 11/9/3 14/12/3 16/14/3
+f 5/5/6 6/8/6 9/16/6 10/15/6
+f 7/6/6 5/5/6 10/15/6 12/10/6
+f 6/8/6 8/7/6 11/9/6 9/16/6
+f 8/7/6 7/6/6 12/10/6 11/9/6

+ 259 - 145
examples/14-shadowvolumes/shadowvolumes.cpp

@@ -11,6 +11,8 @@ using namespace std::tr1;
 #include <bgfx.h>
 #include <bx/timer.h>
 #include <bx/readerwriter.h>
+#include <bx/allocator.h>
+#include <bx/float4_t.h>
 #include "entry/entry.h"
 #include "fpumath.h"
 #include "imgui/imgui.h"
@@ -23,10 +25,11 @@ using namespace std::tr1;
 #include <unordered_map>
 #include <map>
 
+#define SV_USE_SIMD 1
 #define MAX_INSTANCE_COUNT 25
 #define MAX_LIGHTS_COUNT 5
 
-#define VIEWID_RANGE1_PASS0     1 
+#define VIEWID_RANGE1_PASS0     1
 #define VIEWID_RANGE1_RT_PASS1  2
 #define VIEWID_RANGE15_PASS2    3
 #define VIEWID_RANGE1_PASS3    20
@@ -372,7 +375,7 @@ struct Uniforms
 		float m_alpha;
 		float m_lightCount;
 	};
-	
+
 	struct SvParams
 	{
 		float m_useStencilTex;
@@ -527,13 +530,13 @@ static RenderState s_renderStates[RenderState::Count]  =
 		| BGFX_STATE_MSAA
 		, UINT32_MAX
 		, BGFX_STENCIL_TEST_ALWAYS
-		| BGFX_STENCIL_FUNC_REF(1)        
+		| BGFX_STENCIL_FUNC_REF(1)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_Z_KEEP
 		| BGFX_STENCIL_OP_PASS_Z_DECR
 		, BGFX_STENCIL_TEST_ALWAYS
-		| BGFX_STENCIL_FUNC_REF(1)        
+		| BGFX_STENCIL_FUNC_REF(1)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_Z_KEEP
@@ -544,13 +547,13 @@ static RenderState s_renderStates[RenderState::Count]  =
 		| BGFX_STATE_MSAA
 		, UINT32_MAX
 		, BGFX_STENCIL_TEST_ALWAYS
-		| BGFX_STENCIL_FUNC_REF(1)        
+		| BGFX_STENCIL_FUNC_REF(1)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_Z_INCR
 		| BGFX_STENCIL_OP_PASS_Z_KEEP
 		, BGFX_STENCIL_TEST_ALWAYS
-		| BGFX_STENCIL_FUNC_REF(1)        
+		| BGFX_STENCIL_FUNC_REF(1)
 		| BGFX_STENCIL_FUNC_RMASK(0xff)
 		| BGFX_STENCIL_OP_FAIL_S_KEEP
 		| BGFX_STENCIL_OP_FAIL_Z_DECR
@@ -695,31 +698,15 @@ typedef std::vector<Face> FaceArray;
 
 struct Edge
 {
-	struct Plane
-	{
-		float m_plane[4];
-		bool m_reverseVertexOrder;
-	};
-
-	Edge(const float* _v0, const float* _v1)
-		: m_faceIndex(0)
-	{
-		memcpy(m_v0, _v0, 3*sizeof(float) );
-		memcpy(m_v1, _v1, 3*sizeof(float) );
-	}               
-
-	Plane& nextFace()
-	{
-		BX_CHECK(m_faceIndex < FACE_NUM, "Error! 2-manifold meshes must be used!");
-		return m_faces[(m_faceIndex++)%FACE_NUM];
-	}
-
-	float m_v0[3], m_v1[3];    
-	static const uint8_t FACE_NUM = 2;
-	Plane m_faces[FACE_NUM];
+	bool m_faceReverseOrder[2];
 	uint8_t m_faceIndex;
+	float m_v0[3], m_v1[3];
+};
+
+struct Plane
+{
+	float m_plane[4];
 };
-typedef std::vector<Edge> EdgeArray;
 
 struct HalfEdge
 {
@@ -844,24 +831,70 @@ struct Group
 		m_vertices = NULL;
 		m_numIndices = 0;
 		m_indices = NULL;
+		m_numEdges = 0;
+		m_edges = NULL;
+		m_edgePlanesUnalignedPtr = NULL;
 		m_prims.clear();
 	}
 
+	typedef struct { float f[6]; } f6_t;
+
+	struct EdgeComparator
+	{
+		bool operator()(const f6_t& _a, const f6_t& _b)
+		{
+			const uint8_t t0 = 0
+				| ( (_a.f[0] < _b.f[0]) << 5)
+				| ( (_a.f[1] < _b.f[1]) << 4)
+				| ( (_a.f[2] < _b.f[2]) << 3)
+				| ( (_a.f[3] < _b.f[3]) << 2)
+				| ( (_a.f[4] < _b.f[4]) << 1)
+				| ( (_a.f[5] < _b.f[5]) << 0)
+				;
+
+			const uint8_t t1 = 0
+				| ( (_a.f[0] > _b.f[0]) << 5)
+				| ( (_a.f[1] > _b.f[1]) << 4)
+				| ( (_a.f[2] > _b.f[2]) << 3)
+				| ( (_a.f[3] > _b.f[3]) << 2)
+				| ( (_a.f[4] > _b.f[4]) << 1)
+				| ( (_a.f[5] > _b.f[5]) << 0)
+				;
+
+			return t0 > t1;
+		}
+	};
+
 	void fillStructures(uint16_t _stride)
 	{
 		m_faces.clear();
-		m_edges.clear();
 		m_halfEdges.destroy();
 
 		//init halfedges
 		m_halfEdges.init(m_indices, m_numIndices);
 
 		//init faces and edges
-		m_faces.reserve(m_numIndices/3); //1 face = 3 indices 
-		m_edges.reserve(m_numIndices);   //1 triangle = 3 indices = 3 edges.
+		m_faces.reserve(m_numIndices/3); //1 face = 3 indices
+		m_edges = (Edge*)malloc(m_numIndices * sizeof(Edge)); //1 triangle = 3 indices = 3 edges.
+		m_edgePlanesUnalignedPtr = (Plane*)malloc(m_numIndices * sizeof(Plane) + 15);
+		m_edgePlanes = (Plane*)bx::alignPtr(m_edgePlanesUnalignedPtr, 0, 16);
+
+		struct EdgeAndPlane
+		{
+			EdgeAndPlane(const float* _v0, const float* _v1)
+				: m_faceIndex(0)
+			{
+				memcpy(m_v0, _v0, 3*sizeof(float) );
+				memcpy(m_v1, _v1, 3*sizeof(float) );
+			}
 
-		typedef std::map<std::pair<uint16_t, uint16_t>, uint32_t> EdgeIndexMap;
-		EdgeIndexMap edgeIndexMap;
+			bool m_faceReverseOrder[2];
+			uint8_t m_faceIndex;
+			float m_v0[3], m_v1[3];
+			Plane m_plane[2];
+		};
+		typedef std::map<f6_t, EdgeAndPlane, EdgeComparator> EdgeMap;
+		EdgeMap edgeMap;
 
 		for (uint32_t ii = 0, size = m_numIndices/3; ii < size; ++ii)
 		{
@@ -883,13 +916,6 @@ struct Group
 			memcpy(face.m_plane, plane, 4*sizeof(float) );
 			m_faces.push_back(face);
 
-			uint16_t triangleI[3][2] =
-			{
-				{i0, i1},
-				{i1, i2},
-				{i2, i0},
-			};
-
 			const float* triangleV[3][2] =
 			{
 				{v0, v1},
@@ -897,43 +923,46 @@ struct Group
 				{v2, v0},
 			};
 
-			typedef std::vector<uint8_t> TriangleIndex;
-			TriangleIndex triangleIndex;
-
 			for (uint8_t jj = 0; jj < 3; ++jj)
-			{   
-				EdgeIndexMap::iterator iter = edgeIndexMap.find(std::make_pair(triangleI[jj][1], triangleI[jj][0]) );
-				if (edgeIndexMap.end() != iter)
+			{
+				const float* v0 = triangleV[jj][0];
+				const float* v1 = triangleV[jj][1];
+				f6_t key;
+				f6_t keyInv;
+				memcpy(&key.f[0], v0, 3*sizeof(float) );
+				memcpy(&key.f[3], v1, 3*sizeof(float) );
+				memcpy(&keyInv.f[0], v1, 3*sizeof(float) );
+				memcpy(&keyInv.f[3], v0, 3*sizeof(float) );
+
+				EdgeMap::iterator iter = edgeMap.find(keyInv);
+				if (iter != edgeMap.end())
 				{
-					const uint32_t index = iter->second;
-					Edge* edge = &m_edges[index];
-
-					Edge::Plane& face = edge->nextFace();
-					memcpy(face.m_plane, plane, 4*sizeof(float) );
-					face.m_reverseVertexOrder = true;
+					EdgeAndPlane& ep = iter->second;
+					memcpy(ep.m_plane[ep.m_faceIndex].m_plane, plane, 4*sizeof(float) );
+					ep.m_faceReverseOrder[ep.m_faceIndex] = true;
 				}
 				else
 				{
-					triangleIndex.push_back(jj);
+					std::pair<EdgeMap::iterator, bool> result = edgeMap.insert(std::make_pair(key, EdgeAndPlane(v0, v1)) );
+					EdgeAndPlane& ep = result.first->second;
+					memcpy(ep.m_plane[ep.m_faceIndex].m_plane, plane, 4*sizeof(float) );
+					ep.m_faceReverseOrder[ep.m_faceIndex] = false;
+					ep.m_faceIndex++;
 				}
 			}
+		}
 
-			for (TriangleIndex::const_iterator iter = triangleIndex.begin(), end = triangleIndex.end(); iter != end; ++iter)
-			{
-				const uint8_t index = *iter;
-				const uint16_t i0 = triangleI[index][0];
-				const uint16_t i1 = triangleI[index][1];
-				const float* v0 = triangleV[index][0];
-				const float* v1 = triangleV[index][1];
-
-				Edge edge(v0, v1);
-				Edge::Plane& face = edge.nextFace();
-				memcpy(face.m_plane, plane, 4*sizeof(float) );
-				face.m_reverseVertexOrder = false;
-				m_edges.push_back(edge);
-
-				edgeIndexMap.insert(std::make_pair(std::make_pair(i0, i1), (uint32_t)m_edges.size()-1) );
-			}
+		uint32_t index = 0;
+		for (EdgeMap::const_iterator iter = edgeMap.begin(), end = edgeMap.end(); iter != end; ++iter)
+		{
+			Edge* edge = &m_edges[m_numEdges];
+			Plane* plane = &m_edgePlanes[index];
+
+			memcpy(edge, iter->second.m_faceReverseOrder, sizeof(Edge));
+			memcpy(plane, iter->second.m_plane, 2 * sizeof(Plane));
+
+			m_numEdges++;
+			index += 2;
 		}
 	}
 
@@ -948,6 +977,10 @@ struct Group
 		m_vertices = NULL;
 		free(m_indices);
 		m_indices = NULL;
+		free(m_edges);
+		m_edges = NULL;
+		free(m_edgePlanesUnalignedPtr);
+		m_edgePlanesUnalignedPtr = NULL;
 		m_halfEdges.destroy();
 	}
 
@@ -961,7 +994,10 @@ struct Group
 	Aabb m_aabb;
 	Obb m_obb;
 	PrimitiveArray m_prims;
-	EdgeArray m_edges;
+	uint32_t m_numEdges;
+	Edge* m_edges;
+	Plane* m_edgePlanesUnalignedPtr;
+	Plane* m_edgePlanes;
 	FaceArray m_faces;
 	HalfEdges m_halfEdges;
 };
@@ -1114,7 +1150,7 @@ struct Mesh
 struct Model
 {
 	Model()
-	{ 
+	{
 		m_program.idx = bgfx::invalidHandle;
 		m_texture.idx = bgfx::invalidHandle;
 	}
@@ -1178,7 +1214,7 @@ struct Instance
 {
 	Instance()
 		: m_svExtrusionDistance(150.0f)
-	{ 
+	{
 		m_color[0] = 1.0f;
 		m_color[1] = 1.0f;
 		m_color[2] = 1.0f;
@@ -1341,10 +1377,12 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 					  , bool _textureAsStencil = false
 					  )
 {
-	const uint8_t*    vertices  = _group.m_vertices;
-	const FaceArray&  faces     = _group.m_faces;
-	const EdgeArray&  edges     = _group.m_edges;
-	HalfEdges&        halfEdges = _group.m_halfEdges;
+	const uint8_t*    vertices   = _group.m_vertices;
+	const FaceArray&  faces      = _group.m_faces;
+	const Edge*       edges      = _group.m_edges;
+	const Plane*      edgePlanes = _group.m_edgePlanes;
+	const uint32_t    numEdges   = _group.m_numEdges;
+	HalfEdges&        halfEdges  = _group.m_halfEdges;
 
 	struct VertexData
 	{
@@ -1366,8 +1404,8 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 
 	bool cap = (ShadowVolumeImpl::DepthFail == _impl);
 
-	VertexData* verticesSide    = (VertexData*) s_svAllocator.alloc (20000 * sizeof(VertexData) );
-	uint16_t*   indicesSide     = (uint16_t*)   s_svAllocator.alloc (20000 * 3*sizeof(uint16_t) );
+	VertexData* verticesSide    = (VertexData*) s_svAllocator.alloc(20000 * sizeof(VertexData) );
+	uint16_t*   indicesSide     = (uint16_t*)   s_svAllocator.alloc(20000 * 3*sizeof(uint16_t) );
 	uint16_t*   indicesFrontCap = 0;
 	uint16_t*   indicesBackCap  = 0;
 
@@ -1395,7 +1433,7 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 			if (f > 0.0f)
 			{
 				frontFacing = true;
-				uint16_t triangleEdges[3][2] = 
+				uint16_t triangleEdges[3][2] =
 				{
 					{ face.m_i[0], face.m_i[1] },
 					{ face.m_i[1], face.m_i[2] },
@@ -1482,49 +1520,125 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 	}
 	else // ShadowVolumeAlgorithm::EdgeBased:
 	{
-		for (EdgeArray::const_iterator iter = edges.begin(), end = edges.end(); iter != end; ++iter)
-		{
-			const Edge& edge = *iter;
-			const float* v0 = edge.m_v0;
-			const float* v1 = edge.m_v1;
+		uint32_t ii = 0;
 
-			int16_t k = 0;
-			for (uint8_t ii = 0; ii < edge.m_faceIndex; ++ii)
-			{
-				const Edge::Plane& face = edge.m_faces[ii];
+#if SV_USE_SIMD
+		uint32_t numEdgesRounded = numEdges & (~0x1);
 
-				int16_t s = (int16_t)fsign(vec3Dot(face.m_plane, _light) + face.m_plane[3]);
-				if (face.m_reverseVertexOrder)
+		using namespace bx;
+
+		const float4_t lx = float4_splat(_light[0]);
+		const float4_t ly = float4_splat(_light[1]);
+		const float4_t lz = float4_splat(_light[2]);
+
+		for (; ii < numEdgesRounded; ii+=2)
+		{
+			const Edge& edge0 = edges[ii];
+			const Edge& edge1 = edges[ii+1];
+			const Plane* edgePlane0 = &edgePlanes[ii*2];
+			const Plane* edgePlane1 = &edgePlanes[ii*2 + 2];
+
+			const float4_t reverse = float4_ild(edge0.m_faceReverseOrder[0]
+						 , edge1.m_faceReverseOrder[0]
+						 , edge0.m_faceReverseOrder[1]
+						 , edge1.m_faceReverseOrder[1]
+						 );
+
+			const float4_t v0 = float4_ld(edgePlane0[0].m_plane);
+			const float4_t v1 = float4_ld(edgePlane1[0].m_plane);
+			const float4_t v2 = float4_ld(edgePlane0[1].m_plane);
+			const float4_t v3 = float4_ld(edgePlane1[1].m_plane);
+
+			const float4_t xxyy0 = float4_shuf_xAyB(v0, v2);
+			const float4_t zzww0 = float4_shuf_zCwD(v0, v2);
+			const float4_t xxyy1 = float4_shuf_xAyB(v1, v3);
+			const float4_t zzww1 = float4_shuf_zCwD(v1, v3);
+
+			const float4_t vX = float4_shuf_xAyB(xxyy0, xxyy1);
+			const float4_t vY = float4_shuf_zCwD(xxyy0, xxyy1);
+			const float4_t vZ = float4_shuf_xAyB(zzww0, zzww1);
+			const float4_t vW = float4_shuf_zCwD(zzww0, zzww1);
+
+			const float4_t r0 = float4_mul(vX, lx);
+			const float4_t r1 = float4_mul(vY, ly);
+			const float4_t r2 = float4_mul(vZ, lz);
+
+			const float4_t dot = float4_add(r0, float4_add(r1, r2));
+			const float4_t f = float4_add(dot, vW);
+
+			const float4_t zero = float4_zero();
+			const float4_t mask = float4_cmpgt(f, zero);
+			const float4_t onef = float4_splat(1.0f);
+			const float4_t tmp0 = float4_and(mask, onef);
+			const float4_t tmp1 = float4_ftoi(tmp0);
+			const float4_t tmp2 = float4_xor(tmp1, reverse);
+			const float4_t tmp3 = float4_sll(tmp2, 1);
+			const float4_t onei = float4_isplat(1);
+			const float4_t tmp4 = float4_isub(tmp3, onei);
+
+			BX_ALIGN_STRUCT_16(int32_t res[4]);
+			float4_st(&res, tmp4);
+
+			for (uint16_t jj = 0; jj < 2; ++jj)
+			{
+				int16_t k = res[jj] + res[jj+2];
+				if (k != 0)
 				{
-					s = -s;
+					verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v0, 0.0f, float(k));
+					verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v0, 1.0f, float(k));
+					verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v1, 0.0f, float(k));
+					verticesSide[vsideI++] = VertexData(edges[ii+jj].m_v1, 1.0f, float(k));
+
+					k = _textureAsStencil ? 1 : k;
+					uint16_t winding = uint16_t(k > 0);
+					for (uint8_t ii = 0, end = abs(k); ii < end; ++ii)
+					{
+						indicesSide[sideI++] = indexSide;
+						indicesSide[sideI++] = indexSide + 2 - winding;
+						indicesSide[sideI++] = indexSide + 1 + winding;
+
+						indicesSide[sideI++] = indexSide + 2;
+						indicesSide[sideI++] = indexSide + 3 - winding*2;
+						indicesSide[sideI++] = indexSide + 1 + winding*2;
+					}
+
+					indexSide += 4;
 				}
-				k += s;
 			}
+		}
+#endif
 
-			if (k == 0)
-			{
-				continue;
-			}
+		for (; ii < numEdges; ++ii)
+		{
+			const Edge& edge = edges[ii];
+			const Plane* edgePlane = &edgePlanes[ii*2];
 
-			verticesSide[vsideI++] = VertexData(v0, 0.0f, k);
-			verticesSide[vsideI++] = VertexData(v0, 1.0f, k);
-			verticesSide[vsideI++] = VertexData(v1, 0.0f, k);
-			verticesSide[vsideI++] = VertexData(v1, 1.0f, k);
+			int16_t s0 = ( (vec3Dot(edgePlane[0].m_plane, _light) + edgePlane[0].m_plane[3]) > 0.0f) ^ edge.m_faceReverseOrder[0];
+			int16_t s1 = ( (vec3Dot(edgePlane[1].m_plane, _light) + edgePlane[1].m_plane[3]) > 0.0f) ^ edge.m_faceReverseOrder[1];
+			int16_t k = ( (s0 + s1) << 1) - 2;
 
-			k = _textureAsStencil ? 1 : k;
-			uint16_t winding = uint16_t(k > 0);
-			for (uint8_t ii = 0, end = abs(k); ii < end; ++ii)
+			if (k != 0)
 			{
-				indicesSide[sideI++] = indexSide;
-				indicesSide[sideI++] = indexSide + 2 - winding;
-				indicesSide[sideI++] = indexSide + 1 + winding;
+				verticesSide[vsideI++] = VertexData(edge.m_v0, 0.0f, k);
+				verticesSide[vsideI++] = VertexData(edge.m_v0, 1.0f, k);
+				verticesSide[vsideI++] = VertexData(edge.m_v1, 0.0f, k);
+				verticesSide[vsideI++] = VertexData(edge.m_v1, 1.0f, k);
+
+				k = _textureAsStencil ? 1 : k;
+				uint16_t winding = uint16_t(k > 0);
+				for (uint8_t ii = 0, end = abs(k); ii < end; ++ii)
+				{
+					indicesSide[sideI++] = indexSide;
+					indicesSide[sideI++] = indexSide + 2 - winding;
+					indicesSide[sideI++] = indexSide + 1 + winding;
 
-				indicesSide[sideI++] = indexSide + 2;
-				indicesSide[sideI++] = indexSide + 3 - winding*2;
-				indicesSide[sideI++] = indexSide + 1 + winding*2;
-			}
+					indicesSide[sideI++] = indexSide + 2;
+					indicesSide[sideI++] = indexSide + 3 - winding*2;
+					indicesSide[sideI++] = indexSide + 1 + winding*2;
+				}
 
-			indexSide += 4;
+				indexSide += 4;
+			}
 		}
 
 		if (cap)
@@ -1535,7 +1649,7 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 				const Face& face = *iter;
 
 				float f = vec3Dot(face.m_plane, _light) + face.m_plane[3];
-				bool frontFacing = (f > 0.0f); 
+				bool frontFacing = (f > 0.0f);
 
 				for (uint8_t ii = 0, end = 1 + uint8_t(!_textureAsStencil); ii < end; ++ii)
 				{
@@ -1684,10 +1798,10 @@ void createNearClipVolume(float* __restrict _outPlanes24f
 
 	float nearPlaneV[4] =
 	{
-		0.0f * lightSide,  
-		0.0f * lightSide,  
-		1.0f * lightSide,  
-		_near * lightSide, 
+		0.0f * lightSide,
+		0.0f * lightSide,
+		1.0f * lightSide,
+		_near * lightSide,
 	};
 	vec4MulMtx(volumePlanes[4], nearPlaneV, mtxViewTrans);
 
@@ -1735,9 +1849,9 @@ bool clipTest(const float* _planes, uint8_t _planeNum, const Mesh& _mesh, const
 			}
 		}
 
-		if (isInside) 
+		if (isInside)
 		{
-			return true; 
+			return true;
 		}
 	}
 
@@ -1825,19 +1939,19 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	bgfx::ProgramHandle programColorTexture     = loadProgram("vs_shadowvolume_color_texture",     "fs_shadowvolume_color_texture"    );
 	bgfx::ProgramHandle programTexture          = loadProgram("vs_shadowvolume_texture",           "fs_shadowvolume_texture"          );
 
-	bgfx::ProgramHandle programBackBlank        = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbackblank" ); 
-	bgfx::ProgramHandle programSideBlank        = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsideblank" ); 
-	bgfx::ProgramHandle programFrontBlank       = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontblank"); 
+	bgfx::ProgramHandle programBackBlank        = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbackblank" );
+	bgfx::ProgramHandle programSideBlank        = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsideblank" );
+	bgfx::ProgramHandle programFrontBlank       = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontblank");
 
-	bgfx::ProgramHandle programBackColor        = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbackcolor" ); 
-	bgfx::ProgramHandle programSideColor        = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsidecolor" ); 
-	bgfx::ProgramHandle programFrontColor       = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontcolor"); 
+	bgfx::ProgramHandle programBackColor        = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbackcolor" );
+	bgfx::ProgramHandle programSideColor        = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsidecolor" );
+	bgfx::ProgramHandle programFrontColor       = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfrontcolor");
 
-	bgfx::ProgramHandle programSideTex          = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsidetex"   ); 
-	bgfx::ProgramHandle programBackTex1         = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbacktex1"  ); 
-	bgfx::ProgramHandle programBackTex2         = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbacktex2"  ); 
-	bgfx::ProgramHandle programFrontTex1        = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex1" ); 
-	bgfx::ProgramHandle programFrontTex2        = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex2" ); 
+	bgfx::ProgramHandle programSideTex          = loadProgram("vs_shadowvolume_svside",  "fs_shadowvolume_svsidetex"   );
+	bgfx::ProgramHandle programBackTex1         = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbacktex1"  );
+	bgfx::ProgramHandle programBackTex2         = loadProgram("vs_shadowvolume_svback",  "fs_shadowvolume_svbacktex2"  );
+	bgfx::ProgramHandle programFrontTex1        = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex1" );
+	bgfx::ProgramHandle programFrontTex2        = loadProgram("vs_shadowvolume_svfront", "fs_shadowvolume_svfronttex2" );
 
 	struct ShadowVolumeProgramType
 	{
@@ -1865,7 +1979,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	};
 
 	bgfx::ProgramHandle svProgs[ShadowVolumeProgramType::Count][ShadowVolumePart::Count] =
-	{ 
+	{
 		{ programBackBlank, programSideBlank, programFrontBlank } // Blank
 		,{ programBackColor, programSideColor, programFrontColor } // Color
 		,{ programBackTex1,  programSideTex,   programFrontTex1  } // Tex1
@@ -1933,7 +2047,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	int64_t profTime = 0;
 	int64_t timeOffset = bx::getHPCounter();
 
-	uint32_t numShadowVolumeVertices = 0; 
+	uint32_t numShadowVolumeVertices = 0;
 	uint32_t numShadowVolumeIndices  = 0;
 
 	uint32_t oldWidth = 0;
@@ -1944,12 +2058,12 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	bool settings_updateLights       = true;
 	bool settings_updateScene        = true;
 	bool settings_mixedSvImpl        = true;
-	bool settings_useStencilTexture  = false;
+	bool settings_useStencilTexture  = true;
 	bool settings_drawShadowVolumes  = false;
 	float settings_numLights         = 1.0f;
 	float settings_instanceCount     = 9.0f;
-	ShadowVolumeImpl::Enum      settings_shadowVolumeImpl      = ShadowVolumeImpl::DepthFail; 
-	ShadowVolumeAlgorithm::Enum settings_shadowVolumeAlgorithm = ShadowVolumeAlgorithm::FaceBased;
+	ShadowVolumeImpl::Enum      settings_shadowVolumeImpl      = ShadowVolumeImpl::DepthFail;
+	ShadowVolumeAlgorithm::Enum settings_shadowVolumeAlgorithm = ShadowVolumeAlgorithm::EdgeBased;
 	int32_t scrollAreaRight = 0;
 
 	const char* titles[2] =
@@ -2111,8 +2225,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		imguiLabel("CPU Time: %7.1f [ms]", double(profTime)*toMs);
 		imguiLabel("Volume Vertices: %5.uk", numShadowVolumeVertices/1000);
 		imguiLabel("Volume Indices: %6.uk", numShadowVolumeIndices/1000);
-		numShadowVolumeVertices = 0; 
-		numShadowVolumeIndices = 0;  
+		numShadowVolumeVertices = 0;
+		numShadowVolumeIndices = 0;
 
 		imguiSeparatorLine();
 		settings_drawShadowVolumes = imguiCheck("Draw Shadow Volumes", settings_drawShadowVolumes)
@@ -2204,7 +2318,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			bgfx::dbgTextPrintf(3, row++, 0x0f, "Stencil:");
 			bgfx::dbgTextPrintf(8, row++, 0x0f, "Stencil buffer     - Faster, but capable only of +1 incr.");
 			bgfx::dbgTextPrintf(8, row++, 0x0f, "Texture as stencil - Slower, but capable of +2 incr.");
-		}                       
+		}
 
 		// Setup instances
 		Instance shadowCasters[SceneCount][60];
@@ -2456,7 +2570,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			bgfx::setViewClear(VIEWID_RANGE1_RT_PASS1, BGFX_CLEAR_DEPTH_BIT, 0x00000000, 1.0f, 0);
 			bgfx::setViewRenderTarget(VIEWID_RANGE1_RT_PASS1, s_stencilRt);
 
-			const RenderState& renderState = s_renderStates[RenderState::ShadowVolume_UsingStencilTexture_BuildDepth]; 
+			const RenderState& renderState = s_renderStates[RenderState::ShadowVolume_UsingStencilTexture_BuildDepth];
 
 			for (uint8_t ii = 0; ii < shadowCastersCount[currentScene]; ++ii)
 			{
@@ -2472,7 +2586,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		profTime = bx::getHPCounter();
 
 		/**
-		 * For each light: 
+		 * For each light:
 		 * 1. Compute and draw shadow volume to stencil buffer
 		 * 2. Draw diffuse with stencil test
 		 */
@@ -2525,14 +2639,14 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				const Instance& instance = shadowCasters[currentScene][jj];
 				Model* model = instance.m_model;
 
-				ShadowVolumeImpl::Enum shadowVolumeImpl = settings_shadowVolumeImpl; 
+				ShadowVolumeImpl::Enum shadowVolumeImpl = settings_shadowVolumeImpl;
 				if (settings_mixedSvImpl)
-				{                                                     
+				{
 					// If instance is inside near clip volume, depth fail must be used, else depth pass is fine.
 					bool isInsideVolume = clipTest(nearClipVolume, 6, model->m_mesh, instance.m_scale, instance.m_pos);
 					shadowVolumeImpl = (isInsideVolume ? ShadowVolumeImpl::DepthFail : ShadowVolumeImpl::DepthPass);
 				}
-				s_uniforms.m_svparams.m_dfail = float(ShadowVolumeImpl::DepthFail == shadowVolumeImpl); 
+				s_uniforms.m_svparams.m_dfail = float(ShadowVolumeImpl::DepthFail == shadowVolumeImpl);
 
 				// Compute virtual light position for shadow volume generation.
 				float transformedLightPos[3];
@@ -2591,7 +2705,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 							: RenderState::ShadowVolume_UsingStencilTexture_CraftStencil_DepthPass
 							;
 
-						programIndex = ShadowVolumeAlgorithm::FaceBased == settings_shadowVolumeAlgorithm 
+						programIndex = ShadowVolumeAlgorithm::FaceBased == settings_shadowVolumeAlgorithm
 							? ShadowVolumeProgramType::Tex1
 							: ShadowVolumeProgramType::Tex2
 							;
@@ -2691,7 +2805,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			}
 		}
 
-		profTime = bx::getHPCounter() - profTime; 
+		profTime = bx::getHPCounter() - profTime;
 
 		// Lights.
 		const float lightScale[3] = { 1.5f, 1.5f, 1.5f };

BIN
examples/runtime/meshes/platform.bin