Przeglądaj źródła

example 41-tess (#1821)

* new example

new tess example

* renaming

Rename clock to counter...

* clean

Remove comments

* clean up

removed vector and const char* path

* ups

Removing compiled shaders...
DanielGavin 6 lat temu
rodzic
commit
caccb56707

+ 209 - 0
examples/41-tess/constants.h

@@ -0,0 +1,209 @@
+#pragma once
+
+#include "common.h"
+
+
+const char* shader_options[] = {
+	"Normal",
+	"Diffuse"
+};
+
+////Instanced patch geometry at various subdiv levels////
+
+//gpuSubd == 0
+const float verticesL0[] = {
+    0.0f, 0.0f,
+    1.0f, 0.0f,
+    0.0f, 1.0f
+};
+
+const uint32_t indexesL0[] = { 0u, 1u, 2u };
+
+//gpuSubd == 1
+const float verticesL1[] = {
+    0.0f, 1.0f,
+    0.5f, 0.5f,
+    0.0f, 0.5f,
+    0.0f, 0.0f,
+    0.5f, 0.0f,
+    1.0f, 0.0f
+};
+const uint32_t indexesL1[] = {
+    1u, 0u, 2u,
+    1u, 2u, 3u,
+    1u, 3u, 4u,
+    1u, 4u, 5u
+};
+
+//gpuSubd == 2
+const float verticesL2[] = {
+    0.25f, 0.75f,
+    0.0f, 1.0f,
+    0.0f, 0.75f,
+    0.0f, 0.5f,
+    0.25f, 0.5f,
+    0.5f, 0.5f,
+
+    0.25f, 0.25f,
+    0.0f, 0.25f,
+    0.0f, 0.0f,
+    0.25f, 0.0f,
+    0.5f, 0.0f,
+    0.5f, 0.25f,
+    0.75f, 0.25f,
+    0.75f, 0.0f,
+    1.0f, 0.0f        //14
+};
+const uint32_t indexesL2[] = {
+    0u, 1u, 2u,
+    0u, 2u, 3u,
+    0u, 3u, 4u,
+    0u, 4u, 5u,
+
+    6u, 5u, 4u,
+    6u, 4u, 3u,
+    6u, 3u, 7u,
+    6u, 7u, 8u,
+
+    6u, 8u, 9u,
+    6u, 9u, 10u,
+    6u, 10u, 11u,
+    6u, 11u, 5u,
+
+    12u, 5u, 11u,
+    12u, 11u, 10u,
+    12u, 10u, 13u,
+    12u, 13u, 14u
+};
+
+//gpuSubd == 3
+const float verticesL3[] = {
+    0.25f*0.5f, 0.75f*0.5f + 0.5f,
+    0.0f*0.5f, 1.0f*0.5f + 0.5f,
+    0.0f*0.5f, 0.75f*0.5f + 0.5f,
+    0.0f*0.5f , 0.5f*0.5f + 0.5f,
+    0.25f*0.5f, 0.5f*0.5f + 0.5f,
+    0.5f*0.5f, 0.5f*0.5f + 0.5f,
+    0.25f*0.5f, 0.25f*0.5f + 0.5f,
+    0.0f*0.5f, 0.25f*0.5f + 0.5f,
+    0.0f*0.5f, 0.0f*0.5f + 0.5f,
+    0.25f*0.5f, 0.0f*0.5f + 0.5f,
+    0.5f*0.5f, 0.0f*0.5f + 0.5f,
+    0.5f*0.5f, 0.25f*0.5f + 0.5f,
+    0.75f*0.5f, 0.25f*0.5f + 0.5f,
+    0.75f*0.5f, 0.0f*0.5f + 0.5f,
+    1.0f*0.5f, 0.0f*0.5f + 0.5f,        //14
+
+    0.375f, 0.375f,
+    0.25f, 0.375f,
+    0.25f, 0.25f,
+    0.375f, 0.25f,
+    0.5f, 0.25f,
+    0.5f, 0.375f,    //20
+
+    0.125f, 0.375f,
+    0.0f, 0.375f,
+    0.0f, 0.25f,
+    0.125f, 0.25f,    //24
+
+    0.125f, 0.125f,
+    0.0f, 0.125f,
+    0.0f, 0.0f,
+    0.125f, 0.0f,
+    0.25f, 0.0f,
+    0.25f, 0.125f,    //30
+
+    0.375f, 0.125f,
+    0.375f, 0.0f,
+    0.5f, 0.0f,
+    0.5f, 0.125f,    //34
+
+    0.625f, 0.375f,
+    0.625f, 0.25f,
+    0.75f, 0.25f,    //37
+
+    0.625f, 0.125f,
+    0.625f, 0.0f,
+    0.75f, 0.0f,
+    0.75f, 0.125f,    //41
+
+    0.875f, 0.125f,
+    0.875f, 0.0f,
+    1.0f, 0.0f    //44
+};
+const uint32_t indexesL3[] = {
+    0u, 1u, 2u,
+    0u, 2u, 3u,
+    0u, 3u, 4u,
+    0u, 4u, 5u,
+
+    6u, 5u, 4u,
+    6u, 4u, 3u,
+    6u, 3u, 7u,
+    6u, 7u, 8u,
+
+    6u, 8u, 9u,
+    6u, 9u, 10u,
+    6u, 10u, 11u,
+    6u, 11u, 5u,
+
+    12u, 5u, 11u,
+    12u, 11u, 10u,
+    12u, 10u, 13u,
+    12u, 13u, 14u,        //End fo first big triangle
+
+    15u, 14u, 13u,
+    15u, 13u, 10u,
+    15u, 10u, 16u,
+    15u, 16u, 17u,
+    15u, 17u, 18u,
+    15u, 18u, 19u,
+    15u, 19u, 20u,
+    15u, 20u, 14u,
+
+    21u, 10u, 9u,
+    21u, 9u, 8u,
+    21u, 8u, 22u,
+    21u, 22u, 23u,
+    21u, 23u, 24u,
+    21u, 24u, 17u,
+    21u, 17u, 16u,
+    21u, 16u, 10u,
+
+    25u, 17u, 24u,
+    25u, 24u, 23u,
+    25u, 23u, 26u,
+    25u, 26u, 27u,
+    25u, 27u, 28u,
+    25u, 28u, 29u,
+    25u, 29u, 30u,
+    25u, 30u, 17u,
+
+    31u, 19u, 18u,
+    31u, 18u, 17u,
+    31u, 17u, 30u,
+    31u, 30u, 29u,
+    31u, 29u, 32u,
+    31u, 32u, 33u,
+    31u, 33u, 34u,
+    31u, 34u, 19u,
+
+    35u, 14u, 20u,
+    35u, 20u, 19u,
+    35u, 19u, 36u,
+    35u, 36u, 37u,
+
+    38u, 37u, 36u,
+    38u, 36u, 19u,
+    38u, 19u, 34u,
+    38u, 34u, 33u,
+    38u, 33u, 39u,
+    38u, 39u, 40u,
+    38u, 40u, 41u,
+    38u, 41u, 37u,
+
+    42u, 37u, 41u,
+    42u, 41u, 40u,
+    42u, 40u, 43u,
+    42u, 43u, 44u
+};

+ 49 - 0
examples/41-tess/cs_terrain_init.sc

@@ -0,0 +1,49 @@
+#include "bgfx_compute.sh"
+
+#include "uniforms.sh"
+
+BUFFER_WR(u_SubdBufferOut, uint, 1);
+
+BUFFER_RW(u_CulledSubdBuffer, uint, 2);
+
+BUFFER_RW(indirectBuffer, uvec4, 3);
+
+BUFFER_RW(atomicCounterBuffer, uint, 4);
+
+BUFFER_WR(u_SubdBufferIn, uint, 8);
+
+
+NUM_THREADS(1u, 1u, 1u)
+void main()
+{
+	uint subd = 6 << (2 * u_gpu_subd - 1);
+	
+	if((2 * u_gpu_subd - 1) <= 0) {
+		subd = 3u;
+	}
+
+	drawIndexedIndirect(indirectBuffer, 0u, subd, 0u, 0u, 0u, 0u);
+	dispatchIndirect(indirectBuffer, 1u, 2u / UPDATE_INDIRECT_VALUE_DIVIDE + 1u, 1u, 1u);
+	
+	u_SubdBufferOut[0] = 0;
+	u_SubdBufferOut[1] = 1;
+	u_SubdBufferOut[2] = 1;
+	u_SubdBufferOut[3] = 1;
+	
+	u_CulledSubdBuffer[0] = 0;
+	u_CulledSubdBuffer[1] = 1;
+	u_CulledSubdBuffer[2] = 1;
+	u_CulledSubdBuffer[3] = 1;
+	
+	u_SubdBufferIn[0] = 0;
+	u_SubdBufferIn[1] = 1;
+	u_SubdBufferIn[2] = 1;
+	u_SubdBufferIn[3] = 1;
+	
+	uint tmp;
+
+	atomicFetchAndExchange(atomicCounterBuffer[0], 0, tmp); 
+	atomicFetchAndExchange(atomicCounterBuffer[1], 0, tmp); 
+	atomicFetchAndExchange(atomicCounterBuffer[2], 2, tmp); 
+}
+

+ 86 - 0
examples/41-tess/cs_terrain_lod.sc

@@ -0,0 +1,86 @@
+
+////////////////////////////////////////////////////////////////////////////////
+// Implicit Subdivision Shader for Terrain Rendering
+//
+
+#include "terrain_common.sh"
+#include "fcull.sh"
+
+BUFFER_RO(u_SubdBufferIn, uint, 8);
+
+BUFFER_RW(u_CulledSubdBuffer, uint, 2);
+
+BUFFER_RO(u_VertexBuffer, vec4, 6);
+
+BUFFER_RO(u_IndexBuffer, uint, 7);
+
+
+// -----------------------------------------------------------------------------
+/**
+ * Compute LoD Shader
+ *
+ * This compute shader is responsible for updating the subdivision
+ * buffer and visible buffer that will be sent to the rasterizer.
+ */
+
+NUM_THREADS(COMPUTE_THREAD_COUNT, 1u, 1u)
+void main()
+{
+    // get threadID (each key is associated to a thread)
+    uint threadID = gl_GlobalInvocationID.x;
+
+
+	if (threadID >= u_AtomicCounterBuffer[2])
+		return;
+
+    // get coarse triangle associated to the key
+    uint primID = u_SubdBufferIn[threadID*2];
+	
+    vec4 v_in[3];
+	v_in[0] = u_VertexBuffer[u_IndexBuffer[primID * 3    ]];
+	v_in[1] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 1]];
+	v_in[2] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 2]];
+
+    // compute distance-based LOD
+    uint key = u_SubdBufferIn[threadID*2+1];
+	
+    vec4 v[3];
+	vec4 vp[3]; 
+	
+	subd(key, v_in, v, vp);
+	
+	uint targetLod; uint parentLod;
+	
+	if(u_freeze == 0) {
+		targetLod = uint(computeLod(v));
+		parentLod = uint(computeLod(vp));
+	}
+	
+	else {
+		targetLod = parentLod = findMSB(key);
+	}
+	
+    updateSubdBuffer(primID, key, targetLod, parentLod);
+
+    // Cull invisible nodes
+    mat4 mvp = u_modelViewProj;
+    vec4 bmin = min(min(v[0], v[1]), v[2]);
+    vec4 bmax = max(max(v[0], v[1]), v[2]);
+
+    // account for displacement in bound computations
+    bmin.z = 0;
+    bmax.z = u_DmapFactor;
+	
+	
+    // update CulledSubdBuffer
+    if (u_cull == 0 || frustumCullingTest(mvp, bmin.xyz, bmax.xyz)) {
+        // write key
+		uint idx = 0;
+		atomicFetchAndAdd(u_AtomicCounterBuffer[1], 2, idx);
+        u_CulledSubdBuffer[idx] = primID;
+		u_CulledSubdBuffer[idx+1] = key;
+    }
+	
+	
+
+}

+ 20 - 0
examples/41-tess/cs_terrain_update_draw.sc

@@ -0,0 +1,20 @@
+#include "bgfx_compute.sh"
+#include "uniforms.sh"
+
+BUFFER_RW(indirectBuffer, uvec4, 3);
+BUFFER_RW(atomicCounterBuffer, uint, 4);
+
+NUM_THREADS(1u, 1u, 1u)
+void main()
+{
+	uint counter = atomicCounterBuffer[1];
+
+	uint subd = 6 << (2 * u_gpu_subd - 1);
+	
+	if((2 * u_gpu_subd - 1) <= 0) {
+		subd = 3u;
+	}
+	
+	drawIndexedIndirect(indirectBuffer, 0, subd, counter / 2, 0u, 0u, 0u);
+}
+

+ 24 - 0
examples/41-tess/cs_terrain_update_indirect.sc

@@ -0,0 +1,24 @@
+#include "bgfx_compute.sh"
+#include "uniforms.sh"
+
+BUFFER_RW(indirectBuffer, uvec4, 3);
+BUFFER_RW(atomicCounterBuffer, uint, 4);
+
+NUM_THREADS(1u, 1u, 1u)
+void main()
+{
+	uint counter;
+	uint counter2;
+	
+	atomicFetchAndExchange(atomicCounterBuffer[0], 0u, counter);
+	atomicFetchAndExchange(atomicCounterBuffer[1], 0u, counter2);
+
+    uint cnt = (counter / 2u) / UPDATE_INDIRECT_VALUE_DIVIDE + 1u;
+	
+	uint tmp;
+
+	atomicFetchAndExchange(atomicCounterBuffer[2], (counter / 2), tmp); 
+	
+	dispatchIndirect(indirectBuffer, 1u, cnt, 1u, 1u);
+}
+

+ 78 - 0
examples/41-tess/fcull.sh

@@ -0,0 +1,78 @@
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Frustum Culling API
+//
+
+bool frustumCullingTest(mat4 mvp, vec3 bmin, vec3 bmax);
+
+//
+//
+//// end header file /////////////////////////////////////////////////////
+
+
+// *****************************************************************************
+// Frustum Implementation
+
+struct Frustum {
+	vec4 planes[6];
+};
+
+/**
+ * Extract Frustum Planes from MVP Matrix
+ *
+ * Based on "Fast Extraction of Viewing Frustum Planes from the World-
+ * View-Projection Matrix", by Gil Gribb and Klaus Hartmann.
+ * This procedure computes the planes of the frustum and normalizes 
+ * them.
+ */
+void loadFrustum(out Frustum f, mat4 mvp)
+{
+	for (int i = 0; i < 3; ++i)
+	for (int j = 0; j < 2; ++j) {
+		f.planes[i*2+j].x = mtxGetElement(mvp, 0, 3) + (j == 0 ? mtxGetElement(mvp, 0, i) : -mtxGetElement(mvp, 0, i));
+		f.planes[i*2+j].y = mtxGetElement(mvp, 1, 3) + (j == 0 ? mtxGetElement(mvp, 1, i) : -mtxGetElement(mvp, 1, i));
+		f.planes[i*2+j].z = mtxGetElement(mvp, 2, 3) + (j == 0 ? mtxGetElement(mvp, 2, i) : -mtxGetElement(mvp, 2, i));
+		f.planes[i*2+j].w = mtxGetElement(mvp, 3, 3) + (j == 0 ? mtxGetElement(mvp, 3, i) : -mtxGetElement(mvp, 3, i));
+		f.planes[i*2+j]*= length(f.planes[i*2+j].xyz);
+	}
+}
+
+/**
+ * Negative Vertex of an AABB
+ *
+ * This procedure computes the negative vertex of an AABB
+ * given a normal.
+ * See the View Frustum Culling tutorial @ LightHouse3D.com
+ * http://www.lighthouse3d.com/tutorials/view-frustum-culling/geometric-approach-testing-boxes-ii/
+ */
+vec3 negativeVertex(vec3 bmin, vec3 bmax, vec3 n)
+{
+	bvec3 b = greaterThan(n, vec3(0.0, 0.0, 0.0));
+	return mix(bmin, bmax, b);
+}
+
+/**
+ * Frustum-AABB Culling Test
+ *
+ * This procedure returns true if the AABB is either inside, or in
+ * intersection with the frustum, and false otherwise.
+ * The test is based on the View Frustum Culling tutorial @ LightHouse3D.com
+ * http://www.lighthouse3d.com/tutorials/view-frustum-culling/geometric-approach-testing-boxes-ii/
+ */
+bool frustumCullingTest(mat4 mvp, vec3 bmin, vec3 bmax)
+{
+	float a = 1.0f;
+	Frustum f;
+
+	loadFrustum(f, mvp);
+	for (int i = 0; i < 6 && a >= 0.0f; ++i) {
+		vec3 n = negativeVertex(bmin, bmax, f.planes[i].xyz);
+
+		a = dot(vec4(n, 1.0f), f.planes[i]);
+	}
+
+	return (a >= 0.0);
+}
+
+

+ 13 - 0
examples/41-tess/fs_terrain_render.sc

@@ -0,0 +1,13 @@
+$input v_texcoord0
+
+#include "terrain_common.sh"
+
+void main()
+{
+    vec2 s = texture2D(u_SmapSampler, v_texcoord0).rg * u_DmapFactor;
+    vec3 n = normalize(vec3(-s, 1));
+    float d = clamp(n.z, 0.0, 1.0) / 3.14159;
+	vec3 r = vec3(d, d, d);
+    gl_FragColor = vec4(r, 1);
+}
+

+ 11 - 0
examples/41-tess/fs_terrain_render_normal.sc

@@ -0,0 +1,11 @@
+$input v_texcoord0
+
+#include "terrain_common.sh"
+
+void main()
+{
+    vec2 s = texture2D(u_SmapSampler, v_texcoord0).rg * u_DmapFactor;
+    vec3 n = normalize(vec3(-s, 1));
+    gl_FragColor = vec4(abs(n), 1);
+}
+

+ 128 - 0
examples/41-tess/isubd.sh

@@ -0,0 +1,128 @@
+
+uint findMSB(uint x)
+{
+  uint i;
+  uint mask;
+  uint res = -1;
+  
+  for(i = 0; i < 32; i++) {
+  
+    mask = 0x80000000 >> i;
+	
+    if ((x & mask) != 0) {
+      res = 31 - i;
+      break;
+    }
+	
+  }
+  
+  return res;
+}
+
+
+uint parentKey(in uint key)
+{
+    return (key >> 1u);
+}
+
+void childrenKeys(in uint key, out uint children[2])
+{
+    children[0] = (key << 1u) | 0u;
+    children[1] = (key << 1u) | 1u;
+}
+
+bool isRootKey(in uint key)
+{
+    return (key == 1u);
+}
+
+bool isLeafKey(in uint key)
+{
+    return findMSB(key) == 31;
+}
+
+bool isChildZeroKey(in uint key)
+{
+    return ((key & 1u) == 0u);
+}
+
+// barycentric interpolation
+vec3 berp(in vec3 v[3], in vec2 u)
+{
+    return v[0] + u.x * (v[1] - v[0]) + u.y * (v[2] - v[0]);
+}
+vec4 berp(in vec4 v[3], in vec2 u)
+{
+    return v[0] + u.x * (v[1] - v[0]) + u.y * (v[2] - v[0]);
+}
+
+// get xform from bit value
+mat3 bitToXform(in uint bit)
+{
+    float b = float(bit);
+    float c = 1.0f - b;
+	
+    vec3 c1 = vec3(0.0f, c   , b   );
+    vec3 c2 = vec3(0.5f, b   , 0.0f);
+    vec3 c3 = vec3(0.5f, 0.0f, c   );
+	
+	return mtxFromCols(c1, c2, c3);
+}
+
+// get xform from key
+mat3 keyToXform(in uint key)
+{
+	vec3 c1 = vec3(1.0f, 0.0f, 0.0f);
+    vec3 c2 = vec3(0.0f, 1.0f, 0.0f);
+    vec3 c3 = vec3(0.0f, 0.0f, 1.0f);
+
+    mat3 xf = mtxFromCols(c1, c2, c3);
+	
+    while (key > 1u) {
+        xf = mul(xf, bitToXform(key & 1u));
+        key = key >> 1u;
+    }
+
+    return xf;
+}
+
+// get xform from key as well as xform from parent key
+mat3 keyToXform(in uint key, out mat3 xfp)
+{
+    xfp = keyToXform(parentKey(key));
+    return keyToXform(key);
+}
+
+// subdivision routine (vertex position only)
+void subd(in uint key, in vec4 v_in[3], out vec4 v_out[3])
+{
+    mat3 xf = keyToXform(key);
+	
+	mat4x3 m = mtxFromRows(v_in[0], v_in[1], v_in[2]);
+	
+    mat4x3 v = mul(xf, m);
+	
+	v_out[0] = mtxGetRow(v, 0);
+	v_out[1] = mtxGetRow(v, 1);
+	v_out[2] = mtxGetRow(v, 2);	
+}
+
+// subdivision routine (vertex position only)
+// also computes parent position
+void subd(in uint key, in vec4 v_in[3], out vec4 v_out[3], out vec4 v_out_p[3])
+{
+    mat3 xfp; mat3 xf = keyToXform(key, xfp);
+	
+	mat4x3 m = mtxFromRows(v_in[0], v_in[1], v_in[2]);
+	
+	mat4x3 v = mul(xf, m);
+    mat4x3 vp = mul(xfp, m);
+	
+	v_out[0] = mtxGetRow(v, 0);
+	v_out[1] = mtxGetRow(v, 1);
+	v_out[2] = mtxGetRow(v, 2);
+	
+	v_out_p[0] = mtxGetRow(vp, 0);
+	v_out_p[1] = mtxGetRow(vp, 1);
+	v_out_p[2] = mtxGetRow(vp, 2);
+}

+ 10 - 0
examples/41-tess/makefile

@@ -0,0 +1,10 @@
+#
+# Copyright 2011-2019 Branimir Karadzic. All rights reserved.
+# License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+#
+
+BGFX_DIR=../..
+RUNTIME_DIR=$(BGFX_DIR)/examples/runtime
+BUILD_DIR=../../.build
+
+include $(BGFX_DIR)/scripts/shader.mk

+ 68 - 0
examples/41-tess/matrices.sh

@@ -0,0 +1,68 @@
+
+
+//I decided to keep the non square matrices definition in the example, since I am still not sure how non square matrices should be treated in bgfx (Daniel Gavin)
+
+#ifndef MATRICES_H_HEADER_GUARD
+#define MATRICES_H_HEADER_GUARD
+
+#ifndef __cplusplus
+
+#if BGFX_SHADER_LANGUAGE_HLSL || BGFX_SHADER_LANGUAGE_PSSL || BGFX_SHADER_LANGUAGE_SPIRV || BGFX_SHADER_LANGUAGE_METAL
+
+#   define mat3x4 float4x3
+#   define mat4x3 float3x4
+
+#else
+
+
+#endif // BGFX_SHADER_LANGUAGE_*
+
+mat4x3 mtxFromRows(vec4 _0, vec4 _1, vec4 _2)
+{
+#if BGFX_SHADER_LANGUAGE_GLSL
+    return transpose(mat3x4(_0, _1, _2) );
+#else
+	return mat4x3(_0, _1, _2);
+#endif // BGFX_SHADER_LANGUAGE_GLSL
+}
+
+vec4 mtxGetRow(mat4x3 _0, uint row)
+{
+#if BGFX_SHADER_LANGUAGE_GLSL
+    return vec4(_0[0][row], _0[1][row], _0[2][row], _0[3][row]);
+#else
+    return vec4(_0[row]);
+#endif // BGFX_SHADER_LANGUAGE_GLSL
+}
+
+vec4 mtxGetRow(mat4 _0, uint row)
+{
+#if BGFX_SHADER_LANGUAGE_GLSL
+    return vec4(_0[0][row], _0[1][row], _0[2][row], _0[3][row]);
+#else
+    return vec4(_0[row]);
+#endif // BGFX_SHADER_LANGUAGE_GLSL
+}
+
+vec4 mtxGetColumn(mat4 _0, uint column)
+{
+#if BGFX_SHADER_LANGUAGE_GLSL
+    return vec4(_0[column]);
+#else
+    return vec4(_0[0][column], _0[1][column], _0[2][column], _0[3][column]);
+#endif // BGFX_SHADER_LANGUAGE_GLSL
+}
+
+float mtxGetElement(mat4 _0, uint column, uint row)
+{
+#if BGFX_SHADER_LANGUAGE_GLSL
+    return _0[column][row];
+#else
+    return _0[row][column];
+#endif // BGFX_SHADER_LANGUAGE_GLSL
+}
+
+
+#endif // __cplusplus
+
+#endif // MATRICES_H_HEADER_GUARD

+ 101 - 0
examples/41-tess/terrain_common.sh

@@ -0,0 +1,101 @@
+#include "bgfx_compute.sh"
+
+#include "matrices.sh"
+
+#include "isubd.sh"
+
+#include "uniforms.sh"
+
+BUFFER_RW(u_AtomicCounterBuffer, uint, 4);
+
+BUFFER_RW(u_SubdBufferOut, uint, 1);
+
+
+SAMPLER2D(u_DmapSampler, 0); // displacement map
+SAMPLER2D(u_SmapSampler, 1); // slope map
+
+// displacement map
+float dmap(vec2 pos)
+{
+    return (texture2DLod(u_DmapSampler, pos * 0.5 + 0.5, 0).x) * u_DmapFactor;
+}
+
+float distanceToLod(float z, float lodFactor)
+{
+    // Note that we multiply the result by two because the triangles
+    // edge lengths decreases by half every two subdivision steps.
+    return -2.0 * log2(clamp(z * lodFactor, 0.0f, 1.0f));
+}
+
+float computeLod(vec3 c)
+{
+	//displace
+    c.z += dmap(mtxGetColumn(u_invView, 3).xy);
+
+    vec3 cxf = mul(u_modelView, vec4(c.x, c.y, c.z, 1)).xyz;
+    float z = length(cxf);
+
+    return distanceToLod(z, u_LodFactor);
+}
+
+float computeLod(in vec4 v[3])
+{
+    vec3 c = (v[1].xyz + v[2].xyz) / 2.0;
+    return computeLod(c);
+}
+float computeLod(in vec3 v[3])
+{
+    vec3 c = (v[1].xyz + v[2].xyz) / 2.0;
+    return computeLod(c);
+}
+
+void writeKey(uint primID, uint key)
+{
+    uint idx = 0;
+	
+	atomicFetchAndAdd(u_AtomicCounterBuffer[0], 2, idx);
+
+    u_SubdBufferOut[idx] = primID; 
+	u_SubdBufferOut[idx+1] = key;
+}
+
+
+void updateSubdBuffer(
+    uint primID,
+    uint key,
+    uint targetLod,
+    uint parentLod,
+    bool isVisible
+) {
+    // extract subdivision level associated to the key
+    uint keyLod = findMSB(key);
+
+    // update the key accordingly
+    if (/* subdivide ? */ keyLod < targetLod && !isLeafKey(key) && isVisible) {
+        uint children[2]; childrenKeys(key, children);
+
+        writeKey(primID, children[0]);
+        writeKey(primID, children[1]);
+    }
+    else if (/* keep ? */ keyLod < (parentLod + 1) && isVisible) {
+        writeKey(primID, key);
+    }
+    else /* merge ? */ {
+
+        if (/* is root ? */isRootKey(key))
+        {
+            writeKey(primID, key);
+        }
+		
+        else if (/* is zero child ? */isChildZeroKey(key)) {
+            writeKey(primID, parentKey(key));
+        }
+
+    }
+}
+
+void updateSubdBuffer(uint primID, uint key, uint targetLod, uint parentLod)
+{
+    updateSubdBuffer(primID, key, targetLod, parentLod, true);
+}
+

+ 676 - 0
examples/41-tess/tess.cpp

@@ -0,0 +1,676 @@
+/*
+ * Copyright 2019 Daniel Gavin. All rights reserved.
+ * License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
+ */
+
+ /*
+  * Reference(s):
+  * - Adaptive GPU Tessellation with Compute Shaders by Jad Khoury, Jonathan Dupuy, and Christophe Riccio
+  *  http://onrendering.com/data/papers/isubd/isubd.pdf
+  * - Based on Demo
+  *  https://github.com/jdupuy/opengl-framework/tree/master/demo-isubd-terrain#implicit-subdivision-on-the-gpu
+  */
+
+#include "common.h"
+#include "bgfx_utils.h"
+#include "imgui/imgui.h"
+#include "camera.h"
+#include "bounds.h"
+#include <bx/allocator.h>
+#include <bx/debug.h>
+#include <bx/math.h>
+#include <bx/file.h>
+#include <vector>
+
+#include "constants.h"
+
+namespace
+{
+	enum {
+		PROGRAM_TERRAIN_NORMAL,
+		PROGRAM_TERRAIN,
+		SHADING_COUNT };
+
+	enum {
+		BUFFER_SUBD
+	};
+
+	enum {
+		PROGRAM_SUBD_CS_LOD,    
+		PROGRAM_UPDATE_INDIRECT,  
+		PROGRAM_INIT_INDIRECT,
+		PROGRAM_UPDATE_DRAW,
+		PROGRAM_COUNT
+	};
+
+	enum {
+		TERRAIN_DMAP_SAMPLER,
+		TERRAIN_SMAP_SAMPLER,
+		SAMPLER_COUNT
+	};
+
+	enum {
+		TEXTURE_DMAP,
+		TEXTURE_SMAP,
+		TEXTURE_COUNT
+	};
+
+	struct Uniforms
+	{
+		enum { NumVec4 = 2 };
+
+		void init()
+		{
+			u_params = bgfx::createUniform("u_params", bgfx::UniformType::Vec4, NumVec4);
+
+			cull = 1;
+			freeze = 0;
+			gpuSubd = 3;
+
+		}
+
+		void submit()
+		{
+			bgfx::setUniform(u_params, params, NumVec4);
+		}
+
+		void destroy()
+		{
+			bgfx::destroy(u_params);
+		}
+
+		union
+		{
+			struct
+			{
+				float dmapFactor; float lodFactor; float cull; float freeze;
+				float gpuSubd; float padding[3];
+			};
+
+			float params[NumVec4 * 4];
+		};
+
+		bgfx::UniformHandle u_params;
+	};
+	class ExampleTessellation : public entry::AppI
+	{
+	public:
+		ExampleTessellation(const char* _name, const char* _description)
+			: entry::AppI(_name, _description)
+		{
+		}
+
+		void init(int32_t _argc, const char* const* _argv, uint32_t _width, uint32_t _height) override
+		{
+			Args args(_argc, _argv);
+
+			m_width = _width;
+			m_height = _height;
+			m_debug = BGFX_DEBUG_NONE;
+			m_reset = BGFX_RESET_NONE;
+
+			bgfx::Init init;
+			init.type = args.m_type;
+			init.vendorId = args.m_pciId;
+			init.resolution.width = m_width;
+			init.resolution.height = m_height;
+			init.resolution.reset = m_reset;
+			bgfx::init(init);
+
+			m_dmap = { "textures/dmap.png", 0.45f };
+			m_computeThreadCount = 5;
+			m_shading = PROGRAM_TERRAIN;
+			m_primitivePixelLengthTarget = 7.0f;
+			m_fovy = 60.0f;
+			m_pingPong = 0;
+			m_reset_gpu = true;
+
+			// Enable m_debug text.
+			bgfx::setDebug(m_debug);
+
+			// Set view 0 clear state.
+			bgfx::setViewClear(0
+				, BGFX_CLEAR_COLOR | BGFX_CLEAR_DEPTH
+				, 0x303030ff
+				, 1.0f
+				, 0
+			);
+
+			bgfx::setViewClear(1
+				, BGFX_CLEAR_COLOR | BGFX_CLEAR_DEPTH
+				, 0x303030ff
+				, 1.0f
+				, 0
+			);
+
+			// Imgui.
+			imguiCreate();
+
+			m_timeOffset = bx::getHPCounter();
+
+			m_oldWidth = 0;
+			m_oldHeight = 0;
+			m_oldReset = m_reset;
+
+			cameraCreate();
+			cameraSetPosition({ 0.0f, 0.5f, 0.0f });
+			cameraSetVerticalAngle(0);
+
+			is_wireframe = false;
+			is_frozen = false;
+			is_culled = true;
+
+			loadPrograms();
+			loadBuffers();
+			loadTextures();
+
+			createAtomicCounters();
+
+			m_dispatchIndirect = bgfx::createIndirectBuffer(2);
+		}
+
+		virtual int shutdown() override
+		{
+			// Cleanup.
+			cameraDestroy();
+			imguiDestroy();
+
+			m_uniforms.destroy();
+
+			bgfx::destroy(m_bufferCounter);
+			bgfx::destroy(m_bufferCulledSubd);
+			bgfx::destroy(m_bufferSubd[0]);
+			bgfx::destroy(m_bufferSubd[1]);
+			bgfx::destroy(m_dispatchIndirect);
+			bgfx::destroy(m_geometryIndices);
+			bgfx::destroy(m_geometryVertices);
+			bgfx::destroy(m_instancedGeometryIndices);
+			bgfx::destroy(m_instancedGeometryVertices);
+
+			for (uint32_t i = 0; i < PROGRAM_COUNT; ++i) {
+				bgfx::destroy(m_programsCompute[i]);
+			}
+
+			for (uint32_t i = 0; i < SHADING_COUNT; ++i) {
+				bgfx::destroy(m_programsDraw[i]);
+			}
+
+			for (uint32_t i = 0; i < SAMPLER_COUNT; ++i) {
+				bgfx::destroy(m_samplers[i]);
+			}
+
+			for (uint32_t i = 0; i < TEXTURE_COUNT; ++i) {
+				bgfx::destroy(m_textures[i]);
+			}
+
+			// Shutdown bgfx.
+			bgfx::shutdown();
+
+			return 0;
+		}
+
+		bool update() override
+		{
+			if (!entry::processEvents(m_width, m_height, m_debug, m_reset, &m_mouseState))
+			{
+				int64_t now = bx::getHPCounter();
+				static int64_t last = now;
+				const int64_t frameTime = now - last;
+				last = now;
+				const double freq = double(bx::getHPFrequency());
+				const float deltaTime = float(frameTime / freq);
+
+				imguiBeginFrame(m_mouseState.m_mx
+					, m_mouseState.m_my
+					, (m_mouseState.m_buttons[entry::MouseButton::Left] ? IMGUI_MBUT_LEFT : 0)
+					| (m_mouseState.m_buttons[entry::MouseButton::Right] ? IMGUI_MBUT_RIGHT : 0)
+					| (m_mouseState.m_buttons[entry::MouseButton::Middle] ? IMGUI_MBUT_MIDDLE : 0)
+					, m_mouseState.m_mz
+					, uint16_t(m_width)
+					, uint16_t(m_height)
+				);
+
+				showExampleDialog(this);
+
+				ImGui::SetNextWindowPos(
+					ImVec2(m_width - m_width / 5.0f - 10.0f, 10.0f)
+					, ImGuiCond_FirstUseEver
+				);
+				ImGui::SetNextWindowSize(
+					ImVec2(m_width / 5.0f, m_height / 3.0f)
+					, ImGuiCond_FirstUseEver
+				);
+				ImGui::Begin("Settings"
+					, NULL
+					, 0
+				);
+
+				if (ImGui::Checkbox("Debug wireframe", &is_wireframe)) {
+					if (is_wireframe) {
+						bgfx::setDebug(BGFX_DEBUG_WIREFRAME);
+					}
+					else {
+						bgfx::setDebug(BGFX_DEBUG_NONE);
+					}
+				}
+
+				ImGui::SameLine();
+
+				if (ImGui::Checkbox("Cull", &is_culled)) {
+					if (is_culled) {
+						m_uniforms.cull = 1.0;
+					}
+					else {
+						m_uniforms.cull = 0.0;
+					}
+				}
+
+				ImGui::SameLine();
+
+				if (ImGui::Checkbox("Freeze subdividing", &is_frozen)) {
+					if (is_frozen) {
+						m_uniforms.freeze = 1.0;
+					}
+					else {
+						m_uniforms.freeze = 0.0;
+					}
+				}
+
+
+				ImGui::SliderFloat("Pixels per edge", &m_primitivePixelLengthTarget, 1, 20);
+
+				int gpuSlider = (int)m_uniforms.gpuSubd;
+
+				if (ImGui::SliderInt("Triangle Patch level", &gpuSlider, 0, 3)) {
+					m_reset_gpu = true;
+					m_uniforms.gpuSubd = (float)gpuSlider;
+				}
+
+				ImGui::Combo("Shading", &m_shading, shader_options, 2);
+
+				ImGui::Text("Some variables require rebuilding the subdivide buffers and causes a stutter.");
+
+
+				ImGui::End();
+				
+				if (!ImGui::MouseOverArea())
+				{
+					// Update camera.
+					cameraUpdate(deltaTime, m_mouseState);
+
+					if (!!m_mouseState.m_buttons[entry::MouseButton::Left])
+					{
+					}
+				}
+
+				bgfx::touch(0);
+				bgfx::touch(1);
+
+				configureUniforms();
+
+				cameraGetViewMtx(m_viewMtx);
+
+				float model[16];
+
+				bx::mtxRotateX(model, bx::toRad(90));
+
+				bx::mtxProj(m_projMtx, m_fovy, float(m_width) / float(m_height), 0.0001f, 2000.0f, bgfx::getCaps()->homogeneousDepth);
+
+				// Set view 0
+				bgfx::setViewTransform(0, m_viewMtx, m_projMtx);
+
+				// Set view 1
+				bgfx::setViewRect(1, 0, 0, uint16_t(m_width), uint16_t(m_height));
+				bgfx::setViewTransform(1, m_viewMtx, m_projMtx);
+
+				m_uniforms.submit();
+
+				// update the subd buffers
+				if (m_reset_gpu) {
+					
+					m_pingPong = 1;
+
+					bgfx::destroy(m_instancedGeometryVertices);
+					bgfx::destroy(m_instancedGeometryIndices);
+
+					bgfx::destroy(m_bufferSubd[BUFFER_SUBD]);
+					bgfx::destroy(m_bufferSubd[BUFFER_SUBD + 1]);
+					bgfx::destroy(m_bufferCulledSubd);
+
+					loadInstancedGeometryBuffers();
+					loadSubdivisionBuffers();
+
+					//init indirect
+					bgfx::setBuffer(1, m_bufferSubd[m_pingPong], bgfx::Access::ReadWrite);
+					bgfx::setBuffer(2, m_bufferCulledSubd, bgfx::Access::ReadWrite);
+					bgfx::setBuffer(3, m_dispatchIndirect, bgfx::Access::ReadWrite);
+					bgfx::setBuffer(4, m_bufferCounter, bgfx::Access::ReadWrite);
+					bgfx::setBuffer(8, m_bufferSubd[1 - m_pingPong], bgfx::Access::ReadWrite);
+					bgfx::dispatch(0, m_programsCompute[PROGRAM_INIT_INDIRECT], 1, 1, 1);
+
+
+					m_reset_gpu = false;
+				}
+
+				else {
+					// update batch
+					bgfx::setBuffer(3, m_dispatchIndirect, bgfx::Access::ReadWrite);
+					bgfx::setBuffer(4, m_bufferCounter, bgfx::Access::ReadWrite);
+					bgfx::dispatch(0, m_programsCompute[PROGRAM_UPDATE_INDIRECT], 1, 1, 1);
+				}
+
+				bgfx::setBuffer(1, m_bufferSubd[m_pingPong], bgfx::Access::ReadWrite);
+				bgfx::setBuffer(2, m_bufferCulledSubd, bgfx::Access::ReadWrite);
+				bgfx::setBuffer(4, m_bufferCounter, bgfx::Access::ReadWrite);
+				bgfx::setBuffer(6, m_geometryVertices, bgfx::Access::Read);
+				bgfx::setBuffer(7, m_geometryIndices, bgfx::Access::Read);
+				bgfx::setBuffer(8, m_bufferSubd[1 - m_pingPong], bgfx::Access::Read);
+				bgfx::setTransform(model);
+
+				bgfx::setTexture(0, m_samplers[TERRAIN_DMAP_SAMPLER], m_textures[TEXTURE_DMAP], BGFX_SAMPLER_U_CLAMP | BGFX_SAMPLER_V_CLAMP);
+
+				m_uniforms.submit();
+
+				// update the subd buffer
+				bgfx::dispatch(0, m_programsCompute[PROGRAM_SUBD_CS_LOD], m_dispatchIndirect, 1);
+
+				// update draw
+				bgfx::setBuffer(3, m_dispatchIndirect, bgfx::Access::ReadWrite);
+				bgfx::setBuffer(4, m_bufferCounter, bgfx::Access::ReadWrite);
+
+				m_uniforms.submit();
+
+				bgfx::dispatch(1, m_programsCompute[PROGRAM_UPDATE_DRAW], 1, 1, 1);
+
+				// render the terrain
+				bgfx::setTexture(0, m_samplers[TERRAIN_DMAP_SAMPLER], m_textures[TEXTURE_DMAP], BGFX_SAMPLER_U_CLAMP | BGFX_SAMPLER_V_CLAMP);
+				bgfx::setTexture(1, m_samplers[TERRAIN_SMAP_SAMPLER], m_textures[TEXTURE_SMAP], BGFX_SAMPLER_MIN_ANISOTROPIC | BGFX_SAMPLER_MAG_ANISOTROPIC);
+
+				bgfx::setTransform(model);
+				bgfx::setVertexBuffer(0, m_instancedGeometryVertices);
+				bgfx::setIndexBuffer(m_instancedGeometryIndices);
+				bgfx::setBuffer(2, m_bufferCulledSubd, bgfx::Access::Read);
+				bgfx::setBuffer(3, m_geometryVertices, bgfx::Access::Read);
+				bgfx::setBuffer(4, m_geometryIndices, bgfx::Access::Read);
+				bgfx::setState(BGFX_STATE_WRITE_RGB | BGFX_STATE_WRITE_Z | BGFX_STATE_DEPTH_TEST_LESS);
+
+				m_uniforms.submit();
+
+				bgfx::submit(1, m_programsDraw[m_shading], m_dispatchIndirect, 0, true);
+
+				m_pingPong = 1 - m_pingPong;
+
+				imguiEndFrame();
+
+				// Advance to next frame. Rendering thread will be kicked to
+				// process submitted rendering primitives.
+				bgfx::frame(false);
+
+				return true;
+			}
+
+			return false;
+		}
+
+		void createAtomicCounters()
+		{
+			m_bufferCounter = bgfx::createDynamicIndexBuffer(3, BGFX_BUFFER_INDEX32 | BGFX_BUFFER_COMPUTE_READ_WRITE);
+		}
+
+		void configureUniforms()
+		{
+			float lodFactor = 2.0f * bx::tan(bx::toRad(m_fovy) / 2.0f)
+				/ m_width * (1 << (int)m_uniforms.gpuSubd)
+				* m_primitivePixelLengthTarget;
+
+			m_uniforms.lodFactor = lodFactor;
+			m_uniforms.dmapFactor = m_dmap.scale;
+		}
+
+		/**
+		* Load the Terrain Program
+		*
+		* This program renders an adaptive terrain using the implicit subdivision
+		* technique discribed in GPU Zen 2.
+		**/
+		void loadPrograms()
+		{
+			m_samplers[TERRAIN_DMAP_SAMPLER] = bgfx::createUniform("u_DmapSampler", bgfx::UniformType::Sampler);
+			m_samplers[TERRAIN_SMAP_SAMPLER] = bgfx::createUniform("u_SmapSampler", bgfx::UniformType::Sampler);
+
+			m_uniforms.init();
+
+			m_programsDraw[PROGRAM_TERRAIN] = loadProgram("vs_terrain_render", "fs_terrain_render");
+			m_programsDraw[PROGRAM_TERRAIN_NORMAL] = loadProgram("vs_terrain_render", "fs_terrain_render_normal");
+
+			m_programsCompute[PROGRAM_SUBD_CS_LOD] = bgfx::createProgram(loadShader("cs_terrain_lod"), true);
+			m_programsCompute[PROGRAM_UPDATE_INDIRECT] = bgfx::createProgram(loadShader("cs_terrain_update_indirect"), true);
+			m_programsCompute[PROGRAM_UPDATE_DRAW] = bgfx::createProgram(loadShader("cs_terrain_update_draw"), true);
+			m_programsCompute[PROGRAM_INIT_INDIRECT] = bgfx::createProgram(loadShader("cs_terrain_init"), true);
+		}
+
+		void loadSmapTexture()
+		{
+			int w = dmap->m_width;
+			int h = dmap->m_height;
+
+			const uint16_t *texels = (const uint16_t *)dmap->m_data;
+
+			int mipcnt = dmap->m_numMips;
+
+			const bgfx::Memory* mem = bgfx::alloc(w * h * 2 * sizeof(float));
+			float* smap = (float*)mem->data;
+
+			for (int j = 0; j < h; ++j) {
+				for (int i = 0; i < w; ++i) {
+					int i1 = bx::max(0, i - 1);
+					int i2 = bx::min(w - 1, i + 1);
+					int j1 = bx::max(0, j - 1);
+					int j2 = bx::min(h - 1, j + 1);
+					uint16_t px_l = texels[i1 + w * j]; // in [0,2^16-1]
+					uint16_t px_r = texels[i2 + w * j]; // in [0,2^16-1]
+					uint16_t px_b = texels[i + w * j1]; // in [0,2^16-1]
+					uint16_t px_t = texels[i + w * j2]; // in [0,2^16-1]
+					float z_l = (float)px_l / 65535.0f; // in [0, 1]
+					float z_r = (float)px_r / 65535.0f; // in [0, 1]
+					float z_b = (float)px_b / 65535.0f; // in [0, 1]
+					float z_t = (float)px_t / 65535.0f; // in [0, 1]
+					float slope_x = (float)w * 0.5f * (z_r - z_l);
+					float slope_y = (float)h * 0.5f * (z_t - z_b);
+
+					smap[2 * (i + w * j)] = slope_x;
+					smap[1 + 2 * (i + w * j)] = slope_y;
+				}
+			}
+
+			m_textures[TEXTURE_SMAP] = bgfx::createTexture2D((uint16_t)w, (uint16_t)h, mipcnt > 1, 1, bgfx::TextureFormat::RG32F,
+				BGFX_TEXTURE_NONE, mem);
+
+		}
+
+
+		/**
+		 * Load the Displacement Texture
+		 *
+		 * This loads an R16 texture used as a displacement map
+		 */
+		void loadDmapTexture()
+		{
+			dmap = imageLoad(m_dmap.pathToFile.getCPtr(), bgfx::TextureFormat::R16);
+
+			m_textures[TEXTURE_DMAP] = bgfx::createTexture2D((uint16_t)dmap->m_width, (uint16_t)dmap->m_height, false, 1, bgfx::TextureFormat::R16,
+				BGFX_TEXTURE_NONE, bgfx::makeRef(dmap->m_data, dmap->m_size));
+		}
+
+		/**
+		 * Load All Textures
+		 */
+		void loadTextures()
+		{
+			loadDmapTexture();
+			loadSmapTexture();
+		}
+
+		/**
+        * Load the Geometry Buffer
+		*
+		* This procedure loads the scene geometry into an index and
+		* vertex buffer. Here, we only load 2 triangles to define the
+	    * terrain.
+	    **/
+		void loadGeometryBuffers()
+		{
+			float vertices[] = {
+				-1.0f, -1.0f, 0.0f, 1.0f,
+				+1.0f, -1.0f, 0.0f, 1.0f,
+				+1.0f, +1.0f, 0.0f, 1.0f,
+				-1.0f, +1.0f, 0.0f, 1.0f
+			};
+
+			uint32_t indices[] = {
+				0, 
+				1, 
+				3, 
+				2, 
+				3, 
+				1 
+			};
+
+			m_geometryDecl.begin().add(bgfx::Attrib::Position, 4, bgfx::AttribType::Float).end();
+
+			m_geometryVertices = bgfx::createVertexBuffer(bgfx::copy(vertices, sizeof(vertices)), m_geometryDecl, BGFX_BUFFER_COMPUTE_READ);
+			m_geometryIndices = bgfx::createIndexBuffer(bgfx::copy(indices, sizeof(indices)),  BGFX_BUFFER_COMPUTE_READ | BGFX_BUFFER_INDEX32);
+		}
+
+		void loadSubdivisionBuffers()
+		{
+			const size_t bufferCapacity = 1 << 27;
+
+			m_bufferSubd[BUFFER_SUBD] = bgfx::createDynamicIndexBuffer(bufferCapacity, BGFX_BUFFER_COMPUTE_READ_WRITE | BGFX_BUFFER_INDEX32);
+			m_bufferSubd[BUFFER_SUBD + 1] = bgfx::createDynamicIndexBuffer(bufferCapacity, BGFX_BUFFER_COMPUTE_READ_WRITE | BGFX_BUFFER_INDEX32);
+			m_bufferCulledSubd = bgfx::createDynamicIndexBuffer(bufferCapacity, BGFX_BUFFER_COMPUTE_READ_WRITE | BGFX_BUFFER_INDEX32);
+		}
+
+		/**
+		 * Load All Buffers
+		 *
+		 */
+		void loadBuffers()
+		{
+			loadSubdivisionBuffers();
+			loadGeometryBuffers();
+			loadInstancedGeometryBuffers();
+		}
+
+		/**
+		* This will be used to instantiate a triangle grid for each subdivision
+		* key present in the subd buffer.
+		*/
+		void loadInstancedGeometryBuffers()
+		{
+			const float* vertices;
+			const uint32_t* indexes;
+
+			if (m_uniforms.gpuSubd == 0) {
+
+				m_instancedMeshVertexCount = 3;
+				m_instancedMeshPrimitiveCount = 1;
+
+				vertices = verticesL0;
+				indexes = indexesL0;
+			}
+
+			else if (m_uniforms.gpuSubd == 1) {
+				m_instancedMeshVertexCount = 6;
+				m_instancedMeshPrimitiveCount = 4;
+
+				vertices = verticesL1;
+				indexes = indexesL1;
+			}
+
+			else if (m_uniforms.gpuSubd == 2) {
+				m_instancedMeshVertexCount = 15;
+				m_instancedMeshPrimitiveCount = 16;
+
+				vertices = verticesL2;
+				indexes = indexesL2;
+			}
+
+			else { //(m_settings.gpuSubd == 3) {
+				m_instancedMeshVertexCount = 45;
+				m_instancedMeshPrimitiveCount = 64;
+
+				vertices = verticesL3;
+				indexes = indexesL3;
+			}
+
+			m_instancedGeometryDecl.begin().add(bgfx::Attrib::TexCoord0, 2, bgfx::AttribType::Float).end();
+
+			m_instancedGeometryVertices = bgfx::createVertexBuffer(bgfx::makeRef(vertices, sizeof(float) * 2 * m_instancedMeshVertexCount), m_instancedGeometryDecl);
+			m_instancedGeometryIndices = bgfx::createIndexBuffer(bgfx::makeRef(indexes, sizeof(uint32_t) * m_instancedMeshPrimitiveCount * 3), BGFX_BUFFER_INDEX32);
+		}
+
+		Uniforms m_uniforms;
+
+		bgfx::ProgramHandle m_programsCompute[PROGRAM_COUNT];
+		bgfx::ProgramHandle m_programsDraw[SHADING_COUNT];
+		bgfx::TextureHandle m_textures[TEXTURE_COUNT];
+		bgfx::UniformHandle m_samplers[SAMPLER_COUNT];
+
+		bgfx::DynamicIndexBufferHandle m_bufferSubd[2];
+		bgfx::DynamicIndexBufferHandle m_bufferCulledSubd;
+
+		bgfx::DynamicIndexBufferHandle m_bufferCounter;
+
+		bgfx::IndexBufferHandle m_geometryIndices;
+		bgfx::VertexBufferHandle m_geometryVertices;
+		bgfx::VertexDecl m_geometryDecl;
+
+		bgfx::IndexBufferHandle m_instancedGeometryIndices;
+		bgfx::VertexBufferHandle m_instancedGeometryVertices;
+		bgfx::VertexDecl m_instancedGeometryDecl;
+
+		bgfx::IndirectBufferHandle m_dispatchIndirect;
+
+		bimg::ImageContainer* dmap;
+
+		float m_viewMtx[16];
+		float m_projMtx[16];
+
+		uint32_t m_width;
+		uint32_t m_height;
+		uint32_t m_debug;
+		uint32_t m_reset;
+
+		uint32_t m_oldWidth;
+		uint32_t m_oldHeight;
+		uint32_t m_oldReset;
+
+		uint32_t m_instancedMeshVertexCount;
+		uint32_t m_instancedMeshPrimitiveCount;
+
+		entry::MouseState m_mouseState;
+
+		int64_t m_timeOffset;
+
+		struct {
+			bx::FilePath pathToFile;
+			float scale;
+		} m_dmap;
+
+
+		int m_computeThreadCount;
+		int m_shading;
+		int m_gpuSubd;
+		float m_primitivePixelLengthTarget;
+		float m_fovy;
+		int m_pingPong;
+		bool m_reset_gpu;
+		bool is_wireframe;
+		bool is_culled;
+		bool is_frozen;
+	};
+
+} // namespace
+
+ENTRY_IMPLEMENT_MAIN(ExampleTessellation, "41-tess", "Adaptive Gpu Tessellation.");

+ 15 - 0
examples/41-tess/uniforms.sh

@@ -0,0 +1,15 @@
+
+uniform vec4 u_params[2];
+
+
+#define u_DmapFactor u_params[0].x
+#define u_LodFactor u_params[0].y
+#define u_cull u_params[0].z
+#define u_freeze u_params[0].w
+#define u_gpu_subd  int(u_params[1].x)
+
+
+#define COMPUTE_THREAD_COUNT 32u
+#define UPDATE_INDIRECT_VALUE_DIVIDE 32u
+
+

+ 13 - 0
examples/41-tess/varying.def.sc

@@ -0,0 +1,13 @@
+vec2 v_texcoord0 : TEXCOORD0 = vec2(0.0, 0.0);
+vec3 v_position  : TEXCOORD1 = vec3(0.0, 0.0, 0.0);
+vec3 v_view      : TEXCOORD2 = vec3(0.0, 0.0, 0.0);
+vec3 v_normal    : NORMAL    = vec3(0.0, 0.0, 1.0);
+vec3 v_tangent   : TANGENT   = vec3(1.0, 0.0, 0.0);
+vec3 v_bitangent : BINORMAL  = vec3(0.0, 1.0, 0.0);
+vec4 v_color0    : COLOR     = vec4(1.0, 0.0, 0.0, 1.0);
+
+vec3 a_position  : POSITION;
+vec4 a_normal    : NORMAL;
+vec4 a_tangent   : TANGENT;
+vec2 a_texcoord0 : TEXCOORD0;
+vec4 a_color0    : COLOR0;

+ 41 - 0
examples/41-tess/vs_terrain_render.sc

@@ -0,0 +1,41 @@
+$input a_texcoord0
+$output v_texcoord0
+
+#include "terrain_common.sh"
+
+BUFFER_RO(u_CulledSubdBuffer, uint, 2);
+BUFFER_RO(u_VertexBuffer, vec4, 3);
+BUFFER_RO(u_IndexBuffer, uint, 4);
+
+
+void main()
+{
+    // get threadID (each key is associated to a thread)
+    int threadID = gl_InstanceID;
+
+    // get coarse triangle associated to the key
+    uint primID = u_CulledSubdBuffer[threadID*2];
+	
+    vec4 v_in[3];
+	
+	v_in[0] = u_VertexBuffer[u_IndexBuffer[primID * 3    ]];
+	v_in[1] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 1]];
+	v_in[2] = u_VertexBuffer[u_IndexBuffer[primID * 3 + 2]];
+
+    // compute sub-triangle associated to the key
+    uint key = u_CulledSubdBuffer[threadID*2+1];
+	
+    vec4 v[3]; 
+	
+	subd(key, v_in, v);
+
+    // compute vertex location
+    vec4 finalVertex = berp(v, a_texcoord0);
+	
+    finalVertex.z+= dmap(finalVertex.xy);
+
+    v_texcoord0 = finalVertex.xy * 0.5 + 0.5;	
+
+    gl_Position = mul(u_modelViewProj, finalVertex);
+	
+}

BIN
examples/runtime/textures/dmap.png


+ 1 - 0
scripts/genie.lua

@@ -494,6 +494,7 @@ or _OPTIONS["with-combined-examples"] then
 		, "38-bloom"
 		, "38-bloom"
 		, "39-assao"
 		, "39-assao"
 		, "40-svt"
 		, "40-svt"
+		, "41-tess"
 		)
 		)
 
 
 	-- C99 source doesn't compile under WinRT settings
 	-- C99 source doesn't compile under WinRT settings