Browse Source

MeshBatch : Added storage buffer support (SSBO in GL, RWStructuredBuffer in DX12)

TothBenoit 1 year ago
parent
commit
633360f72c
3 changed files with 65 additions and 35 deletions
  1. 18 16
      h3d/scene/MeshBatch.hx
  2. 3 1
      hxsl/BatchShader.hx
  3. 44 18
      hxsl/Cache.hx

+ 18 - 16
h3d/scene/MeshBatch.hx

@@ -50,9 +50,8 @@ class ComputeIndirect extends hxsl.Shader {
 			var position : Vec3;
 		}
 
-		@const(4096) var MAX_INSTANCE : Int;
-		@param var commandBuffer : RWBuffer<Int, MAX_INSTANCE>;
-		@param var instanceData : PartialBuffer<{ modelView : Mat4}, MAX_INSTANCE>;
+		@param var commandBuffer : RWBuffer<Int, 65535>;
+		@param var instanceData : RWPartialBuffer<{ modelView : Mat4}, 65535>;
 		@param var radius : Float;
 
 		@const(16) var MATERIAL_COUNT : Int = 1;
@@ -140,11 +139,11 @@ class MeshBatch extends MultiMaterial {
 	var enableLOD(get, never) : Bool;
 	function get_enableLOD() return meshBatchFlags.has( EnableLod );
 	var enableGPUCulling(get, never) : Bool;
-	function get_enableGPUCulling() return meshBatchFlags.has( EnableGpuCulling ); 
-	
-	var computeBufferFormat(get, never) : Bool;
-	function get_computeBufferFormat() return meshBatchFlags.has(EnableGpuUpdate) || enableGPUCulling || enableLOD;
-	
+	function get_enableGPUCulling() return meshBatchFlags.has( EnableGpuCulling );
+
+	var useStorageBuffer(get, never) : Bool;
+	function get_useStorageBuffer() return meshBatchFlags.has(EnableGpuUpdate) || enableGPUCulling || enableLOD;
+
 	var matInfos : h3d.Buffer;
 
 	/**
@@ -254,7 +253,7 @@ class MeshBatch extends MultiMaterial {
 				p.dynamicParameters = true;
 				p.batchMode = true;
 
-				if( computeBufferFormat ) {
+				if( useStorageBuffer ) {
 					var pl = [];
 					var p = b.params;
 					while( p != null ) {
@@ -299,9 +298,10 @@ class MeshBatch extends MultiMaterial {
 					b.shaders.push(sl.s);
 					sl = sl.next;
 				}
+				shader.Batch_UseStorage = useStorageBuffer;
 				shader.Batch_Count = b.maxInstance * b.paramsCount;
 				shader.Batch_HasOffset = primitiveSubPart != null || enableLOD || enableGPUCulling;
-				shader.constBits = (shader.Batch_Count << 1) | (shader.Batch_HasOffset ? 1 : 0);
+				shader.constBits = (shader.Batch_Count << 2) | (shader.Batch_UseStorage ? ( 1 << 1 ) : 0) | (shader.Batch_HasOffset ? 1 : 0);
 				shader.updateConstants(null);
 			}
 		}
@@ -316,7 +316,7 @@ class MeshBatch extends MultiMaterial {
 
 	public function begin( emitCountTip = -1, ?flags : haxe.EnumFlags<MeshBatchFlag> ) {
 		if ( flags != null ) {
-			#if !js
+			#if (!js && !(hldx && !dx12))
 			// TODO : Add LOD and GPU Culling support for mesh batch using sub parts
 			var allowedLOD = flags.has(EnableLod) && primitiveSubPart == null && @:privateAccess instanced.primitive.lodCount() > 1;
 			flags.setTo(EnableLod, allowedLOD);
@@ -540,12 +540,12 @@ class MeshBatch extends MultiMaterial {
     				return n + 1;
 				}
 
-				var maxVertexCount = (computeBufferFormat) ? p.maxInstance : MAX_BUFFER_ELEMENTS;
+				var maxVertexCount = (useStorageBuffer) ? p.maxInstance : MAX_BUFFER_ELEMENTS;
 				var vertexCount = Std.int( count * (( 4 * p.paramsCount ) / p.bufferFormat.stride) );
 				var vertexCountAllocated = #if js Std.int( MAX_BUFFER_ELEMENTS * 4 / p.bufferFormat.stride ) #else hxd.Math.imin( nextPowerOfTwo( vertexCount ), maxVertexCount ) #end;
 
 				if( buf == null || buf.isDisposed() || buf.vertices < vertexCountAllocated ) {
-					var bufferFlags : hxd.impl.Allocator.BufferFlags = meshBatchFlags.has(EnableGpuUpdate) ? UniformReadWrite : UniformDynamic;
+					var bufferFlags : hxd.impl.Allocator.BufferFlags = useStorageBuffer ? UniformReadWrite : UniformDynamic;
 					if ( buf != null )
 						alloc.disposeBuffer(buf);
 					buf = alloc.allocBuffer( vertexCountAllocated, p.bufferFormat,bufferFlags );
@@ -592,7 +592,6 @@ class MeshBatch extends MultiMaterial {
 					var computePass = new h3d.mat.Pass("batchUpdate");
 					var computeShader = new ComputeIndirect();
 					computePass.addShader(computeShader);
-					computeShader.MAX_INSTANCE = p.maxInstance;
 					computeShader.ENABLE_LOD = enableLOD;
 					computeShader.ENABLE_CULLING = enableGPUCulling;
 					p.computePass = computePass;
@@ -673,7 +672,10 @@ class MeshBatch extends MultiMaterial {
 		while( true ) {
 			if( p.pass == ctx.drawPass.pass ) {
 				var bufferIndex = ctx.drawPass.index & 0xFFFF;
-				p.shader.Batch_Buffer = p.buffers[bufferIndex];
+				if ( useStorageBuffer )
+					p.shader.Batch_StorageBuffer = p.buffers[bufferIndex];
+				else
+					p.shader.Batch_Buffer = p.buffers[bufferIndex];
 				if( p.instanceBuffers == null ) {
 					var count = hxd.Math.imin( instanceCount - p.maxInstance * bufferIndex, p.maxInstance );
 					instanced.commands.setCommand(count,p.indexCount,p.indexStart);
@@ -697,7 +699,7 @@ class MeshBatch extends MultiMaterial {
 		var p = dataPasses;
 		while( p != null ) {
 			var pass = p.pass;
-			
+
 			// Triggers upload
 			if ( enableGPUCulling )
 				ctx.getCameraFrustumBuffer();

+ 3 - 1
hxsl/BatchShader.hx

@@ -4,8 +4,10 @@ class BatchShader extends hxsl.Shader {
 
 	static var SRC = {
 		@const var Batch_HasOffset : Bool;
-		@const(65536) var Batch_Count : Int;
+		@const var Batch_UseStorage : Bool;
+		@const(4096) var Batch_Count : Int;
 		@param var Batch_Buffer : Buffer<Vec4,Batch_Count>;
+		@param var Batch_StorageBuffer : RWBuffer<Vec4, 65535>;
 	};
 
 	public var params : RuntimeShader.AllocParam;

+ 44 - 18
hxsl/Cache.hx

@@ -622,18 +622,22 @@ class Cache {
 		hasOffset.qualifiers = [Const()];
 		inputOffset.qualifiers = [PerInstance(1)];
 
+		var useStorage = declVar("Batch_UseStorage",TBool,Param);
 		var vcount = declVar("Batch_Count",TInt,Param);
-		var vbuffer = declVar("Batch_Buffer",TBuffer(TVec(4,VFloat),SVar(vcount),Uniform),Param);
+		var vuniformBuffer = declVar("Batch_Buffer",TBuffer(TVec(4,VFloat),SVar(vcount),Uniform),Param);
+		var vstorageBuffer = declVar("Batch_StorageBuffer",TBuffer(TVec(4,VFloat),SConst(65535),RW),Param);
 		var voffset = declVar("Batch_Offset", TInt, Local);
-		var ebuffer = { e : TVar(vbuffer), p : pos, t : vbuffer.type };
+		var euniformBuffer = { e : TVar(vuniformBuffer), p : pos, t : vuniformBuffer.type };
+		var estorageBuffer = { e : TVar(vstorageBuffer), p : pos, t : vstorageBuffer.type };
 		var eoffset = { e : TVar(voffset), p : pos, t : voffset.type };
 		var tvec4 = TVec(4,VFloat);
 		var countBits = 16;
 		vcount.qualifiers = [Const(1 << countBits)];
+		useStorage.qualifiers = [Const()];
 
 		s.data = {
 			name : "batchShader_"+id,
-			vars : [vcount,hasOffset,vbuffer,voffset,inputOffset],
+			vars : [vcount,hasOffset,useStorage,vuniformBuffer,vstorageBuffer,voffset,inputOffset],
 			funs : [],
 		};
 
@@ -764,11 +768,11 @@ class Cache {
 		var parentVars = new Map();
 		var swiz = [[X],[Y],[Z],[W]];
 
-		function readOffset( index : Int ) : TExpr {
+		function readOffset( ebuffer, index : Int ) : TExpr {
 			return { e : TArray(ebuffer,{ e : TBinop(OpAdd,eoffset,{ e : TConst(CInt(index)), t : TInt, p : pos }), t : TInt, p : pos }), t : tvec4, p : pos };
 		}
 
-		function extractVar( v : AllocParam ) {
+		function declareLocalVar( v : AllocParam ) {
 			var vreal : TVar = declVar(v.name, v.type, Local);
 			if( v.perObjectGlobal != null ) {
 				var path = v.perObjectGlobal.path.split(".");
@@ -791,43 +795,49 @@ class Cache {
 				}
 			}
 			s.data.vars.push(vreal);
+			return vreal;
+		}
+
+		function extractVar( vreal, ebuffer, v : AllocParam ) {
 			var index = (v.pos>>2);
 			var extract = switch( v.type ) {
 			case TMat4:
 				{ p : pos, t : v.type, e : TCall({ e : TGlobal(Mat4), t : TVoid, p : pos },[
-					readOffset(index),
-					readOffset(index + 1),
-					readOffset(index + 2),
-					readOffset(index + 3),
+					readOffset(ebuffer, index),
+					readOffset(ebuffer, index + 1),
+					readOffset(ebuffer, index + 2),
+					readOffset(ebuffer, index + 3),
 				]) };
 			case TVec(4,VFloat):
-				readOffset(index);
+				readOffset(ebuffer, index);
 			case TVec(3,VFloat):
-				{ p : pos, t : v.type, e : TSwiz(readOffset(index),v.pos&3 == 0 ? [X,Y,Z] : [Y,Z,W]) };
+				{ p : pos, t : v.type, e : TSwiz(readOffset(ebuffer, index),v.pos&3 == 0 ? [X,Y,Z] : [Y,Z,W]) };
 			case TVec(2,VFloat):
 				var swiz = switch( v.pos & 3 ) {
 				case 0: [X,Y];
 				case 1: [Y,Z];
 				default: [Z,W];
 				}
-				{ p : pos, t : v.type, e : TSwiz(readOffset(index),swiz) };
+				{ p : pos, t : v.type, e : TSwiz(readOffset(ebuffer, index),swiz) };
 			case TFloat:
-				{ p : pos, t : v.type, e : TSwiz(readOffset(index),swiz[v.pos&3]) }
+				{ p : pos, t : v.type, e : TSwiz(readOffset(ebuffer, index),swiz[v.pos&3]) }
 			default:
 				throw "assert";
 			}
 			return { p : pos, e : TBinop(OpAssign, { e : TVar(vreal), p : pos, t : v.type }, extract), t : TVoid };
 		}
 
-		var exprs = [];
+		var exprsUniform = [];
+		var exprsStorage = [];
 		var stride = used.length;
 		var p = params;
 		while( p != null ) {
-			exprs.push(extractVar(p));
+			var vreal = declareLocalVar(p);
+			exprsUniform.push(extractVar(vreal, euniformBuffer, p));
+			exprsStorage.push(extractVar(vreal, estorageBuffer, p));
 			p = p.next;
 		}
 
-
 		var inits = [];
 
 		inits.push({
@@ -853,19 +863,35 @@ class Cache {
 			e : TBinop(OpAssignOp(OpMult),eoffset,{ e : TConst(CInt(stride)), t : TInt, p : pos }),
 		});
 
+		inits.push({
+			p : pos,
+			e : TIf({ e : TVar(useStorage), t : TBool, p : pos },{
+				p : pos,
+				e : TBlock(exprsStorage),
+				t : TVoid,
+			}, {
+				p : pos,
+				e : TBlock(exprsUniform),
+				t : TVoid,
+			}),
+			t : TVoid,
+		});
+
 		var fv : TVar = declVar("init",TFun([]), Function);
 		var f : TFunction = {
 			kind : Init,
 			ref : fv,
 			args : [],
 			ret : TVoid,
-			expr : { e : TBlock(inits.concat(exprs)), p : pos, t : TVoid },
+			expr : { e : TBlock(inits), p : pos, t : TVoid },
 		};
 		s.data.funs.push(f);
-		s.consts = new SharedShader.ShaderConst(vcount,1,countBits+1);
+		s.consts = new SharedShader.ShaderConst(vcount,2,countBits+1);
 		s.consts.globalId = 0;
 		s.consts.next = new SharedShader.ShaderConst(hasOffset,0,1);
 		s.consts.next.globalId = 0;
+		s.consts.next.next = new SharedShader.ShaderConst(useStorage,1,1);
+		s.consts.next.next.globalId = 0;
 
 		return { shader : s, params : params, size : stride };
 	}