Browse Source

added mesh batch primitiveSubPart for multi indirect batching

Nicolas Cannasse 3 years ago
parent
commit
7a0edd0811
3 changed files with 114 additions and 15 deletions
  1. 80 10
      h3d/scene/MeshBatch.hx
  2. 1 0
      hxsl/BatchShader.hx
  3. 33 5
      hxsl/Cache.hx

+ 80 - 10
h3d/scene/MeshBatch.hx

@@ -9,6 +9,7 @@ private class BatchData {
 	public var matIndex : Int;
 	public var indexCount : Int;
 	public var indexStart : Int;
+	public var instanceBuffers : Array<h3d.impl.InstanceBuffer>;
 	public var buffers : Array<h3d.Buffer> = [];
 	public var data : hxd.FloatBuffer;
 	public var params : hxsl.RuntimeShader.AllocParam;
@@ -22,6 +23,15 @@ private class BatchData {
 
 }
 
+class MeshBatchPart {
+	public var indexStart : Int;
+	public var indexCount : Int;
+	public var baseVertex : Int;
+	public var bounds : h3d.col.Bounds;
+	public function new() {
+	}
+}
+
 /**
 	h3d.scene.MeshBatch allows to draw multiple meshed in a single draw call.
 	See samples/MeshBatch.hx for an example.
@@ -52,6 +62,12 @@ class MeshBatch extends MultiMaterial {
 	**/
 	public var shadersChanged = true;
 
+	/**
+		Tells the mesh batch to draw only a subpart of the primitive
+	**/
+	public var primitiveSubPart : MeshBatchPart;
+	var primitiveSubBytes : haxe.io.Bytes;
+
 	public function new( primitive, ?material, ?parent ) {
 		instanced = new h3d.prim.Instanced();
 		instanced.commands = new h3d.impl.InstanceBuffer();
@@ -72,6 +88,10 @@ class MeshBatch extends MultiMaterial {
 			dataPasses.pass.removeShader(dataPasses.shader);
 			for( b in dataPasses.buffers )
 				alloc.disposeBuffer(b);
+			if( dataPasses.instanceBuffers != null ) {
+				for( b in dataPasses.instanceBuffers )
+					b.dispose();
+			}
 			alloc.disposeFloats(dataPasses.data);
 			dataPasses = dataPasses.next;
 		}
@@ -118,7 +138,8 @@ class MeshBatch extends MultiMaterial {
 					sl = sl.next;
 				}
 				shader.Batch_Count = b.maxInstance * b.paramsCount;
-				shader.constBits = b.maxInstance * b.paramsCount;
+				shader.Batch_HasOffset = primitiveSubPart != null;
+				shader.constBits = (shader.Batch_Count << 1) | (shader.Batch_HasOffset ? 1 : 0);
 				shader.updateConstants(null);
 			}
 		}
@@ -237,7 +258,29 @@ class MeshBatch extends MultiMaterial {
 
 	public function emitInstance() {
 		if( worldPosition == null ) syncPos();
-		instanced.addInstanceBounds(worldPosition == null ? absPos : worldPosition);
+		var ps = primitiveSubPart;
+		if( ps != null ) @:privateAccess {
+			instanced.tmpBounds.load(primitiveSubPart.bounds);
+			instanced.tmpBounds.transform(worldPosition == null ? absPos : worldPosition);
+			instanced.bounds.add(instanced.tmpBounds);
+
+			if( primitiveSubBytes == null ) {
+				primitiveSubBytes = haxe.io.Bytes.alloc(128);
+				instanced.commands = null;
+			}
+			if( primitiveSubBytes.length < (instanceCount+1) * 20 ) {
+				var next = haxe.io.Bytes.alloc(Std.int(primitiveSubBytes.length*3/2));
+				next.blit(0, primitiveSubBytes, 0, instanceCount * 20);
+				primitiveSubBytes = next;
+			}
+			var p = instanceCount * 20;
+			primitiveSubBytes.setInt32(p, ps.indexCount);
+			primitiveSubBytes.setInt32(p + 4, 1);
+			primitiveSubBytes.setInt32(p + 8, ps.indexStart);
+			primitiveSubBytes.setInt32(p + 12, ps.baseVertex);
+			primitiveSubBytes.setInt32(p + 16, 0);
+		} else
+			instanced.addInstanceBounds(worldPosition == null ? absPos : worldPosition);
 		var p = dataPasses;
 		while( p != null ) {
 			syncData(p);
@@ -251,22 +294,33 @@ class MeshBatch extends MultiMaterial {
 		if( instanceCount == 0 ) return;
 		var p = dataPasses;
 		var alloc = hxd.impl.Allocator.get();
+		var psBytes = primitiveSubBytes;
 		while( p != null ) {
 			var index = 0;
 			var start = 0;
 			while( start < instanceCount ) {
 				var upload = needUpload;
 				var buf = p.buffers[index];
-				if( buf == null || buf.isDisposed() ) {
-					buf = alloc.allocBuffer(MAX_BUFFER_ELEMENTS,4,UniformDynamic);
-					p.buffers[index] = buf;
-					upload = true;
-				}
 				var count = instanceCount - start;
 				if( count > p.maxInstance )
 					count = p.maxInstance;
-				if( upload )
+				if( buf == null || buf.isDisposed() ) {
+					buf = alloc.allocBuffer(MAX_BUFFER_ELEMENTS,4,UniformDynamic);
+					p.buffers[index] = buf;
 					buf.uploadVector(p.data, start * p.paramsCount * 4, count * p.paramsCount);
+				}
+				if( psBytes != null ) {
+					if( p.instanceBuffers == null ) p.instanceBuffers = [];
+					var buf = p.instanceBuffers[index];
+					if( buf == null /*|| buf.isDisposed()*/ ) {
+						buf = new h3d.impl.InstanceBuffer();
+						var sub = psBytes.sub(start*20,count*20);
+						for( i in 0...count )
+							sub.setInt32(i*20+16, i);
+						buf.setBuffer(count, sub);
+						p.instanceBuffers[index] = buf;
+					}
+				}
 				start += count;
 				index++;
 			}
@@ -274,6 +328,19 @@ class MeshBatch extends MultiMaterial {
 				alloc.disposeBuffer(p.buffers.pop());
 			p = p.next;
 		}
+		if( psBytes != null ) {
+			var prim = cast(primitive,h3d.prim.MeshPrimitive);
+			var offsets = @:privateAccess prim.getBuffer("Batch_Start");
+			if( offsets == null || offsets.vertices < instanceCount || offsets.isDisposed() ) {
+				if( offsets != null ) offsets.dispose();
+				var tmp = haxe.io.Bytes.alloc(4 * instanceCount);
+				for( i in 0...instanceCount )
+					tmp.setFloat(i<<2, i);
+				offsets = new h3d.Buffer(instanceCount, 1);
+				offsets.uploadBytes(tmp,0,instanceCount);
+				@:privateAccess prim.addBuffer("Batch_Start", offsets);
+			}
+		}
 		needUpload = false;
 	}
 
@@ -283,8 +350,11 @@ class MeshBatch extends MultiMaterial {
 			if( p.pass == ctx.drawPass.pass ) {
 				var bufferIndex = ctx.drawPass.index & 0xFFFF;
 				p.shader.Batch_Buffer = p.buffers[bufferIndex];
-				var count = instanceCount - p.maxInstance * bufferIndex;
-				instanced.commands.setCommand(count,p.indexCount,p.indexStart);
+				if( p.instanceBuffers == null ) {
+					var count = instanceCount - p.maxInstance * bufferIndex;
+					instanced.commands.setCommand(count,p.indexCount,p.indexStart);
+				} else
+					instanced.commands = p.instanceBuffers[bufferIndex];
 				break;
 			}
 			p = p.next;

+ 1 - 0
hxsl/BatchShader.hx

@@ -3,6 +3,7 @@ package hxsl;
 class BatchShader extends hxsl.Shader {
 
 	static var SRC = {
+		@const var Batch_HasOffset : Bool;
 		@const(65536) var Batch_Count : Int;
 		@param var Batch_Buffer : Buffer<Vec4,Batch_Count>;
 	};

+ 33 - 5
hxsl/Cache.hx

@@ -493,6 +493,12 @@ class Cache {
 		}
 
 		var pos = null;
+
+		var hasOffset = declVar("Batch_HasOffset",TBool,Param);
+		var inputOffset = declVar("Batch_Start",TFloat,Input);
+		hasOffset.qualifiers = [Const()];
+		inputOffset.qualifiers = [PerInstance(1)];
+
 		var vcount = declVar("Batch_Count",TInt,Param);
 		var vbuffer = declVar("Batch_Buffer",TBuffer(TVec(4,VFloat),SVar(vcount)),Param);
 		var voffset = declVar("Batch_Offset", TInt, Local);
@@ -504,7 +510,7 @@ class Cache {
 
 		s.data = {
 			name : "batchShader_"+id,
-			vars : [vcount,vbuffer,voffset],
+			vars : [vcount,hasOffset,vbuffer,voffset,inputOffset],
 			funs : [],
 		};
 
@@ -689,23 +695,45 @@ class Cache {
 			p = p.next;
 		}
 
-		exprs.unshift({
+
+		var inits = [];
+
+		inits.push({
 			p : pos,
-			e : TBinop(OpAssign, eoffset, { p : pos, t : TInt, e : TBinop(OpMult,{ e : TGlobal(InstanceID), t : TInt, p : pos },{ e : TConst(CInt(stride)), p : pos, t : TInt }) }),
+			e : TBinop(OpAssign, eoffset, { e : TGlobal(InstanceID), t : TInt, p : pos }),
 			t : TVoid,
 		});
 
+		// when Batch_hasOffset is set to true, have InstanceID somewhat emulate DrawID
+		inits.push({
+			p : pos,
+			e : TIf({ e : TVar(hasOffset), t : TBool, p : pos },{
+				p : pos,
+				e : TBinop(OpAssignOp(OpAdd), eoffset, { e : TCall({ e : TGlobal(ToInt), t : TVoid, p : pos },[{ p : pos, t : TFloat, e : TVar(inputOffset) }]), t : TInt, p : pos }),
+				t : TVoid,
+			}, null),
+			t : TVoid,
+		});
+
+		inits.push({
+			p : pos,
+			t : TInt,
+			e : TBinop(OpAssignOp(OpMult),eoffset,{ e : TConst(CInt(stride)), t : TInt, p : pos }),
+		});
+
 		var fv : TVar = declVar("init",TFun([]), Function);
 		var f : TFunction = {
 			kind : Init,
 			ref : fv,
 			args : [],
 			ret : TVoid,
-			expr : { e : TBlock(exprs), p : pos, t : TVoid },
+			expr : { e : TBlock(inits.concat(exprs)), p : pos, t : TVoid },
 		};
 		s.data.funs.push(f);
-		s.consts = new SharedShader.ShaderConst(vcount,0,countBits);
+		s.consts = new SharedShader.ShaderConst(vcount,1,countBits+1);
 		s.consts.globalId = 0;
+		s.consts.next = new SharedShader.ShaderConst(hasOffset,0,1);
+		s.consts.next.globalId = 0;
 
 		return { shader : s, params : params, size : stride };
 	}