Pārlūkot izejas kodu

directx draw instanced indirect support

ncannasse 7 gadi atpakaļ
vecāks
revīzija
ec9c808110

+ 12 - 0
h3d/Engine.hx

@@ -199,6 +199,18 @@ class Engine {
 		}
 	}
 
+	public function renderInstanced( buffers : Buffer.BufferOffset, indexes : Indexes, commands : h3d.impl.InstanceBuffer ) {
+		flushTarget();
+		driver.selectMultiBuffers(buffers);
+		if( indexes.isDisposed() )
+			return;
+		if( commands.commandCount > 0 ) {
+			driver.drawInstanced(indexes.ibuf, commands);
+			drawTriangles += commands.triCount;
+			drawCalls++;
+		}
+	}
+
 	function set_debug(d) {
 		debug = d;
 		driver.setDebug(debug);

+ 31 - 1
h3d/impl/DirectXDriver.hx

@@ -872,6 +872,13 @@ class DirectXDriver extends h3d.impl.Driver {
 				if( v.kind == Input ) {
 					var e = new LayoutElement();
 					var name = hxsl.HlslOut.semanticName(v.name);
+					var perInst = 0;
+					if( v.qualifiers != null )
+						for( q in v.qualifiers )
+							switch( q ) {
+							case PerInstance(k): perInst = k;
+							default:
+							}
 					e.semanticName = @:privateAccess name.toUtf8();
 					e.inputSlot = layout.length;
 					e.format = switch( v.type ) {
@@ -883,7 +890,11 @@ class DirectXDriver extends h3d.impl.Driver {
 					default:
 						throw "Unsupported input type " + hxsl.Ast.Tools.toString(v.type);
 					};
-					e.inputSlotClass = PerVertexData;
+					if( perInst > 0 ) {
+						e.inputSlotClass = PerInstanceData;
+						e.instanceDataStepRate = perInst;
+					} else
+						e.inputSlotClass = PerVertexData;
 					layout.push(e);
 					s.offsets.push(offset);
 					s.inputs.push(v.name);
@@ -1093,6 +1104,25 @@ class DirectXDriver extends h3d.impl.Driver {
 		dx.Driver.drawIndexed(ntriangles * 3, startIndex, 0);
 	}
 
+	override function allocInstanceBuffer(b:InstanceBuffer, buf : haxe.io.Bytes) {
+		b.data = dx.Driver.createBuffer(b.commandCount * 5 * 4, Default, UnorderedAccess, None, DrawIndirectArgs, 4, buf);
+	}
+
+	override function disposeInstanceBuffer(b:InstanceBuffer) {
+		(b.data : dx.Resource).release();
+		b.data = null;
+	}
+
+	override function drawInstanced(ibuf:IndexBuffer, commands:InstanceBuffer) {
+		if( !allowDraw )
+			return;
+		if( currentIndex != ibuf ) {
+			currentIndex = ibuf;
+			dx.Driver.iaSetIndexBuffer(ibuf.res,false,0);
+		}
+		dx.Driver.drawIndexedInstancedIndirect(commands.data, 0);
+	}
+
 	static var COMPARE : Array<ComparisonFunc> = [
 		Always,
 		Never,

+ 9 - 0
h3d/impl/Driver.hx

@@ -201,6 +201,9 @@ class Driver {
 	public function draw( ibuf : IndexBuffer, startIndex : Int, ntriangles : Int ) {
 	}
 
+	public function drawInstanced( ibuf : IndexBuffer, commands : h3d.impl.InstanceBuffer ) {
+	}
+
 	public function setRenderZone( x : Int, y : Int, width : Int, height : Int ) {
 	}
 
@@ -242,6 +245,9 @@ class Driver {
 		return null;
 	}
 
+	public function allocInstanceBuffer( b : h3d.impl.InstanceBuffer, bytes : haxe.io.Bytes ) {
+	}
+
 	public function disposeTexture( t : h3d.mat.Texture ) {
 	}
 
@@ -251,6 +257,9 @@ class Driver {
 	public function disposeVertexes( v : VertexBuffer ) {
 	}
 
+	public function disposeInstanceBuffer( b : h3d.impl.InstanceBuffer ) {
+	}
+
 	public function uploadIndexBuffer( i : IndexBuffer, startIndice : Int, indiceCount : Int, buf : hxd.IndexBuffer, bufPos : Int ) {
 	}
 

+ 37 - 0
h3d/impl/InstanceBuffer.hx

@@ -0,0 +1,37 @@
+package h3d.impl;
+
+@:allow(h3d.impl.Driver)
+class InstanceBuffer {
+
+	var data : Dynamic;
+	var driver : h3d.impl.Driver;
+	public var triCount(default,null) : Int = 0;
+	public var commandCount(default, null) : Int;
+
+	/**
+		Bytes are structures of 5 i32 with the following values:
+		- indexCount : number of indexes per instance
+		- instanceCount : number of indexed draws
+		- startIndexLocation : offset in indexes
+		- baseVertexLocation : offset in buffer
+		- startInstanceLocation : offset in per instance buffer
+	**/
+	public function new( commandCount : Int, bytes : haxe.io.Bytes ) {
+
+		for( i in 0...commandCount ) {
+			var idxCount = bytes.getInt32(i * 20);
+			var instCount = bytes.getInt32(i * 20 + 4);
+			var tri = Std.int((idxCount * instCount) / 3);
+			triCount += tri;
+		}
+
+		this.commandCount = commandCount;
+		driver = h3d.Engine.getCurrent().driver;
+		driver.allocInstanceBuffer(this, bytes);
+	}
+
+	public function dispose() {
+		if( data != null ) driver.disposeInstanceBuffer(this);
+	}
+
+}

+ 31 - 0
h3d/prim/Instanced.hx

@@ -0,0 +1,31 @@
+package h3d.prim;
+
+class Instanced extends MeshPrimitive {
+
+	public var commands : h3d.impl.InstanceBuffer;
+	public var instanceBuffer : h3d.Buffer;
+
+	public function new() {
+	}
+
+	public function setMesh( m : MeshPrimitive ) {
+		var engine = h3d.Engine.getCurrent();
+		if( m.buffer == null ) m.alloc(engine);
+		buffer = m.buffer;
+		indexes = m.indexes;
+		if( indexes == null ) indexes = engine.mem.triIndexes;
+		for( bid in m.bufferCache.keys() ) {
+			var b = m.bufferCache.get(bid);
+			addBuffer(hxsl.Globals.getIDName(bid), b.buffer, b.offset);
+		}
+	}
+
+	public function defineBuffer( name, offset, isInst = false ) {
+		addBuffer(name, isInst ? instanceBuffer : buffer, offset);
+	}
+
+	override function render( engine : h3d.Engine ) {
+		engine.renderInstanced(getBuffers(engine),indexes,commands);
+	}
+
+}

+ 4 - 0
hxsl/Ast.hx

@@ -81,6 +81,7 @@ enum VarQualifier {
 	Precision( p : Prec );
 	Range( min : Float, max : Float );
 	Ignore; // the variable is ignored in reflection (inspector)
+	PerInstance( v : Int );
 }
 
 enum Prec {
@@ -234,6 +235,9 @@ enum TGlobal {
 	// debug / internal
 	ChannelRead;
 	Trace;
+	// instancing
+	VertexID;
+	InstanceID;
 }
 
 enum Component {

+ 5 - 0
hxsl/Checker.hx

@@ -126,10 +126,14 @@ class Checker {
 				[{ args : [{ name : "uv", type : vec2 }], ret : vec2 }];
 			case Trace:
 				[];
+			case VertexID, InstanceID:
+				null;
 			}
 			if( def != null )
 				globals.set(g.toString(), { t : TFun(def), g : g } );
 		}
+		globals.set("vertexID", { t : TInt, g : VertexID });
+		globals.set("instanceID", { t : TInt, g : InstanceID });
 		globals.set("int", globals.get("toInt"));
 		globals.set("float", globals.get("toFloat"));
 		globals.set("reflect", globals.get("lReflect"));
@@ -705,6 +709,7 @@ class Checker {
 					}
 					if( tv.kind != Global && tv.kind != Param ) error("@const only allowed on parameter or global", pos);
 				case PerObject: if( tv.kind != Global ) error("@perObject only allowed on global", pos);
+				case PerInstance(_): if( tv.kind != Input ) error("@perInstance only allowed on input", pos);
 				case Nullable: if( tv.kind != Param ) error("@nullable only allowed on parameter or global", pos);
 				case Name(_):
 					if( parent != null ) error("Cannot have an explicit name for a structure variable", pos);

+ 3 - 0
hxsl/Globals.hx

@@ -76,5 +76,8 @@ class Globals {
 		}
 		return id;
 	}
+	public static function getIDName( id : Int ) : String {
+		return ALL[id];
+	}
 
 }

+ 18 - 0
hxsl/HlslOut.hx

@@ -25,6 +25,8 @@ class HlslOut {
 		m.set(Fract, "frac");
 		m.set(Mix, "lerp");
 		m.set(Inversesqrt, "rsqrt");
+		m.set(VertexID,"_in.vertexID");
+		m.set(InstanceID,"_in.instanceID");
 		for( g in m )
 			KWDS.set(g, true);
 		m;
@@ -32,6 +34,8 @@ class HlslOut {
 
 	var SV_POSITION = "SV_POSITION";
 	var SV_TARGET = "SV_TARGET";
+	var SV_VertexID = "SV_VertexID";
+	var SV_InstanceID = "SV_InstanceID";
 	var STATIC = "static ";
 	var buf : StringBuf;
 	var exprIds = 0;
@@ -507,6 +511,12 @@ class HlslOut {
 		}
 	}
 
+	function collectGlobals( m : Map<TGlobal,Bool>, e : TExpr ) {
+		switch( e.e )  {
+		case TGlobal(g): m.set(g,true);
+		default: e.iter(collectGlobals.bind(m));
+		}
+	}
 
 	function initVars( s : ShaderData ) {
 		var index = 0;
@@ -521,12 +531,20 @@ class HlslOut {
 			varAccess.set(v.id, prefix);
 		}
 
+		var foundGlobals = new Map();
+		for( f in s.funs )
+			collectGlobals(foundGlobals, f.expr);
+
 		add("struct s_input {\n");
 		if( !isVertex )
 			add("\tfloat4 __pos__ : "+SV_POSITION+";\n");
 		for( v in s.vars )
 			if( v.kind == Input || (v.kind == Var && !isVertex) )
 				declVar("_in.", v);
+		if( foundGlobals.exists(VertexID) )
+			add("\tuint vertexID : "+SV_VertexID+";\n");
+		if( foundGlobals.exists(InstanceID) )
+			add("\tuint instanceID : "+SV_InstanceID+";\n");
 		add("};\n\n");
 
 		add("struct s_output {\n");

+ 5 - 0
hxsl/MacroParser.hx

@@ -23,6 +23,9 @@ class MacroParser {
 		case [ { expr : EConst(CInt(a) | CFloat(a)) }, { expr : EConst(CInt(b) | CFloat(b)) } ] if( m.name == "range" ):
 			v.qualifiers.push(Range(Std.parseFloat(a),Std.parseFloat(b)));
 			return;
+		case [ { expr : EConst(CInt(a)) } ] if( m.name == "perInstance" ):
+			v.qualifiers.push(PerInstance(Std.parseInt(a)));
+			return;
 		default:
 			error("Invalid meta parameter for "+m.name, m.pos);
 		}
@@ -53,6 +56,8 @@ class MacroParser {
 			v.qualifiers.push(Precision(High));
 		case "ignore":
 			v.qualifiers.push(Ignore);
+		case "perInstance":
+			v.qualifiers.push(PerInstance(1));
 		default:
 			error("Unsupported qualifier " + m.name, m.pos);
 		}

+ 1 - 0
hxsl/Printer.hx

@@ -60,6 +60,7 @@ class Printer {
 				case Precision(p): p.getName().toLowerCase() + "p";
 				case Range(min, max): "range(" + min + "," + max + ")";
 				case Ignore: "ignore";
+				case PerInstance(n): "perInstance("+n+")";
 				}) + " ");
 		}
 		if( v.kind != defKind )

+ 49 - 0
samples/ShaderAdvanced.hx

@@ -31,6 +31,26 @@ class TestTextureArray extends hxsl.Shader {
 
 }
 
+class InstancedOffsetShader extends hxsl.Shader {
+
+	static var SRC = {
+
+		@:import h3d.shader.BaseMesh;
+
+		@perInstance(2) @input var offset : Vec2;
+
+		function vertex() {
+			transformedPosition.xy += offset;
+			transformedPosition.xy += float(instanceID & 1) * vec2(0.2,0.1);
+			transformedPosition.z += float(instanceID) * 0.01;
+			pixelColor.r = float(instanceID) / 16.;
+			pixelColor.g = float(vertexID) / 8.;
+		}
+
+	};
+
+}
+
 class ShaderAdvanced extends hxd.App {
 
 	var updates : Array<Float -> Void> = [];
@@ -116,6 +136,35 @@ class ShaderAdvanced extends hxd.App {
 		tarr.textures.clear(0xFF4040,1,0);
 		tarr.textures.clear(0x40FF40,1,1);
 		tarr.textures.clear(0x4040FF,1,2);
+
+		// draw instanced
+		var cube = h3d.prim.Cube.defaultUnitCube();
+
+		var prim = new h3d.prim.Instanced();
+		var bytes = new haxe.io.BytesOutput();
+		bytes.writeInt32(cube.triCount() * 3);
+		bytes.writeInt32(16);
+		bytes.writeInt32(0);
+		bytes.writeInt32(0);
+		bytes.writeInt32(0);
+
+		prim.setMesh(cube);
+		prim.commands = new h3d.impl.InstanceBuffer(1, bytes.getBytes());
+
+		new h3d.scene.DirLight(new h3d.Vector(-1,-2,-5),s3d);
+		new h3d.scene.CameraController(s3d).loadFromCamera();
+
+		var buf = new hxd.FloatBuffer();
+		for( i in 0...16 ) {
+			buf.push(i * 0.4);
+			buf.push(i * 0.2);
+		}
+		prim.instanceBuffer = h3d.Buffer.ofFloats(buf,2);
+		prim.defineBuffer("offset",0,true);
+
+		var m = new h3d.scene.Mesh(prim, s3d);
+		m.material.mainPass.addShader(new InstancedOffsetShader());
+		m.material.shadows = false;
 	}
 
 	override function update(dt:Float) {