Selaa lähdekoodia

fixed dx12 driver vs buffer changes

Nicolas Cannasse 2 vuotta sitten
vanhempi
commit
7b590302e0
1 muutettua tiedostoa jossa 61 lisäystä ja 54 poistoa
  1. 61 54
      h3d/impl/DX12Driver.hx

+ 61 - 54
h3d/impl/DX12Driver.hx

@@ -102,13 +102,12 @@ class ShaderRegisters {
 class CompiledShader {
 	public var vertexRegisters : ShaderRegisters;
 	public var fragmentRegisters : ShaderRegisters;
-	public var inputCount : Int;
-	public var inputNames : InputNames;
+	public var format : hxd.BufferFormat;
 	public var pipeline : GraphicsPipelineStateDesc;
 	public var pipelines : Map<Int,hl.NativeArray<CachedPipeline>> = new Map();
 	public var rootSignature : RootSignature;
 	public var inputLayout : hl.CArray<InputElementDesc>;
-	public var inputOffsets : Array<Int>;
+	public var inputCount : Int;
 	public var shader : hxsl.RuntimeShader;
 	public function new() {
 	}
@@ -299,7 +298,7 @@ class DX12Driver extends h3d.impl.Driver {
 	static inline var PSIGN_STENCIL_MASK = PSIGN_UNUSED + 1;
 	static inline var PSIGN_STENCIL_OPS = PSIGN_STENCIL_MASK + 2;
 	static inline var PSIGN_RENDER_TARGETS = PSIGN_STENCIL_OPS + 4;
-	static inline var PSIGN_BUF_OFFSETS = PSIGN_RENDER_TARGETS + 8;
+	static inline var PSIGN_LAYOUT = PSIGN_RENDER_TARGETS + 8;
 
 	var pipelineSignature = new hl.Bytes(64);
 	var adlerOut = new hl.Bytes(4);
@@ -962,12 +961,10 @@ class DX12Driver extends h3d.impl.Driver {
 		var sign = new RootSignature(signBytes,signSize);
 
 		var inputLayout = hl.CArray.alloc(InputElementDesc, inputs.length);
-		var inputOffsets = [];
-		var offset = 0;
+		var format : Array<hxd.BufferFormat.BufferInput> = [];
 		for( i => v in inputs ) {
 			var d = inputLayout[i];
 			var perInst = 0;
-			inputOffsets.push(offset);
 			if( v.qualifiers != null )
 				for( q in v.qualifiers )
 					switch( q ) {
@@ -975,16 +972,8 @@ class DX12Driver extends h3d.impl.Driver {
 					default:
 					}
 			d.semanticName = @:privateAccess hxsl.HlslOut.semanticName(v.name).toUtf8();
-			d.format = switch( v.type ) {
-				case TFloat: offset++; R32_FLOAT;
-				case TVec(2, VFloat): offset += 2; R32G32_FLOAT;
-				case TVec(3, VFloat): offset += 3;R32G32B32_FLOAT;
-				case TVec(4, VFloat): offset += 4;R32G32B32A32_FLOAT;
-				case TBytes(4): offset++; R8G8B8A8_UINT;
-				default:
-					throw "Unsupported input type " + hxsl.Ast.Tools.toString(v.type);
-			};
 			d.inputSlot = i;
+			format.push({ name : v.name, type : hxd.BufferFormat.InputFormat.fromHXSL(v.type) });
 			if( perInst > 0 ) {
 				d.inputSlotClass = PER_INSTANCE_DATA;
 				d.instanceDataStepRate = perInst;
@@ -1011,12 +1000,11 @@ class DX12Driver extends h3d.impl.Driver {
 
 		//Driver.createGraphicsPipelineState(p);
 
-		c.inputNames = InputNames.get([for( v in inputs ) v.name]);
+		c.format = hxd.BufferFormat.make(format);
 		c.pipeline = p;
 		c.rootSignature = sign;
 		c.inputLayout = inputLayout;
 		c.inputCount = inputs.length;
-		c.inputOffsets = inputOffsets;
 		c.shader = shader;
 
 		for( i in 0...inputs.length )
@@ -1024,10 +1012,6 @@ class DX12Driver extends h3d.impl.Driver {
 	   return c;
 	}
 
-	override function getShaderInputNames() : InputNames {
-		return currentShader.inputNames;
-	}
-
 	function disposeResource( r : ResourceData ) {
 		frame.toRelease.push(r.res);
 		r.res = null;
@@ -1036,7 +1020,7 @@ class DX12Driver extends h3d.impl.Driver {
 
 	// ----- BUFFERS
 
-	function allocBuffer( size : Int, heapType, state ) {
+	function allocGPU( size : Int, heapType, state ) {
 		var desc = new ResourceDesc();
 		var flags = new haxe.EnumFlags();
 		desc.dimension = BUFFER;
@@ -1050,20 +1034,20 @@ class DX12Driver extends h3d.impl.Driver {
 		return Driver.createCommittedResource(tmp.heap, flags, desc, state, null);
 	}
 
-	override function allocVertexes( m : ManagedBuffer ) : VertexBuffer {
+	override function allocBuffer( m : h3d.Buffer ) : GPUBuffer {
 		var buf = new VertexBufferData();
-		var size = (m.size * m.stride) << 2;
+		var size = m.getMemSize();
 		var bufSize = m.flags.has(UniformBuffer) ? calcCBVSize(size) : size;
 		buf.state = COPY_DEST;
-		buf.res = allocBuffer(bufSize, DEFAULT, COPY_DEST);
+		buf.res = allocGPU(bufSize, DEFAULT, COPY_DEST);
 		if( !m.flags.has(UniformBuffer) ) {
 			var view = new VertexBufferView();
 			view.bufferLocation = buf.res.getGpuVirtualAddress();
 			view.sizeInBytes = size;
-			view.strideInBytes = m.stride << 2;
+			view.strideInBytes = m.format.strideBytes;
 			buf.view = view;
 		}
-		buf.stride = m.stride;
+		buf.stride = m.format.strideBytes;
 		buf.size = bufSize;
 		buf.uploaded = m.flags.has(Dynamic);
 		return buf;
@@ -1075,7 +1059,7 @@ class DX12Driver extends h3d.impl.Driver {
 		buf.count = count;
 		buf.bits = is32?2:1;
 		var size = count << buf.bits;
-		buf.res = allocBuffer(size, DEFAULT, COPY_DEST);
+		buf.res = allocGPU(size, DEFAULT, COPY_DEST);
 		var view = new IndexBufferView();
 		view.bufferLocation = buf.res.getGpuVirtualAddress();
 		view.format = is32 ? R32_UINT : R16_UINT;
@@ -1086,7 +1070,7 @@ class DX12Driver extends h3d.impl.Driver {
 
 	override function allocInstanceBuffer(b:InstanceBuffer, bytes:haxe.io.Bytes) {
 		var dataSize = b.commandCount * 5 * 4;
-		var buf = allocBuffer(dataSize, DEFAULT, COPY_DEST);
+		var buf = allocGPU(dataSize, DEFAULT, COPY_DEST);
 		var tmpBuf = allocDynamicBuffer(bytes, dataSize);
 		frame.commandList.copyBufferRegion(buf, 0, tmpBuf, 0, dataSize);
 		b.data = buf;
@@ -1098,7 +1082,7 @@ class DX12Driver extends h3d.impl.Driver {
 		frame.commandList.resourceBarrier(b);
 	}
 
-	override function disposeVertexes(v:VertexBuffer) {
+	override function disposeBuffer(v:GPUBuffer) {
 		disposeResource(v);
 	}
 
@@ -1117,7 +1101,7 @@ class DX12Driver extends h3d.impl.Driver {
 			tmpBuf = allocDynamicBuffer(bytes.offset(startByte), bytesCount);
 		else {
 			var size = calcCBVSize(bytesCount);
-			tmpBuf = allocBuffer(size, UPLOAD, GENERIC_READ);
+			tmpBuf = allocGPU(size, UPLOAD, GENERIC_READ);
 			var ptr = tmpBuf.map(0, null);
 			ptr.blit(0, bytes, 0, bytesCount);
 			tmpBuf.unmap(0,null);
@@ -1141,16 +1125,16 @@ class DX12Driver extends h3d.impl.Driver {
 		transition(i, INDEX_BUFFER);
 	}
 
-	override function uploadVertexBuffer(v:VertexBuffer, startVertex:Int, vertexCount:Int, buf:hxd.FloatBuffer, bufPos:Int) {
+	override function uploadBufferData(v:GPUBuffer, startVertex:Int, vertexCount:Int, buf:hxd.FloatBuffer, bufPos:Int) {
 		var data = hl.Bytes.getArray(buf.getNative()).offset(bufPos<<2);
 		transition(v, COPY_DEST);
-		updateBuffer(v, data, startVertex * v.stride << 2, vertexCount * v.stride << 2);
+		updateBuffer(v, data, startVertex * v.stride, vertexCount * v.stride);
 		transition(v, VERTEX_AND_CONSTANT_BUFFER);
 	}
 
-	override function uploadVertexBytes(v:VertexBuffer, startVertex:Int, vertexCount:Int, buf:haxe.io.Bytes, bufPos:Int) {
+	override function uploadBufferBytes(v:GPUBuffer, startVertex:Int, vertexCount:Int, buf:haxe.io.Bytes, bufPos:Int) {
 		transition(v, COPY_DEST);
-		updateBuffer(v, @:privateAccess buf.b.offset(bufPos << 2), startVertex * v.stride << 2, vertexCount * v.stride << 2);
+		updateBuffer(v, @:privateAccess buf.b.offset(bufPos << 2), startVertex * v.stride, vertexCount * v.stride);
 		transition(v, VERTEX_AND_CONSTANT_BUFFER);
 	}
 
@@ -1288,7 +1272,7 @@ class DX12Driver extends h3d.impl.Driver {
 		tmp.heap.type = UPLOAD;
 		var subRes = mipLevel + side * t.mipLevels;
 		var tmpSize = t.t.res.getRequiredIntermediateSize(subRes, 1).low;
-		var tmpBuf = allocBuffer(tmpSize, UPLOAD, GENERIC_READ);
+		var tmpBuf = allocGPU(tmpSize, UPLOAD, GENERIC_READ);
 
 		var upd = new SubResourceData();
 		var stride = @:privateAccess pixels.stride;
@@ -1370,7 +1354,7 @@ class DX12Driver extends h3d.impl.Driver {
 			b = b.next;
 		}
 		if( tmpBuf == null ) {
-			tmpBuf = allocBuffer(size, UPLOAD, GENERIC_READ);
+			tmpBuf = allocGPU(size, UPLOAD, GENERIC_READ);
 			var b = new TempBuffer();
 			b.buffer = tmpBuf;
 			b.size = size;
@@ -1479,7 +1463,7 @@ class DX12Driver extends h3d.impl.Driver {
 				for( i in 0...shader.bufferCount ) {
 					var srv = frame.shaderResourceViews.alloc(1);
 					var b = buf.buffers[i];
-					var cbv = @:privateAccess b.buffer.vbuf;
+					var cbv = @:privateAccess b.vbuf;
 					if( cbv.view != null )
 						throw "Buffer was allocated without UniformBuffer flag";
 					transition(cbv, VERTEX_AND_CONSTANT_BUFFER);
@@ -1527,36 +1511,37 @@ class DX12Driver extends h3d.impl.Driver {
 
 	override function selectBuffer(buffer:Buffer) {
 		var views = tmp.vertexViews;
-		var bview = @:privateAccess buffer.buffer.vbuf.view;
+		var bview = @:privateAccess buffer.vbuf.view;
+		var map = buffer.format.resolveMapping(currentShader.format);
+		var vbuf = @:privateAccess buffer.vbuf;
 		for( i in 0...currentShader.inputCount ) {
 			var v = views[i];
+			var inf = map[i];
 			v.bufferLocation = bview.bufferLocation;
 			v.sizeInBytes = bview.sizeInBytes;
 			v.strideInBytes = bview.strideInBytes;
-			pipelineSignature.setUI8(PSIGN_BUF_OFFSETS + i, currentShader.inputOffsets[i]);
+			pipelineSignature.setUI8(PSIGN_LAYOUT + i, inf.offset | inf.precision.toInt());
 		}
 		needPipelineFlush = true;
 		frame.commandList.iaSetVertexBuffers(0, currentShader.inputCount, views[0]);
 	}
 
-	override function selectMultiBuffers(buffers:h3d.Buffer.BufferOffset) {
+	override function selectMultiBuffers(formats:hxd.BufferFormat.MultiFormat,buffers:Array<h3d.Buffer>) {
 		var views = tmp.vertexViews;
-		var bufferCount = 0;
-		while( buffers != null ) {
-			var v = views[bufferCount];
-			var bview = @:privateAccess buffers.buffer.buffer.vbuf.view;
+		var map = formats.resolveMapping(currentShader.format);
+		for( i in 0...map.length ) {
+			var v = views[i];
+			var inf = map[i];
+			var bview = @:privateAccess buffers[inf.bufferIndex].vbuf.view;
 			v.bufferLocation = bview.bufferLocation;
 			v.sizeInBytes = bview.sizeInBytes;
 			v.strideInBytes = bview.strideInBytes;
-			pipelineSignature.setUI8(PSIGN_BUF_OFFSETS + bufferCount, buffers.offset);
-			buffers = buffers.next;
-			bufferCount++;
+			pipelineSignature.setUI8(PSIGN_LAYOUT + i, inf.offset | inf.precision.toInt());
 		}
 		needPipelineFlush = true;
-		frame.commandList.iaSetVertexBuffers(0, bufferCount, views[0]);
+		frame.commandList.iaSetVertexBuffers(0, map.length, views[0]);
 	}
 
-
 	static var CULL : Array<CullMode> = [NONE,BACK,FRONT,NONE];
 	static var BLEND_OP : Array<BlendOp> = [ADD,SUBTRACT,REV_SUBTRACT,MIN,MAX];
 	static var COMP : Array<ComparisonFunc> = [ALWAYS, NEVER, EQUAL, NOT_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL];
@@ -1618,7 +1603,29 @@ class DX12Driver extends h3d.impl.Driver {
 
 		for( i in 0...shader.inputCount ) {
 			var d = shader.inputLayout[i];
-			d.alignedByteOffset = pipelineSignature.getUI8(PSIGN_BUF_OFFSETS + i) << 2;
+			var offset = pipelineSignature.getUI8(PSIGN_LAYOUT + i);
+			trace(offset);
+			d.alignedByteOffset = offset & ~3;
+			d.format = @:privateAccess switch( [shader.format.inputs[i].type, new hxd.BufferFormat.Precision(offset&3)] ) {
+			case [DFloat, F32]: R32_FLOAT;
+			case [DFloat, F16]: R16_FLOAT;
+			case [DFloat, S8]: R8_SNORM;
+			case [DFloat, U8]: R8_UNORM;
+			case [DVec2, F32]: R32G32_FLOAT;
+			case [DVec2, F16]: R16G16_FLOAT;
+			case [DVec2, S8]: R8G8_SNORM;
+			case [DVec2, U8]: R8G8_UNORM;
+			case [DVec3, F32]: R32G32B32_FLOAT;
+			case [DVec3, F16]: R16G16B16A16_FLOAT; // padding
+			case [DVec3, S8]: R8G8B8A8_SNORM; // padding
+			case [DVec3, U8]: R8G8B8A8_UNORM; // padding
+			case [DVec4, F32]: R32G32B32A32_FLOAT;
+			case [DVec4, F16]: R16G16B16A16_FLOAT;
+			case [DVec4, S8]: R8G8B8A8_SNORM;
+			case [DVec4, U8]: R8G8B8A8_UNORM;
+			case [DBytes4, _]: R8G8B8A8_UINT;
+			default: throw "assert";
+			};
 		}
 
 		var stencil = stencilMask != 0 || stencilOp != 0;
@@ -1646,7 +1653,7 @@ class DX12Driver extends h3d.impl.Driver {
 		if( !needPipelineFlush ) return;
 		needPipelineFlush = false;
 		var signature = pipelineSignature;
-		var signatureSize = PSIGN_BUF_OFFSETS + currentShader.inputCount;
+		var signatureSize = PSIGN_LAYOUT + currentShader.inputCount;
 		adlerOut.setI32(0, 0);
 		hl.Format.digest(adlerOut, signature, signatureSize, 3);
 		var hash = adlerOut.getI32(0);
@@ -1754,7 +1761,7 @@ class DX12Driver extends h3d.impl.Driver {
 		if( frame.queryCurrentHeap == 0 )
 			return;
 		if( frame.queryBuffer == null )
-			frame.queryBuffer = allocBuffer(frame.queryHeaps.length * QUERY_COUNT * 8, READBACK, COPY_DEST);
+			frame.queryBuffer = allocGPU(frame.queryHeaps.length * QUERY_COUNT * 8, READBACK, COPY_DEST);
 		var position = 0;
 		for( i in 0...frame.queryCurrentHeap ) {
 			var count = i < frame.queryCurrentHeap - 1 ? QUERY_COUNT : frame.queryHeapOffset;