Jelajahi Sumber

DX12 : Fix Texture3D upload and removed some allocations during textures operations.

TothBenoit 6 bulan lalu
induk
melakukan
f79a6890d6
1 mengubah file dengan 69 tambahan dan 35 penghapusan
  1. 69 35
      h3d/impl/DX12Driver.hx

+ 69 - 35
h3d/impl/DX12Driver.hx

@@ -210,6 +210,7 @@ class CompiledShader {
 
 	public var renderTargets : hl.BytesAccess<Address>;
 	public var depthStencils : hl.BytesAccess<Address>;
+	public var copyableInfosBytes : hl.Bytes;
 	public var vertexViews : hl.CArray<VertexBufferView>;
 	public var descriptors2 : hl.NativeArray<DescriptorHeap>;
 	public var barriers : hl.CArray<ResourceBarrier>;
@@ -232,12 +233,15 @@ class CompiledShader {
 	@:packed public var wtexDesc(default,null) : UAVTextureViewDesc;
 	@:packed public var subResourceData(default, null) : SubResourceData;
 	@:packed public var bumpAllocation(default,null) : BumpAllocation;
+	@:packed public var srcTextureLocation(default, null) : TextureCopyLocation;
+	@:packed public var dstTextureLocation(default, null) : TextureCopyLocation;
 
 	public var pass : h3d.mat.Pass;
 
 	public function new() {
 		renderTargets = new hl.Bytes(8 * 8);
 		depthStencils = new hl.Bytes(8);
+		copyableInfosBytes = new hl.Bytes(8 * 3);
 		vertexViews = hl.CArray.alloc(VertexBufferView, 16);
 		maxBarriers = 100;
 		barriers = hl.CArray.alloc( ResourceBarrier, maxBarriers );
@@ -958,13 +962,14 @@ class DX12Driver extends h3d.impl.Driver {
 		if( pixels.width == 0 || pixels.height == 0 )
 			return;
 
-		var totalSize : hl.BytesAccess<Int64> = new hl.Bytes(8);
-		var src = new TextureCopyLocation();
+		var totalSize : hl.BytesAccess<Int64> = tmp.copyableInfosBytes;
+		var src = tmp.srcTextureLocation;
 		src.res = tex.t.res;
+		src.type = SUBRESOURCE_INDEX;
 		src.subResourceIndex = mipLevel + layer * tex.mipLevels;
 		var srcDesc = makeTextureDesc(tex);
 
-		var dst = new TextureCopyLocation();
+		var dst = tmp.dstTextureLocation;
 		dst.type = PLACED_FOOTPRINT;
 		Driver.getCopyableFootprints(srcDesc, src.subResourceIndex, 1, 0, dst.placedFootprint, null, null, totalSize);
 
@@ -1584,7 +1589,7 @@ class DX12Driver extends h3d.impl.Driver {
 
 	function makeTextureDesc(t:h3d.mat.Texture) {
 		var desc = new ResourceDesc();
-		desc.dimension = TEXTURE2D;
+		desc.dimension = t.flags.has(Is3D) ? TEXTURE3D : TEXTURE2D;
 		desc.width = t.width;
 		desc.height = t.height;
 		desc.depthOrArraySize = t.layerCount;
@@ -1682,19 +1687,12 @@ class DX12Driver extends h3d.impl.Driver {
 		if ( side != 0 )
 			offset += t.t.res.getRequiredIntermediateSize( 0, t.mipLevels ) * side;
 
-		var stride = @:privateAccess pixels.stride;
-		switch( t.format ) {
-		case S3TC(n): stride = pixels.width * ((n == 1 || n == 4) ? 2 : 4); // "uncompressed" stride ?
-		default:
-		}
-
-		var upd = tmp.subResourceData;
-		upd.data = (pixels.bytes:hl.Bytes).offset(pixels.offset);
-		upd.rowPitch = stride;
-		upd.slicePitch = pixels.dataSize;
-
-		var subRes = mipLevel + side * t.mipLevels;
+		var is3d = t.flags.has(Is3D);
+		var subRes = is3d ? mipLevel : mipLevel + side * t.mipLevels;
 		var tmpSize = t.t.res.getRequiredIntermediateSize(subRes, 1).low;
+		if ( is3d )
+			tmpSize = Std.int(tmpSize / t.layerCount );
+
 		#if (hldx >= version("1.15.0"))
 		var textureAlignment = Driver.getConstant(TEXTURE_DATA_PLACEMENT_ALIGNMENT);
 		#else
@@ -1704,8 +1702,36 @@ class DX12Driver extends h3d.impl.Driver {
 
 		transition(t.t, COPY_DEST);
 		flushTransitions();
-		if( !Driver.updateSubResource(frame.commandList, t.t.res, allocation.resource, allocation.offset, subRes, 1, upd) )
-			throw "Failed to update sub resource";
+
+		var dst = tmp.dstTextureLocation;
+		dst.res = t.t.res;
+		dst.subResourceIndex = subRes;
+		dst.type = SUBRESOURCE_INDEX;
+		var dstDesc = makeTextureDesc(t);
+
+		var src = tmp.srcTextureLocation;
+		src.res = allocation.resource;
+		src.type = PLACED_FOOTPRINT;
+
+		var numRow : hl.BytesAccess<Int64> = tmp.copyableInfosBytes;
+		var rowSizeInBytes : hl.BytesAccess<Int64> = tmp.copyableInfosBytes.offset(8);
+		Driver.getCopyableFootprints(dstDesc, subRes, 1, allocation.offset, src.placedFootprint, numRow, rowSizeInBytes, null);
+
+		var stride = @:privateAccess pixels.stride;
+		switch( t.format ) {
+		case S3TC(n): stride = pixels.width * ((n == 1 || n == 4) ? 2 : 4); // "uncompressed" stride ?
+		default:
+		}
+		var rowPitch = src.placedFootprint.footprint.rowPitch;
+		var offset = src.placedFootprint.offset.low - allocation.offset;
+		var data = (pixels.bytes:hl.Bytes).offset(pixels.offset);
+		var numRow = numRow[0].low;
+		var rowSizeInBytes = rowSizeInBytes[0].low;
+		for ( i in 0...numRow)
+			allocation.cpuAddress.blit(rowPitch * i + offset, data, stride * i, rowSizeInBytes);
+
+		src.placedFootprint.footprint.depth = 1;
+		frame.commandList.copyTextureRegion(dst, 0, 0, is3d ? side : 0, src, null);
 		transition(t.t, PIXEL_SHADER_RESOURCE);
 
 		t.flags.set(WasCleared);
@@ -1725,10 +1751,14 @@ class DX12Driver extends h3d.impl.Driver {
 		transition( from.t, COPY_SOURCE);
 		transition( to.t, COPY_DEST);
 		flushTransitions();
-		var dst = new TextureCopyLocation();
-		var src = new TextureCopyLocation();
+		var dst = tmp.dstTextureLocation;
+		var src = tmp.srcTextureLocation;
 		dst.res = to.t.res;
+		dst.type = SUBRESOURCE_INDEX;
+		dst.subResourceIndex = 0;
 		src.res = from.t.res;
+		src.type = SUBRESOURCE_INDEX;
+		src.subResourceIndex = 0;
 		frame.commandList.copyTextureRegion(dst, 0, 0, 0, src, null);
 		to.flags.set(WasCleared);
 		for( t in currentRenderTargets )
@@ -1799,6 +1829,20 @@ class DX12Driver extends h3d.impl.Driver {
 		}
 	}
 
+	inline function toDepthFormat(format : h3d.mat.Data.TextureFormat ) : DxgiFormat {
+		var fmt = switch (format) {
+			case Depth16:
+				R16_UNORM;
+			case Depth24, Depth24Stencil8:
+				R24_UNORM_X8_TYPELESS;
+			case Depth32:
+				R32_FLOAT;
+			default:
+				throw "Unsupported depth format "+ format;
+		}
+		return fmt;
+	}
+
 	function createSRV( t : h3d.mat.Texture, srvAddr : Address, samplerAddr : Address ) {
 		if (!srvThreadLaunched) {
 			srvThreadLaunched = true;
@@ -1832,27 +1876,17 @@ class DX12Driver extends h3d.impl.Driver {
 			desc.arraySize = t.layerCount;
 			desc.planeSlice = 0;
 			desc.resourceMinLODClamp = 0;
-		} else if ( t.isDepth() ) {
-			var desc = srvArgs.resourceDesc;
-			switch (t.format) {
-				case Depth16:
-					desc.format = R16_UNORM;
-				case Depth24, Depth24Stencil8:
-					desc.format = R24_UNORM_X8_TYPELESS;
-				case Depth32:
-					desc.format = R32_FLOAT;
-				default:
-					throw "Unsupported depth format "+ t.format;
-			}
-			desc.dimension = TEXTURE2D;
+		} else if ( t.flags.has(Is3D) ) {
+			var desc = unsafeCastTo(srvArgs.resourceDesc, Tex3DSRV);
+			desc.format = t.t.format;
+			desc.dimension = TEXTURE3D;
 			desc.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
 			desc.mostDetailedMip = t.startingMip;
 			desc.mipLevels = -1;
-			desc.planeSlice = 0;
 			desc.resourceMinLODClamp = 0;
 		} else {
 			var desc = srvArgs.resourceDesc;
-			desc.format = t.t.format;
+			desc.format = t.isDepth() ? toDepthFormat(t.format) : t.t.format;
 			desc.dimension = TEXTURE2D;
 			desc.shader4ComponentMapping = ShaderComponentMapping.DEFAULT;
 			desc.mostDetailedMip = t.startingMip;