소스 검색

[HXSL] Added StorageBuffer which maps to StructuredBuffer in DX12 and readonly SSBO in GL.

TothBenoit 9 달 전
부모
커밋
9a2985a72e
10개의 변경된 파일138개의 추가작업 그리고 68개의 파일을 삭제
  1. 90 53
      h3d/impl/DX12Driver.hx
  2. 11 1
      h3d/impl/GlDriver.hx
  3. 2 2
      h3d/scene/MeshBatch.hx
  4. 6 2
      hxsl/Ast.hx
  5. 2 1
      hxsl/Flatten.hx
  6. 9 5
      hxsl/GlslOut.hx
  7. 10 1
      hxsl/HlslOut.hx
  8. 6 2
      hxsl/MacroParser.hx
  9. 1 1
      hxsl/Macros.hx
  10. 1 0
      hxsl/SharedShader.hx

+ 90 - 53
h3d/impl/DX12Driver.hx

@@ -169,6 +169,7 @@ class ShaderRegisters {
 	public var params : Int;
 	public var buffers : Int;
 	public var cbvCount : Int;
+	public var storageCount : Int;
 	public var textures : Int;
 	public var samplers : Int;
 	public var texturesCount : Int;
@@ -326,11 +327,7 @@ class ResourceData {
 	}
 }
 
-class BufferData extends ResourceData {
-	public var uploaded : Bool;
-}
-
-class VertexBufferData extends BufferData {
+class VertexBufferData extends ResourceData {
 	public var view : dx.Dx12.VertexBufferView;
 	public var iview : dx.Dx12.IndexBufferView;
 	public var size : Int;
@@ -1084,15 +1081,15 @@ class DX12Driver extends h3d.impl.Driver {
 						var baseShaderRegister = descRange.baseShaderRegister;
 						switch ( descRange.rangeType) {
 							case CBV:
-								s += 'CBV(b${baseShaderRegister}, numDescriptors = ${descRange.numDescriptors},';
+								s += 'CBV(b${baseShaderRegister}, numDescriptors = ${descRange.numDescriptors}),';
 							case SRV:
-								s += 'SRV(t${baseShaderRegister}, numDescriptors = ${descRange.numDescriptors},';
+								s += 'SRV(t${baseShaderRegister}, numDescriptors = ${descRange.numDescriptors}),';
 							case SAMPLER:
 								var baseShaderRegister = descRange.baseShaderRegister;
-								s += 'Sampler(s${baseShaderRegister}, space=${descRange.registerSpace}, numDescriptors = ${descRange.numDescriptors},';
+								s += 'Sampler(s${baseShaderRegister}, space=${descRange.registerSpace}, numDescriptors = ${descRange.numDescriptors}),';
 							case UAV:
 								var reg = descRange.baseShaderRegister;
-								s += 'UAV(u${reg}, numDescriptors = ${descRange.numDescriptors},';
+								s += 'UAV(u${reg}, numDescriptors = ${descRange.numDescriptors}),';
 						}
 					}
 					s += 'visibility = ${vis}),';
@@ -1188,14 +1185,24 @@ class DX12Driver extends h3d.impl.Driver {
 					default: throw "assert";
 					}
 					regs.bufferTypes.push(kind);
-					if ( kind == Uniform )
-						regs.cbvCount += 1;
-					else
-						uavCount += 1;
+					switch ( kind ) {
+						case Uniform, Partial:
+							regs.cbvCount++;
+						case Storage, StoragePartial:
+							regs.storageCount++;
+						case RW, RWPartial:
+							uavCount++;
+						default:
+							throw "assert";
+					}
 					p = p.next;
 				}
 
-				var rangArr = allocDescTable(vis, regs.cbvCount > 0 && uavCount > 0 ? 2 : 1);
+				var rangeCount = 0;
+				rangeCount += regs.cbvCount > 0 ? 1 : 0;
+				rangeCount += regs.storageCount > 0 ? 1 : 0;
+				rangeCount += uavCount > 0 ? 1 : 0;
+				var rangArr = allocDescTable(vis, rangeCount);
 				var i = 0;
 				if ( regs.cbvCount > 0 ) {
 					var r = rangArr[i];
@@ -1206,6 +1213,14 @@ class DX12Driver extends h3d.impl.Driver {
 					regCount += regs.cbvCount;
 					i++;
 				}
+				if ( regs.storageCount > 0 ) {
+					var r = rangArr[i];
+					r.rangeType = SRV;
+					r.baseShaderRegister = regs.texturesCount;
+					r.registerSpace = 0;
+					r.numDescriptors = regs.storageCount;
+					i++;
+				}
 				if ( uavCount > 0 ) {
 					var r = rangArr[i];
 					r.rangeType = UAV;
@@ -1217,8 +1232,8 @@ class DX12Driver extends h3d.impl.Driver {
 				}
 			}
 			if( sh.texturesCount > 0 ) {
-				regs.texturesCount = 0;
 				regs.texturesTypes = [];
+				var uavCount = 0;
 
 				var p = sh.data.vars;
 				for( v in sh.data.vars ) {
@@ -1229,21 +1244,26 @@ class DX12Driver extends h3d.impl.Driver {
 						if( t.match(TSampler(_)) )
 							regs.texturesCount += n;
 						else {
-							for( i in 0...n )
-								allocConsts(1, vis, UAV);
+							uavCount += n;
 						}
 					default:
 					}
 				}
 
+				regs.textures = paramsCount;
+				var rangeCount = 0;
+				rangeCount += regs.texturesCount > 0 ? 1 : 0;
+				rangeCount += uavCount > 0 ? 1 : 0;
+				var rangeArr = allocDescTable(vis, rangeCount);
+				var i = 0;
 				if( regs.texturesCount > 0 ) {
-					regs.textures = paramsCount;
 
-					var r = allocDescTable(vis)[0];
+					var r = rangeArr[i];
 					r.rangeType = SRV;
-					r.baseShaderRegister = 0;
+					r.baseShaderRegister = regs.storageCount;
 					r.registerSpace = 0;
 					r.numDescriptors = regs.texturesCount;
+					i++;
 
 					regs.samplers = paramsCount;
 					var r = allocDescTable(vis)[0];
@@ -1252,6 +1272,16 @@ class DX12Driver extends h3d.impl.Driver {
 					r.registerSpace = 0;
 					r.numDescriptors = regs.texturesCount;
 				}
+
+				if ( uavCount > 0 ) {
+					var r = rangeArr[i];
+					r.rangeType = UAV;
+					r.baseShaderRegister = regCount;
+					r.registerSpace = 0;
+					r.numDescriptors = uavCount;
+					regCount += uavCount;
+					i++;
+				}
 			}
 			return regs;
 		}
@@ -1456,7 +1486,6 @@ class DX12Driver extends h3d.impl.Driver {
 			buf.view = view;
 		}
 		buf.size = bufSize;
-		buf.uploaded = m.flags.has(Dynamic);
 		return buf;
 	}
 
@@ -1480,7 +1509,7 @@ class DX12Driver extends h3d.impl.Driver {
 		b.data = null;
 	}
 
-	function updateBuffer( b : BufferData, bytes : hl.Bytes, startByte : Int, bytesCount : Int ) {
+	function updateBuffer( b : ResourceData, bytes : hl.Bytes, startByte : Int, bytesCount : Int ) {
 		var alloc = allocDynamicBuffer(bytes, bytesCount);
 		frame.commandList.copyBufferRegion(b.res, startByte, alloc.resource, alloc.offset, bytesCount);
 	}
@@ -1896,13 +1925,14 @@ class DX12Driver extends h3d.impl.Driver {
 				if ( hasBuffersTexturesChanged(buf, regs) ) {
 					regs.lastHeapCount = heapCount;
 					regs.srv = frame.shaderResourceViews.alloc(shader.texturesCount);
-					regs.samplersView = frame.samplerViews.alloc(shader.texturesCount);
+					regs.samplersView = frame.samplerViews.alloc(regs.texturesCount);
 					if ( regs.lastTextures.length < shader.texturesCount ) {
 						regs.lastTextures.resize(shader.texturesCount);
 						regs.lastTexturesBits.resize(shader.texturesCount);
 					}
 					var regIndex = regs.buffers + shader.bufferCount;
-					var outIndex = 0;
+					var textureIndex = 0;
+					var uavIndex = regs.texturesCount;
 					for( i in 0...shader.texturesCount ) {
 						var t = buf.tex[i];
 						var pt = regs.texturesTypes[i];
@@ -1963,54 +1993,52 @@ class DX12Driver extends h3d.impl.Driver {
 								desc.firstArraySlice = 0;
 								desc.arraySize = 1;
 							}
-							Driver.createUnorderedAccessView(t.t.res, null, desc, srv);
-							if( currentShader.isCompute )
-								frame.commandList.setComputeRootDescriptorTable(regIndex++, frame.shaderResourceViews.toGPU(srv));
-							else
-								frame.commandList.setGraphicsRootDescriptorTable(regIndex++, frame.shaderResourceViews.toGPU(srv));
+							Driver.createUnorderedAccessView(t.t.res, null, desc, regs.srv.offset(uavIndex * frame.shaderResourceViews.stride));
+							uavIndex++;
 							continue;
 						default:
+							t.lastFrame = frameCount;
+							var state = if ( shader.kind == Fragment )
+								PIXEL_SHADER_RESOURCE;
+							else
+								NON_PIXEL_SHADER_RESOURCE;
+							transition(t.t, state);
+							createSRV(t, regs.srv.offset(textureIndex * frame.shaderResourceViews.stride), regs.samplersView.offset(textureIndex * frame.samplerViews.stride));
+							textureIndex++;
 						}
-
-						t.lastFrame = frameCount;
-						var state = if ( shader.kind == Fragment )
-							PIXEL_SHADER_RESOURCE;
-						else
-							NON_PIXEL_SHADER_RESOURCE;
-						transition(t.t, state);
-						createSRV(t, regs.srv.offset(outIndex * frame.shaderResourceViews.stride), regs.samplersView.offset(outIndex * frame.samplerViews.stride));
-						outIndex++;
 					}
-				}
-				else {
-					for( i in 0...regs.texturesCount ) {
+				} else {
+					for( i in 0...shader.texturesCount ) {
 						var t = buf.tex[i];
 						if (t == null || t.t == null)
 							continue;
-
-						var state = if ( shader.kind == Fragment )
-							PIXEL_SHADER_RESOURCE;
-						else
-							NON_PIXEL_SHADER_RESOURCE;
+						var pt = regs.texturesTypes[i];
+						var state = switch( pt ) {
+						case TRWTexture(_,_,_):
+							UNORDERED_ACCESS;
+						default:
+							(shader.kind == Fragment ? PIXEL_SHADER_RESOURCE : NON_PIXEL_SHADER_RESOURCE);
+						}
 						transition(t.t, state);
 					}
 				}
 
-				if( regs.texturesCount > 0 ) {
-					if( currentShader.isCompute ) {
-						frame.commandList.setComputeRootDescriptorTable(regs.textures, frame.shaderResourceViews.toGPU(regs.srv));
+				if( currentShader.isCompute ) {
+					frame.commandList.setComputeRootDescriptorTable(regs.textures, frame.shaderResourceViews.toGPU(regs.srv));
+					if ( regs.texturesCount > 0 )
 						frame.commandList.setComputeRootDescriptorTable(regs.samplers, frame.samplerViews.toGPU(regs.samplersView));
-					} else {
-						frame.commandList.setGraphicsRootDescriptorTable(regs.textures, frame.shaderResourceViews.toGPU(regs.srv));
+				} else {
+					frame.commandList.setGraphicsRootDescriptorTable(regs.textures, frame.shaderResourceViews.toGPU(regs.srv));
+					if ( regs.texturesCount > 0 )
 						frame.commandList.setGraphicsRootDescriptorTable(regs.samplers, frame.samplerViews.toGPU(regs.samplersView));
-					}
 				}
 			}
 		case Buffers:
 			if( shader.bufferCount > 0 ) {
 				var srv = frame.shaderResourceViews.alloc(shader.bufferCount);
-				var uavIndex = regs.cbvCount;
 				var cbvIndex = 0;
+				var storageIndex = regs.cbvCount;
+				var uavIndex = regs.cbvCount + regs.storageCount;
 				for( i in 0...shader.bufferCount ) {
 					var b = buf.buffers[i];
 					var cbv = b.vbuf;
@@ -2024,6 +2052,15 @@ class DX12Driver extends h3d.impl.Driver {
 						desc.sizeInBytes = cbv.size;
 						Driver.createConstantBufferView(desc, srv.offset(cbvIndex * frame.shaderResourceViews.stride));
 						cbvIndex++;
+					case Storage:
+						var state = shader.kind == Fragment ? PIXEL_SHADER_RESOURCE : NON_PIXEL_SHADER_RESOURCE;
+						transition(cbv, state);
+						var desc = tmp.bufferSRV;
+						desc.numElements = b.vertices;
+						desc.structureByteStride = b.format.strideBytes;
+						desc.flags = NONE;
+						Driver.createShaderResourceView(cbv.res, desc, srv.offset(storageIndex * frame.shaderResourceViews.stride));
+						storageIndex++;
 					case RW:
 						if( !b.flags.has(ReadWriteBuffer) )
 							throw "Buffer was allocated without ReadWriteBuffer flag";

+ 11 - 1
h3d/impl/GlDriver.hx

@@ -427,6 +427,14 @@ class GlDriver extends Driver {
 			}
 			s.buffers = [for( i in 0...shader.bufferCount ) {
 				switch( s.bufferTypes[i] ) {
+				case Storage:
+					#if js
+					throw "Storage buffer not supported in WebGL";
+					#elseif (hl_ver < version("1.15.0"))
+					throw "Storage buffer support requires -D hl-ver=1.15.0";
+					#else
+					gl.getProgramResourceIndex(p.p,GL.SHADER_STORAGE_BLOCK, "storage_uniform_buffer"+i);
+					#end
 				case RW:
 					#if js
 					throw "RW buffer not supported in WebGL";
@@ -447,7 +455,7 @@ class GlDriver extends Driver {
 				switch( s.bufferTypes[i] ) {
 				case Uniform:
 					gl.uniformBlockBinding(p.p,s.buffers[i],i + start);
-				case RW:
+				case RW, Storage:
 					#if (hl_ver >= version("1.15.0"))
 					gl.shaderStorageBlockBinding(p.p,s.buffers[i], i + start);
 					#end
@@ -619,6 +627,8 @@ class GlDriver extends Driver {
 					switch( s.bufferTypes[i] ) {
 					case Uniform:
 						gl.bindBufferBase(GL.UNIFORM_BUFFER, i + start, buf.buffers[i].vbuf);
+					case Storage:
+						gl.bindBufferBase(0x90D2 /*GL.SHADER STORAGE BUFFER*/, i + start, buf.buffers[i].vbuf);
 					case RW:
 						if ( !buf.buffers[i].flags.has(ReadWriteBuffer) )
 							throw "Buffer was allocated without ReadWriteBuffer flag";

+ 2 - 2
h3d/scene/MeshBatch.hx

@@ -70,9 +70,9 @@ class ComputeIndirect extends hxsl.Shader {
 		// n : material offset, n + 1 : subPart ID
 		@const var ENABLE_COUNT_BUFFER : Bool;
 		@param var countBuffer : RWBuffer<Int>;
-		@param var instanceOffsets: RWBuffer<Int>;
+		@param var instanceOffsets: StorageBuffer<Int>;
 		@param var commandBuffer : RWBuffer<Int>;
-		@param var instanceData : RWPartialBuffer<{ modelView : Mat4 }>;
+		@param var instanceData : StoragePartialBuffer<{ modelView : Mat4 }>;
 		@param var radius : Float;
 
 		@const var USING_SUB_PART : Bool = false;

+ 6 - 2
hxsl/Ast.hx

@@ -2,8 +2,10 @@ package hxsl;
 
 enum BufferKind {
 	Uniform;
+	Storage;
 	RW;
 	Partial;
+	StoragePartial;
 	RWPartial;
 }
 
@@ -405,7 +407,7 @@ class Tools {
 				}
 		case TChannel(_):
 			return 3 + MAX_CHANNELS_BITS;
-		case TBuffer(_, _, Partial|RWPartial):
+		case TBuffer(_, _, Partial|StoragePartial|RWPartial):
 			return MAX_PARTIAL_MAPPINGS_BITS;
 		default:
 		}
@@ -413,7 +415,7 @@ class Tools {
 	}
 
 	public static function isConst( v : TVar ) {
-		if( v.type.match(TChannel(_)|TBuffer(_,_,Partial|RWPartial)) )
+		if( v.type.match(TChannel(_)|TBuffer(_,_,Partial|StoragePartial|RWPartial)) )
 			return true;
 		if( v.qualifiers != null )
 			for( q in v.qualifiers )
@@ -473,8 +475,10 @@ class Tools {
 		case TBuffer(t, s, k):
 			var prefix = switch( k ) {
 			case Uniform: "Buffer";
+			case Storage: "StorageBuffer";
 			case RW: "RWBuffer";
 			case Partial: "PartialBuffer";
+			case StoragePartial: "StoragePartialBuffer";
 			case RWPartial: "RWPartialBuffer";
 			};
 			prefix+" "+toString(t) + "[" + (switch( s ) { case SConst(i): "" + i; case SVar(v): v.name; } ) + "]";

+ 2 - 1
hxsl/Flatten.hx

@@ -64,6 +64,7 @@ class Flatten {
 			packTextures(prefix + "Textures" + name, allVars, t.rw == 0 ? TSampler(t.dim, t.arr) : TRWTexture(t.dim, t.arr, t.rw));
 		}
 		packBuffers("buffers", allVars, Uniform);
+		packBuffers("storagebuffers", allVars, Storage);
 		packBuffers("rwbuffers", allVars, RW);
 		var funs = [for( f in s.funs ) mapFun(f, mapExpr)];
 		return {
@@ -416,7 +417,7 @@ class Flatten {
 			case TBuffer(t,SConst(size),k) if( kind == k ):
 				var stride = Math.ceil(t.size()/4);
 				var bt = switch( t ) {
-				case TInt|TFloat if( kind.match( RW|RWPartial ) ) :
+				case TInt|TFloat if( kind.match( Storage|RW|StoragePartial|RWPartial ) ) :
 					v.type;
 				default:
 					// for buffers of complex types, let's perform our own remaping

+ 9 - 5
hxsl/GlslOut.hx

@@ -227,6 +227,8 @@ class GlslOut {
 		case TBuffer(t, size, kind):
 			switch( kind ) {
 			case Uniform, Partial:
+			case Storage, StoragePartial:
+				add("storage_");
 			case RW, RWPartial:
 				add("rw_");
 			}
@@ -484,7 +486,7 @@ class GlslOut {
 			add("clamp(");
 			addValue(e, tabs);
 			add(", 0., 1.)");
-		case TCall( { e : TGlobal(AtomicAdd) }, args):			
+		case TCall( { e : TGlobal(AtomicAdd) }, args):
 			add("atomicAdd(");
 			addValue(args[0], tabs);
 			add("[");
@@ -717,6 +719,10 @@ class GlslOut {
 		switch( v.kind ) {
 		case Param, Global:
 			switch( v.type ) {
+			case TBuffer(_, _, Storage|StoragePartial):
+				if ( version < 430 )
+					throw "SSBO are available since version 4.3";
+				add("layout(std430) readonly buffer ");
 			case TBuffer(_, _, RW|RWPartial):
 				if ( version < 430 )
 					throw "SSBO are available since version 4.3";
@@ -726,10 +732,8 @@ class GlslOut {
 				switch( kind ) {
 				case Uniform, Partial:
 					add("uniform ");
-				case RW, RWPartial:
-					if ( version < 430 )
-						throw "SSBO are available since version 4.3";
-					add("buffer ");
+				default:
+					throw "assert";
 				}
 			case TArray(TRWTexture(_, _, chans), _):
 				var format = "rgba".substr(0, chans);

+ 10 - 1
hxsl/HlslOut.hx

@@ -214,6 +214,11 @@ class HlslOut {
 				kind : v.kind,
 			});
 			addArraySize(size);
+		case TBuffer(t, size, Storage):
+			add('StructuredBuffer<');
+			addType(t);
+			add('> ');
+			ident(v);
 		case TBuffer(t, size, RW):
 			add('RWStructuredBuffer<');
 			addType(t);
@@ -855,12 +860,16 @@ class HlslOut {
 		add("};\n\n");
 
 		var regCount = baseRegister + 2;
+		var storageRegister = 0;
 		for( b in buffers.concat(uavs) ) {
 			switch( b.type ) {
 			case TBuffer(t, size, Uniform):
 				add('cbuffer _buffer$regCount : register(b${regCount++}) { ');
 				addVar(b);
 				add("; };\n");
+			case TBuffer(t, size, Storage):
+				addVar(b);
+				add(' : register(t${storageRegister++});\n');
 			default:
 				addVar(b);
 				add(' : register(u${regCount++});\n');
@@ -869,7 +878,7 @@ class HlslOut {
 		if( buffers.length + uavs.length > 0 ) add("\n");
 
 		var ctx = new Samplers();
-		var texCount = 0;
+		var texCount = storageRegister;
 		for( v in textures ) {
 			addVar(v);
 			add(' : register(t${texCount});\n');

+ 6 - 2
hxsl/MacroParser.hx

@@ -147,18 +147,20 @@ class MacroParser {
 			var t = getTexDim(name.substr(9), (dim,arr) -> TRWTexture(dim,arr,chans));
 			if( t != null )
 				return t;
-		case TPath( { pack : [], name : name = ("RWBuffer"|"RWPartialBuffer"), sub : null, params : [t] } ):
+		case TPath( { pack : [], name : name = ("StorageBuffer"|"RWBuffer"|"StoragePartialBuffer"|"RWPartialBuffer"), sub : null, params : [t] } ):
 			var t = switch( t ) {
 				case TPType(t): parseType(t, pos);
 				default: null;
 				}
 			if( t != null )
 				return switch( name ) {
+				case "StorageBuffer" : TBuffer(t, SConst(0), Storage);
 				case "RWBuffer": TBuffer(t,SConst(0),RW);
+				case "StoragePartialBuffer" : TBuffer(t, SConst(0), StoragePartial);
 				case "RWPartialBuffer": TBuffer(t,SConst(0),RWPartial);
 				default: throw "assert";
 				}
-		case TPath( { pack : [], name : name = ("Array"|"Buffer"|"RWBuffer"|"PartialBuffer"|"RWPartialBuffer"), sub : null, params : [t, size] } ):
+		case TPath( { pack : [], name : name = ("Array"|"Buffer"|"StorageBuffer"|"RWBuffer"|"PartialBuffer"|"StoragePartialBuffer"|"RWPartialBuffer"), sub : null, params : [t, size] } ):
 			var t = switch( t ) {
 			case TPType(t): parseType(t, pos);
 			default: null;
@@ -175,8 +177,10 @@ class MacroParser {
 				return switch( name ) {
 				case "Array": TArray(t, size);
 				case "Buffer": TBuffer(t,size,Uniform);
+				case "StorageBuffer": TBuffer(t,size,Storage);
 				case "RWBuffer": TBuffer(t,size,RW);
 				case "PartialBuffer": TBuffer(t,size,Partial);
+				case "StoragePartialBuffer": TBuffer(t,size,StoragePartial);
 				case "RWPartialBuffer": TBuffer(t,size,RWPartial);
 				default: throw "assert";
 				}

+ 1 - 1
hxsl/Macros.hx

@@ -261,7 +261,7 @@ class Macros {
 					if( $p == null ) $psel = Unknown else if( $psel == Unknown ) $defFormat;
 					constBits |= ((globals.allocChannelID($p) << 3) | Type.enumIndex($psel)) << $v{ c.pos };
 				});
-			case TBuffer(_,_,Partial|RWPartial):
+			case TBuffer(_,_,Partial|StoragePartial|RWPartial):
 				var psel = getPath(c.v,"Format");
 				exprs.push(macro {
 					if( $p == null ) throw "Partial buffer is not set";

+ 1 - 0
hxsl/SharedShader.hx

@@ -154,6 +154,7 @@ class SharedShader {
 				var fullT = makeBufferType(v, t, fmt);
 				v.type = TBuffer(fullT, size, switch( kind ) {
 					case Partial: Uniform;
+					case StoragePartial: Storage;
 					case RWPartial: RW;
 					default: throw "assert";
 				});