Преглед на файлове

metal: implement storage buffer bindings

Alex Szpakowski преди 3 години
родител
ревизия
30fe00d767
променени са 3 файла, в които са добавени 124 реда и са изтрити 50 реда
  1. 25 11
      src/modules/graphics/metal/Graphics.mm
  2. 4 1
      src/modules/graphics/metal/Shader.h
  3. 95 38
      src/modules/graphics/metal/Shader.mm

+ 25 - 11
src/modules/graphics/metal/Graphics.mm

@@ -742,7 +742,7 @@ id<MTLSamplerState> Graphics::getCachedSampler(const SamplerState &s)
 
 	if (isClampOne(s.wrapU) || isClampOne(s.wrapV) || isClampOne(s.wrapW))
 	{
-		if (@available(macOS 10.12, iOS 10.14, *))
+		if (@available(macOS 10.12, iOS 14.0, *))
 			desc.borderColor = MTLSamplerBorderColorOpaqueWhite;
 	}
 
@@ -1110,8 +1110,10 @@ void Graphics::applyShaderUniforms(id<MTLRenderCommandEncoder> renderEncoder, lo
 	}
 }
 
-static void setVertexBuffers(id<MTLRenderCommandEncoder> encoder, const BufferBindings *buffers, Graphics::RenderEncoderBindings &bindings)
+static void setVertexBuffers(id<MTLRenderCommandEncoder> encoder, love::graphics::Shader *shader, const BufferBindings *buffers, Graphics::RenderEncoderBindings &bindings)
 {
+	Shader *s = (Shader *)shader;
+	int firstBinding = s->getFirstVertexBufferBinding();
 	uint32 allbits = buffers->useBits;
 	uint32 i = 0;
 	while (allbits)
@@ -1122,7 +1124,7 @@ static void setVertexBuffers(id<MTLRenderCommandEncoder> encoder, const BufferBi
 		{
 			auto b = buffers->info[i];
 			id<MTLBuffer> buffer = getMTLBuffer(b.buffer);
-			setBuffer(encoder, bindings, SHADERSTAGE_VERTEX, i + VERTEX_BUFFER_BINDING_START, buffer, b.offset);
+			setBuffer(encoder, bindings, SHADERSTAGE_VERTEX, firstBinding + i, buffer, b.offset);
 		}
 
 		i++;
@@ -1143,7 +1145,7 @@ void Graphics::draw(const DrawCommand &cmd)
 	applyRenderState(encoder, *cmd.attributes);
 	applyShaderUniforms(encoder, Shader::current, cmd.texture);
 
-	setVertexBuffers(encoder, cmd.buffers, renderBindings);
+	setVertexBuffers(encoder, Shader::current, cmd.buffers, renderBindings);
 
 	[encoder drawPrimitives:getMTLPrimitiveType(cmd.primitiveType)
 				vertexStart:cmd.vertexStart
@@ -1166,9 +1168,7 @@ void Graphics::draw(const DrawIndexedCommand &cmd)
 	applyRenderState(encoder, *cmd.attributes);
 	applyShaderUniforms(encoder, Shader::current, cmd.texture);
 
-	[encoder setCullMode:MTLCullModeNone];
-
-	setVertexBuffers(encoder, cmd.buffers, renderBindings);
+	setVertexBuffers(encoder, Shader::current, cmd.buffers, renderBindings);
 
 	auto indexType = cmd.indexType == INDEX_UINT32 ? MTLIndexTypeUInt32 : MTLIndexTypeUInt16;
 
@@ -1226,7 +1226,7 @@ void Graphics::drawQuads(int start, int count, const VertexAttributes &attribute
 	// Some older iOS devices don't support base vertex rendering.
 	if (families.apple[3] || families.mac[1] || families.macCatalyst[1])
 	{
-		setVertexBuffers(encoder, &buffers, renderBindings);
+		setVertexBuffers(encoder, Shader::current, &buffers, renderBindings);
 
 		int basevertex = start * 4;
 
@@ -1255,7 +1255,7 @@ void Graphics::drawQuads(int start, int count, const VertexAttributes &attribute
 
 		for (int quadindex = 0; quadindex < count; quadindex += MAX_QUADS_PER_DRAW)
 		{
-			setVertexBuffers(encoder, &bufferscopy, renderBindings);
+			setVertexBuffers(encoder, Shader::current, &bufferscopy, renderBindings);
 
 			int quadcount = std::min(MAX_QUADS_PER_DRAW, count - quadindex);
 
@@ -1791,6 +1791,7 @@ bool Graphics::isPixelFormatSupported(PixelFormat format, PixelFormatUsageFlags
 		format = getSRGBPixelFormat(format);
 
 	const uint32 sample = PIXELFORMATUSAGEFLAGS_SAMPLE;
+	const uint32 linear = PIXELFORMATUSAGEFLAGS_LINEAR;
 	const uint32 rt = PIXELFORMATUSAGEFLAGS_RENDERTARGET;
 	const uint32 blend = PIXELFORMATUSAGEFLAGS_BLEND;
 	const uint32 msaa = PIXELFORMATUSAGEFLAGS_MSAA;
@@ -1827,7 +1828,7 @@ bool Graphics::isPixelFormatSupported(PixelFormat format, PixelFormatUsageFlags
 		case PIXELFORMAT_R32_FLOAT:
 			if (families.apple[1])
 				flags |= sample | rt | blend | msaa | computewrite;
-			if (families.mac[1])
+			if (families.mac[1] || families.macCatalyst[1])
 				flags |= all;
 			break;
 
@@ -2114,11 +2115,24 @@ void Graphics::initCapabilities()
 		capabilities.limits[LIMIT_TEXTURE_SIZE] = 8192;
 		capabilities.limits[LIMIT_CUBE_TEXTURE_SIZE] = 8192;
 	}
+
+	// TODO: metal doesn't have a good API to query this?
 	capabilities.limits[LIMIT_TEXEL_BUFFER_SIZE] = 128 * 1024 * 1024;
-	capabilities.limits[LIMIT_SHADER_STORAGE_BUFFER_SIZE] = 128 * 1024 * 1024; // TODO;
+
+	if (@available(macOS 10.14, iOS 12.0, *))
+	{
+		NSUInteger buffersize = [device maxBufferLength];
+		capabilities.limits[LIMIT_SHADER_STORAGE_BUFFER_SIZE] = buffersize;
+	}
+	else
+	{
+		capabilities.limits[LIMIT_SHADER_STORAGE_BUFFER_SIZE] = 128 * 1024 * 1024;
+	}
+
 	capabilities.limits[LIMIT_THREADGROUPS_X] = LOVE_INT32_MAX; // TODO: is there a real limit?
 	capabilities.limits[LIMIT_THREADGROUPS_Y] = LOVE_INT32_MAX;
 	capabilities.limits[LIMIT_THREADGROUPS_Z] = LOVE_INT32_MAX;
+
 	if (families.mac[1] || families.macCatalyst[1] || families.apple[2])
 		capabilities.limits[LIMIT_RENDER_TARGETS] = 8;
 	else

+ 4 - 1
src/modules/graphics/metal/Shader.h

@@ -47,7 +47,6 @@ namespace metal
 {
 
 static const int DEFAULT_VERTEX_BUFFER_BINDING = 1;
-static const int VERTEX_BUFFER_BINDING_START = 2;
 
 class Shader final : public love::graphics::Shader
 {
@@ -110,6 +109,8 @@ public:
 	id<MTLComputePipelineState> getComputePipeline() const { return computePipeline; }
 
 	static int getUniformBufferBinding();
+	int getFirstVertexBufferBinding() const { return firstVertexBufferBinding; }
+
 	const std::vector<TextureBinding> &getTextureBindings() const { return textureBindings; }
 	const std::vector<BufferBinding> &getBufferBindings() const { return bufferBindings; }
 
@@ -139,6 +140,8 @@ private:
 	size_t localUniformBufferSize;
 	size_t builtinUniformDataOffset;
 
+	int firstVertexBufferBinding;
+
 	std::map<std::string, int> attributes;
 
 	std::vector<TextureBinding> textureBindings;

+ 95 - 38
src/modules/graphics/metal/Shader.mm

@@ -237,6 +237,11 @@ static inline id<MTLSamplerState> getMTLSampler(love::graphics::Texture *tex)
 	return tex ? (__bridge id<MTLSamplerState>)(void *) tex->getSamplerHandle() : nil;
 }
 
+static inline id<MTLBuffer> getMTLBuffer(love::graphics::Buffer *buffer)
+{
+	return buffer ? (__bridge id<MTLBuffer>)(void *) buffer->getHandle() : nil;
+}
+
 static EShLanguage getGLSLangStage(ShaderStageType stage)
 {
 	switch (stage)
@@ -257,6 +262,7 @@ Shader::Shader(id<MTLDevice> device, StrongRef<love::graphics::ShaderStage> stag
 	, localUniformBufferData(nullptr)
 	, localUniformBufferSize(0)
 	, builtinUniformDataOffset(0)
+	, firstVertexBufferBinding(DEFAULT_VERTEX_BUFFER_BINDING + 1)
 { @autoreleasepool {
 	using namespace glslang;
 
@@ -380,6 +386,11 @@ void Shader::compileFromGLSLang(id<MTLDevice> device, const glslang::TProgram &p
 	std::map<std::string, int> varyings;
 	int nextVaryingLocation = 0;
 
+	int metalBufferIndices[SHADERSTAGE_MAX_ENUM];
+	for (int i = 0; i < SHADERSTAGE_MAX_ENUM; i++)
+		metalBufferIndices[i] = getUniformBufferBinding() + 1;
+	metalBufferIndices[SHADERSTAGE_VERTEX] = DEFAULT_VERTEX_BUFFER_BINDING + 1;
+
 	for (int stageindex = 0; stageindex < SHADERSTAGE_MAX_ENUM; stageindex++)
 	{
 		auto glslangstage = getGLSLangStage((ShaderStageType) stageindex);
@@ -420,6 +431,7 @@ void Shader::compileFromGLSLang(id<MTLDevice> device, const glslang::TProgram &p
 				u.name = resource.name;
 				u.count = type.array.empty() ? 1 : type.array[0];
 				u.isDepthSampler = type.image.depth;
+				u.components = 1;
 
 				switch (imagetype.basetype)
 				{
@@ -461,7 +473,8 @@ void Shader::compileFromGLSLang(id<MTLDevice> device, const glslang::TProgram &p
 					break;
 				}
 
-				u.data = malloc(sizeof(int) * u.count);
+				u.dataSize = sizeof(int) * u.count;
+				u.data = malloc(u.dataSize);
 				for (int i = 0; i < u.count; i++)
 					u.ints[i] = -1; // Initialized below, after compiling.
 
@@ -489,12 +502,13 @@ void Shader::compileFromGLSLang(id<MTLDevice> device, const glslang::TProgram &p
 
 			for (const auto &resource : resources.uniform_buffers)
 			{
+				MSLResourceBinding binding;
+				binding.stage = msl.get_execution_model();
+				binding.binding = msl.get_decoration(resource.id, spv::DecorationBinding);
+				binding.desc_set = msl.get_decoration(resource.id, spv::DecorationDescriptorSet);
+
 				if (resource.name == "gl_DefaultUniformBlock")
 				{
-					MSLResourceBinding binding;
-					binding.stage = msl.get_execution_model();
-					binding.binding = msl.get_decoration(resource.id, spv::DecorationBinding);
-					binding.desc_set = msl.get_decoration(resource.id, spv::DecorationDescriptorSet);
 					binding.msl_buffer = getUniformBufferBinding();
 					msl.add_msl_resource_binding(binding);
 
@@ -527,6 +541,7 @@ void Shader::compileFromGLSLang(id<MTLDevice> device, const glslang::TProgram &p
 						u.name = msl.get_member_name(type.self, uindex);
 						u.dataSize = membersize;
 						u.count = membertype.array.empty() ? 1 : membertype.array[0];
+						u.components = 1;
 
 						switch (membertype.basetype)
 						{
@@ -568,19 +583,60 @@ void Shader::compileFromGLSLang(id<MTLDevice> device, const glslang::TProgram &p
 							builtinUniformInfo[builtin] = &uniforms[u.name];
 						}
 					}
-
+				}
+				else
+				{
+					binding.msl_buffer = metalBufferIndices[stageindex]++;
+					msl.add_msl_resource_binding(binding);
 				}
 			}
 
 			for (const auto &resource : resources.storage_buffers)
 			{
+				MSLResourceBinding binding;
+				binding.stage = msl.get_execution_model();
+				binding.binding = msl.get_decoration(resource.id, spv::DecorationBinding);
+				binding.desc_set = msl.get_decoration(resource.id, spv::DecorationDescriptorSet);
+				binding.msl_buffer = metalBufferIndices[stageindex]++;
+				msl.add_msl_resource_binding(binding);
+
 				auto it = uniforms.find(resource.name);
 				if (it != uniforms.end())
+					continue;
+
+				const SPIRType &type = msl.get_type(resource.type_id);
+
+				UniformInfo u = {};
+				u.baseType = UNIFORM_STORAGEBUFFER;
+				u.components = 1;
+				u.name = resource.name;
+				u.count = type.array.empty() ? 1 : type.array[0];
+
+				const auto reflectionit = validationReflection.storageBuffers.find(u.name);
+				if (reflectionit != validationReflection.storageBuffers.end())
+				{
+					u.bufferStride = reflectionit->second.stride;
+					u.bufferMemberCount = reflectionit->second.memberCount;
+					u.access = reflectionit->second.access;
+				}
+				else
 				{
+					// No reflection info - maybe glslang was better at detecting
+					// dead code than the driver's compiler?
 					continue;
 				}
 
-				// TODO
+				u.buffers = new love::graphics::Buffer*[u.count];
+				u.dataSize = sizeof(int) * u.count;
+				u.data = malloc(u.dataSize);
+
+				for (int i = 0; i < u.count; i++)
+				{
+					u.ints[i] = -1; // Initialized below, after compiling.
+					u.buffers[i] = nullptr; // TODO
+				}
+
+				uniforms[u.name] = u;
 			}
 
 			if (stageindex == SHADERSTAGE_VERTEX)
@@ -741,6 +797,8 @@ void Shader::compileFromGLSLang(id<MTLDevice> device, const glslang::TProgram &p
 			printf("Error parsing SPIR-V shader source: %s\n", e.what());
 		}
 	}
+
+	firstVertexBufferBinding = metalBufferIndices[SHADERSTAGE_VERTEX];
 }
 
 Shader::~Shader()
@@ -896,6 +954,7 @@ void Shader::sendBuffers(const UniformInfo *info, love::graphics::Buffer **buffe
 		else if (storagebinding)
 		{
 			// TODO
+			bufferBindings[info->ints[i]].buffer = getMTLBuffer(buffer);
 		}
 	}
 }
@@ -947,7 +1006,7 @@ id<MTLRenderPipelineState> Shader::getCachedRenderPipeline(const RenderPipelineK
 
 		MTLRenderPipelineColorAttachmentDescriptor *attachment = desc.colorAttachments[i];
 
-		if (@available(macOS 10.15, iOS 13, *))
+		if (@available(macOS 10.15, iOS 13.0, *))
 		{
 			// We already don't really support metal on older systems, this just
 			// silences a compiler warning about it.
@@ -998,47 +1057,45 @@ id<MTLRenderPipelineState> Shader::getCachedRenderPipeline(const RenderPipelineK
 		}
 	}
 
+	MTLVertexDescriptor *vertdesc = [MTLVertexDescriptor vertexDescriptor];
+	const auto &attributes = key.vertexAttributes;
+
+	for (const auto &pair : this->attributes)
 	{
-		MTLVertexDescriptor *vertdesc = [MTLVertexDescriptor vertexDescriptor];
-		const auto &attributes = key.vertexAttributes;
+		int i = pair.second;
+		uint32 bit = 1u << i;
 
-		for (const auto &pair : this->attributes)
+		if (attributes.enableBits & bit)
 		{
-			int i = pair.second;
-			uint32 bit = 1u << i;
+			const auto &attrib = attributes.attribs[i];
+			int metalBufferIndex = firstVertexBufferBinding + attrib.bufferIndex;
 
-			if (attributes.enableBits & bit)
-			{
-				const auto &attrib = attributes.attribs[i];
-				int metalBufferIndex = attrib.bufferIndex + VERTEX_BUFFER_BINDING_START;
+			vertdesc.attributes[i].format = getMTLVertexFormat(attrib.format);
+			vertdesc.attributes[i].offset = attrib.offsetFromVertex;
+			vertdesc.attributes[i].bufferIndex = metalBufferIndex;
 
-				vertdesc.attributes[i].format = getMTLVertexFormat(attrib.format);
-				vertdesc.attributes[i].offset = attrib.offsetFromVertex;
-				vertdesc.attributes[i].bufferIndex = metalBufferIndex;
+			const auto &layout = attributes.bufferLayouts[attrib.bufferIndex];
 
-				const auto &layout = attributes.bufferLayouts[attrib.bufferIndex];
+			bool instanced = attributes.instanceBits & (1u << attrib.bufferIndex);
+			auto step = instanced ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
 
-				bool instanced = attributes.instanceBits & (1u << attrib.bufferIndex);
-				auto step = instanced ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
-
-				vertdesc.layouts[metalBufferIndex].stride = layout.stride;
-				vertdesc.layouts[metalBufferIndex].stepFunction = step;
-			}
-			else
-			{
-				vertdesc.attributes[i].format = MTLVertexFormatFloat4;
-				vertdesc.attributes[i].offset = 0;
-				vertdesc.attributes[i].bufferIndex = DEFAULT_VERTEX_BUFFER_BINDING;
-
-				vertdesc.layouts[DEFAULT_VERTEX_BUFFER_BINDING].stride = sizeof(float) * 4;
-				vertdesc.layouts[DEFAULT_VERTEX_BUFFER_BINDING].stepFunction = MTLVertexStepFunctionConstant;
-				vertdesc.layouts[DEFAULT_VERTEX_BUFFER_BINDING].stepRate = 0;
-			}
+			vertdesc.layouts[metalBufferIndex].stride = layout.stride;
+			vertdesc.layouts[metalBufferIndex].stepFunction = step;
 		}
+		else
+		{
+			vertdesc.attributes[i].format = MTLVertexFormatFloat4;
+			vertdesc.attributes[i].offset = 0;
+			vertdesc.attributes[i].bufferIndex = DEFAULT_VERTEX_BUFFER_BINDING;
 
-		desc.vertexDescriptor = vertdesc;
+			vertdesc.layouts[DEFAULT_VERTEX_BUFFER_BINDING].stride = sizeof(float) * 4;
+			vertdesc.layouts[DEFAULT_VERTEX_BUFFER_BINDING].stepFunction = MTLVertexStepFunctionConstant;
+			vertdesc.layouts[DEFAULT_VERTEX_BUFFER_BINDING].stepRate = 0;
+		}
 	}
 
+	desc.vertexDescriptor = vertdesc;
+
 	NSError *err = nil;
 	id<MTLRenderPipelineState> pipeline = [device newRenderPipelineStateWithDescriptor:desc error:&err];