Browse Source

Slightly less heavy-handed compute memory barrier implementation.

Also validate whether there are any unbound writable resources when dispatching compute shader work.
Alex Szpakowski 4 years ago
parent
commit
4caeb27047

+ 10 - 1
src/modules/graphics/Graphics.cpp

@@ -1095,8 +1095,17 @@ void Graphics::dispatchThreadgroups(Shader* shader, int x, int y, int z)
 	}
 
 	flushBatchedDraws();
+
+	auto prevshader = Shader::current;
 	shader->attach();
-	dispatch(x, y, z);
+
+	bool success = dispatch(x, y, z);
+
+	if (prevshader != nullptr)
+		prevshader->attach();
+
+	if (!success)
+		throw love::Exception("Compute shader must have resources bound to all writable texture and buffer variables.");
 }
 
 Graphics::BatchedVertexData Graphics::requestBatchedDraw(const BatchedDrawCommand &cmd)

+ 1 - 1
src/modules/graphics/Graphics.h

@@ -938,7 +938,7 @@ protected:
 	virtual Shader *newShaderInternal(StrongRef<ShaderStage> stages[SHADERSTAGE_MAX_ENUM]) = 0;
 	virtual StreamBuffer *newStreamBuffer(BufferUsage type, size_t size) = 0;
 
-	virtual void dispatch(int x, int y, int z) = 0;
+	virtual bool dispatch(int x, int y, int z) = 0;
 
 	virtual void setRenderTargetsInternal(const RenderTargets &rts, int w, int h, int pixelw, int pixelh, bool hasSRGBtexture) = 0;
 

+ 7 - 0
src/modules/graphics/Shader.cpp

@@ -792,6 +792,13 @@ bool Shader::validateInternal(StrongRef<ShaderStage> stages[], std::string &err,
 			bufferReflection.stride = (size_t) info.size;
 			bufferReflection.memberCount = (size_t) info.numMembers;
 
+			if (qualifiers.isReadOnly())
+				bufferReflection.access = ACCESS_READ;
+			else if (qualifiers.isWriteOnly())
+				bufferReflection.access = ACCESS_WRITE;
+			else
+				bufferReflection.access = (Access)(ACCESS_READ | ACCESS_WRITE);
+
 			reflection.storageBuffers[info.name] = bufferReflection;
 		}
 		else

+ 9 - 0
src/modules/graphics/Shader.h

@@ -99,6 +99,13 @@ public:
 		ENTRYPOINT_RAW,
 	};
 
+	enum Access
+	{
+		ACCESS_NONE = 0,
+		ACCESS_READ = (1 << 0),
+		ACCESS_WRITE = (1 << 1),
+	};
+
 	struct SourceInfo
 	{
 		Language language;
@@ -124,6 +131,7 @@ public:
 		};
 
 		UniformType baseType;
+		Access access;
 		TextureType textureType;
 		DataBaseType texelBufferType;
 		bool isDepthSampler;
@@ -240,6 +248,7 @@ protected:
 	{
 		size_t stride;
 		size_t memberCount;
+		Access access;
 	};
 
 	struct ValidationReflection

+ 75 - 2
src/modules/graphics/opengl/Graphics.cpp

@@ -469,10 +469,83 @@ void Graphics::setActive(bool enable)
 	active = enable;
 }
 
-void Graphics::dispatch(int x, int y, int z)
+static bool computeDispatchBarriers(Shader *shader, GLbitfield &preDispatchBarriers, GLbitfield &postDispatchBarriers)
 {
+	// TODO: handle indirect argument buffer types, when those are added.
+	for (auto buffer : shader->getActiveWritableStorageBuffers())
+	{
+		if (buffer == nullptr)
+			return false;
+
+		auto usage = buffer->getUsageFlags();
+
+		postDispatchBarriers |= GL_BUFFER_UPDATE_BARRIER_BIT;
+
+		if (usage & BUFFERUSAGEFLAG_SHADER_STORAGE)
+		{
+			preDispatchBarriers |= GL_SHADER_STORAGE_BARRIER_BIT;
+			postDispatchBarriers |= GL_SHADER_STORAGE_BARRIER_BIT;
+		}
+
+		if (usage & BUFFERUSAGEFLAG_TEXEL)
+			postDispatchBarriers |= GL_TEXTURE_FETCH_BARRIER_BIT;
+
+		if (usage & BUFFERUSAGEFLAG_INDEX)
+			postDispatchBarriers |= GL_ELEMENT_ARRAY_BARRIER_BIT;
+
+		if (usage & BUFFERUSAGEFLAG_VERTEX)
+			postDispatchBarriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
+
+		if (usage & (BUFFERUSAGEFLAG_COPY_SOURCE | BUFFERUSAGEFLAG_COPY_DEST))
+			postDispatchBarriers |= GL_PIXEL_BUFFER_BARRIER_BIT;
+	}
+
+	for (auto texture : shader->getActiveWritableTextures())
+	{
+		if (texture == nullptr)
+			return false;
+
+		preDispatchBarriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
+
+		postDispatchBarriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT
+			| GL_TEXTURE_UPDATE_BARRIER_BIT
+			| GL_TEXTURE_FETCH_BARRIER_BIT;
+
+		if (texture->isRenderTarget())
+			postDispatchBarriers |= GL_FRAMEBUFFER_BARRIER_BIT;
+	}
+
+	return true;
+}
+
+bool Graphics::dispatch(int x, int y, int z)
+{
+	// Set by higher level code before calling dispatch(x, y, z).
+	auto shader = (Shader *) Shader::current;
+
+	GLbitfield preDispatchBarriers = 0;
+	GLbitfield postDispatchBarriers = 0;
+
+	if (!computeDispatchBarriers(shader, preDispatchBarriers, postDispatchBarriers))
+		return false;
+
+	// glMemoryBarrier before dispatch to make sure non-compute-read ->
+	// compute-write is synced.
+	// TODO: is this needed? spec language around GL_SHADER_IMAGE_ACCESS_BARRIER_BIT
+	// makes me think so.
+	// This is overly conservative (dispatch -> dispatch will have redundant
+	// barriers).
+	if (preDispatchBarriers != 0)
+		glMemoryBarrier(preDispatchBarriers);
+
 	glDispatchCompute(x, y, z);
-	glMemoryBarrier(GL_ALL_BARRIER_BITS); // TODO: Improve synchronization
+
+	// Not as (theoretically) efficient as issuing the barrier right before
+	// they're used later, but much less complicated.
+	if (postDispatchBarriers != 0)
+		glMemoryBarrier(postDispatchBarriers);
+
+	return true;
 }
 
 void Graphics::draw(const DrawCommand &cmd)

+ 1 - 1
src/modules/graphics/opengl/Graphics.h

@@ -68,7 +68,7 @@ public:
 
 	void setActive(bool active) override;
 
-	void dispatch(int x, int y, int z) override;
+	bool dispatch(int x, int y, int z) override;
 
 	void draw(const DrawCommand &cmd) override;
 	void draw(const DrawIndexedCommand &cmd) override;

+ 23 - 8
src/modules/graphics/opengl/Shader.cpp

@@ -120,6 +120,7 @@ void Shader::mapActiveUniforms()
 		u.name = std::string(cname, (size_t) namelen);
 		u.location = glGetUniformLocation(program, u.name.c_str());
 		u.baseType = getUniformBaseType(gltype);
+		u.access = ACCESS_READ;
 		u.textureType = getUniformTextureType(gltype);
 		u.texelBufferType = getUniformTexelBufferType(gltype);
 		u.isDepthSampler = isDepthTextureType(gltype);
@@ -325,6 +326,7 @@ void Shader::mapActiveUniforms()
 		{
 			UniformInfo u = {};
 			u.baseType = UNIFORM_STORAGEBUFFER;
+			u.access = ACCESS_READ;
 
 			GLsizei namelength = 0;
 			glGetProgramResourceName(program, GL_SHADER_STORAGE_BLOCK, sindex, 2048, &namelength, namebuffer);
@@ -337,6 +339,7 @@ void Shader::mapActiveUniforms()
 			{
 				u.bufferStride = reflectionit->second.stride;
 				u.bufferMemberCount = reflectionit->second.memberCount;
+				u.access = reflectionit->second.access;
 			}
 
 			// Make sure previously set uniform data is preserved, and shader-
@@ -369,10 +372,17 @@ void Shader::mapActiveUniforms()
 			if (binding.bindingindex >= 0)
 			{
 				int activeindex = (int)activeStorageBufferBindings.size();
+				activeStorageBufferBindings.push_back(binding);
 
-				storageBufferBindingIndexToActiveBinding[binding.bindingindex] = activeindex;
+				auto p = std::make_pair(activeindex, -1);
 
-				activeStorageBufferBindings.push_back(binding);
+				if (u.access & ACCESS_WRITE)
+				{
+					p.second = (int)activeWritableStorageBuffers.size();
+					activeWritableStorageBuffers.push_back(u.buffers[0]);
+				}
+
+				storageBufferBindingIndexToActiveBinding[binding.bindingindex] = p;
 			}
 
 			uniforms[u.name] = u;
@@ -433,8 +443,9 @@ bool Shader::loadVolatile()
 	textureUnits.clear();
 	textureUnits.push_back(TextureUnit());
 
-	storageBufferBindingIndexToActiveBinding.resize(gl.getMaxShaderStorageBufferBindings(), -1);
+	storageBufferBindingIndexToActiveBinding.resize(gl.getMaxShaderStorageBufferBindings(), std::make_pair(-1, -1));
 	activeStorageBufferBindings.clear();
+	activeWritableStorageBuffers.clear();
 
 	for (const auto &stage : stages)
 	{
@@ -888,7 +899,7 @@ void Shader::sendBuffers(const UniformInfo *info, love::graphics::Buffer **buffe
 		if (texelbinding)
 		{
 			GLuint gltex = 0;
-			if (buffers[i] != nullptr)
+			if (buffer != nullptr)
 				gltex = (GLuint) buffer->getTexelBufferHandle();
 			else
 				gltex = gl.getDefaultTexelBuffer();
@@ -906,7 +917,7 @@ void Shader::sendBuffers(const UniformInfo *info, love::graphics::Buffer **buffe
 			int bindingindex = info->ints[i];
 
 			GLuint glbuffer = 0;
-			if (buffers[i] != nullptr)
+			if (buffer != nullptr)
 				glbuffer = (GLuint) buffer->getHandle();
 			else
 				glbuffer = gl.getDefaultStorageBuffer();
@@ -914,9 +925,13 @@ void Shader::sendBuffers(const UniformInfo *info, love::graphics::Buffer **buffe
 			if (shaderactive)
 				gl.bindIndexedBuffer(glbuffer, BUFFERUSAGE_SHADER_STORAGE, bindingindex);
 
-			int activeindex = storageBufferBindingIndexToActiveBinding[bindingindex];
-			if (activeindex >= 0)
-				activeStorageBufferBindings[activeindex].buffer = glbuffer;
+			auto activeindex = storageBufferBindingIndexToActiveBinding[bindingindex];
+
+			if (activeindex.first >= 0)
+				activeStorageBufferBindings[activeindex.first].buffer = glbuffer;
+
+			if (activeindex.second >= 0)
+				activeWritableStorageBuffers[activeindex.second] = buffer;
 		}
 	}
 }

+ 7 - 1
src/modules/graphics/opengl/Shader.h

@@ -65,6 +65,9 @@ public:
 
 	void updateBuiltinUniforms(love::graphics::Graphics *gfx, int viewportW, int viewportH);
 
+	const std::vector<Buffer *> &getActiveWritableStorageBuffers() const { return activeWritableStorageBuffers; }
+	const std::vector<Texture *> &getActiveWritableTextures() const { return activeWritableTextures; }
+
 private:
 
 	struct TextureUnit
@@ -118,9 +121,12 @@ private:
 	// Texture unit pool for setting textures
 	std::vector<TextureUnit> textureUnits;
 
-	std::vector<int> storageBufferBindingIndexToActiveBinding;
+	std::vector<std::pair<int, int>> storageBufferBindingIndexToActiveBinding;
 	std::vector<BufferBinding> activeStorageBufferBindings;
 
+	std::vector<Buffer *> activeWritableStorageBuffers;
+	std::vector<Texture *> activeWritableTextures;
+
 	std::vector<std::pair<const UniformInfo *, int>> pendingUniformUpdates;
 
 }; // Shader