9 лет назад · c35935da81
--- a/src/renderer_mtl.h
+++ b/src/renderer_mtl.h
@@ -23,6 +23,8 @@ namespace bgfx { namespace mtl
 
				 	// objects with creation functions starting with 'new' has a refcount 1 after creation, object must be destroyed with release.
			
 
				 	// commandBuffer, commandEncoders are autoreleased objects. Needs AutoreleasePool!
			
 
				 
			
 
				+#define MTL_MAX_FRAMES_IN_FLIGHT (3)
			
 
				+	
			
 
				 #define MTL_CLASS(name) \
			
 
				 	class name \
			
 
				 	{ \
			
@@ -35,6 +37,13 @@ namespace bgfx { namespace mtl
 
				 
			
 
				 		typedef void (*mtlCallback)(void* userData);
			
 
				 
			
 
				+	MTL_CLASS(BlitCommandEncoder)
			
 
				+		void endEncoding()
			
 
				+		{
			
 
				+			[m_obj endEncoding];
			
 
				+		}
			
 
				+	MTL_CLASS_END
			
 
				+	
			
 
				 	MTL_CLASS(Buffer)
			
 
				 		void* contents()
			
 
				 		{
			
@@ -148,8 +157,14 @@ namespace bgfx { namespace mtl
 
				 
			
 
				 		id<MTLLibrary> newLibraryWithSource(const char* _source)
			
 
				 		{
			
 
				+			MTLCompileOptions* options = [MTLCompileOptions new];
			
 
				+			//NOTE: turned of as 'When using the fast variants, math functions execute more quickly,
			
 
				+			//      but operate over a **LIMITED RANGE** and their behavior when handling NaN values is not defined.'
			
 
				+			if (BX_ENABLED(BX_PLATFORM_IOS))
			
 
				+				options.fastMathEnabled = NO;
			
 
				+
			
 
				 			NSError* error;
			
 
				-			id<MTLLibrary> lib = [m_obj newLibraryWithSource:@(_source) options:nil error:&error];
			
 
				+			id<MTLLibrary> lib = [m_obj newLibraryWithSource:@(_source) options:options error:&error];
			
 
				 			BX_WARN(NULL == error
			
 
				 				, "Shader compilation failed: %s"
			
 
				 				, [error.localizedDescription cStringUsingEncoding:NSASCIIStringEncoding]
			
@@ -583,10 +598,12 @@ namespace bgfx { namespace mtl
 
				 	struct BufferMtl
			
 
				 	{
			
 
				 		BufferMtl()
			
 
				-			: m_buffer(NULL)
			
 
				-			, m_flags(BGFX_BUFFER_NONE)
			
 
				+			: m_flags(BGFX_BUFFER_NONE)
			
 
				 			, m_dynamic(false)
			
 
				+			, m_bufferIndex(0)
			
 
				 		{
			
 
				+			for (uint32_t ii = 0; ii < MTL_MAX_FRAMES_IN_FLIGHT; ++ii)
			
 
				+				m_buffers[ii] = NULL;
			
 
				 		}
			
 
				 
			
 
				 		void create(uint32_t _size, void* _data, uint16_t _flags, uint16_t _stride = 0, bool _vertex = false);
			
@@ -594,18 +611,22 @@ namespace bgfx { namespace mtl
 
				 
			
 
				 		void destroy()
			
 
				 		{
			
 
				-			if (NULL != m_buffer)
			
 
				+			for (uint32_t ii = 0; ii < MTL_MAX_FRAMES_IN_FLIGHT; ++ii)
			
 
				 			{
			
 
				-				[m_buffer release];
			
 
				-				m_buffer = NULL;
			
 
				-				m_dynamic = false;
			
 
				+				MTL_RELEASE(m_buffers[ii]);
			
 
				 			}
			
 
				+			m_dynamic = false;
			
 
				 		}
			
 
				+		
			
 
				+		Buffer getBuffer() const { return m_buffers[m_bufferIndex]; }
			
 
				 
			
 
				-		Buffer   m_buffer;
			
 
				 		uint32_t m_size;
			
 
				 		uint16_t m_flags;
			
 
				+		
			
 
				 		bool m_dynamic;
			
 
				+	private:
			
 
				+		uint8_t  m_bufferIndex;
			
 
				+		Buffer   m_buffers[MTL_MAX_FRAMES_IN_FLIGHT];
			
 
				 	};
			
 
				 
			
 
				 	typedef BufferMtl IndexBufferMtl;
			
--- a/src/renderer_mtl.mm
+++ b/src/renderer_mtl.mm
@@ -18,34 +18,35 @@
 
				 #import <Foundation/Foundation.h>
			
 
				 
			
 
				 #define UNIFORM_BUFFER_SIZE (8*1024*1024)
			
 
				-#define UNIFORM_BUFFER_COUNT (3)
			
 
				 
			
 
				 /*
			
 
				  // known metal shader generation issues:
			
 
				-   03-raymarch: OSX nothing is visible  ( depth/color order should be swapped in fragment output struct)
			
 
				    15-shadowmaps-simple: shader compilation error
			
 
				    16-shadowmaps:  //problem with essl -> metal: SAMPLER2D(u_shadowMap0, 4);  sampler index is lost. Shadowmap is set to slot 4, but
			
 
				       metal shader uses sampler/texture slot 0. this could require changes outside of renderer_mtl?
			
 
				 	  packFloatToRGBA needs highp. currently it uses half.
			
 
				    24-nbody: no generated compute shaders for metal
			
 
				    27-terrain: shaderc generates invalid metal shader for vs_terrain_height_texture. vertex output: half4 gl_Position [[position]], should be float4
			
 
				-
			
 
				+ 
			
 
				 Known issues(driver problems??):
			
 
				   OSX mac mini(late 2014), OSX10.11.3 : nanovg-rendering: color writemask off causes problem...
			
 
				-  iPad mini 2,  iOS 8.1.1:  21-deferred: scissor not working properly
			
 
				-							26-occlusion: doesn't work with two rendercommandencoders, merge should fix this
			
 
				+TODO: check if swap really solves this?	03-raymarch: OSX nothing is visible  ( depth/color order should be swapped in fragment output struct)
			
 
				 
			
 
				+  iPad mini 2,  iOS 8.1.1:  21-deferred: scissor not working properly
			
 
				+							26-occlusion: query doesn't work with two rendercommandencoders, merge should fix this
			
 
				+			Only on this device ( no problem on iPad Air 2 with iOS9.3.1)
			
 
				+ 
			
 
				 TODOs:
			
 
				   07-callback, saveScreenshot should be implemented with one frame latency (using saveScreenshotBegin and End)
			
 
				   - iOS device orientation change is not handled properly
			
 
				-
			
 
				+ 
			
 
				  22-windows: todo support multiple windows
			
 
				-
			
 
				- - optimization: remove heavy sync, merge views with same fb and no clear.
			
 
				+ 
			
 
				+ - optimization: remove sync points, merge views with same fb and no clear.
			
 
				       13-stencil and 16-shadowmaps are very inefficient. every view stores/loads backbuffer data
			
 
				-
			
 
				+ 
			
 
				   - 15-shadowmaps-simple (example needs modification mtxCrop znew = z * 0.5 + 0.5 is not needed ) could be hacked in shader too
			
 
				-
			
 
				+ 
			
 
				  BGFX_RESET_FLIP_AFTER_RENDER on low level renderers should be true? (crashes even with BGFX_RESET_FLIP_AFTER_RENDER because there is
			
 
				  one rendering frame before reset). Do I have absolutely need to send result to View at flip or can I do it in submit?
			
 
				  */
			
@@ -334,7 +335,7 @@ namespace bgfx { namespace mtl
 
				 			: m_metalLayer(NULL)
			
 
				 			, m_backBufferPixelFormatHash(0)
			
 
				 			, m_maxAnisotropy(1)
			
 
				-			, m_uniformBufferIndex(0)
			
 
				+			, m_bufferIndex(0)
			
 
				 			, m_numWindows(1)
			
 
				 			, m_rtMsaa(false)
			
 
				 			, m_drawable(NULL)
			
@@ -405,7 +406,8 @@ namespace bgfx { namespace mtl
 
				 			m_textureDescriptor = newTextureDescriptor();
			
 
				 			m_samplerDescriptor = newSamplerDescriptor();
			
 
				 
			
 
				-			for (uint8_t i=0; i < UNIFORM_BUFFER_COUNT; ++i)
			
 
				+			m_framesSemaphore.post(MTL_MAX_FRAMES_IN_FLIGHT);
			
 
				+			for (uint8_t i=0; i < MTL_MAX_FRAMES_IN_FLIGHT; ++i)
			
 
				 			{
			
 
				 				m_uniformBuffers[i] = m_device.newBufferWithLength(UNIFORM_BUFFER_SIZE, 0);
			
 
				 			}
			
@@ -585,7 +587,7 @@ namespace bgfx { namespace mtl
 
				 				MTL_RELEASE(m_backBufferStencil);
			
 
				 			}
			
 
				 
			
 
				-			for (uint8_t i=0; i < UNIFORM_BUFFER_COUNT; ++i)
			
 
				+			for (uint8_t i=0; i < MTL_MAX_FRAMES_IN_FLIGHT; ++i)
			
 
				 			{
			
 
				 				MTL_RELEASE(m_uniformBuffers[i]);
			
 
				 			}
			
@@ -806,7 +808,7 @@ namespace bgfx { namespace mtl
 
				 				return;
			
 
				 			}
			
 
				 
			
 
				-			//TODO: we should wait for completion of pending commandBuffers
			
 
				+			sync();
			
 
				 			//TODO: implement this with saveScreenshotBegin/End
			
 
				 
			
 
				 			Texture backBuffer = m_drawable.texture;
			
@@ -908,7 +910,7 @@ namespace bgfx { namespace mtl
 
				 			}
			
 
				 
			
 
				 			VertexBufferMtl& vb = m_vertexBuffers[_blitter.m_vb->handle.idx];
			
 
				-			rce.setVertexBuffer(vb.m_buffer, 0, 1);
			
 
				+			rce.setVertexBuffer(vb.getBuffer(), 0, 1);
			
 
				 
			
 
				 			float proj[16];
			
 
				 			bx::mtxOrtho(proj, 0.0f, (float)width, (float)height, 0.0f, 0.0f, 1000.0f);
			
@@ -925,13 +927,20 @@ namespace bgfx { namespace mtl
 
				 			const uint32_t numVertices = _numIndices*4/6;
			
 
				 			if (0 < numVertices)
			
 
				 			{
			
 
				-				m_indexBuffers [_blitter.m_ib->handle.idx].update(0, _numIndices*2, _blitter.m_ib->data);
			
 
				+				m_indexBuffers [_blitter.m_ib->handle.idx].update(0, _numIndices*2, _blitter.m_ib->data, true);
			
 
				 				m_vertexBuffers[_blitter.m_vb->handle.idx].update(0, numVertices*_blitter.m_decl.m_stride, _blitter.m_vb->data, true);
			
 
				 
			
 
				-				m_renderCommandEncoder.drawIndexedPrimitives(MTLPrimitiveTypeTriangle, _numIndices, MTLIndexTypeUInt16, m_indexBuffers[_blitter.m_ib->handle.idx].m_buffer, 0, 1);
			
 
				+				m_renderCommandEncoder.drawIndexedPrimitives(MTLPrimitiveTypeTriangle, _numIndices, MTLIndexTypeUInt16, m_indexBuffers[_blitter.m_ib->handle.idx].getBuffer(), 0, 1);
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				+		static void commandBufferFinishedCallback(void* _data)
			
 
				+		{
			
 
				+			RendererContextMtl* renderer = (RendererContextMtl*)_data;
			
 
				+			if ( renderer )
			
 
				+				renderer->m_framesSemaphore.post();
			
 
				+		}
			
 
				+
			
 
				 		void flip(HMD& /*_hmd*/) BX_OVERRIDE
			
 
				 		{
			
 
				 			if (NULL == m_drawable
			
@@ -944,11 +953,13 @@ namespace bgfx { namespace mtl
 
				 			m_commandBuffer.presentDrawable(m_drawable);
			
 
				 			MTL_RELEASE(m_drawable);
			
 
				 
			
 
				+			m_commandBuffer.addCompletedHandler(commandBufferFinishedCallback, this);
			
 
				+
			
 
				 			m_commandBuffer.commit();
			
 
				 
			
 
				-			//  using heavy syncing now
			
 
				-			//  TODO: refactor it with double/triple buffering frame data
			
 
				-			m_commandBuffer.waitUntilCompleted();
			
 
				+			MTL_RELEASE(m_prevCommandBuffer);
			
 
				+			m_prevCommandBuffer = m_commandBuffer;
			
 
				+			retain(m_commandBuffer);
			
 
				 
			
 
				 			MTL_RELEASE(m_commandBuffer);
			
 
				 
			
@@ -1306,6 +1317,29 @@ namespace bgfx { namespace mtl
 
				 			return m_backBufferDepth.height();
			
 
				 		}
			
 
				 
			
 
				+		void sync()
			
 
				+		{
			
 
				+			if ( m_prevCommandBuffer )
			
 
				+				m_prevCommandBuffer.waitUntilCompleted();
			
 
				+		}
			
 
				+
			
 
				+		BlitCommandEncoder getBlitCommandEncoder()
			
 
				+		{
			
 
				+			if ( m_blitCommandEncoder == NULL)
			
 
				+			{
			
 
				+				if ( m_commandBuffer == NULL )
			
 
				+				{
			
 
				+					m_commandBuffer = m_commandQueue.commandBuffer();
			
 
				+					retain(m_commandBuffer);
			
 
				+				}
			
 
				+				
			
 
				+				m_blitCommandEncoder = m_commandBuffer.blitCommandEncoder();
			
 
				+			}
			
 
				+			
			
 
				+			return m_blitCommandEncoder;
			
 
				+		}
			
 
				+
			
 
				+
			
 
				 		Device        m_device;
			
 
				 		CommandQueue  m_commandQueue;
			
 
				 		CAMetalLayer* m_metalLayer;
			
@@ -1320,11 +1354,14 @@ namespace bgfx { namespace mtl
 
				 
			
 
				 		OcclusionQueryMTL m_occlusionQuery;
			
 
				 
			
 
				+		bx::Semaphore m_framesSemaphore;
			
 
				+
			
 
				 		Buffer   m_uniformBuffer;
			
 
				-		Buffer   m_uniformBuffers[UNIFORM_BUFFER_COUNT];
			
 
				+		Buffer   m_uniformBuffers[MTL_MAX_FRAMES_IN_FLIGHT];
			
 
				 		uint32_t m_uniformBufferVertexOffset;
			
 
				 		uint32_t m_uniformBufferFragmentOffset;
			
 
				-		uint8_t  m_uniformBufferIndex;
			
 
				+
			
 
				+		uint8_t  m_bufferIndex;
			
 
				 
			
 
				 		uint16_t          m_numWindows;
			
 
				 		FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS];
			
@@ -1361,6 +1398,8 @@ namespace bgfx { namespace mtl
 
				 		// currently active objects data
			
 
				 		id <CAMetalDrawable> m_drawable;
			
 
				 		CommandBuffer m_commandBuffer;
			
 
				+		CommandBuffer m_prevCommandBuffer;
			
 
				+		BlitCommandEncoder m_blitCommandEncoder;
			
 
				 		RenderCommandEncoder m_renderCommandEncoder;
			
 
				 	};
			
 
				 
			
@@ -1454,14 +1493,6 @@ namespace bgfx { namespace mtl
 
				 		char* temp = (char*)alloca(tempLen);
			
 
				 		bx::StaticMemoryBlockWriter writer(temp, tempLen);
			
 
				 
			
 
				-		//TODO: remove this hack. some shaders have problem with half<->float conversion
			
 
				-		writeString(&writer
			
 
				-					, "#define half float\n"
			
 
				-					 "#define half2 float2\n"
			
 
				-					 "#define half3 float3\n"
			
 
				-					 "#define half4 float4\n"
			
 
				-					);
			
 
				-
			
 
				 		bx::write(&writer, code, codeLen);
			
 
				 		bx::write(&writer, '\0');
			
 
				 		code = temp;
			
@@ -1892,14 +1923,16 @@ namespace bgfx { namespace mtl
 
				 
			
 
				 		m_size = _size;
			
 
				 		m_flags = _flags;
			
 
				+		m_dynamic = false; //NULL == _data;
			
 
				 
			
 
				 		if (NULL == _data)
			
 
				 		{
			
 
				-			m_buffer = s_renderMtl->m_device.newBufferWithLength(_size, 0);
			
 
				+			for (uint32_t ii = 0; ii < MTL_MAX_FRAMES_IN_FLIGHT; ++ii)
			
 
				+				m_buffers[ii] = s_renderMtl->m_device.newBufferWithLength(_size, 0);
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			m_buffer = s_renderMtl->m_device.newBufferWithBytes(_data, _size, 0);
			
 
				+			m_buffers[0] = s_renderMtl->m_device.newBufferWithBytes(_data, _size, 0);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1907,7 +1940,12 @@ namespace bgfx { namespace mtl
 
				 	{
			
 
				 		BX_UNUSED(_discard);
			
 
				 
			
 
				-		memcpy( (uint8_t*)m_buffer.contents() + _offset, _data, _size);
			
 
				+			//TODO: cannot call this more than once per frame
			
 
				+		if ( m_dynamic && _discard )
			
 
				+			m_bufferIndex = (m_bufferIndex + 1) % MTL_MAX_FRAMES_IN_FLIGHT;
			
 
				+		else
			
 
				+			s_renderMtl->sync();
			
 
				+		memcpy( (uint8_t*)getBuffer().contents() + _offset, _data, _size);
			
 
				 	}
			
 
				 
			
 
				 	void VertexBufferMtl::create(uint32_t _size, void* _data, VertexDeclHandle _declHandle, uint16_t _flags)
			
@@ -2007,7 +2045,7 @@ namespace bgfx { namespace mtl
 
				 
			
 
				 				desc.storageMode = (MTLStorageMode)(writeOnly||isDepth(TextureFormat::Enum(m_textureFormat))
			
 
				 													? 2 /*MTLStorageModePrivate*/
			
 
				-													: 1 /*MTLStorageModeManaged*/
			
 
				+													: ((BX_ENABLED(BX_PLATFORM_IOS)) ? 0 /* MTLStorageModeShared */ :  1 /*MTLStorageModeManaged*/)
			
 
				 													);
			
 
				 
			
 
				 				desc.usage = MTLTextureUsageShaderRead;
			
@@ -2109,6 +2147,8 @@ namespace bgfx { namespace mtl
 
				 
			
 
				 	void TextureMtl::update(uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem)
			
 
				 	{
			
 
				+		s_renderMtl->sync();
			
 
				+		
			
 
				 		MTLRegion region =
			
 
				 		{
			
 
				 			{ _rect.m_x,     _rect.m_y,      _z     },
			
@@ -2268,8 +2308,19 @@ namespace bgfx { namespace mtl
 
				 
			
 
				 	void RendererContextMtl::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) BX_OVERRIDE
			
 
				 	{
			
 
				-		m_commandBuffer = m_commandQueue.commandBuffer();
			
 
				-		retain(m_commandBuffer); // keep alive to be useable at 'flip'
			
 
				+		m_framesSemaphore.wait();
			
 
				+
			
 
				+		if ( m_commandBuffer == NULL )
			
 
				+		{
			
 
				+			m_commandBuffer = m_commandQueue.commandBuffer();
			
 
				+			retain(m_commandBuffer); // keep alive to be useable at 'flip'
			
 
				+		}
			
 
				+		
			
 
				+		if ( m_blitCommandEncoder )
			
 
				+		{
			
 
				+			m_blitCommandEncoder.endEncoding();
			
 
				+			m_blitCommandEncoder = 0;
			
 
				+		}
			
 
				 
			
 
				 		//TODO: multithreading with multiple commandbuffer
			
 
				 		// is there a FAST way to tell which view is active?
			
@@ -2280,8 +2331,8 @@ namespace bgfx { namespace mtl
 
				 		retain(m_drawable); // keep alive to be useable at 'flip'
			
 
				 #endif
			
 
				 
			
 
				-		m_uniformBuffer = m_uniformBuffers[m_uniformBufferIndex];
			
 
				-		m_uniformBufferIndex = (m_uniformBufferIndex + 1) % UNIFORM_BUFFER_COUNT;
			
 
				+		m_uniformBuffer = m_uniformBuffers[m_bufferIndex];
			
 
				+		m_bufferIndex = (m_bufferIndex + 1) % MTL_MAX_FRAMES_IN_FLIGHT;
			
 
				 		m_uniformBufferVertexOffset = 0;
			
 
				 		m_uniformBufferFragmentOffset = 0;
			
 
				 
			
@@ -2299,13 +2350,13 @@ namespace bgfx { namespace mtl
 
				 		if (0 < _render->m_iboffset)
			
 
				 		{
			
 
				 			TransientIndexBuffer* ib = _render->m_transientIb;
			
 
				-			m_indexBuffers[ib->handle.idx].update(0, _render->m_iboffset, ib->data);
			
 
				+			m_indexBuffers[ib->handle.idx].update(0, _render->m_iboffset, ib->data, true);
			
 
				 		}
			
 
				 
			
 
				 		if (0 < _render->m_vboffset)
			
 
				 		{
			
 
				 			TransientVertexBuffer* vb = _render->m_transientVb;
			
 
				-			m_vertexBuffers[vb->handle.idx].update(0, _render->m_vboffset, vb->data);
			
 
				+			m_vertexBuffers[vb->handle.idx].update(0, _render->m_vboffset, vb->data, true);
			
 
				 		}
			
 
				 
			
 
				 		_render->sort();
			
@@ -2818,12 +2869,12 @@ namespace bgfx { namespace mtl
 
				 						const VertexDecl& vertexDecl = m_vertexDecls[decl];
			
 
				 						uint32_t offset = draw.m_startVertex  * vertexDecl.getStride();
			
 
				 
			
 
				-						rce.setVertexBuffer(vb.m_buffer, offset, 1);
			
 
				+						rce.setVertexBuffer(vb.getBuffer(), offset, 1);
			
 
				 
			
 
				 						if (isValid(draw.m_instanceDataBuffer) )
			
 
				 						{
			
 
				 							const VertexBufferMtl& inst = m_vertexBuffers[draw.m_instanceDataBuffer.idx];
			
 
				-							rce.setVertexBuffer(inst.m_buffer, draw.m_instanceDataOffset, 2);
			
 
				+							rce.setVertexBuffer(inst.getBuffer(), draw.m_instanceDataOffset, 2);
			
 
				 						}
			
 
				 					}
			
 
				 				}
			
@@ -2868,7 +2919,7 @@ namespace bgfx { namespace mtl
 
				 								numInstances      = draw.m_numInstances;
			
 
				 								numPrimsRendered  = numPrimsSubmitted*draw.m_numInstances;
			
 
				 
			
 
				-								rce.drawIndexedPrimitives(prim.m_type, numIndices, indexType, ib.m_buffer, 0, draw.m_numInstances);
			
 
				+								rce.drawIndexedPrimitives(prim.m_type, numIndices, indexType, ib.getBuffer(), 0, draw.m_numInstances);
			
 
				 							}
			
 
				 							else if (prim.m_min <= draw.m_numIndices)
			
 
				 							{
			
@@ -2878,7 +2929,7 @@ namespace bgfx { namespace mtl
 
				 								numInstances      = draw.m_numInstances;
			
 
				 								numPrimsRendered  = numPrimsSubmitted*draw.m_numInstances;
			
 
				 
			
 
				-								rce.drawIndexedPrimitives(prim.m_type, numIndices, indexType, ib.m_buffer, draw.m_startIndex * indexSize,numInstances);
			
 
				+								rce.drawIndexedPrimitives(prim.m_type, numIndices, indexType, ib.getBuffer(), draw.m_startIndex * indexSize,numInstances);
			
 
				 							}
			
 
				 						}
			
 
				 						else