Browse Source

Use a more efficient method of streaming automatically batched vertices on AMD Windows and Linux systems.

--HG--
branch : minor
Alex Szpakowski 7 years ago
parent
commit
8672ff2272

+ 2 - 0
CMakeLists.txt

@@ -274,6 +274,8 @@ set(LOVE_SRC_COMMON
 	src/common/Matrix.h
 	src/common/Memoizer.cpp
 	src/common/Memoizer.h
+	src/common/memory.cpp
+	src/common/memory.h
 	src/common/Module.cpp
 	src/common/Module.h
 	src/common/Object.cpp

+ 26 - 1
platform/xcode/Images.xcassets/iOS AppIcon.appiconset/Contents.json

@@ -1,5 +1,15 @@
 {
   "images" : [
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "3x"
+    },
     {
       "size" : "29x29",
       "idiom" : "iphone",
@@ -54,6 +64,16 @@
       "filename" : "[email protected]",
       "scale" : "3x"
     },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
     {
       "size" : "29x29",
       "idiom" : "ipad",
@@ -119,10 +139,15 @@
       "idiom" : "ipad",
       "filename" : "[email protected]",
       "scale" : "2x"
+    },
+    {
+      "idiom" : "ios-marketing",
+      "size" : "1024x1024",
+      "scale" : "1x"
     }
   ],
   "info" : {
     "version" : 1,
     "author" : "xcode"
   }
-}
+}

+ 10 - 0
platform/xcode/liblove.xcodeproj/project.pbxproj

@@ -885,6 +885,9 @@
 		FA4F2C121DE936FE00CA37D7 /* unixtcp.c in Sources */ = {isa = PBXBuildFile; fileRef = 217DFBCF1D9F6D490055D849 /* unixtcp.c */; };
 		FA4F2C131DE936FE00CA37D7 /* unixudp.c in Sources */ = {isa = PBXBuildFile; fileRef = 217DFBD11D9F6D490055D849 /* unixudp.c */; };
 		FA4F2C141DE936FE00CA37D7 /* usocket.c in Sources */ = {isa = PBXBuildFile; fileRef = 217DFBD51D9F6D490055D849 /* usocket.c */; };
+		FA56AA381FAFF02000A43D5F /* memory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = FA56AA361FAFF02000A43D5F /* memory.cpp */; };
+		FA56AA391FAFF02000A43D5F /* memory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = FA56AA361FAFF02000A43D5F /* memory.cpp */; };
+		FA56AA3A1FAFF02000A43D5F /* memory.h in Headers */ = {isa = PBXBuildFile; fileRef = FA56AA371FAFF02000A43D5F /* memory.h */; };
 		FA56D9BC1C208A0200D8D3C7 /* libmodplug.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FA56D9BA1C2089EE00D8D3C7 /* libmodplug.a */; };
 		FA577AB016C7507900860150 /* Cocoa.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FA577A7916C71A1700860150 /* Cocoa.framework */; };
 		FA577AC216C7512D00860150 /* FreeType.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FA577A6716C719D900860150 /* FreeType.framework */; };
@@ -1772,6 +1775,8 @@
 		FA4F2BE01DE6650600CA37D7 /* Transform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Transform.h; sourceTree = "<group>"; };
 		FA4F2BE11DE6650600CA37D7 /* wrap_Transform.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = wrap_Transform.cpp; sourceTree = "<group>"; };
 		FA4F2BE21DE6650600CA37D7 /* wrap_Transform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = wrap_Transform.h; sourceTree = "<group>"; };
+		FA56AA361FAFF02000A43D5F /* memory.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = memory.cpp; sourceTree = "<group>"; };
+		FA56AA371FAFF02000A43D5F /* memory.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = memory.h; sourceTree = "<group>"; };
 		FA56D9BA1C2089EE00D8D3C7 /* libmodplug.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = libmodplug.a; sourceTree = "<group>"; };
 		FA577A6716C719D900860150 /* FreeType.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = FreeType.framework; path = /Library/Frameworks/FreeType.framework; sourceTree = "<absolute>"; };
 		FA577A6D16C719EA00860150 /* Lua.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Lua.framework; path = /Library/Frameworks/Lua.framework; sourceTree = "<absolute>"; };
@@ -2084,6 +2089,8 @@
 				FA0B79031A958E3B000E1D17 /* Matrix.h */,
 				FA0B79041A958E3B000E1D17 /* Memoizer.cpp */,
 				FA0B79051A958E3B000E1D17 /* Memoizer.h */,
+				FA56AA361FAFF02000A43D5F /* memory.cpp */,
+				FA56AA371FAFF02000A43D5F /* memory.h */,
 				FA0B79061A958E3B000E1D17 /* Module.cpp */,
 				FA0B79071A958E3B000E1D17 /* Module.h */,
 				FA0B79081A958E3B000E1D17 /* Object.cpp */,
@@ -3687,6 +3694,7 @@
 				FA9D8DDB1DEF8411002CD881 /* Stream.h in Headers */,
 				FA0B7AA91A958EA3000E1D17 /* b2RopeJoint.h in Headers */,
 				FACA02F71F5E396B0084B28F /* wrap_DataModule.h in Headers */,
+				FA56AA3A1FAFF02000A43D5F /* memory.h in Headers */,
 				FA0B7E441A95902C000E1D17 /* wrap_CircleShape.h in Headers */,
 				FA0B7EB41A95902C000E1D17 /* System.h in Headers */,
 				FAF1405A1E20934C00F898D2 /* ConstantUnion.h in Headers */,
@@ -4195,6 +4203,7 @@
 				FA1583E21E196180005E603B /* wrap_Shader.cpp in Sources */,
 				FA0B7AB91A958EA3000E1D17 /* enet.cpp in Sources */,
 				FA0B7E281A95902C000E1D17 /* PulleyJoint.cpp in Sources */,
+				FA56AA391FAFF02000A43D5F /* memory.cpp in Sources */,
 				FA0B7A4C1A958EA3000E1D17 /* b2BlockAllocator.cpp in Sources */,
 				FAF1409E1E20934C00F898D2 /* reflection.cpp in Sources */,
 				FA15DFAE1F9B8D360042AB22 /* lutf8lib.c in Sources */,
@@ -4561,6 +4570,7 @@
 				FA0B7E271A95902C000E1D17 /* PulleyJoint.cpp in Sources */,
 				FA1BA0B71E17043400AA2803 /* wrap_Shader.cpp in Sources */,
 				FA0B7B301A958EA3000E1D17 /* wuff.c in Sources */,
+				FA56AA381FAFF02000A43D5F /* memory.cpp in Sources */,
 				FA0B7E031A95902C000E1D17 /* Contact.cpp in Sources */,
 				FA0B7D821A95902C000E1D17 /* CompressedImageData.cpp in Sources */,
 				FAF1409D1E20934C00F898D2 /* reflection.cpp in Sources */,

+ 70 - 0
src/common/memory.cpp

@@ -0,0 +1,70 @@
+/**
+ * Copyright (c) 2006-2017 LOVE Development Team
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ **/
+
+#include "config.h"
+#include "memory.h"
+
+#include <stdlib.h>
+
+#ifdef LOVE_WINDOWS
+#include <malloc.h>
+#else
+#include <unistd.h> // Assume POSIX support.
+#endif
+
+namespace love
+{
+
+bool alignedMalloc(void **mem, size_t size, size_t alignment)
+{
+#ifdef LOVE_WINDOWS
+	*mem = _aligned_malloc(size, alignment);
+	return *mem != nullptr;
+#else
+	return posix_memalign(mem, alignment, size) != 0;
+#endif
+}
+
+void alignedFree(void *mem)
+{
+#ifdef LOVE_WINDOWS
+	_aligned_free(mem);
+#else
+	free(mem);
+#endif
+}
+
+size_t getPageSize()
+{
+#ifdef LOVE_WINDOWS
+	// TODO: Do an actual query.
+	return 4096;
+#else
+	static const long size = sysconf(_SC_PAGESIZE);
+	return size > 0 ? (size_t) size : 4096;
+#endif
+}
+
+size_t alignUp(size_t size, size_t alignment)
+{
+	return (size + alignment - 1) & (~(alignment - 1));
+}
+
+} // love

+ 38 - 0
src/common/memory.h

@@ -0,0 +1,38 @@
+/**
+ * Copyright (c) 2006-2017 LOVE Development Team
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ **/
+
+#pragma once
+
+#include <stddef.h>
+
+namespace love
+{
+
+bool alignedMalloc(void **mem, size_t size, size_t alignment);
+void alignedFree(void *mem);
+
+size_t getPageSize();
+
+/**
+ * 'alignment' must be a power of two.
+ **/
+size_t alignUp(size_t size, size_t alignment);
+
+} // love

+ 112 - 1
src/modules/graphics/opengl/StreamBuffer.cpp

@@ -24,6 +24,7 @@
 #include "FenceSync.h"
 #include "graphics/Volatile.h"
 #include "common/Exception.h"
+#include "common/memory.h"
 
 #include <vector>
 #include <algorithm>
@@ -427,11 +428,121 @@ private:
 
 }; // StreamBufferPersistentMapSync
 
+class StreamBufferPinnedMemory final : public StreamBufferSync, public Volatile
+{
+public:
+
+	StreamBufferPinnedMemory(BufferType type, size_t size)
+		: StreamBufferSync(type, size)
+		, vbo(0)
+		, glMode(OpenGL::getGLBufferType(mode))
+		, data(nullptr)
+		, alignedSize(0)
+	{
+		size_t alignment = getPageSize();
+		alignedSize = alignUp(size * BUFFER_FRAMES, alignment);
+
+		if (!alignedMalloc((void **) &data, alignedSize, alignment))
+			throw love::Exception("Out of memory.");
+
+		loadVolatile();
+	}
+
+	~StreamBufferPinnedMemory()
+	{
+		unloadVolatile();
+		alignedFree(data);
+	}
+
+	size_t getUsableSize() const override
+	{
+		return bufferSize - frameGPUReadOffset;
+	}
+
+	MapInfo map(size_t /*minsize*/) override
+	{
+		MapInfo info;
+		info.size = bufferSize - frameGPUReadOffset;
+		info.data = data + (frameIndex * bufferSize) + frameGPUReadOffset;
+
+		int firstSyncIndex = frameGPUReadOffset / syncSize;
+		int lastSyncIndex = (bufferSize - 1) / syncSize;
+
+		// We're mapping the full range of space left in the buffer, so we
+		// need to wait on all of it...
+		// FIXME: is it even worth it to have multiple sync objects per frame?
+		for (int i = firstSyncIndex; i <= lastSyncIndex; i++)
+			syncs[frameIndex * MAX_SYNCS_PER_FRAME + i].cpuWait();
+
+		return info;
+	}
+
+	size_t unmap(size_t usedsize) override
+	{
+		size_t offset = (frameIndex * bufferSize) + frameGPUReadOffset;
+
+		gl.bindBuffer(mode, vbo);
+		glFlushMappedBufferRange(glMode, offset, usedsize);
+
+		return offset;
+	}
+
+	ptrdiff_t getHandle() const override { return vbo; }
+
+	bool loadVolatile() override
+	{
+		if (vbo != 0)
+			return true;
+
+		glGenBuffers(1, &vbo);
+
+		glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, vbo);
+		glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, alignedSize, data, GL_STREAM_DRAW);
+
+		frameGPUReadOffset = 0;
+		frameIndex = 0;
+
+		return true;
+	}
+
+	void unloadVolatile() override
+	{
+		if (vbo != 0)
+		{
+			// Make sure the GPU has completed work using the memory before
+			// freeing it. TODO: Do we need a full glFinish() or is this
+			// sufficient?
+			glFlush();
+			for (FenceSync &sync : syncs)
+				sync.cpuWait();
+
+			gl.bindBuffer(mode, vbo);
+			gl.deleteBuffer(vbo);
+			vbo = 0;
+		}
+
+		for (FenceSync &sync : syncs)
+			sync.cleanup();
+	}
+
+private:
+
+	GLuint vbo;
+	GLenum glMode;
+	uint8 *data;
+	size_t alignedSize;
+
+}; // StreamBufferPinnedMemory
+
 love::graphics::StreamBuffer *CreateStreamBuffer(BufferType mode, size_t size)
 {
 	if (gl.isCoreProfile())
 	{
-		if (GLAD_VERSION_4_4 || GLAD_ARB_buffer_storage)
+		// AMD's pinned memory seems to be faster than persistent mapping, on
+		// AMD GPUs.
+		if (GLAD_AMD_pinned_memory)
+			return new StreamBufferPinnedMemory(mode, size);
+		else if (GLAD_VERSION_4_4 || GLAD_ARB_buffer_storage)
 			return new StreamBufferPersistentMapSync(mode, size);
 		else
 			return new StreamBufferSubDataOrphan(mode, size);