123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556 |
- /**
- * Copyright (c) 2006-2017 LOVE Development Team
- *
- * This software is provided 'as-is', without any express or implied
- * warranty. In no event will the authors be held liable for any damages
- * arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- * claim that you wrote the original software. If you use this software
- * in a product, an acknowledgment in the product documentation would be
- * appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- **/
- #include "common/config.h"
- #include "StreamBuffer.h"
- #include "OpenGL.h"
- #include "FenceSync.h"
- #include "graphics/Volatile.h"
- #include "common/Exception.h"
- #include "common/memory.h"
- #include <vector>
- #include <algorithm>
- namespace love
- {
- namespace graphics
- {
- namespace opengl
- {
- static const int BUFFER_FRAMES = 3;
- static const int MAX_SYNCS_PER_FRAME = 4;
- class StreamBufferClientMemory final : public love::graphics::StreamBuffer
- {
- public:
- StreamBufferClientMemory(BufferType mode, size_t size)
- : love::graphics::StreamBuffer(mode, size)
- , data(nullptr)
- {
- try
- {
- data = new uint8[size];
- }
- catch (std::exception &)
- {
- throw love::Exception("Out of memory.");
- }
- }
- virtual ~StreamBufferClientMemory()
- {
- delete[] data;
- }
- size_t getUsableSize() const override
- {
- return bufferSize;
- }
- MapInfo map(size_t /*minsize*/) override
- {
- return MapInfo(data, bufferSize);
- }
- size_t unmap(size_t /*usedsize*/) override
- {
- return (size_t) data;
- }
- void markUsed(size_t /*usedsize*/) override { }
- ptrdiff_t getHandle() const override { return 0; }
- private:
- uint8 *data;
- }; // StreamBufferClientMemory
- class StreamBufferSubDataOrphan final : public love::graphics::StreamBuffer, public Volatile
- {
- public:
- StreamBufferSubDataOrphan(BufferType mode, size_t size)
- : love::graphics::StreamBuffer(mode, size)
- , vbo(0)
- , glMode(OpenGL::getGLBufferType(mode))
- , data(nullptr)
- , offset(0)
- , frameOffset(0)
- {
- try
- {
- data = new uint8[size];
- }
- catch (std::exception &)
- {
- throw love::Exception("Out of memory.");
- }
- loadVolatile();
- }
- virtual ~StreamBufferSubDataOrphan()
- {
- unloadVolatile();
- delete[] data;
- }
- size_t getUsableSize() const override
- {
- return bufferSize - frameOffset;
- }
- MapInfo map(size_t minsize) override
- {
- if (offset + minsize > bufferSize)
- {
- offset = 0;
- frameOffset = 0;
- gl.bindBuffer(mode, vbo);
- glBufferData(glMode, bufferSize, nullptr, GL_STREAM_DRAW);
- }
- return MapInfo(data, bufferSize - offset);
- }
- size_t unmap(size_t usedsize) override
- {
- gl.bindBuffer(mode, vbo);
- glBufferSubData(glMode, offset, usedsize, data);
- return offset;
- }
- void markUsed(size_t usedsize) override
- {
- offset += usedsize;
- frameOffset += usedsize;
- }
- void nextFrame() override
- {
- frameOffset = 0;
- }
- ptrdiff_t getHandle() const override { return vbo; }
- bool loadVolatile() override
- {
- if (vbo != 0)
- return true;
- glGenBuffers(1, &vbo);
- gl.bindBuffer(mode, vbo);
- glBufferData(glMode, bufferSize, nullptr, GL_STREAM_DRAW);
- offset = 0;
- frameOffset = 0;
- return true;
- }
- void unloadVolatile() override
- {
- if (vbo == 0)
- return;
- gl.deleteBuffer(vbo);
- vbo = 0;
- }
- protected:
- GLuint vbo;
- GLenum glMode;
- uint8 *data;
- size_t offset;
- size_t frameOffset;
- }; // StreamBufferSubDataOrphan
- class StreamBufferSync : public love::graphics::StreamBuffer
- {
- public:
- StreamBufferSync(BufferType type, size_t size)
- : love::graphics::StreamBuffer(type, size)
- , syncSize((size + MAX_SYNCS_PER_FRAME - 1) / MAX_SYNCS_PER_FRAME)
- , frameIndex(0)
- , frameGPUReadOffset(0)
- , syncs()
- {}
- virtual ~StreamBufferSync() {}
- void nextFrame() override
- {
- getCurrentSync()->fence();
- frameIndex = (frameIndex + 1) % BUFFER_FRAMES;
- frameGPUReadOffset = 0;
- }
- void markUsed(size_t usedsize) override
- {
- int firstSyncIndex = frameGPUReadOffset / syncSize;
- int lastSyncIndex = std::min((frameGPUReadOffset + usedsize), bufferSize - 1) / syncSize;
- // Insert fences for all sync buckets completely filled by this section
- // of the data. The last bucket before the end of the frame will also be
- // handled by nextFrame().
- for (int i = firstSyncIndex; i < lastSyncIndex; i++)
- syncs[frameIndex * MAX_SYNCS_PER_FRAME + i].fence();
- frameGPUReadOffset += usedsize;
- }
- protected:
- const size_t syncSize;
- int frameIndex;
- size_t frameGPUReadOffset;
- FenceSync syncs[MAX_SYNCS_PER_FRAME * BUFFER_FRAMES];
- FenceSync *getCurrentSync()
- {
- return &syncs[frameIndex * MAX_SYNCS_PER_FRAME + frameGPUReadOffset / syncSize];
- }
- }; // StreamBufferSync
- class StreamBufferMapSync final : public StreamBufferSync, public Volatile
- {
- public:
- StreamBufferMapSync(BufferType type, size_t size)
- : StreamBufferSync(type, size)
- , vbo(0)
- , glMode(OpenGL::getGLBufferType(mode))
- {
- loadVolatile();
- }
- ~StreamBufferMapSync()
- {
- unloadVolatile();
- }
- size_t getUsableSize() const override
- {
- return bufferSize - frameGPUReadOffset;
- }
- MapInfo map(size_t /*minsize*/) override
- {
- gl.bindBuffer(mode, vbo);
- MapInfo info;
- info.size = bufferSize - frameGPUReadOffset;
- int firstSyncIndex = frameGPUReadOffset / syncSize;
- int lastSyncIndex = (bufferSize - 1) / syncSize;
- // We're mapping the full range of space left in the buffer, so we
- // need to wait on all of it...
- // FIXME: is it even worth it to have multiple sync objects per frame?
- for (int i = firstSyncIndex; i <= lastSyncIndex; i++)
- syncs[frameIndex * MAX_SYNCS_PER_FRAME + i].cpuWait();
- GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT;
- size_t mapoffset = (frameIndex * bufferSize) + frameGPUReadOffset;
- info.data = (uint8 *) glMapBufferRange(glMode, mapoffset, info.size, flags);
- return info;
- }
- size_t unmap(size_t usedsize) override
- {
- gl.bindBuffer(mode, vbo);
- glFlushMappedBufferRange(glMode, 0, usedsize);
- glUnmapBuffer(glMode);
- return (frameIndex * bufferSize) + frameGPUReadOffset;
- }
- ptrdiff_t getHandle() const override { return vbo; }
- bool loadVolatile() override
- {
- if (vbo != 0)
- return true;
- glGenBuffers(1, &vbo);
- gl.bindBuffer(mode, vbo);
- glBufferData(glMode, bufferSize * BUFFER_FRAMES, nullptr, GL_STREAM_DRAW);
- frameGPUReadOffset = 0;
- frameIndex = 0;
- return true;
- }
- void unloadVolatile() override
- {
- if (vbo != 0)
- {
- gl.deleteBuffer(vbo);
- vbo = 0;
- }
- for (FenceSync &sync : syncs)
- sync.cleanup();
- }
- private:
- GLuint vbo;
- GLenum glMode;
- }; // StreamBufferMapSync
- class StreamBufferPersistentMapSync final : public StreamBufferSync, public Volatile
- {
- public:
- StreamBufferPersistentMapSync(BufferType type, size_t size)
- : StreamBufferSync(type, size)
- , vbo(0)
- , glMode(OpenGL::getGLBufferType(mode))
- , data(nullptr)
- {
- loadVolatile();
- }
- ~StreamBufferPersistentMapSync()
- {
- unloadVolatile();
- }
- size_t getUsableSize() const override
- {
- return bufferSize - frameGPUReadOffset;
- }
- MapInfo map(size_t /*minsize*/) override
- {
- MapInfo info;
- info.size = bufferSize - frameGPUReadOffset;
- info.data = data + (frameIndex * bufferSize) + frameGPUReadOffset;
- int firstSyncIndex = frameGPUReadOffset / syncSize;
- int lastSyncIndex = (bufferSize - 1) / syncSize;
- // We're mapping the full range of space left in the buffer, so we
- // need to wait on all of it...
- // FIXME: is it even worth it to have multiple sync objects per frame?
- for (int i = firstSyncIndex; i <= lastSyncIndex; i++)
- syncs[frameIndex * MAX_SYNCS_PER_FRAME + i].cpuWait();
- return info;
- }
- size_t unmap(size_t usedsize) override
- {
- size_t offset = (frameIndex * bufferSize) + frameGPUReadOffset;
- gl.bindBuffer(mode, vbo);
- glFlushMappedBufferRange(glMode, offset, usedsize);
- return offset;
- }
- ptrdiff_t getHandle() const override { return vbo; }
- bool loadVolatile() override
- {
- if (vbo != 0)
- return true;
- glGenBuffers(1, &vbo);
- gl.bindBuffer(mode, vbo);
- GLbitfield storageflags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
- GLbitfield mapflags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_FLUSH_EXPLICIT_BIT;
- glBufferStorage(glMode, bufferSize * BUFFER_FRAMES, nullptr, storageflags);
- data = (uint8 *) glMapBufferRange(glMode, 0, bufferSize * BUFFER_FRAMES, mapflags);
- frameGPUReadOffset = 0;
- frameIndex = 0;
- return true;
- }
- void unloadVolatile() override
- {
- if (vbo != 0)
- {
- gl.bindBuffer(mode, vbo);
- glUnmapBuffer(glMode);
- gl.deleteBuffer(vbo);
- vbo = 0;
- }
- for (FenceSync &sync : syncs)
- sync.cleanup();
- }
- private:
- GLuint vbo;
- GLenum glMode;
- uint8 *data;
- }; // StreamBufferPersistentMapSync
- class StreamBufferPinnedMemory final : public StreamBufferSync, public Volatile
- {
- public:
- StreamBufferPinnedMemory(BufferType type, size_t size)
- : StreamBufferSync(type, size)
- , vbo(0)
- , glMode(OpenGL::getGLBufferType(mode))
- , data(nullptr)
- , alignedSize(0)
- {
- size_t alignment = getPageSize();
- alignedSize = alignUp(size * BUFFER_FRAMES, alignment);
- if (!alignedMalloc((void **) &data, alignedSize, alignment))
- throw love::Exception("Out of memory.");
- loadVolatile();
- }
- ~StreamBufferPinnedMemory()
- {
- unloadVolatile();
- alignedFree(data);
- }
- size_t getUsableSize() const override
- {
- return bufferSize - frameGPUReadOffset;
- }
- MapInfo map(size_t /*minsize*/) override
- {
- MapInfo info;
- info.size = bufferSize - frameGPUReadOffset;
- info.data = data + (frameIndex * bufferSize) + frameGPUReadOffset;
- int firstSyncIndex = frameGPUReadOffset / syncSize;
- int lastSyncIndex = (bufferSize - 1) / syncSize;
- // We're mapping the full range of space left in the buffer, so we
- // need to wait on all of it...
- // FIXME: is it even worth it to have multiple sync objects per frame?
- for (int i = firstSyncIndex; i <= lastSyncIndex; i++)
- syncs[frameIndex * MAX_SYNCS_PER_FRAME + i].cpuWait();
- return info;
- }
- size_t unmap(size_t usedsize) override
- {
- size_t offset = (frameIndex * bufferSize) + frameGPUReadOffset;
- gl.bindBuffer(mode, vbo);
- glFlushMappedBufferRange(glMode, offset, usedsize);
- return offset;
- }
- ptrdiff_t getHandle() const override { return vbo; }
- bool loadVolatile() override
- {
- if (vbo != 0)
- return true;
- glGenBuffers(1, &vbo);
- glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, vbo);
- glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, alignedSize, data, GL_STREAM_DRAW);
- frameGPUReadOffset = 0;
- frameIndex = 0;
- return true;
- }
- void unloadVolatile() override
- {
- if (vbo != 0)
- {
- // Make sure the GPU has completed work using the memory before
- // freeing it. TODO: Do we need a full glFinish() or is this
- // sufficient?
- glFlush();
- for (FenceSync &sync : syncs)
- sync.cpuWait();
- gl.bindBuffer(mode, vbo);
- gl.deleteBuffer(vbo);
- vbo = 0;
- }
- for (FenceSync &sync : syncs)
- sync.cleanup();
- }
- private:
- GLuint vbo;
- GLenum glMode;
- uint8 *data;
- size_t alignedSize;
- }; // StreamBufferPinnedMemory
- love::graphics::StreamBuffer *CreateStreamBuffer(BufferType mode, size_t size)
- {
- if (gl.isCoreProfile())
- {
- // AMD's pinned memory seems to be faster than persistent mapping, on
- // AMD GPUs.
- if (GLAD_AMD_pinned_memory)
- return new StreamBufferPinnedMemory(mode, size);
- else if (GLAD_VERSION_4_4 || GLAD_ARB_buffer_storage)
- return new StreamBufferPersistentMapSync(mode, size);
- else
- return new StreamBufferSubDataOrphan(mode, size);
- }
- else
- return new StreamBufferClientMemory(mode, size);
- }
- } // opengl
- } // graphics
- } // love
|