Browse Source

Merge branch 'master' of https://github.com/BearishSun/BansheeEngine.git

BearishSun 11 years ago
parent
commit
e39fac04e0

+ 6 - 6
BansheeCore/BansheeCore.vcxproj

@@ -276,7 +276,7 @@
     <ClInclude Include="Include\BsBindableGpuParamBlock.h" />
     <ClInclude Include="Include\BsBindableGpuParams.h" />
     <ClInclude Include="Include\BsCoreThread.h" />
-    <ClInclude Include="Include\BsCPUProfiler.h" />
+    <ClInclude Include="Include\BsProfilerCPU.h" />
     <ClInclude Include="Include\BsDefaultRenderQueue.h" />
     <ClInclude Include="Include\BsDeferredCallManager.h" />
     <ClInclude Include="Include\BsDrawOps.h" />
@@ -287,7 +287,7 @@
     <ClInclude Include="Include\BsGameObjectHandleRTTI.h" />
     <ClInclude Include="Include\BsGameObjectManager.h" />
     <ClInclude Include="Include\BsGameObjectRTTI.h" />
-    <ClInclude Include="Include\BsGPUProfiler.h" />
+    <ClInclude Include="Include\BsProfilerGPU.h" />
     <ClInclude Include="Include\BsGpuResourceData.h" />
     <ClInclude Include="Include\BsGpuParamBlockBuffer.h" />
     <ClInclude Include="Include\BsGpuResource.h" />
@@ -304,7 +304,7 @@
     <ClInclude Include="Include\BsPixelUtil.h" />
     <ClInclude Include="Include\BsPixelVolume.h" />
     <ClInclude Include="Include\BsPlatform.h" />
-    <ClInclude Include="Include\BsProfiler.h" />
+    <ClInclude Include="Include\BsProfilingManager.h" />
     <ClInclude Include="Include\BsQueryManager.h" />
     <ClInclude Include="Include\BsRenderOperation.h" />
     <ClInclude Include="Include\BsRenderQueue.h" />
@@ -420,7 +420,7 @@
     <ClCompile Include="Source\BsBindableGpuParamBlock.cpp" />
     <ClCompile Include="Source\BsBindableGpuParams.cpp" />
     <ClCompile Include="Source\BsCoreThread.cpp" />
-    <ClCompile Include="Source\BsCPUProfiler.cpp" />
+    <ClCompile Include="Source\BsProfilerCPU.cpp" />
     <ClCompile Include="Source\BsDefaultRenderQueue.cpp" />
     <ClCompile Include="Source\BsDeferredCallManager.cpp" />
     <ClCompile Include="Source\BsDrawOps.cpp" />
@@ -443,7 +443,7 @@
     <ClCompile Include="Source\BsGpuParamBlock.cpp" />
     <ClCompile Include="Source\BsGpuParamBlockBuffer.cpp" />
     <ClCompile Include="Source\BsGpuParams.cpp" />
-    <ClCompile Include="Source\BsGPUProfiler.cpp" />
+    <ClCompile Include="Source\BsProfilerGPU.cpp" />
     <ClCompile Include="Source\BsGpuProgInclude.cpp" />
     <ClCompile Include="Source\BsGpuProgram.cpp" />
     <ClCompile Include="Source\BsGpuProgramImporter.cpp" />
@@ -467,7 +467,7 @@
     <ClCompile Include="Source\BsPixelUtil.cpp" />
     <ClCompile Include="Source\BsPixelVolume.cpp" />
     <ClCompile Include="Source\BsPlatform.cpp" />
-    <ClCompile Include="Source\BsProfiler.cpp" />
+    <ClCompile Include="Source\BsProfilingManager.cpp" />
     <ClCompile Include="Source\BsQueryManager.cpp" />
     <ClCompile Include="Source\BsRenderer.cpp" />
     <ClCompile Include="Source\BsRenderQueue.cpp" />

+ 18 - 18
BansheeCore/BansheeCore.vcxproj.filters

@@ -90,9 +90,6 @@
     </Filter>
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="Include\BsGPUProfiler.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
     <ClInclude Include="Include\BsRenderStats.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -108,9 +105,6 @@
     <ClInclude Include="Include\BsUUID.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="Include\BsProfiler.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
     <ClInclude Include="Include\BsPixelVolume.h">
       <Filter>Header Files</Filter>
     </ClInclude>
@@ -120,9 +114,6 @@
     <ClInclude Include="Include\BsDeferredCallManager.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="Include\BsCPUProfiler.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
     <ClInclude Include="Include\Win32\BsWin32Defs.h">
       <Filter>Header Files\Win32</Filter>
     </ClInclude>
@@ -519,11 +510,17 @@
     <ClInclude Include="Include\BsCorePrerequisites.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="Include\BsProfilerCPU.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Include\BsProfilerGPU.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="Include\BsProfilingManager.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="Source\BsGPUProfiler.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="Source\BsCoreApplication.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -533,9 +530,6 @@
     <ClCompile Include="Source\BsUUID.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="Source\BsProfiler.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="Source\BsPlatform.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -551,9 +545,6 @@
     <ClCompile Include="Source\BsDeferredCallManager.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="Source\BsCPUProfiler.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="Source\Win32\BsPlatformWndProc.cpp">
       <Filter>Source Files\Win32</Filter>
     </ClCompile>
@@ -821,5 +812,14 @@
     <ClCompile Include="Source\BsCoreSceneManager.cpp">
       <Filter>Source Files\Scene</Filter>
     </ClCompile>
+    <ClCompile Include="Source\BsProfilerCPU.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Source\BsProfilerGPU.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="Source\BsProfilingManager.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>

+ 0 - 146
BansheeCore/Include/BsProfiler.h

@@ -1,146 +0,0 @@
-#pragma once
-
-#include "BsCorePrerequisites.h"
-#include "BsModule.h"
-#include "BsCPUProfiler.h"
-
-namespace BansheeEngine
-{
-/**
- * @brief	Shortcut for profiling a single function call.
- */
-#define PROFILE_CALL(call, name)						\
-	BansheeEngine::gProfiler().beginSample(##name##);	\
-	call;												\
-	BansheeEngine::gProfiler().endSample(##name##);
-
-	/**
-	 * @brief	Contains data about a profiling session.
-	 */
-	struct ProfilerReport
-	{
-		CPUProfilerReport cpuReport;
-	};
-
-	/**
-	 * @brief	Type of thread used by the profiler.
-	 */
-	enum class ProfiledThread
-	{
-		Sim,
-		Core
-	};
-
-	/**
-	 * @brief	Provides performance measuring methods for the CPU.
-	 */
-	class BS_CORE_EXPORT Profiler : public Module<Profiler>
-	{
-	public:
-		Profiler();
-		~Profiler();
-
-		/**
-		 * @copydoc CPUProfiler::beginThread
-		 */
-		void beginThread(const ProfilerString& name) 
-		{ 
-#if BS_PROFILING_ENABLED
-			mCPUProfiler->beginThread(name); 
-#endif
-		}
-
-		/**
-		 * @copydoc CPUProfiler::endThread
-		 */
-		void endThread() 
-		{ 
-#if BS_PROFILING_ENABLED
-			mCPUProfiler->endThread(); 
-#endif
-		}
-
-		/**
-		 * @copydoc CPUProfiler::beginSample
-		 */
-		void beginSample(const ProfilerString& name) 
-		{ 
-#if BS_PROFILING_ENABLED
-			mCPUProfiler->beginSample(name); 
-#endif
-		}
-
-		/**
-		 * @copydoc CPUProfiler::endSample
-		 */
-		void endSample(const ProfilerString& name) 
-		{ 
-#if BS_PROFILING_ENABLED
-			mCPUProfiler->endSample(name); 
-#endif
-		}
-
-		/**
-		 * @copydoc CPUProfiler::beginSamplePrecise
-		 */
-		void beginSamplePrecise(const ProfilerString& name) 
-		{ 
-#if BS_PROFILING_ENABLED
-			mCPUProfiler->beginSamplePrecise(name); 
-#endif
-		}
-
-		/**
-		 * @copydoc CPUProfiler::endSamplePrecise
-		 */
-		void endSamplePrecise(const ProfilerString& name) 
-		{ 
-#if BS_PROFILING_ENABLED
-			mCPUProfiler->endSamplePrecise(name); 
-#endif
-		}
-
-		/**
-		 * @brief	Called every frame.
-		 *
-		 * @note	Internal method.
-		 */
-		void _update();
-
-		/**
-		 * @brief	Called every frame from the core thread.
-		 * 			
-		 * @note	Internal method. Only call from core thread.
-		 */
-		void _updateCore();
-
-		/**
-		 * @brief	Returns a profiler report for the specified frame, for the specified thread.
-		 *
-		 * @param	Profiler report index, ranging [0, NUM_SAVED_FRAMES]. 0 always returns the latest
-		 * 					 report. Increasing indexes return reports for older and older frames. Out of range
-		 * 					 indexes will be clamped.
-		 *
-		 * @note	Profiler reports get updated every frame. Oldest reports that no longer fit in the saved reports buffer
-		 * 			are discarded.
-		 */
-		const ProfilerReport& getReport(ProfiledThread thread, UINT32 idx = 0) const;
-
-	private:
-		static const UINT32 NUM_SAVED_FRAMES;
-		ProfilerReport* mSavedSimReports;
-		UINT32 mNextSimReportIdx;
-
-		ProfilerReport* mSavedCoreReports;
-		UINT32 mNextCoreReportIdx;
-
-		CPUProfiler* mCPUProfiler;
-
-		BS_MUTEX(mSync);
-	};
-
-	/**
-	 * @brief	Quick way to access the profiler.
-	 */
-	BS_CORE_EXPORT Profiler& gProfiler();
-}

+ 18 - 4
BansheeCore/Include/BsCPUProfiler.h → BansheeCore/Include/BsProfilerCPU.h

@@ -1,6 +1,7 @@
 #pragma once
 
 #include "BsCorePrerequisites.h"
+#include "BsModule.h"
 
 namespace BansheeEngine
 {
@@ -12,7 +13,7 @@ namespace BansheeEngine
 	 * @note	Thread safe. Matching begin*\end* calls
 	 * 			must belong to the same thread though.
 	 */
-	class BS_CORE_EXPORT CPUProfiler
+	class BS_CORE_EXPORT ProfilerCPU : public Module<ProfilerCPU>
 	{
 		/**
 		 * @brief	Timer class responsible for tracking elapsed time.
@@ -272,8 +273,8 @@ namespace BansheeEngine
 		};
 
 	public:
-		CPUProfiler();
-		~CPUProfiler();
+		ProfilerCPU();
+		~ProfilerCPU();
 
 		/**
 		 * @brief	Registers a new thread we will be doing sampling in. This needs to be called before any beginSample*\endSample* calls
@@ -445,9 +446,22 @@ namespace BansheeEngine
 		const CPUProfilerPreciseSamplingEntry& getPreciseSamplingData() const { return mPreciseSamplingRootEntry; }
 
 	private:
-		friend class CPUProfiler;
+		friend class ProfilerCPU;
 
 		CPUProfilerBasicSamplingEntry mBasicSamplingRootEntry;
 		CPUProfilerPreciseSamplingEntry mPreciseSamplingRootEntry;
 	};
+
+	/**
+	* @brief	Quick way to access the CPU profiler.
+	*/
+	BS_CORE_EXPORT ProfilerCPU& gProfilerCPU();
+
+	/**
+	* @brief	Shortcut for profiling a single function call.
+	*/
+#define PROFILE_CALL(call, name)							\
+	BansheeEngine::gProfilerCPU().beginSample(##name##);	\
+	call;													\
+	BansheeEngine::gProfilerCPU().endSample(##name##);
 }

+ 2 - 2
BansheeCore/Include/BsGPUProfiler.h → BansheeCore/Include/BsProfilerGPU.h

@@ -56,7 +56,7 @@ namespace BansheeEngine
 	 *
 	 * @note	Core thread only.
 	 */
-	class BS_CORE_EXPORT GPUProfiler : public Module<GPUProfiler>
+	class BS_CORE_EXPORT ProfilerGPU : public Module<ProfilerGPU>
 	{
 	private:
 		struct ActiveSample
@@ -75,7 +75,7 @@ namespace BansheeEngine
 		};
 
 	public:
-		GPUProfiler();
+		ProfilerGPU();
 
 		/**
 		 * @brief	Signals a start of a new frame. Every frame will generate a separate profiling report.

+ 78 - 0
BansheeCore/Include/BsProfilingManager.h

@@ -0,0 +1,78 @@
+#pragma once
+
+#include "BsCorePrerequisites.h"
+#include "BsModule.h"
+#include "BsProfilerCPU.h"
+
+namespace BansheeEngine
+{
+	/**
+	 * @brief	Contains data about a profiling session.
+	 */
+	struct ProfilerReport
+	{
+		CPUProfilerReport cpuReport;
+	};
+
+	/**
+	 * @brief	Type of thread used by the profiler.
+	 */
+	enum class ProfiledThread
+	{
+		Sim,
+		Core
+	};
+
+	/**
+	 * @brief	Tracks CPU profiling information with each frame for sim and core threads.
+	 *
+	 * @note	Sim thread only unless specified otherwise.
+	 */
+	class BS_CORE_EXPORT ProfilingManager : public Module<ProfilingManager>
+	{
+	public:
+		ProfilingManager();
+		~ProfilingManager();
+
+		/**
+		 * @brief	Called every frame.
+		 *
+		 * @note	Internal method.
+		 */
+		void _update();
+
+		/**
+		 * @brief	Called every frame from the core thread.
+		 * 			
+		 * @note	Internal method. Core thread only.
+		 */
+		void _updateCore();
+
+		/**
+		 * @brief	Returns a profiler report for the specified frame, for the specified thread.
+		 *
+		 * @param	Profiler report index, ranging [0, NUM_SAVED_FRAMES]. 0 always returns the latest
+		 * 					 report. Increasing indexes return reports for older and older frames. Out of range
+		 * 					 indexes will be clamped.
+		 *
+		 * @note	Profiler reports get updated every frame. Oldest reports that no longer fit in the saved reports buffer
+		 * 			are discarded.
+		 */
+		const ProfilerReport& getReport(ProfiledThread thread, UINT32 idx = 0) const;
+
+	private:
+		static const UINT32 NUM_SAVED_FRAMES;
+		ProfilerReport* mSavedSimReports;
+		UINT32 mNextSimReportIdx;
+
+		ProfilerReport* mSavedCoreReports;
+		UINT32 mNextCoreReportIdx;
+
+		BS_MUTEX(mSync);
+	};
+
+	/**
+	 * @brief	Quick way to access the profiler.
+	 */
+	BS_CORE_EXPORT ProfilingManager& gProfiler();
+}

+ 14 - 7
BansheeCore/Source/BsCoreApplication.cpp

@@ -30,7 +30,9 @@
 #include "BsDeferredCallManager.h"
 #include "BsCoreThread.h"
 #include "BsStringTable.h"
-#include "BsProfiler.h"
+#include "BsProfilingManager.h"
+#include "BsProfilerCPU.h"
+#include "BsProfilerGPU.h"
 #include "BsQueryManager.h"
 #include "BsThreadPool.h"
 #include "BsTaskScheduler.h"
@@ -54,7 +56,8 @@ namespace BansheeEngine
 		MemStack::beginThread();
 
 		UUIDGenerator::startUp();
-		Profiler::startUp();
+		ProfilerCPU::startUp();
+		ProfilingManager::startUp();
 		ThreadPool::startUp<TThreadPool<ThreadBansheePolicy>>((numWorkerThreads));
 		TaskScheduler::startUp();
 		TaskScheduler::instance().removeWorker();
@@ -79,6 +82,7 @@ namespace BansheeEngine
 
 		loadPlugin(desc.sceneManager, &mSceneManagerPlugin);
 
+		ProfilerGPU::startUp();
 		MeshManager::startUp();
 		MaterialManager::startUp();
 		FontManager::startUp();
@@ -100,6 +104,7 @@ namespace BansheeEngine
 		FontManager::shutDown();
 		MaterialManager::shutDown();
 		MeshManager::shutDown();
+		ProfilerGPU::shutDown();
 
 		unloadPlugin(mSceneManagerPlugin);
 
@@ -119,7 +124,8 @@ namespace BansheeEngine
 		CoreThread::shutDown();
 		TaskScheduler::shutDown();
 		ThreadPool::shutDown();
-		Profiler::shutDown();
+		ProfilingManager::shutDown();
+		ProfilerCPU::shutDown();
 		UUIDGenerator::shutDown();
 
 		MemStack::endThread();
@@ -132,7 +138,7 @@ namespace BansheeEngine
 
 		while(mRunMainLoop)
 		{
-			gProfiler().beginThread("Sim");
+			gProfilerCPU().beginThread("Sim");
 
 			gCoreThread().update();
 			Platform::_update();
@@ -169,10 +175,11 @@ namespace BansheeEngine
 
 			gCoreThread().queueCommand(&Platform::_coreUpdate);
 			gCoreThread().submitAccessors();
+			gCoreThread().queueCommand(std::bind(&ProfilerGPU::_update, ProfilerGPU::instancePtr()));
 			gCoreThread().queueCommand(std::bind(&CoreApplication::endCoreProfiling, this));
 			gCoreThread().queueCommand(std::bind(&CoreApplication::frameRenderingFinishedCallback, this));
 
-			gProfiler().endThread();
+			gProfilerCPU().endThread();
 			gProfiler()._update();
 		}
 	}
@@ -198,12 +205,12 @@ namespace BansheeEngine
 
 	void CoreApplication::beginCoreProfiling()
 	{
-		gProfiler().beginThread("Core");
+		gProfilerCPU().beginThread("Core");
 	}
 
 	void CoreApplication::endCoreProfiling()
 	{
-		gProfiler().endThread();
+		gProfilerCPU().endThread();
 		gProfiler()._updateCore();
 	}
 

+ 0 - 2
BansheeCore/Source/BsMesh.cpp

@@ -12,8 +12,6 @@
 #include "BsVertexDataDesc.h"
 #include "BsResources.h"
 
-#include "BsProfiler.h"
-
 namespace BansheeEngine
 {
 	Mesh::Mesh(UINT32 numVertices, UINT32 numIndices, const VertexDataDescPtr& vertexDesc, 

+ 43 - 38
BansheeCore/Source/BsCPUProfiler.cpp → BansheeCore/Source/BsProfilerCPU.cpp

@@ -1,55 +1,55 @@
-#include "BsCPUProfiler.h"
+#include "BsProfilerCPU.h"
 #include "BsDebug.h"
 #include "BsPlatform.h"
 
 namespace BansheeEngine
 {
-	CPUProfiler::Timer::Timer()
+	ProfilerCPU::Timer::Timer()
 	{
 		time = 0.0f;
 	}
 
-	void CPUProfiler::Timer::start()
+	void ProfilerCPU::Timer::start()
 	{
 		startTime = getCurrentTime();
 	}
 
-	void CPUProfiler::Timer::stop()
+	void ProfilerCPU::Timer::stop()
 	{
 		time += getCurrentTime() - startTime;
 	}
 
-	void CPUProfiler::Timer::reset()
+	void ProfilerCPU::Timer::reset()
 	{
 		time = 0.0f;
 	}
 
-	inline double CPUProfiler::Timer::getCurrentTime() 
+	inline double ProfilerCPU::Timer::getCurrentTime() 
 	{
 		return Platform::queryPerformanceTimerMs();
 	}
 
-	CPUProfiler::TimerPrecise::TimerPrecise()
+	ProfilerCPU::TimerPrecise::TimerPrecise()
 	{
 		cycles = 0;
 	}
 
-	void CPUProfiler::TimerPrecise::start()
+	void ProfilerCPU::TimerPrecise::start()
 	{
 		startCycles = getNumCycles();
 	}
 
-	void CPUProfiler::TimerPrecise::stop()
+	void ProfilerCPU::TimerPrecise::stop()
 	{
 		cycles += getNumCycles() - startCycles;
 	}
 
-	void CPUProfiler::TimerPrecise::reset()
+	void ProfilerCPU::TimerPrecise::reset()
 	{
 		cycles = 0;
 	}
 
-	inline UINT64 CPUProfiler::TimerPrecise::getNumCycles() 
+	inline UINT64 ProfilerCPU::TimerPrecise::getNumCycles() 
 	{
 #if BS_COMPILER == BS_COMPILER_GNUC
 		asm volatile("cpuid" : : : "%eax", "%ebx", "%ecx", "%edx" );
@@ -64,7 +64,7 @@ namespace BansheeEngine
 #endif		
 	}
 
-	void CPUProfiler::ProfileData::beginSample()
+	void ProfilerCPU::ProfileData::beginSample()
 	{
 		memAllocs = MemoryCounter::getNumAllocs();
 		memFrees = MemoryCounter::getNumFrees();
@@ -73,7 +73,7 @@ namespace BansheeEngine
 		timer.start();
 	}
 
-	void CPUProfiler::ProfileData::endSample()
+	void ProfilerCPU::ProfileData::endSample()
 	{
 		timer.stop();
 
@@ -83,13 +83,13 @@ namespace BansheeEngine
 		samples.push_back(ProfileSample(timer.time, numAllocs, numFrees));
 	}
 
-	void CPUProfiler::ProfileData::resumeLastSample()
+	void ProfilerCPU::ProfileData::resumeLastSample()
 	{
 		timer.start();
 		samples.erase(samples.end() - 1);
 	}
 
-	void CPUProfiler::PreciseProfileData::beginSample()
+	void ProfilerCPU::PreciseProfileData::beginSample()
 	{
 		memAllocs = MemoryCounter::getNumAllocs();
 		memFrees = MemoryCounter::getNumFrees();
@@ -98,7 +98,7 @@ namespace BansheeEngine
 		timer.start();
 	}
 
-	void CPUProfiler::PreciseProfileData::endSample()
+	void ProfilerCPU::PreciseProfileData::endSample()
 	{
 		timer.stop();
 
@@ -108,21 +108,21 @@ namespace BansheeEngine
 		samples.push_back(PreciseProfileSample(timer.cycles, numAllocs, numFrees));
 	}
 
-	void CPUProfiler::PreciseProfileData::resumeLastSample()
+	void ProfilerCPU::PreciseProfileData::resumeLastSample()
 	{
 		timer.start();
 		samples.erase(samples.end() - 1);
 	}
 
-	BS_THREADLOCAL CPUProfiler::ThreadInfo* CPUProfiler::ThreadInfo::activeThread = nullptr;
+	BS_THREADLOCAL ProfilerCPU::ThreadInfo* ProfilerCPU::ThreadInfo::activeThread = nullptr;
 
-	CPUProfiler::ThreadInfo::ThreadInfo()
+	ProfilerCPU::ThreadInfo::ThreadInfo()
 		:isActive(false), rootBlock(nullptr)
 	{
 
 	}
 
-	void CPUProfiler::ThreadInfo::begin(const ProfilerString& _name)
+	void ProfilerCPU::ThreadInfo::begin(const ProfilerString& _name)
 	{
 		if(isActive)
 		{
@@ -141,7 +141,7 @@ namespace BansheeEngine
 		isActive = true;
 	}
 
-	void CPUProfiler::ThreadInfo::end()
+	void ProfilerCPU::ThreadInfo::end()
 	{
 		if(activeBlock.type == ActiveSamplingType::Basic)
 			activeBlock.block->basic.endSample();
@@ -174,7 +174,7 @@ namespace BansheeEngine
 		activeBlock = ActiveBlock();
 	}
 
-	void CPUProfiler::ThreadInfo::reset()
+	void ProfilerCPU::ThreadInfo::reset()
 	{
 		if(isActive)
 			end();
@@ -185,22 +185,22 @@ namespace BansheeEngine
 		rootBlock = nullptr;
 	}
 
-	CPUProfiler::ProfiledBlock* CPUProfiler::ThreadInfo::getBlock()
+	ProfilerCPU::ProfiledBlock* ProfilerCPU::ThreadInfo::getBlock()
 	{
 		// TODO - Pool this, if possible using the memory allocator stuff
 		// TODO - Also consider moving all samples in ThreadInfo, and also pool them (otherwise I can't pool ProfiledBlock since it will be variable size)
 		return bs_new<ProfiledBlock, ProfilerAlloc>();
 	}
 
-	void CPUProfiler::ThreadInfo::releaseBlock(CPUProfiler::ProfiledBlock* block)
+	void ProfilerCPU::ThreadInfo::releaseBlock(ProfilerCPU::ProfiledBlock* block)
 	{
 		bs_delete<ProfilerAlloc>(block);
 	}
 
-	CPUProfiler::ProfiledBlock::ProfiledBlock()
+	ProfilerCPU::ProfiledBlock::ProfiledBlock()
 	{ }
 
-	CPUProfiler::ProfiledBlock::~ProfiledBlock()
+	ProfilerCPU::ProfiledBlock::~ProfiledBlock()
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 
@@ -210,7 +210,7 @@ namespace BansheeEngine
 		children.clear();
 	}
 
-	CPUProfiler::ProfiledBlock* CPUProfiler::ProfiledBlock::findChild(const ProfilerString& name) const
+	ProfilerCPU::ProfiledBlock* ProfilerCPU::ProfiledBlock::findChild(const ProfilerString& name) const
 	{
 		for(auto& child : children)
 		{
@@ -221,7 +221,7 @@ namespace BansheeEngine
 		return nullptr;
 	}
 
-	CPUProfiler::CPUProfiler()
+	ProfilerCPU::ProfilerCPU()
 		:mBasicTimerOverhead(0.0), mPreciseTimerOverhead(0), mBasicSamplingOverheadMs(0.0), mPreciseSamplingOverheadCycles(0),
 		mBasicSamplingOverheadCycles(0), mPreciseSamplingOverheadMs(0.0)
 	{
@@ -230,7 +230,7 @@ namespace BansheeEngine
 		estimateTimerOverhead();
 	}
 
-	CPUProfiler::~CPUProfiler()
+	ProfilerCPU::~ProfilerCPU()
 	{
 		reset();
 
@@ -240,7 +240,7 @@ namespace BansheeEngine
 			bs_delete<ProfilerAlloc>(threadInfo);
 	}
 
-	void CPUProfiler::beginThread(const ProfilerString& name)
+	void ProfilerCPU::beginThread(const ProfilerString& name)
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 		if(thread == nullptr)
@@ -258,13 +258,13 @@ namespace BansheeEngine
 		thread->begin(name);
 	}
 
-	void CPUProfiler::endThread()
+	void ProfilerCPU::endThread()
 	{
 		// I don't do a nullcheck where on purpose, so endSample can be called ASAP
 		ThreadInfo::activeThread->end();
 	}
 
-	void CPUProfiler::beginSample(const ProfilerString& name)
+	void ProfilerCPU::beginSample(const ProfilerString& name)
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 		if(thread == nullptr || !thread->isActive)
@@ -296,7 +296,7 @@ namespace BansheeEngine
 		block->basic.beginSample();
 	}
 
-	void CPUProfiler::endSample(const ProfilerString& name)
+	void ProfilerCPU::endSample(const ProfilerString& name)
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 		ProfiledBlock* block = thread->activeBlock.block;
@@ -332,7 +332,7 @@ namespace BansheeEngine
 			thread->activeBlock = ActiveBlock();
 	}
 
-	void CPUProfiler::beginSamplePrecise(const ProfilerString& name)
+	void ProfilerCPU::beginSamplePrecise(const ProfilerString& name)
 	{
 		// Note: There is a (small) possibility a context switch will happen during this measurement in which case result will be skewed. 
 		// Increasing thread priority might help. This is generally only a problem with code that executes a long time (10-15+ ms - depending on OS quant length)
@@ -364,7 +364,7 @@ namespace BansheeEngine
 		block->precise.beginSample();
 	}
 
-	void CPUProfiler::endSamplePrecise(const ProfilerString& name)
+	void ProfilerCPU::endSamplePrecise(const ProfilerString& name)
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 		ProfiledBlock* block = thread->activeBlock.block;
@@ -400,7 +400,7 @@ namespace BansheeEngine
 			thread->activeBlock = ActiveBlock();
 	}
 
-	void CPUProfiler::reset()
+	void ProfilerCPU::reset()
 	{
 		ThreadInfo* thread = ThreadInfo::activeThread;
 
@@ -408,7 +408,7 @@ namespace BansheeEngine
 			thread->reset();
 	}
 
-	CPUProfilerReport CPUProfiler::generateReport()
+	CPUProfilerReport ProfilerCPU::generateReport()
 	{
 		CPUProfilerReport report;
 
@@ -708,7 +708,7 @@ namespace BansheeEngine
 		return report;
 	}
 
-	void CPUProfiler::estimateTimerOverhead()
+	void ProfilerCPU::estimateTimerOverhead()
 	{
 		// Get an idea of how long timer calls and RDTSC takes
 		const UINT32 reps = 1000, sampleReps = 100;
@@ -963,4 +963,9 @@ namespace BansheeEngine
 	{
 
 	}
+
+	ProfilerCPU& gProfilerCPU()
+	{
+		return ProfilerCPU::instance();
+	}
 }

+ 15 - 15
BansheeCore/Source/BsGPUProfiler.cpp → BansheeCore/Source/BsProfilerGPU.cpp

@@ -1,4 +1,4 @@
-#include "BsGPUProfiler.h"
+#include "BsProfilerGPU.h"
 #include "BsRenderSystem.h"
 #include "BsTimerQuery.h"
 #include "BsOcclusionQuery.h"
@@ -6,11 +6,11 @@
 
 namespace BansheeEngine
 {
-	GPUProfiler::GPUProfiler()
+	ProfilerGPU::ProfilerGPU()
 		:mNumActiveSamples(0), mIsFrameActive(false)
 	{ }
 
-	void GPUProfiler::beginFrame()
+	void ProfilerGPU::beginFrame()
 	{
 		if (mIsFrameActive)
 			BS_EXCEPT(InvalidStateException, "Cannot begin a frame because another frame is active.");
@@ -23,7 +23,7 @@ namespace BansheeEngine
 		mIsFrameActive = true;
 	}
 
-	void GPUProfiler::endFrame()
+	void ProfilerGPU::endFrame()
 	{
 		if (mNumActiveSamples > 0)
 			BS_EXCEPT(InvalidStateException, "Attempting to end a frame while a sample is active.");
@@ -37,7 +37,7 @@ namespace BansheeEngine
 		mIsFrameActive = false;
 	}
 
-	void GPUProfiler::beginSample(const ProfilerString& name)
+	void ProfilerGPU::beginSample(const ProfilerString& name)
 	{
 		if (!mIsFrameActive)
 			BS_EXCEPT(InvalidStateException, "Cannot begin a sample because no frame is active.");
@@ -50,7 +50,7 @@ namespace BansheeEngine
 		mNumActiveSamples++;
 	}
 
-	void GPUProfiler::endSample(const ProfilerString& name)
+	void ProfilerGPU::endSample(const ProfilerString& name)
 	{
 		if (mNumActiveSamples == 0)
 			return;
@@ -68,12 +68,12 @@ namespace BansheeEngine
 		mNumActiveSamples--;
 	}
 
-	UINT32 GPUProfiler::getNumAvailableReports()
+	UINT32 ProfilerGPU::getNumAvailableReports()
 	{
 		return (UINT32)mReadyReports.size();
 	}
 
-	GPUProfilerReport GPUProfiler::getNextReport()
+	GPUProfilerReport ProfilerGPU::getNextReport()
 	{
 		if (mReadyReports.empty())
 			BS_EXCEPT(InvalidStateException, "No reports are available.")
@@ -84,7 +84,7 @@ namespace BansheeEngine
 		return report;
 	}
 
-	void GPUProfiler::_update()
+	void ProfilerGPU::_update()
 	{
 		while (!mUnresolvedFrames.empty())
 		{
@@ -104,7 +104,7 @@ namespace BansheeEngine
 		}
 	}
 
-	GPUProfilerReport GPUProfiler::resolveFrame(ActiveFrame& frame)
+	GPUProfilerReport ProfilerGPU::resolveFrame(ActiveFrame& frame)
 	{
 		GPUProfilerReport report;
 		
@@ -121,7 +121,7 @@ namespace BansheeEngine
 		return report;
 	}
 
-	void GPUProfiler::resolveSample(const ActiveSample& sample, GPUProfileSample& reportSample)
+	void ProfilerGPU::resolveSample(const ActiveSample& sample, GPUProfileSample& reportSample)
 	{
 		reportSample.name = String(sample.sampleName.c_str());
 		reportSample.timeMs = sample.activeTimeQuery->getTimeMs();
@@ -156,7 +156,7 @@ namespace BansheeEngine
 		mFreeOcclusionQueries.push(sample.activeOcclusionQuery);
 	}
 
-	void GPUProfiler::beginSampleInternal(ActiveSample& sample)
+	void ProfilerGPU::beginSampleInternal(ActiveSample& sample)
 	{
 		sample.startStats = RenderSystem::instance().getRenderStats();
 		sample.activeTimeQuery = getTimerQuery();
@@ -166,14 +166,14 @@ namespace BansheeEngine
 		sample.activeOcclusionQuery->begin();
 	}
 
-	void GPUProfiler::endSampleInternal(ActiveSample& sample)
+	void ProfilerGPU::endSampleInternal(ActiveSample& sample)
 	{
 		sample.endStats = RenderSystem::instance().getRenderStats();
 		sample.activeOcclusionQuery->end();
 		sample.activeTimeQuery->end();
 	}
 
-	TimerQueryPtr GPUProfiler::getTimerQuery() const
+	TimerQueryPtr ProfilerGPU::getTimerQuery() const
 	{
 		if (!mFreeTimerQueries.empty())
 		{
@@ -186,7 +186,7 @@ namespace BansheeEngine
 		return TimerQuery::create();
 	}
 
-	OcclusionQueryPtr GPUProfiler::getOcclusionQuery() const
+	OcclusionQueryPtr ProfilerGPU::getOcclusionQuery() const
 	{
 		if (!mFreeOcclusionQueries.empty())
 		{

+ 14 - 21
BansheeCore/Source/BsProfiler.cpp → BansheeCore/Source/BsProfilingManager.cpp

@@ -1,27 +1,20 @@
-#include "BsProfiler.h"
+#include "BsProfilingManager.h"
 #include "BsMath.h"
 
 namespace BansheeEngine
 {
-	const UINT32 Profiler::NUM_SAVED_FRAMES = 200;
+	const UINT32 ProfilingManager::NUM_SAVED_FRAMES = 200;
 
-	Profiler::Profiler()
-		:mSavedSimReports(nullptr), mCPUProfiler(nullptr), mNextSimReportIdx(0),
+	ProfilingManager::ProfilingManager()
+		:mSavedSimReports(nullptr), mNextSimReportIdx(0),
 		mSavedCoreReports(nullptr), mNextCoreReportIdx(0)
 	{
-#if BS_PROFILING_ENABLED
-		mCPUProfiler = bs_new<CPUProfiler, ProfilerAlloc>();
-#endif
-
 		mSavedSimReports = bs_newN<ProfilerReport, ProfilerAlloc>(NUM_SAVED_FRAMES);
 		mSavedCoreReports = bs_newN<ProfilerReport, ProfilerAlloc>(NUM_SAVED_FRAMES);
 	}
 
-	Profiler::~Profiler()
+	ProfilingManager::~ProfilingManager()
 	{
-		if(mCPUProfiler != nullptr)
-			bs_delete<ProfilerAlloc>(mCPUProfiler);
-
 		if(mSavedSimReports != nullptr)
 			bs_deleteN<ProfilerAlloc>(mSavedSimReports, NUM_SAVED_FRAMES);
 
@@ -29,30 +22,30 @@ namespace BansheeEngine
 			bs_deleteN<ProfilerAlloc>(mSavedCoreReports, NUM_SAVED_FRAMES);
 	}
 
-	void Profiler::_update()
+	void ProfilingManager::_update()
 	{
 #if BS_PROFILING_ENABLED
-		mSavedSimReports[mNextSimReportIdx].cpuReport = mCPUProfiler->generateReport();
+		mSavedSimReports[mNextSimReportIdx].cpuReport = gProfilerCPU().generateReport();
 
-		mCPUProfiler->reset();
+		gProfilerCPU().reset();
 
 		mNextSimReportIdx = (mNextSimReportIdx + 1) % NUM_SAVED_FRAMES;
 #endif
 	}
 
-	void Profiler::_updateCore()
+	void ProfilingManager::_updateCore()
 	{
 #if BS_PROFILING_ENABLED
 		BS_LOCK_MUTEX(mSync);
-		mSavedCoreReports[mNextCoreReportIdx].cpuReport = mCPUProfiler->generateReport();
+		mSavedCoreReports[mNextCoreReportIdx].cpuReport = gProfilerCPU().generateReport();
 
-		mCPUProfiler->reset();
+		gProfilerCPU().reset();
 
 		mNextCoreReportIdx = (mNextCoreReportIdx + 1) % NUM_SAVED_FRAMES;
 #endif
 	}
 
-	const ProfilerReport& Profiler::getReport(ProfiledThread thread, UINT32 idx) const
+	const ProfilerReport& ProfilingManager::getReport(ProfiledThread thread, UINT32 idx) const
 	{
 		idx = Math::clamp(idx, 0U, (UINT32)(NUM_SAVED_FRAMES - 1));
 
@@ -74,8 +67,8 @@ namespace BansheeEngine
 		}
 	}
 
-	Profiler& gProfiler()
+	ProfilingManager& gProfiler()
 	{
-		return Profiler::instance();
+		return ProfilingManager::instance();
 	}
 }

+ 9 - 9
BansheeCore/Source/BsRenderSystem.cpp

@@ -10,7 +10,7 @@
 #include "BsGpuResource.h"
 #include "BsCoreThread.h"
 #include "BsMesh.h"
-#include "BsProfiler.h"
+#include "BsProfilerCPU.h"
 
 using namespace std::placeholders;
 
@@ -208,7 +208,7 @@ namespace BansheeEngine {
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
-		gProfiler().beginSample("render");
+		gProfilerCPU().beginSample("render");
 
 		if (mClipPlanesDirty)
 		{
@@ -261,7 +261,7 @@ namespace BansheeEngine {
 
 		mesh->_notifyUsedOnGPU();
 
-		gProfiler().endSample("render");
+		gProfilerCPU().endSample("render");
 	}
 
 	void RenderSystem::swapBuffers(RenderTargetPtr target)
@@ -280,30 +280,30 @@ namespace BansheeEngine {
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
-		gProfiler().beginSample("writeSubresource");
+		gProfilerCPU().beginSample("writeSubresource");
 
 		resource->writeSubresource(subresourceIdx, *data, discardEntireBuffer);
 
-		gProfiler().endSample("writeSubresource");
+		gProfilerCPU().endSample("writeSubresource");
 
-		gProfiler().beginSample("writeSubresourceB");
+		gProfilerCPU().beginSample("writeSubresourceB");
 
 		data->_unlock();
 		asyncOp._completeOperation();
 
-		gProfiler().endSample("writeSubresourceB");
+		gProfilerCPU().endSample("writeSubresourceB");
 	}
 
 	void RenderSystem::readSubresource(GpuResourcePtr resource, UINT32 subresourceIdx, GpuResourceDataPtr& data, AsyncOp& asyncOp)
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
-		gProfiler().beginSample("readSubresource");
+		gProfilerCPU().beginSample("readSubresource");
 
 		resource->readSubresource(subresourceIdx, *data);
 		data->_unlock();
 		asyncOp._completeOperation();
 
-		gProfiler().endSample("readSubresource");
+		gProfilerCPU().endSample("readSubresource");
 	}
 }

+ 3 - 3
BansheeD3D11RenderSystem/Source/BsD3D11HardwareBuffer.cpp

@@ -4,7 +4,7 @@
 #include "BsException.h"
 #include "BsDebug.h"
 
-#include "BsProfiler.h"
+#include "BsProfilerCPU.h"
 
 namespace BansheeEngine
 {
@@ -169,14 +169,14 @@ namespace BansheeEngine
 			D3D11_MAPPED_SUBRESOURCE mappedSubResource;
 			mappedSubResource.pData = NULL;
 			mDevice.clearErrors();
-			gProfiler().beginSample("Map");
+			gProfilerCPU().beginSample("Map");
 			HRESULT hr = mDevice.getImmediateContext()->Map(mD3DBuffer, 0, mapType, 0, &mappedSubResource);
 			if (FAILED(hr) || mDevice.hasError())
 			{
 				String msg = mDevice.getErrorDescription();
 				BS_EXCEPT(RenderingAPIException, "Error calling Map: " + msg);
 			}
-			gProfiler().endSample("Map");
+			gProfilerCPU().endSample("Map");
 
 			pRet = static_cast<void*>(static_cast<char*>(mappedSubResource.pData) + offset);
 

+ 2 - 2
BansheeEditor/Source/BsMainEditorWindow.cpp

@@ -6,7 +6,7 @@
 #include "BsSceneObject.h"
 #include "BsRenderTexture.h"
 #include "BsApplication.h"
-#include "BsProfiler.h"
+#include "BsProfilingManager.h"
 #include "BsGUIArea.h"
 #include "BsGUILayout.h"
 
@@ -78,7 +78,7 @@ namespace BansheeEngine
 		//DrawHelper3D::instance().drawAABox(sceneCamera, dbgBox, Color::Green, 250.0f);
 
 		ProfilerOverlay::startUp(sceneCamera->getViewport());
-		ProfilerOverlay::instance().show();
+		ProfilerOverlay::instance().show(ProfilerOverlayType::CPUSamples);
 	}
 
 	MainEditorWindow::~MainEditorWindow()

+ 84 - 32
BansheeEngine/Include/BsProfilerOverlay.h

@@ -1,11 +1,18 @@
 #pragma once
 
 #include "BsPrerequisites.h"
+#include "BsProfilerGPU.h"
 #include "BsModule.h"
 #include "BsEvent.h"
 
 namespace BansheeEngine
 {
+	enum class ProfilerOverlayType
+	{
+		CPUSamples,
+		GPUSamples
+	};
+
 	class BS_EXPORT ProfilerOverlay : public Module<ProfilerOverlay>
 	{
 	public:
@@ -45,13 +52,23 @@ namespace BansheeEngine
 			HString totalCyclesSelf;
 		};
 
+		struct GPUSampleRow
+		{
+			GUILayout* layout;
+
+			Vector<GUIElement*> elements;
+
+			HString name;
+			HString time;
+		};
+
 	public:
 		ProfilerOverlay(const ViewportPtr& target);
 		~ProfilerOverlay();
 
 		void setTarget(const ViewportPtr& target);
 
-		void show();
+		void show(ProfilerOverlayType type);
 		void hide();
 
 		/**
@@ -61,48 +78,83 @@ namespace BansheeEngine
 	private:
 		static const UINT32 MAX_DEPTH;
 
+		ProfilerOverlayType mType;
 		ViewportPtr mTarget;
 
 		HSceneObject mWidgetSO;
 		GameObjectHandle<GUIWidget> mWidget;
-		GUIArea* mBasicAreaLabels;
-		GUIArea* mPreciseAreaLabels;
-		GUIArea* mBasicAreaContents;
-		GUIArea* mPreciseAreaContents;
-
-		GUILayout* mBasicLayoutLabels;
-		GUILayout* mPreciseLayoutLabels;
-		GUILayout* mBasicLayoutContents;
-		GUILayout* mPreciseLayoutContents;
-
-		GUIElement* mTitleBasicName;
-		GUIElement* mTitleBasicPctOfParent;
-		GUIElement* mTitleBasicNumCalls;
-		GUIElement* mTitleBasicNumAllocs;
-		GUIElement* mTitleBasicNumFrees;
-		GUIElement* mTitleBasicAvgTime;
-		GUIElement* mTitleBasicTotalTime;
-		GUIElement* mTitleBasicAvgTitleSelf;
-		GUIElement* mTitleBasicTotalTimeSelf;
-
-		GUIElement* mTitlePreciseName;
-		GUIElement* mTitlePrecisePctOfParent;
-		GUIElement* mTitlePreciseNumCalls;
-		GUIElement* mTitlePreciseNumAllocs;
-		GUIElement* mTitlePreciseNumFrees;
-		GUIElement* mTitlePreciseAvgCycles;
-		GUIElement* mTitlePreciseTotalCycles;
-		GUIElement* mTitlePreciseAvgCyclesSelf;
-		GUIElement* mTitlePreciseTotalCyclesSelf;
+		GUIArea* mCPUBasicAreaLabels = nullptr;
+		GUIArea* mCPUPreciseAreaLabels = nullptr;
+		GUIArea* mCPUBasicAreaContents = nullptr;
+		GUIArea* mCPUPreciseAreaContents = nullptr;
+
+		GUILayout* mBasicLayoutLabels = nullptr;
+		GUILayout* mPreciseLayoutLabels = nullptr;
+		GUILayout* mBasicLayoutContents = nullptr;
+		GUILayout* mPreciseLayoutContents = nullptr;
+
+		GUIElement* mTitleBasicName = nullptr;
+		GUIElement* mTitleBasicPctOfParent = nullptr;
+		GUIElement* mTitleBasicNumCalls = nullptr;
+		GUIElement* mTitleBasicNumAllocs = nullptr;
+		GUIElement* mTitleBasicNumFrees = nullptr;
+		GUIElement* mTitleBasicAvgTime = nullptr;
+		GUIElement* mTitleBasicTotalTime = nullptr;
+		GUIElement* mTitleBasicAvgTitleSelf = nullptr;
+		GUIElement* mTitleBasicTotalTimeSelf = nullptr;
+
+		GUIElement* mTitlePreciseName = nullptr;
+		GUIElement* mTitlePrecisePctOfParent = nullptr;
+		GUIElement* mTitlePreciseNumCalls = nullptr;
+		GUIElement* mTitlePreciseNumAllocs = nullptr;
+		GUIElement* mTitlePreciseNumFrees = nullptr;
+		GUIElement* mTitlePreciseAvgCycles = nullptr;
+		GUIElement* mTitlePreciseTotalCycles = nullptr;
+		GUIElement* mTitlePreciseAvgCyclesSelf = nullptr;
+		GUIElement* mTitlePreciseTotalCyclesSelf = nullptr;
+
+		GUIArea* mGPUAreaFrameContents = nullptr;
+		GUIArea* mGPUAreaFrameSamples = nullptr;
+		GUILayout* mGPULayoutFrameContentsLeft = nullptr;
+		GUILayout* mGPULayoutFrameContentsRight = nullptr;
+		GUILayout* mGPULayoutSamples = nullptr;
+
+		HString mGPUFrameNumStr;
+		HString mGPUTimeStr;
+		HString mGPUDrawCallsStr;
+		HString mGPURenTargetChangesStr;
+		HString mGPUPresentsStr;
+		HString mGPUClearsStr;
+		HString mGPUVerticesStr;
+		HString mGPUPrimitivesStr;
+		HString mGPUSamplesStr;
+		HString mGPUBlendStateChangesStr;
+		HString mGPURasterStateChangesStr;
+		HString mGPUDepthStencilStateChangesStr;
+
+		HString mGPUObjectsCreatedStr;
+		HString mGPUObjectsDestroyedStr;
+		HString mGPUResourceWritesStr;
+		HString mGPUResourceReadsStr;
+		HString mGPUTextureBindsStr;
+		HString mGPUSamplerBindsStr;
+		HString mGPUVertexBufferBindsStr;
+		HString mGPUIndexBufferBindsStr;
+		HString mGPUGPUProgramBufferBindsStr;
+		HString mGPUGPUProgramBindsStr;
 
 		Vector<BasicRow> mBasicRows;
 		Vector<PreciseRow> mPreciseRows;
+		Vector<GPUSampleRow> mGPUSampleRows;
 
 		HEvent mTargetResizedConn;
 		bool mIsShown;
 
 		void targetResized();
-		void updateAreaSizes();
-		void updateContents(const ProfilerReport& simReport, const ProfilerReport& coreReport);
+		void updateCPUSampleAreaSizes();
+		void updateGPUSampleAreaSizes();
+
+		void updateCPUSampleContents(const ProfilerReport& simReport, const ProfilerReport& coreReport);
+		void updateGPUSampleContents(const GPUProfilerReport& gpuReport);
 	};
 }

+ 1 - 1
BansheeEngine/Source/BsApplication.cpp

@@ -10,7 +10,7 @@
 #include "BsGLBuiltinMaterialFactory.h"
 #include "BsBuiltinResources.h"
 #include "BsScriptManager.h"
-#include "BsProfiler.h"
+#include "BsProfilingManager.h"
 #include "BsVirtualInput.h"
 #include "BsCursor.h"
 

+ 5 - 5
BansheeEngine/Source/BsGUIManager.cpp

@@ -28,7 +28,7 @@
 #include "BsDragAndDropManager.h"
 #include "BsGUIDropDownBoxManager.h"
 #include "BsGUIContextMenu.h"
-#include "BsProfiler.h"
+#include "BsProfilerCPU.h"
 #include "BsMeshHeap.h"
 #include "BsTransientMesh.h"
 #include "BsVirtualInput.h"
@@ -181,12 +181,12 @@ namespace BansheeEngine
 		DragAndDropManager::instance().update();
 
 		// Update layouts
-		gProfiler().beginSample("UpdateLayout");
+		gProfilerCPU().beginSample("UpdateLayout");
 		for(auto& widgetInfo : mWidgets)
 		{
 			widgetInfo.widget->_updateLayout();
 		}
-		gProfiler().endSample("UpdateLayout");
+		gProfilerCPU().endSample("UpdateLayout");
 
 		// Blink caret
 		float curTime = gTime().getTime();
@@ -340,7 +340,7 @@ namespace BansheeEngine
 		{
 			GUIRenderData& renderData = cachedMeshData.second;
 
-			gProfiler().beginSample("UM_A");
+			gProfilerCPU().beginSample("UM_A");
 
 			// Check if anything is dirty. If nothing is we can skip the update
 			bool isDirty = renderData.isDirty;
@@ -354,7 +354,7 @@ namespace BansheeEngine
 				}
 			}
 
-			gProfiler().endSample("UM_A");
+			gProfilerCPU().endSample("UM_A");
 
 			if(!isDirty)
 				continue;

+ 3 - 3
BansheeEngine/Source/BsImageSprite.cpp

@@ -4,7 +4,7 @@
 #include "BsSpriteTexture.h"
 #include "BsTexture.h"
 
-#include "BsProfiler.h"
+#include "BsProfilerCPU.h"
 
 namespace BansheeEngine
 {
@@ -22,7 +22,7 @@ namespace BansheeEngine
 			return;
 		}
 
-		gProfiler().beginSample("UpdateImageSprite");
+		gProfilerCPU().beginSample("UpdateImageSprite");
 
 		// Actually generate a mesh
 		if(mCachedRenderElements.size() < 1)
@@ -252,6 +252,6 @@ namespace BansheeEngine
 
 		updateBounds();
 
-		gProfiler().endSample("UpdateImageSprite");
+		gProfilerCPU().endSample("UpdateImageSprite");
 	}
 }

+ 226 - 42
BansheeEngine/Source/BsProfilerOverlay.cpp

@@ -6,8 +6,9 @@
 #include "BsGUIElement.h"
 #include "BsGUILabel.h"
 #include "BsGUISpace.h"
+#include "BsTime.h"
 #include "BsBuiltinResources.h"
-#include "BsProfiler.h"
+#include "BsProfilingManager.h"
 #include "BsViewport.h"
 
 namespace BansheeEngine
@@ -220,16 +221,68 @@ namespace BansheeEngine
 		}
 	};
 
+	class GPUSampleRowFiller
+	{
+	public:
+		UINT32 curIdx;
+		GUILayout& layout;
+		GUIWidget& widget;
+		Vector<ProfilerOverlay::GPUSampleRow>& rows;
+
+		GPUSampleRowFiller(Vector<ProfilerOverlay::GPUSampleRow>& _rows, GUILayout& _layout, GUIWidget& _widget)
+			:rows(_rows), curIdx(0), layout(_layout), widget(_widget)
+		{ }
+
+		~GPUSampleRowFiller()
+		{
+			UINT32 excessEntries = (UINT32)rows.size() - curIdx;
+			for (UINT32 i = 0; i < excessEntries; i++)
+			{
+				layout.removeChildAt(layout.getNumChildren() - i - 1); // -1 because last element is flexible space and we want to skip it
+
+				ProfilerOverlay::GPUSampleRow& row = rows[curIdx + i];
+
+				for (auto& element : row.elements)
+					GUIElement::destroy(element);
+			}
+
+			rows.resize(curIdx);
+		}
+
+		void addData(const String& name, float timeMs)
+		{
+			if (curIdx >= rows.size())
+			{
+				rows.push_back(ProfilerOverlay::GPUSampleRow());
+
+				ProfilerOverlay::GPUSampleRow& newRow = rows.back();
+
+				newRow.name = HString(L"Name");
+				newRow.time = HString(L"{0}");
+
+				newRow.layout = &layout.insertLayoutX(layout.getNumChildren() - 1); // Insert before flexible space
+
+				GUILabel* nameLabel = GUILabel::create(newRow.name, GUIOptions(GUIOption::fixedWidth(100)));
+				GUILabel* timeLabel = GUILabel::create(newRow.time, GUIOptions(GUIOption::fixedWidth(100)));
+
+				newRow.layout->addElement(nameLabel);
+				newRow.layout->addElement(timeLabel);
+
+				newRow.elements.push_back(nameLabel);
+				newRow.elements.push_back(timeLabel);
+			}
+
+			ProfilerOverlay::GPUSampleRow& row = rows[curIdx];
+			row.time.setParameter(0, toWString(timeMs));
+
+			curIdx++;
+		}
+	};
+
 	const UINT32 ProfilerOverlay::MAX_DEPTH = 4;
 
 	ProfilerOverlay::ProfilerOverlay(const ViewportPtr& target)
-		:mIsShown(false), mBasicAreaLabels(nullptr), mPreciseAreaLabels(nullptr), mBasicAreaContents(nullptr), mPreciseAreaContents(nullptr),
-		mBasicLayoutLabels(nullptr), mPreciseLayoutLabels(nullptr), mBasicLayoutContents(nullptr), mPreciseLayoutContents(nullptr),
-		mTitleBasicName(nullptr), mTitleBasicPctOfParent(nullptr), mTitleBasicNumCalls(nullptr), mTitleBasicNumAllocs(nullptr), mTitleBasicNumFrees(nullptr),
-		mTitleBasicAvgTime(nullptr), mTitleBasicTotalTime(nullptr), mTitleBasicAvgTitleSelf(nullptr), mTitleBasicTotalTimeSelf(nullptr), 
-		mTitlePreciseName(nullptr), mTitlePrecisePctOfParent(nullptr), mTitlePreciseNumCalls(nullptr), mTitlePreciseNumAllocs(nullptr), 
-		mTitlePreciseNumFrees(nullptr), mTitlePreciseAvgCycles(nullptr), mTitlePreciseTotalCycles(nullptr), mTitlePreciseAvgCyclesSelf(nullptr), 
-		mTitlePreciseTotalCyclesSelf(nullptr)
+		:mIsShown(false), mType(ProfilerOverlayType::CPUSamples)
 	{
 		setTarget(target);
 	}
@@ -263,17 +316,18 @@ namespace BansheeEngine
 		mWidget->setDepth(127);
 		mWidget->setSkin(BuiltinResources::instance().getGUISkin());
 
-		mBasicAreaLabels = GUIArea::create(*mWidget, 0, 0);
-		mPreciseAreaLabels = GUIArea::create(*mWidget, 0, 0);
-		mBasicAreaContents = GUIArea::create(*mWidget, 0, 0);
-		mPreciseAreaContents = GUIArea::create(*mWidget, 0, 0);
+		// Set up CPU sample areas
+		mCPUBasicAreaLabels = GUIArea::create(*mWidget, 0, 0);
+		mCPUPreciseAreaLabels = GUIArea::create(*mWidget, 0, 0);
+		mCPUBasicAreaContents = GUIArea::create(*mWidget, 0, 0);
+		mCPUPreciseAreaContents = GUIArea::create(*mWidget, 0, 0);
 
-		mBasicLayoutLabels = &mBasicAreaLabels->getLayout().addLayoutY();
-		mPreciseLayoutLabels = &mPreciseAreaLabels->getLayout().addLayoutY();
-		mBasicLayoutContents = &mBasicAreaContents->getLayout().addLayoutY();
-		mPreciseLayoutContents = &mPreciseAreaContents->getLayout().addLayoutY();
+		mBasicLayoutLabels = &mCPUBasicAreaLabels->getLayout().addLayoutY();
+		mPreciseLayoutLabels = &mCPUPreciseAreaLabels->getLayout().addLayoutY();
+		mBasicLayoutContents = &mCPUBasicAreaContents->getLayout().addLayoutY();
+		mPreciseLayoutContents = &mCPUPreciseAreaContents->getLayout().addLayoutY();
 
-		// Set up title bars
+		// Set up CPU sample title bars
 		mTitleBasicName = GUILabel::create(HString(L"Name"), GUIOptions(GUIOption::fixedWidth(200)));
 		mTitleBasicPctOfParent = GUILabel::create(HString(L"% parent"), GUIOptions(GUIOption::fixedWidth(50)));
 		mTitleBasicNumCalls = GUILabel::create(HString(L"# calls"), GUIOptions(GUIOption::fixedWidth(50)));
@@ -324,18 +378,86 @@ namespace BansheeEngine
 		mBasicLayoutContents->addFlexibleSpace();
 		mPreciseLayoutContents->addFlexibleSpace();
 
-		updateAreaSizes();
+		// Set up GPU sample areas
+		mGPUAreaFrameContents = GUIArea::create(*mWidget, 0, 0);
+		mGPUAreaFrameSamples = GUIArea::create(*mWidget, 0, 0);
+		mGPULayoutFrameContentsLeft = &mGPUAreaFrameContents->getLayout().addLayoutY();
+		mGPULayoutFrameContentsRight = &mGPUAreaFrameContents->getLayout().addLayoutY();
+		mGPULayoutSamples = &mGPUAreaFrameSamples->getLayout().addLayoutY();
+
+		mGPUFrameNumStr = HString(L"__ProfOvFrame", L"Frame #{0}");
+		mGPUTimeStr = HString(L"__ProfOvTime", L"Time: {0}ms");
+		mGPUDrawCallsStr = HString(L"__ProfOvDrawCalls", L"Draw calls: {0}");
+		mGPURenTargetChangesStr = HString(L"__ProfOvRTChanges", L"Render target changes: {0}");
+		mGPUPresentsStr = HString(L"__ProfOvPresents", L"Presents: {0}");
+		mGPUClearsStr = HString(L"__ProfOvClears", L"Clears: {0}");
+		mGPUVerticesStr = HString(L"__ProfOvVertices", L"Num. vertices: {0}");
+		mGPUPrimitivesStr = HString(L"__ProfOvPrimitives", L"Num. primitives: {0}");
+		mGPUSamplesStr = HString(L"__ProfOvSamples", L"Samples: {0}");
+		mGPUBlendStateChangesStr = HString(L"__ProfOvBSChanges", L"Blend state changes: {0}");
+		mGPURasterStateChangesStr = HString(L"__ProfOvRSChanges", L"Rasterizer state changes: {0}");
+		mGPUDepthStencilStateChangesStr = HString(L"__ProfOvDSSChanges", L"Depth/stencil state changes: {0}");
+
+		mGPUObjectsCreatedStr = HString(L"__ProfOvObjsCreated", L"Objects created: {0}");
+		mGPUObjectsDestroyedStr = HString(L"__ProfOvObjsDestroyed", L"Objects destroyed: {0}");
+		mGPUResourceWritesStr = HString(L"__ProfOvResWrites", L"Resource writes: {0}");
+		mGPUResourceReadsStr = HString(L"__ProfOvResReads", L"Resource reads: {0}");
+		mGPUTextureBindsStr = HString(L"__ProfOvTexBinds", L"Texture binds: {0}");
+		mGPUSamplerBindsStr = HString(L"__ProfOvSampBinds", L"Sampler binds: {0}");
+		mGPUVertexBufferBindsStr = HString(L"__ProfOvVBBinds", L"VB binds: {0}");
+		mGPUIndexBufferBindsStr = HString(L"__ProfOvIBBinds", L"IB binds: {0}");
+		mGPUGPUProgramBufferBindsStr = HString(L"__ProfOvProgBuffBinds", L"GPU program buffer binds: {0}");
+		mGPUGPUProgramBindsStr = HString(L"__ProfOvProgBinds", L"GPU program binds: {0}");
+
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPUFrameNumStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPUTimeStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPUDrawCallsStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPURenTargetChangesStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPUPresentsStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPUClearsStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPUVerticesStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPUPrimitivesStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPUSamplesStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPUBlendStateChangesStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPURasterStateChangesStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addElement(GUILabel::create(mGPUDepthStencilStateChangesStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsLeft->addFlexibleSpace();
+
+		mGPULayoutFrameContentsRight->addElement(GUILabel::create(mGPUObjectsCreatedStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsRight->addElement(GUILabel::create(mGPUObjectsDestroyedStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsRight->addElement(GUILabel::create(mGPUResourceWritesStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsRight->addElement(GUILabel::create(mGPUResourceReadsStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsRight->addElement(GUILabel::create(mGPUTextureBindsStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsRight->addElement(GUILabel::create(mGPUSamplerBindsStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsRight->addElement(GUILabel::create(mGPUVertexBufferBindsStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsRight->addElement(GUILabel::create(mGPUIndexBufferBindsStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsRight->addElement(GUILabel::create(mGPUGPUProgramBufferBindsStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsRight->addElement(GUILabel::create(mGPUGPUProgramBufferBindsStr, GUIOptions(GUIOption::fixedWidth(200))));
+		mGPULayoutFrameContentsRight->addFlexibleSpace();
+
+		updateCPUSampleAreaSizes();
+		updateGPUSampleAreaSizes();
 	}
 
-	void ProfilerOverlay::show()
+	void ProfilerOverlay::show(ProfilerOverlayType type)
 	{
-		if(mIsShown)
+		if(mIsShown && mType == type)
 			return;
 
-		mBasicAreaLabels->enable();
-		mPreciseAreaLabels->enable();
-		mBasicAreaContents->enable();
-		mPreciseAreaContents->enable();
+		if (type == ProfilerOverlayType::CPUSamples)
+		{
+			mCPUBasicAreaLabels->enable();
+			mCPUPreciseAreaLabels->enable();
+			mCPUBasicAreaContents->enable();
+			mCPUPreciseAreaContents->enable();
+		}
+		else
+		{
+			mGPUAreaFrameContents->enable();
+			mGPUAreaFrameSamples->enable();
+		}
+
+		mType = type;
 		mIsShown = true;
 	}
 
@@ -344,27 +466,38 @@ namespace BansheeEngine
 		if(!mIsShown)
 			return;
 
-		mBasicAreaLabels->disable();
-		mPreciseAreaLabels->disable();
-		mBasicAreaContents->disable();
-		mPreciseAreaContents->disable();
+		mCPUBasicAreaLabels->disable();
+		mCPUPreciseAreaLabels->disable();
+		mCPUBasicAreaContents->disable();
+		mCPUPreciseAreaContents->disable();
+		mGPUAreaFrameContents->disable();
+		mGPUAreaFrameSamples->disable();
 		mIsShown = false;
 	}
 
 	void ProfilerOverlay::update()
 	{
-		const ProfilerReport& latestSimReport = Profiler::instance().getReport(ProfiledThread::Sim);
-		const ProfilerReport& latestCoreReport = Profiler::instance().getReport(ProfiledThread::Core);
+		const ProfilerReport& latestSimReport = ProfilingManager::instance().getReport(ProfiledThread::Sim);
+		const ProfilerReport& latestCoreReport = ProfilingManager::instance().getReport(ProfiledThread::Core);
+
+		updateCPUSampleContents(latestSimReport, latestCoreReport);
 
-		updateContents(latestSimReport, latestCoreReport);
+		while (ProfilerGPU::instance().getNumAvailableReports() > 1)
+			ProfilerGPU::instance().getNextReport(); // Drop any extra reports, we only want the latest
+
+		if (ProfilerGPU::instance().getNumAvailableReports() > 0)
+		{
+			updateGPUSampleContents(ProfilerGPU::instance().getNextReport());
+		}
 	}
 
 	void ProfilerOverlay::targetResized()
 	{
-		updateAreaSizes();
+		updateCPUSampleAreaSizes();
+		updateGPUSampleAreaSizes();
 	}
 
-	void ProfilerOverlay::updateAreaSizes()
+	void ProfilerOverlay::updateCPUSampleAreaSizes()
 	{
 		static const INT32 PADDING = 10;
 		static const float LABELS_CONTENT_RATIO = 0.3f;
@@ -375,20 +508,38 @@ namespace BansheeEngine
 		UINT32 labelsWidth = Math::ceilToInt(width * LABELS_CONTENT_RATIO);
 		UINT32 contentWidth = width - labelsWidth;
 
-		mBasicAreaLabels->setPosition(PADDING, PADDING);
-		mBasicAreaLabels->setSize(labelsWidth, height);
+		mCPUBasicAreaLabels->setPosition(PADDING, PADDING);
+		mCPUBasicAreaLabels->setSize(labelsWidth, height);
 
-		mPreciseAreaLabels->setPosition(PADDING, height + PADDING * 2);
-		mPreciseAreaLabels->setSize(labelsWidth, height);
+		mCPUPreciseAreaLabels->setPosition(PADDING, height + PADDING * 2);
+		mCPUPreciseAreaLabels->setSize(labelsWidth, height);
 
-		mBasicAreaContents->setPosition(PADDING + labelsWidth, PADDING);
-		mBasicAreaContents->setSize(contentWidth, height);
+		mCPUBasicAreaContents->setPosition(PADDING + labelsWidth, PADDING);
+		mCPUBasicAreaContents->setSize(contentWidth, height);
 
-		mPreciseAreaContents->setPosition(PADDING + labelsWidth, height + PADDING * 2);
-		mPreciseAreaContents->setSize(contentWidth, height);
+		mCPUPreciseAreaContents->setPosition(PADDING + labelsWidth, height + PADDING * 2);
+		mCPUPreciseAreaContents->setSize(contentWidth, height);
 	}
 
-	void ProfilerOverlay::updateContents(const ProfilerReport& simReport, const ProfilerReport& coreReport)
+	void ProfilerOverlay::updateGPUSampleAreaSizes()
+	{
+		static const INT32 PADDING = 10;
+		static const float SAMPLES_FRAME_RATIO = 0.5f;
+
+		UINT32 width = (UINT32)std::max(0, (INT32)mTarget->getWidth() - PADDING * 2);
+		UINT32 height = (UINT32)std::max(0, (INT32)(mTarget->getHeight() - PADDING * 3));
+
+		UINT32 frameHeight = Math::ceilToInt(height * SAMPLES_FRAME_RATIO);
+		UINT32 samplesHeight = height - frameHeight;
+
+		mGPUAreaFrameContents->setPosition(PADDING, PADDING);
+		mGPUAreaFrameContents->setSize(width, frameHeight);
+
+		mGPUAreaFrameSamples->setPosition(PADDING, PADDING + frameHeight + PADDING);
+		mGPUAreaFrameSamples->setSize(width, samplesHeight);
+	}
+
+	void ProfilerOverlay::updateCPUSampleContents(const ProfilerReport& simReport, const ProfilerReport& coreReport)
 	{
 		static const UINT32 NUM_ROOT_ENTRIES = 2;
 
@@ -478,4 +629,37 @@ namespace BansheeEngine
 			}
 		}
 	}
+
+	void ProfilerOverlay::updateGPUSampleContents(const GPUProfilerReport& gpuReport)
+	{
+		mGPUFrameNumStr.setParameter(0, toWString((UINT64)gTime().getCurrentFrameNumber()));
+		mGPUTimeStr.setParameter(0, toWString(gpuReport.frameSample.timeMs));
+		mGPUDrawCallsStr.setParameter(0, toWString(gpuReport.frameSample.numDrawCalls));
+		mGPURenTargetChangesStr.setParameter(0, toWString(gpuReport.frameSample.numRenderTargetChanges));
+		mGPUPresentsStr.setParameter(0, toWString(gpuReport.frameSample.numPresents));
+		mGPUClearsStr.setParameter(0, toWString(gpuReport.frameSample.numClears));
+		mGPUVerticesStr.setParameter(0, toWString(gpuReport.frameSample.numVertices));
+		mGPUPrimitivesStr.setParameter(0, toWString(gpuReport.frameSample.numPrimitives));
+		mGPUSamplesStr.setParameter(0, toWString(gpuReport.frameSample.numDrawnSamples));
+		mGPUBlendStateChangesStr.setParameter(0, toWString(gpuReport.frameSample.numBlendStateChanges));
+		mGPURasterStateChangesStr.setParameter(0, toWString(gpuReport.frameSample.numRasterizerStateChanges));
+		mGPUDepthStencilStateChangesStr.setParameter(0, toWString(gpuReport.frameSample.numDepthStencilStateChanges));
+
+		mGPUObjectsCreatedStr.setParameter(0, toWString(gpuReport.frameSample.numObjectsCreated));
+		mGPUObjectsDestroyedStr.setParameter(0, toWString(gpuReport.frameSample.numObjectsDestroyed));
+		mGPUResourceWritesStr.setParameter(0, toWString(gpuReport.frameSample.numResourceWrites));
+		mGPUResourceReadsStr.setParameter(0, toWString(gpuReport.frameSample.numResourceReads));
+		mGPUTextureBindsStr.setParameter(0, toWString(gpuReport.frameSample.numTextureBinds));
+		mGPUSamplerBindsStr.setParameter(0, toWString(gpuReport.frameSample.numSamplerBinds));
+		mGPUVertexBufferBindsStr.setParameter(0, toWString(gpuReport.frameSample.numVertexBufferBinds));
+		mGPUIndexBufferBindsStr.setParameter(0, toWString(gpuReport.frameSample.numIndexBufferBinds));
+		mGPUGPUProgramBufferBindsStr.setParameter(0, toWString(gpuReport.frameSample.numGpuParamBufferBinds));
+		mGPUGPUProgramBindsStr.setParameter(0, toWString(gpuReport.frameSample.numGpuProgramBinds));
+
+		GPUSampleRowFiller sampleRowFiller(mGPUSampleRows, *mGPULayoutSamples, *mWidget);
+		for (auto& sample : gpuReport.samples)
+		{
+			sampleRowFiller.addData(sample.name, sample.timeMs);
+		}
+	}
 }

+ 11 - 11
BansheeEngine/Source/BsTextSprite.cpp

@@ -4,7 +4,7 @@
 #include "BsFont.h"
 #include "BsVector2.h"
 
-#include "BsProfiler.h" // PROFILING ONLY
+#include "BsProfilerCPU.h" // PROFILING ONLY
 
 namespace BansheeEngine
 {
@@ -15,11 +15,11 @@ namespace BansheeEngine
 
 	void TextSprite::update(const TEXT_SPRITE_DESC& desc)
 	{
-		gProfiler().beginSample("textUpdateA");
+		gProfilerCPU().beginSample("textUpdateA");
 		TextData textData(desc.text, desc.font, desc.fontSize, desc.width, desc.height, desc.wordWrap);
-		gProfiler().endSample("textUpdateA");
+		gProfilerCPU().endSample("textUpdateA");
 
-		gProfiler().beginSample("textUpdateB");
+		gProfilerCPU().beginSample("textUpdateB");
 
 		UINT32 numLines = textData.getNumLines();
 		UINT32 numPages = textData.getNumPages();
@@ -53,8 +53,8 @@ namespace BansheeEngine
 		if(mCachedRenderElements.size() != numPages)
 			mCachedRenderElements.resize(numPages);
 
-		gProfiler().endSample("textUpdateB");
-		gProfiler().beginSample("textUpdateC");
+		gProfilerCPU().endSample("textUpdateB");
+		gProfilerCPU().beginSample("textUpdateC");
 
 		// Actually generate a mesh
 		UINT32 texPage = 0;
@@ -104,8 +104,8 @@ namespace BansheeEngine
 			texPage++;
 		}
 
-		gProfiler().instance().endSample("textUpdateC");
-		gProfiler().instance().beginSample("textUpdateD");
+		gProfilerCPU().instance().endSample("textUpdateC");
+		gProfilerCPU().instance().beginSample("textUpdateD");
 
 		// Calc alignment and anchor offsets and set final line positions
 		for(UINT32 j = 0; j < numPages; j++)
@@ -116,12 +116,12 @@ namespace BansheeEngine
 				renderElem.vertices, renderElem.uvs, renderElem.indexes, renderElem.numQuads);
 		}
 
-		gProfiler().instance().endSample("textUpdateD");
-		gProfiler().instance().beginSample("textUpdateE");
+		gProfilerCPU().instance().endSample("textUpdateD");
+		gProfilerCPU().instance().beginSample("textUpdateE");
 
 		updateBounds();
 
-		gProfiler().instance().endSample("textUpdateE");
+		gProfilerCPU().instance().endSample("textUpdateE");
 	}
 
 	UINT32 TextSprite::genTextQuads(UINT32 page, const TextData& textData, UINT32 width, UINT32 height, 

+ 19 - 19
BansheeRenderer/Source/BsBansheeRenderer.cpp

@@ -19,7 +19,7 @@
 #include "BsGUIManager.h"
 #include "BsCoreThread.h"
 
-#include "BsProfiler.h"
+#include "BsProfilerCPU.h"
 
 namespace BansheeEngine
 {
@@ -41,7 +41,7 @@ namespace BansheeEngine
 
 	void BansheeRenderer::renderAll() 
 	{
-		gProfiler().beginSample("renderA");
+		gProfilerCPU().beginSample("renderA");
 
 		gBsSceneManager().updateRenderableBounds();
 
@@ -83,8 +83,8 @@ namespace BansheeEngine
 			std::sort(begin(cameras), end(cameras), cameraComparer);
 		}
 
-		gProfiler().endSample("renderA");
-		gProfiler().beginSample("renderB");
+		gProfilerCPU().endSample("renderA");
+		gProfilerCPU().beginSample("renderB");
 
 		// Render everything, target by target
 		for(auto& camerasPerTarget : camerasPerRenderTarget)
@@ -119,12 +119,12 @@ namespace BansheeEngine
 			coreAccessor.swapBuffers(target);
 		}
 
-		gProfiler().endSample("renderB");
+		gProfilerCPU().endSample("renderB");
 	}
 
 	void BansheeRenderer::render(const HCamera& camera) 
 	{
-		gProfiler().beginSample("renderC");
+		gProfilerCPU().beginSample("renderC");
 
 		Vector<HRenderable> allRenderables;
 		
@@ -141,8 +141,8 @@ namespace BansheeEngine
 
 		mRenderQueue->clear();
 
-		gProfiler().endSample("renderC");
-		gProfiler().beginSample("renderD");
+		gProfilerCPU().endSample("renderC");
+		gProfilerCPU().beginSample("renderD");
 
 		// Get scene render operations
 		for(auto iter = allRenderables.begin(); iter != allRenderables.end(); ++iter)
@@ -150,8 +150,8 @@ namespace BansheeEngine
 			(*iter)->render(*mRenderQueue, viewProjMatrix);
 		}
 
-		gProfiler().endSample("renderD");
-		gProfiler().beginSample("renderE");
+		gProfilerCPU().endSample("renderD");
+		gProfilerCPU().beginSample("renderE");
 
 		// Get GUI render operations
 		GUIManager::instance().render(camera->getViewport(), *mRenderQueue);
@@ -163,8 +163,8 @@ namespace BansheeEngine
 		DrawHelper3D::instance().render(camera, *mRenderQueue);
 		DrawHelper2D::instance().render(camera, *mRenderQueue);
 
-		gProfiler().endSample("renderE");
-		gProfiler().beginSample("renderF");
+		gProfilerCPU().endSample("renderE");
+		gProfilerCPU().beginSample("renderF");
 
 		// Get any operations from hooked up callbacks
 		const Viewport* viewportRawPtr = camera->getViewport().get();
@@ -177,30 +177,30 @@ namespace BansheeEngine
 		mRenderQueue->sort();
 		const Vector<SortedRenderOp>& sortedROps =  mRenderQueue->getSortedRenderOps();
 
-		gProfiler().endSample("renderF");
+		gProfilerCPU().endSample("renderF");
 
 		for(auto iter = sortedROps.begin(); iter != sortedROps.end(); ++iter)
 		{
-			gProfiler().beginSample("renderG");
+			gProfilerCPU().beginSample("renderG");
 
 			const RenderOperation& renderOp = *iter->baseOperation;
 			MaterialPtr material = renderOp.material;
 
-			gProfiler().endSample("renderG");
-			gProfiler().beginSample("renderH");
+			gProfilerCPU().endSample("renderG");
+			gProfilerCPU().beginSample("renderH");
 
 			PassPtr pass = material->getPass(iter->passIdx);
 			PassParametersPtr paramsPtr = material->getPassParameters(iter->passIdx);
 
 			coreAccessor.setPass(pass, paramsPtr);
 
-			gProfiler().endSample("renderH");
-			gProfiler().beginSample("renderI");
+			gProfilerCPU().endSample("renderH");
+			gProfilerCPU().beginSample("renderI");
 
 			const SubMesh& subMesh = renderOp.mesh->getSubMesh(renderOp.submeshIdx);
 			coreAccessor.render(renderOp.mesh, subMesh.indexOffset, subMesh.indexCount, true, subMesh.drawOp);
 
-			gProfiler().endSample("renderI");
+			gProfilerCPU().endSample("renderI");
 		}
 	}
 }

+ 8 - 1
BansheeUtility/Include/BsHString.h

@@ -42,9 +42,16 @@ namespace BansheeEngine
 		/**
 		 * @brief	Creates a new localized string with the specified identifier. If the identifier
 		 * 			doesn't previously exist in the string table, identifier value will also be used 
-		 * 			for initializing the English version of the string.
+		 * 			for initializing the default language version of the string.
 		 */
 		explicit HString(const WString& identifierString);
+
+		/**
+		* @brief	Creates a new localized string with the specified identifier and sets the default language version
+		*			of the string. If a string with that identifier already exists default language string will be updated.
+		*/
+		explicit HString(const WString& identifierString, const WString& englishString);
+
 		HString();
 		HString(const HString& copy);
 		~HString();

+ 14 - 0
BansheeUtility/Source/BsHString.cpp

@@ -57,6 +57,20 @@ namespace BansheeEngine
 		mData->mUpdateConn = mData->mStringData->commonData->onStringDataModified.connect(std::bind(&HString::StringData::updateString, mData.get()));
 	}
 
+	HString::HString(const WString& identifierString, const WString& defaultString)
+	{
+		mData = bs_shared_ptr<StringData>();
+
+		StringTable::instance().setString(identifierString, StringTable::DEFAULT_LANGUAGE, defaultString);
+
+		mData->mStringData = &StringTable::instance().getStringData(identifierString);
+
+		if (mData->mStringData->numParameters > 0)
+			mData->mParameters = bs_newN<WString>(mData->mStringData->numParameters);
+
+		mData->mUpdateConn = mData->mStringData->commonData->onStringDataModified.connect(std::bind(&HString::StringData::updateString, mData.get()));
+	}
+
 	HString::HString(const HString& copy)
 	{
 		mData = copy.mData;

+ 1 - 8
Polish.txt

@@ -3,9 +3,8 @@
 There's still a crash regarding an uninitialized mCachedPtr on a C# class when shutting down. Attempt to find consistent repro steps.
 
 Finish GPUProfiler:
- - Actually implement sampling and report generation
- - Add timer and occlusion queries
  - Resource writes/reads/creation/destruction is not currently increased in RenderStats
+ - Test overlay and add title labels for samples
 
  ---------------------------
 
@@ -16,7 +15,6 @@ Finish GPUProfiler:
  Fullscreen stuff:
 
 I should be able to specify resolution when going to windowed mode
- - Maybe just store the windowed and fullscreen resolutions separately and restore automatically?
  - I have a feeling DX9 might start crashing once I resize to sub-fullscreen res and go back to FS - test it
 
  Add VSync toggle to RenderWindow if it doesn't already exist.
@@ -28,11 +26,6 @@ DISREGARD MONITOR INDEX ON DX9
 
  -----------------------------
 
-Consider renaming Profiler to CPUProfiler and CPUProfiler to something else. Since now I have GPUProfiler it's confusing to have one named just Profiler.
-Profiler can only be called from sim/core thread which is also a bit weird.
-
-Find and rename any other CM_ defines
-
 Rename CamelotOIS external library
 
 Refactor Viewport as it is used on both core and sim threads and it has no locking of any kind.