Przeglądaj źródła

Added core thread profiling

Marko Pintera 12 lat temu
rodzic
commit
aeac82e2b1

+ 1 - 1
BansheeEngine/Include/BsProfilerOverlay.h

@@ -107,6 +107,6 @@ namespace BansheeEngine
 
 		void targetResized();
 		void updateAreaSizes();
-		void updateContents(const CM::ProfilerReport& report);
+		void updateContents(const CM::ProfilerReport& simReport, const CM::ProfilerReport& coreReport);
 	};
 }

+ 49 - 29
BansheeEngine/Source/BsProfilerOverlay.cpp

@@ -369,9 +369,10 @@ namespace BansheeEngine
 
 	void ProfilerOverlay::update()
 	{
-		const ProfilerReport& latestReport = Profiler::instance().getReport();
+		const ProfilerReport& latestSimReport = Profiler::instance().getReport(ProfiledThread::Sim);
+		const ProfilerReport& latestCoreReport = Profiler::instance().getReport(ProfiledThread::Core);
 
-		updateContents(latestReport);
+		updateContents(latestSimReport, latestCoreReport);
 	}
 
 	void ProfilerOverlay::targetResized()
@@ -403,10 +404,15 @@ namespace BansheeEngine
 		mPreciseAreaContents->setSize(contentWidth, height);
 	}
 
-	void ProfilerOverlay::updateContents(const ProfilerReport& report)
+	void ProfilerOverlay::updateContents(const CM::ProfilerReport& simReport, const CM::ProfilerReport& coreReport)
 	{
-		const CPUProfilerBasicSamplingEntry& basicRootEntry = report.cpuReport.getBasicSamplingData();
-		const CPUProfilerPreciseSamplingEntry& preciseRootEntry = report.cpuReport.getPreciseSamplingData();
+		static const UINT32 NUM_ROOT_ENTRIES = 2;
+
+		const CPUProfilerBasicSamplingEntry& simBasicRootEntry = simReport.cpuReport.getBasicSamplingData();
+		const CPUProfilerPreciseSamplingEntry& simPreciseRootEntry = simReport.cpuReport.getPreciseSamplingData();
+
+		const CPUProfilerBasicSamplingEntry& coreBasicRootEntry = coreReport.cpuReport.getBasicSamplingData();
+		const CPUProfilerPreciseSamplingEntry& corePreciseRootEntry = coreReport.cpuReport.getPreciseSamplingData();
 
 		struct TodoBasic
 		{
@@ -429,47 +435,61 @@ namespace BansheeEngine
 		};
 
 		BasicRowFiller basicRowFiller(mBasicRows, *mBasicLayoutLabels, *mBasicLayoutContents, *mWidget);
-
 		Stack<TodoBasic>::type todoBasic;
-		todoBasic.push(TodoBasic(basicRootEntry, 0));
 
-		while(!todoBasic.empty())
-		{
-			TodoBasic curEntry = todoBasic.top();
-			todoBasic.pop();
+		const CPUProfilerBasicSamplingEntry* basicRootEntries[NUM_ROOT_ENTRIES];
+		basicRootEntries[0] = &simBasicRootEntry;
+		basicRootEntries[1] = &coreBasicRootEntry;
 
-			const CPUProfilerBasicSamplingEntry::Data& data = curEntry.entry.data;
-			basicRowFiller.addData(curEntry.depth, data.name, data.pctOfParent, data.numCalls, data.avgTimeMs, data.totalTimeMs, 
-				data.maxTimeMs, data.avgSelfTimeMs, data.totalSelfTimeMs, data.estimatedOverheadMs, data.estimatedSelfOverheadMs);
+		for(UINT32 i = 0; i < NUM_ROOT_ENTRIES; i++)
+		{
+			todoBasic.push(TodoBasic(*basicRootEntries[i], 0));
 
-			if(curEntry.depth <= MAX_DEPTH)
+			while(!todoBasic.empty())
 			{
-				for(auto& child : curEntry.entry.childEntries)
+				TodoBasic curEntry = todoBasic.top();
+				todoBasic.pop();
+
+				const CPUProfilerBasicSamplingEntry::Data& data = curEntry.entry.data;
+				basicRowFiller.addData(curEntry.depth, data.name, data.pctOfParent, data.numCalls, data.avgTimeMs, data.totalTimeMs, 
+					data.maxTimeMs, data.avgSelfTimeMs, data.totalSelfTimeMs, data.estimatedOverheadMs, data.estimatedSelfOverheadMs);
+
+				if(curEntry.depth <= MAX_DEPTH)
 				{
-					todoBasic.push(TodoBasic(child, curEntry.depth + 1));
+					for(auto& child : curEntry.entry.childEntries)
+					{
+						todoBasic.push(TodoBasic(child, curEntry.depth + 1));
+					}
 				}
 			}
 		}
 
 		PreciseRowFiller preciseRowFiller(mPreciseRows, *mBasicLayoutLabels, *mBasicLayoutContents, *mWidget);
-
 		Stack<TodoPrecise>::type todoPrecise;
-		todoPrecise.push(TodoPrecise(preciseRootEntry, 0));
 
-		while(!todoBasic.empty())
-		{
-			TodoPrecise curEntry = todoPrecise.top();
-			todoPrecise.pop();
+		const CPUProfilerPreciseSamplingEntry* preciseRootEntries[NUM_ROOT_ENTRIES];
+		preciseRootEntries[0] = &simPreciseRootEntry;
+		preciseRootEntries[1] = &corePreciseRootEntry;
 
-			const CPUProfilerPreciseSamplingEntry::Data& data = curEntry.entry.data;
-			preciseRowFiller.addData(curEntry.depth, data.name, data.pctOfParent, data.numCalls, data.avgCycles, data.totalCycles, 
-				data.maxCycles, data.avgSelfCycles, data.totalSelfCycles, data.estimatedOverhead, data.estimatedSelfOverhead);
+		for(UINT32 i = 0; i < NUM_ROOT_ENTRIES; i++)
+		{
+			todoPrecise.push(TodoPrecise(*preciseRootEntries[i], 0));
 
-			if(curEntry.depth <= MAX_DEPTH)
+			while(!todoBasic.empty())
 			{
-				for(auto& child : curEntry.entry.childEntries)
+				TodoPrecise curEntry = todoPrecise.top();
+				todoPrecise.pop();
+
+				const CPUProfilerPreciseSamplingEntry::Data& data = curEntry.entry.data;
+				preciseRowFiller.addData(curEntry.depth, data.name, data.pctOfParent, data.numCalls, data.avgCycles, data.totalCycles, 
+					data.maxCycles, data.avgSelfCycles, data.totalSelfCycles, data.estimatedOverhead, data.estimatedSelfOverhead);
+
+				if(curEntry.depth <= MAX_DEPTH)
 				{
-					todoPrecise.push(TodoPrecise(child, curEntry.depth + 1));
+					for(auto& child : curEntry.entry.childEntries)
+					{
+						todoPrecise.push(TodoPrecise(child, curEntry.depth + 1));
+					}
 				}
 			}
 		}

+ 10 - 0
CamelotCore/Include/CmApplication.h

@@ -100,6 +100,16 @@ namespace CamelotFramework
 		 * @brief	Called when the frame finishes rendering.
 		 */
 		void frameRenderingFinishedCallback();
+
+		/**
+		 * @brief	Called by the core thread to begin profiling.
+		 */
+		void beginCoreProfiling();
+
+		/**
+		 * @brief	Called by the core thread to end profiling.
+		 */
+		void endCoreProfiling();
 	};
 
 	CM_EXPORT Application& gApplication();

+ 22 - 4
CamelotCore/Include/CmProfiler.h

@@ -11,6 +11,12 @@ namespace CamelotFramework
 		CPUProfilerReport cpuReport;
 	};
 
+	enum class ProfiledThread
+	{
+		Sim,
+		Core
+	};
+
 	class CM_EXPORT Profiler : public Module<Profiler>
 	{
 	public:
@@ -83,7 +89,14 @@ namespace CamelotFramework
 		void update();
 
 		/**
-		 * @brief	Returns a profiler report for the specified frame. 
+		 * @brief	Called every frame from the core thread. Internal method.
+		 * 			
+		 * @note	Only call from core thread.
+		 */
+		void updateCore();
+
+		/**
+		 * @brief	Returns a profiler report for the specified frame, for the specified thread.
 		 *
 		 * @param	Profiler report index, ranging [0, NUM_SAVED_FRAMES]. 0 always returns the latest
 		 * 					 report. Increasing indexes return reports for older and older frames. Out of range
@@ -92,14 +105,19 @@ namespace CamelotFramework
 		 * @note	Profiler reports get updated every frame. Oldest reports that no longer fit in the saved reports buffer
 		 * 			are discarded.
 		 */
-		const ProfilerReport& getReport(UINT32 idx = 0) const;
+		const ProfilerReport& getReport(ProfiledThread thread, UINT32 idx = 0) const;
 
 	private:
 		static const UINT32 NUM_SAVED_FRAMES;
-		ProfilerReport* mSavedReports;
-		UINT32 mNextReportIdx;
+		ProfilerReport* mSavedSimReports;
+		UINT32 mNextSimReportIdx;
+
+		ProfilerReport* mSavedCoreReports;
+		UINT32 mNextCoreReportIdx;
 
 		CPUProfiler* mCPUProfiler;
+
+		CM_MUTEX(mSync);
 	};
 
 	CM_EXPORT Profiler& gProfiler();

+ 13 - 0
CamelotCore/Source/CmApplication.cpp

@@ -105,6 +105,7 @@ namespace CamelotFramework
 			if(!mainLoopCallback.empty())
 				mainLoopCallback();
 
+			gCoreThread().queueCommand(boost::bind(&Application::beginCoreProfiling, this));
 			RendererManager::instance().getActive()->renderAll();
 
 			// Core and sim thread run in lockstep. This will result in a larger input latency than if I was 
@@ -121,6 +122,7 @@ namespace CamelotFramework
 			}
 
 			gCoreThread().queueCommand(boost::bind(&Application::updateMessagePump, this));
+			gCoreThread().queueCommand(boost::bind(&Application::endCoreProfiling, this));
 			mPrimaryCoreAccessor->submitToCoreThread();
 			gCoreThread().queueCommand(boost::bind(&Application::frameRenderingFinishedCallback, this));
 
@@ -150,6 +152,17 @@ namespace CamelotFramework
 		CM_THREAD_NOTIFY_ONE(mFrameRenderingFinishedCondition);
 	}
 
+	void Application::beginCoreProfiling()
+	{
+		gProfiler().beginThread("Core");
+	}
+
+	void Application::endCoreProfiling()
+	{
+		gProfiler().endThread();
+		gProfiler().updateCore();
+	}
+
 	void Application::shutDown()
 	{
 		mPrimaryWindow->destroy();

+ 39 - 10
CamelotCore/Source/CmProfiler.cpp

@@ -6,13 +6,15 @@ namespace CamelotFramework
 	const UINT32 Profiler::NUM_SAVED_FRAMES = 200;
 
 	Profiler::Profiler()
-		:mSavedReports(nullptr), mCPUProfiler(nullptr), mNextReportIdx(0)
+		:mSavedSimReports(nullptr), mCPUProfiler(nullptr), mNextSimReportIdx(0),
+		mSavedCoreReports(nullptr), mNextCoreReportIdx(0)
 	{
 #if CM_PROFILING_ENABLED
 		mCPUProfiler = cm_new<CPUProfiler>();
 #endif
 
-		mSavedReports = cm_newN<ProfilerReport>(NUM_SAVED_FRAMES);
+		mSavedSimReports = cm_newN<ProfilerReport>(NUM_SAVED_FRAMES);
+		mSavedCoreReports = cm_newN<ProfilerReport>(NUM_SAVED_FRAMES);
 	}
 
 	Profiler::~Profiler()
@@ -20,29 +22,56 @@ namespace CamelotFramework
 		if(mCPUProfiler != nullptr)
 			cm_delete(mCPUProfiler);
 
-		if(mSavedReports != nullptr)
-			cm_deleteN(mSavedReports, NUM_SAVED_FRAMES);
+		if(mSavedSimReports != nullptr)
+			cm_deleteN(mSavedSimReports, NUM_SAVED_FRAMES);
+
+		if(mSavedCoreReports != nullptr)
+			cm_deleteN(mSavedCoreReports, NUM_SAVED_FRAMES);
 	}
 
 	void Profiler::update()
 	{
 #if CM_PROFILING_ENABLED
-		mSavedReports[mNextReportIdx].cpuReport = mCPUProfiler->generateReport();
+		mSavedSimReports[mNextSimReportIdx].cpuReport = mCPUProfiler->generateReport();
 
 		mCPUProfiler->reset();
 
-		mNextReportIdx = (mNextReportIdx + 1) % NUM_SAVED_FRAMES;
+		mNextSimReportIdx = (mNextSimReportIdx + 1) % NUM_SAVED_FRAMES;
 #endif
 	}
 
-	const ProfilerReport& Profiler::getReport(UINT32 idx) const
+	void Profiler::updateCore()
+	{
+#if CM_PROFILING_ENABLED
+		CM_LOCK_MUTEX(mSync);
+		mSavedCoreReports[mNextCoreReportIdx].cpuReport = mCPUProfiler->generateReport();
+
+		mCPUProfiler->reset();
+
+		mNextCoreReportIdx = (mNextCoreReportIdx + 1) % NUM_SAVED_FRAMES;
+#endif
+	}
+
+	const ProfilerReport& Profiler::getReport(ProfiledThread thread, UINT32 idx) const
 	{
 		idx = Math::Clamp(idx, 0U, (UINT32)(NUM_SAVED_FRAMES - 1));
 
-		UINT32 reportIdx = mNextReportIdx + (UINT32)((INT32)NUM_SAVED_FRAMES - ((INT32)idx + 1));
-		reportIdx = (reportIdx) % NUM_SAVED_FRAMES;
+		if(thread == ProfiledThread::Core)
+		{
+			CM_LOCK_MUTEX(mSync);
+
+			UINT32 reportIdx = mNextCoreReportIdx + (UINT32)((INT32)NUM_SAVED_FRAMES - ((INT32)idx + 1));
+			reportIdx = (reportIdx) % NUM_SAVED_FRAMES;
+
+			return mSavedCoreReports[reportIdx];
+		}
+		else
+		{
+			UINT32 reportIdx = mNextSimReportIdx + (UINT32)((INT32)NUM_SAVED_FRAMES - ((INT32)idx + 1));
+			reportIdx = (reportIdx) % NUM_SAVED_FRAMES;
 
-		return mSavedReports[reportIdx];
+			return mSavedSimReports[reportIdx];
+		}
 	}
 
 	Profiler& gProfiler()

+ 3 - 9
TODO.txt

@@ -11,15 +11,9 @@ LONGTERM TODO:
 PROFILER:
  TODO: Profiler is right now including windows.h. I need to work around that but don't feel like bothering with it atm
   - Easy way would be to move CPUProfiler outside of Utility and into Core
-
-When rendering GUI in front of scene view it seems to be rendering before the actual scene!?
-
-Update GUIWidget input handling:
- - Recheck the bridging code, especially windowToBridgedCoords method.
- - Add GUIRenderTexture method and ensure that bridging actually works
- - GUIWidget ownerWindowFocusChanged will also likely need to be replaced
-
-Generating a report only generates it on the active thread. I need a way to generate profiler reports on the render thread as well. Maybe extend CoreThreadAccessor?
+ - When rendering GUI in front of scene view it seems to be rendering before the actual scene!?
+ - Generating a report only generates it on the active thread. I need a way to generate profiler 
+   reports on the render thread as well. Maybe extend CoreThreadAccessor?
 
 
 TODO: Viewport can be modified from the sim thread, but is used on the core thread without any syncronization mechanisms. Maybe add a method that returns VIEWPORT_DATA, and have that used on the core thread.