Ver código fonte

Circular buffer in ProfilerGPU to prevent memory leak

MarcoROG 8 anos atrás
pai
commit
b0da22f1c2

+ 55 - 50
Source/BansheeCore/Include/BsProfilerGPU.h

@@ -9,8 +9,8 @@
 namespace bs
 namespace bs
 {
 {
 	/** @addtogroup Profiling
 	/** @addtogroup Profiling
-	 *  @{
-	 */
+	*  @{
+	*/
 
 
 	/** Contains various profiler statistics about a single GPU profiling sample. */
 	/** Contains various profiler statistics about a single GPU profiling sample. */
 	struct GPUProfileSample
 	struct GPUProfileSample
@@ -48,10 +48,10 @@ namespace bs
 	};
 	};
 
 
 	/**
 	/**
-	 * Profiler that measures time and amount of various GPU operations.
-	 *
-	 * @note	Core thread only except where noted otherwise.
-	 */
+	* Profiler that measures time and amount of various GPU operations.
+	*
+	* @note	Core thread only except where noted otherwise.
+	*/
 	class BS_CORE_EXPORT ProfilerGPU : public Module<ProfilerGPU>
 	class BS_CORE_EXPORT ProfilerGPU : public Module<ProfilerGPU>
 	{
 	{
 	private:
 	private:
@@ -72,67 +72,68 @@ namespace bs
 
 
 	public:
 	public:
 		ProfilerGPU();
 		ProfilerGPU();
+		~ProfilerGPU();
 
 
 		/**
 		/**
-		 * Signals a start of a new frame. Every frame will generate a separate profiling report. This call must be followed
-		 * by endFrame(), and any sampling operations must happen between beginFrame() and endFrame().
-		 */
+		* Signals a start of a new frame. Every frame will generate a separate profiling report. This call must be followed
+		* by endFrame(), and any sampling operations must happen between beginFrame() and endFrame().
+		*/
 		void beginFrame();
 		void beginFrame();
 
 
 		/**
 		/**
-		 * Signals an end of the currently sampled frame. Results of the sampling will be available once 
-		 * getNumAvailableReports increments. This may take a while as the sampling is scheduled on the core thread and 
-		 * on the GPU.
-		 */
+		* Signals an end of the currently sampled frame. Results of the sampling will be available once
+		* getNumAvailableReports increments. This may take a while as the sampling is scheduled on the core thread and
+		* on the GPU.
+		*/
 		void endFrame();
 		void endFrame();
 
 
 		/**
 		/**
-		 * Begins sample measurement. Must be followed by endSample().
-		 *
-		 * @param[in]	name	Unique name for the sample you can later use to find the sampling data.
-		 *
-		 * @note	Must be called between beginFrame()/endFrame() calls.
-		 */
+		* Begins sample measurement. Must be followed by endSample().
+		*
+		* @param[in]	name	Unique name for the sample you can later use to find the sampling data.
+		*
+		* @note	Must be called between beginFrame()/endFrame() calls.
+		*/
 		void beginSample(const ProfilerString& name);
 		void beginSample(const ProfilerString& name);
 
 
 		/**
 		/**
-		 * Ends sample measurement.
-		 *
-		 * @param[in]	name	Unique name for the sample.
-		 *
-		 * @note	
-		 * Unique name is primarily needed to more easily identify mismatched begin/end sample pairs. Otherwise the name in 
-		 * beginSample() would be enough. Must be called between beginFrame()/endFrame() calls.
-		 */
+		* Ends sample measurement.
+		*
+		* @param[in]	name	Unique name for the sample.
+		*
+		* @note
+		* Unique name is primarily needed to more easily identify mismatched begin/end sample pairs. Otherwise the name in
+		* beginSample() would be enough. Must be called between beginFrame()/endFrame() calls.
+		*/
 		void endSample(const ProfilerString& name);
 		void endSample(const ProfilerString& name);
 
 
 		/**
 		/**
-		 * Returns number of profiling reports that are ready but haven't been retrieved yet. 
-		 *
-		 * @note	
-		 * There is an internal limit of maximum number of available reports, where oldest ones will get deleted so make 
-		 * sure to call this often if you don't want to miss some.
-		 * @note
-		 * Thread safe.
-		 */
+		* Returns number of profiling reports that are ready but haven't been retrieved yet.
+		*
+		* @note
+		* There is an internal limit of maximum number of available reports, where oldest ones will get deleted so make
+		* sure to call this often if you don't want to miss some.
+		* @note
+		* Thread safe.
+		*/
 		UINT32 getNumAvailableReports();
 		UINT32 getNumAvailableReports();
 
 
 		/**
 		/**
-		 * Gets the oldest report available and removes it from the internal list. Throws an exception if no reports are 
-		 * available.
-		 *
-		 * @note	Thread safe.
-		 */
+		* Gets the oldest report available and removes it from the internal list. Throws an exception if no reports are
+		* available.
+		*
+		* @note	Thread safe.
+		*/
 		GPUProfilerReport getNextReport();
 		GPUProfilerReport getNextReport();
 
 
 	public: // ***** INTERNAL ******
 	public: // ***** INTERNAL ******
-		/** @name Internal
-		 *  @{
-		 */
+			/** @name Internal
+			*  @{
+			*/
 
 
-		/**
-		 * To be called once per frame from the Core thread.
-		 */
+			/**
+			* To be called once per frame from the Core thread.
+			*/
 		void _update();
 		void _update();
 
 
 		/** @} */
 		/** @} */
@@ -151,9 +152,9 @@ namespace bs
 		SPtr<ct::OcclusionQuery> getOcclusionQuery() const;
 		SPtr<ct::OcclusionQuery> getOcclusionQuery() const;
 
 
 		/**
 		/**
-		 * Interprets the active frame results and generates a profiler report for the frame. Provided frame queries must 
-		 * have finished before calling this.
-		 */
+		* Interprets the active frame results and generates a profiler report for the frame. Provided frame queries must
+		* have finished before calling this.
+		*/
 		GPUProfilerReport resolveFrame(ActiveFrame& frame);
 		GPUProfilerReport resolveFrame(ActiveFrame& frame);
 
 
 		/** Resolves an active sample and converts it to report sample. */
 		/** Resolves an active sample and converts it to report sample. */
@@ -165,7 +166,11 @@ namespace bs
 		Stack<UINT32> mActiveSampleIndexes;
 		Stack<UINT32> mActiveSampleIndexes;
 
 
 		Queue<ActiveFrame> mUnresolvedFrames;
 		Queue<ActiveFrame> mUnresolvedFrames;
-		Queue<GPUProfilerReport> mReadyReports;
+		GPUProfilerReport* mReadyReports;
+
+		static const UINT32 MAX_QUEUE_ELEMENTS;
+		UINT32 mReportHeadPos;
+		UINT32 mReportCount;
 
 
 		mutable Stack<SPtr<ct::TimerQuery>> mFreeTimerQueries;
 		mutable Stack<SPtr<ct::TimerQuery>> mFreeTimerQueries;
 		mutable Stack<SPtr<ct::OcclusionQuery>> mFreeOcclusionQueries;
 		mutable Stack<SPtr<ct::OcclusionQuery>> mFreeOcclusionQueries;

+ 25 - 10
Source/BansheeCore/Source/BsProfilerGPU.cpp

@@ -8,9 +8,18 @@
 
 
 namespace bs
 namespace bs
 {
 {
+	const UINT32 ProfilerGPU::MAX_QUEUE_ELEMENTS = 5;
+
 	ProfilerGPU::ProfilerGPU()
 	ProfilerGPU::ProfilerGPU()
-		:mIsFrameActive(false)
-	{ }
+		:mIsFrameActive(false), mReadyReports(nullptr), mReportHeadPos(0), mReportCount(0)
+	{
+		mReadyReports = bs_newN<GPUProfilerReport>(MAX_QUEUE_ELEMENTS);
+	}
+
+	ProfilerGPU::~ProfilerGPU()
+	{
+		bs_deleteN(mReadyReports, MAX_QUEUE_ELEMENTS);
+	}
 
 
 	void ProfilerGPU::beginFrame()
 	void ProfilerGPU::beginFrame()
 	{
 	{
@@ -62,7 +71,7 @@ namespace bs
 		ActiveSample& sample = mActiveFrame.samples[lastSampleIdx];
 		ActiveSample& sample = mActiveFrame.samples[lastSampleIdx];
 		if (sample.sampleName != name)
 		if (sample.sampleName != name)
 		{
 		{
-			String errorStr = "Attempting to end a sample that doesn't match. Got: " + 
+			String errorStr = "Attempting to end a sample that doesn't match. Got: " +
 				String(name.c_str()) + ". Expected: " + String(sample.sampleName.c_str());
 				String(name.c_str()) + ". Expected: " + String(sample.sampleName.c_str());
 
 
 			BS_EXCEPT(InvalidStateException, errorStr);
 			BS_EXCEPT(InvalidStateException, errorStr);
@@ -76,18 +85,20 @@ namespace bs
 	{
 	{
 		Lock lock(mMutex);
 		Lock lock(mMutex);
 
 
-		return (UINT32)mReadyReports.size();
+		return mReportCount;
 	}
 	}
 
 
 	GPUProfilerReport ProfilerGPU::getNextReport()
 	GPUProfilerReport ProfilerGPU::getNextReport()
 	{
 	{
 		Lock lock(mMutex);
 		Lock lock(mMutex);
 
 
-		if (mReadyReports.empty())
+		if (mReportCount == 0)
 			BS_EXCEPT(InvalidStateException, "No reports are available.")
 			BS_EXCEPT(InvalidStateException, "No reports are available.")
 
 
-		GPUProfilerReport report = mReadyReports.front();
-		mReadyReports.pop();
+			GPUProfilerReport report = mReadyReports[mReportHeadPos];
+
+		mReportHeadPos = (mReportHeadPos + 1) % MAX_QUEUE_ELEMENTS;
+		mReportCount--;
 
 
 		return report;
 		return report;
 	}
 	}
@@ -107,7 +118,11 @@ namespace bs
 
 
 				{
 				{
 					Lock lock(mMutex);
 					Lock lock(mMutex);
-					mReadyReports.push(report);
+					mReadyReports[(mReportHeadPos + mReportCount) % MAX_QUEUE_ELEMENTS] = report;
+					if (mReportCount == MAX_QUEUE_ELEMENTS)
+						mReportHeadPos = (mReportHeadPos + 1) % MAX_QUEUE_ELEMENTS;
+					else
+						mReportCount++;
 				}
 				}
 			}
 			}
 			else
 			else
@@ -118,7 +133,7 @@ namespace bs
 	GPUProfilerReport ProfilerGPU::resolveFrame(ActiveFrame& frame)
 	GPUProfilerReport ProfilerGPU::resolveFrame(ActiveFrame& frame)
 	{
 	{
 		GPUProfilerReport report;
 		GPUProfilerReport report;
-		
+
 		resolveSample(frame.frameSample, report.frameSample);
 		resolveSample(frame.frameSample, report.frameSample);
 
 
 		for (auto& sample : frame.samples)
 		for (auto& sample : frame.samples)
@@ -145,7 +160,7 @@ namespace bs
 
 
 		reportSample.numVertices = (UINT32)(sample.endStats.numVertices - sample.startStats.numVertices);
 		reportSample.numVertices = (UINT32)(sample.endStats.numVertices - sample.startStats.numVertices);
 		reportSample.numPrimitives = (UINT32)(sample.endStats.numPrimitives - sample.startStats.numPrimitives);
 		reportSample.numPrimitives = (UINT32)(sample.endStats.numPrimitives - sample.startStats.numPrimitives);
-		
+
 		reportSample.numPipelineStateChanges = (UINT32)(sample.endStats.numPipelineStateChanges - sample.startStats.numPipelineStateChanges);
 		reportSample.numPipelineStateChanges = (UINT32)(sample.endStats.numPipelineStateChanges - sample.startStats.numPipelineStateChanges);
 
 
 		reportSample.numGpuParamBinds = (UINT32)(sample.endStats.numGpuParamBinds - sample.startStats.numGpuParamBinds);
 		reportSample.numGpuParamBinds = (UINT32)(sample.endStats.numGpuParamBinds - sample.startStats.numGpuParamBinds);