Browse Source

Profiler containers now use ProfilerAlloc so their allocations arent counted by the profiler

Marko Pintera 12 years ago
parent
commit
6ad504f1b3
3 changed files with 40 additions and 22 deletions
  1. 19 7
      CamelotCore/Include/CmCPUProfiler.h
  2. 14 14
      CamelotCore/Source/CmCPUProfiler.cpp
  3. 7 1
      Opts.txt

+ 19 - 7
CamelotCore/Include/CmCPUProfiler.h

@@ -8,6 +8,18 @@ namespace CamelotFramework
 
 	typedef std::basic_string<char, std::char_traits<char>, StdAlloc<char, ProfilerAlloc>> ProfilerString; 
 
+	template <typename T, typename A = StdAlloc<T, ProfilerAlloc>> 
+	struct ProfilerVector 
+	{ 
+		typedef typename std::vector<T, A> type;    
+	}; 
+
+	template <typename T, typename A = StdAlloc<T, ProfilerAlloc>> 
+	struct ProfilerStack
+	{ 
+		typedef typename std::stack<T, std::deque<T, A>> type;    
+	}; 
+
 	/**
 	 * @brief	Provides various performance measuring methods
 	 * 			
@@ -72,7 +84,7 @@ namespace CamelotFramework
 
 		struct ProfileData
 		{
-			Vector<ProfileSample>::type samples;
+			ProfilerVector<ProfileSample>::type samples;
 			Timer timer;
 
 			UINT64 memAllocs;
@@ -85,7 +97,7 @@ namespace CamelotFramework
 
 		struct PreciseProfileData
 		{
-			Vector<PreciseProfileSample>::type samples;
+			ProfilerVector<PreciseProfileSample>::type samples;
 			TimerPrecise timer;
 
 			UINT64 memAllocs;
@@ -109,7 +121,7 @@ namespace CamelotFramework
 			ProfileData basic;
 			PreciseProfileData precise;
 
-			Vector<ProfiledBlock*>::type children;
+			ProfilerVector<ProfiledBlock*>::type children;
 
 			ProfiledBlock* findChild(const ProfilerString& name) const;
 		};
@@ -143,7 +155,7 @@ namespace CamelotFramework
 
 			ProfiledBlock* rootBlock;
 
-			Stack<ActiveBlock>::type activeBlocks;
+			ProfilerStack<ActiveBlock>::type activeBlocks;
 			ActiveBlock activeBlock;
 
 			void begin(const ProfilerString& _name);
@@ -232,7 +244,7 @@ namespace CamelotFramework
 		UINT64 mBasicSamplingOverheadCycles;
 		UINT64 mPreciseSamplingOverheadCycles;
 
-		Vector<ThreadInfo*>::type mActiveThreads;
+		ProfilerVector<ThreadInfo*>::type mActiveThreads;
 		CM_MUTEX(mThreadSync);
 
 		void estimateTimerOverhead();
@@ -263,7 +275,7 @@ namespace CamelotFramework
 			float pctOfParent;
 		} data;
 
-		Vector<CPUProfilerBasicSamplingEntry>::type childEntries;
+		ProfilerVector<CPUProfilerBasicSamplingEntry>::type childEntries;
 	};
 
 	struct CM_EXPORT CPUProfilerPreciseSamplingEntry
@@ -291,7 +303,7 @@ namespace CamelotFramework
 			float pctOfParent;
 		} data;
 
-		Vector<CPUProfilerPreciseSamplingEntry>::type childEntries;
+		ProfilerVector<CPUProfilerPreciseSamplingEntry>::type childEntries;
 	};
 
 	class CM_EXPORT CPUProfilerReport

+ 14 - 14
CamelotCore/Source/CmCPUProfiler.cpp

@@ -170,7 +170,7 @@ namespace CamelotFramework
 		}
 
 		isActive = false;
-		activeBlocks = Stack<ActiveBlock>::type();
+		activeBlocks = ProfilerStack<ActiveBlock>::type();
 		activeBlock = ActiveBlock();
 	}
 
@@ -431,15 +431,15 @@ namespace CamelotFramework
 
 			ProfiledBlock* parentBlock;
 			UINT32 entryIdx;
-			Vector<UINT32>::type childIndexes;
+			ProfilerVector<UINT32>::type childIndexes;
 		};
 
-		Vector<CPUProfilerBasicSamplingEntry>::type basicEntries;
-		Vector<CPUProfilerPreciseSamplingEntry>::type preciseEntries;	
+		ProfilerVector<CPUProfilerBasicSamplingEntry>::type basicEntries;
+		ProfilerVector<CPUProfilerPreciseSamplingEntry>::type preciseEntries;	
 
 		// Fill up flatHierarchy array in a way so we always process children before parents
-		Stack<UINT32>::type todo;
-		Vector<TempEntry>::type flatHierarchy;
+		ProfilerStack<UINT32>::type todo;
+		ProfilerVector<TempEntry>::type flatHierarchy;
 
 		UINT32 entryIdx = 0;
 		todo.push(entryIdx);
@@ -558,9 +558,9 @@ namespace CamelotFramework
 		}
 
 		// Prune empty basic entries
-		Stack<UINT32>::type finalBasicHierarchyTodo;
-		Stack<UINT32>::type parentBasicEntryIndexes;
-		Vector<TempEntry>::type newBasicEntries;
+		ProfilerStack<UINT32>::type finalBasicHierarchyTodo;
+		ProfilerStack<UINT32>::type parentBasicEntryIndexes;
+		ProfilerVector<TempEntry>::type newBasicEntries;
 
 		finalBasicHierarchyTodo.push(0);
 
@@ -600,7 +600,7 @@ namespace CamelotFramework
 
 		if(newBasicEntries.size() > 0)
 		{
-			Vector<CPUProfilerBasicSamplingEntry*>::type finalBasicEntries;
+			ProfilerVector<CPUProfilerBasicSamplingEntry*>::type finalBasicEntries;
 
 			report.mBasicSamplingRootEntry = basicEntries[newBasicEntries[0].entryIdx];
 			finalBasicEntries.push_back(&report.mBasicSamplingRootEntry);
@@ -632,9 +632,9 @@ namespace CamelotFramework
 		}
 
 		// Prune empty precise entries
-		Stack<UINT32>::type finalPreciseHierarchyTodo;
-		Stack<UINT32>::type parentPreciseEntryIndexes;
-		Vector<TempEntry>::type newPreciseEntries;
+		ProfilerStack<UINT32>::type finalPreciseHierarchyTodo;
+		ProfilerStack<UINT32>::type parentPreciseEntryIndexes;
+		ProfilerVector<TempEntry>::type newPreciseEntries;
 
 		finalPreciseHierarchyTodo.push(0);
 
@@ -674,7 +674,7 @@ namespace CamelotFramework
 
 		if(newPreciseEntries.size() > 0)
 		{
-			Vector<CPUProfilerPreciseSamplingEntry*>::type finalPreciseEntries;
+			ProfilerVector<CPUProfilerPreciseSamplingEntry*>::type finalPreciseEntries;
 
 			report.mPreciseSamplingRootEntry = preciseEntries[newPreciseEntries[0].entryIdx];
 			finalPreciseEntries.push_back(&report.mPreciseSamplingRootEntry);

+ 7 - 1
Opts.txt

@@ -27,4 +27,10 @@ FrameAlloc
 
 Make sure BindableGpuParams are not copyable
 
-CPUProfiler still uses normal allocator for containers
+CPUProfiler still uses normal allocator for containers
+
+Refactor stack allocator so I don't need to manually initialize heaps
+ - Use atomic variable counter?
+
+ There is a shit-ton of allocations in CPUProfiler (especially report generation)
+  - Maybe attempt to cut down on them?