Browse Source

Removed GetProcessorTicksPerSecond to avoid exposing code that does not 100% reliably work across platforms (#707)

Jorrit Rouwe 1 year ago
parent
commit
d44f4bad08

+ 6 - 0
Build/CMakeLists.txt

@@ -44,6 +44,12 @@ option(USE_FMADD "Enable FMADD" ON)
 # Enable all warnings
 option(ENABLE_ALL_WARNINGS "Enable all warnings and warnings as errors" ON)
 
+# Setting to periodically trace broadphase stats to help determine if the broadphase layer configuration is optimal
+option(TRACK_BROADPHASE_STATS "Track Broadphase Stats" OFF)
+
+# Setting to periodically trace narrowphase stats to help determine which collision queries could be optimized
+option(TRACK_NARROWPHASE_STATS "Track Narrowphase Stats" OFF)
+
 include(CMakeDependentOption)
 
 # Ability to toggle between the static and DLL versions of the MSVC runtime library

+ 3 - 3
Jolt/Core/Core.h

@@ -315,13 +315,13 @@
 	// Configuration for a popular game console.
 	// This file is not distributed because it would violate an NDA.
 	// Creating one should only be a couple of minutes of work if you have the documentation for the platform
-	// (you only need to define JPH_BREAKPOINT, JPH_PLATFORM_BLUE_GET_TICKS and JPH_PLATFORM_BLUE_GET_TICK_FREQUENCY and include the right header).
+	// (you only need to define JPH_BREAKPOINT, JPH_PLATFORM_BLUE_GET_TICKS, JPH_PLATFORM_BLUE_MUTEX*, JPH_PLATFORM_BLUE_RWLOCK* and include the right header).
 	#include <Jolt/Core/PlatformBlue.h>
 #elif defined(JPH_PLATFORM_LINUX) || defined(JPH_PLATFORM_ANDROID) || defined(JPH_PLATFORM_MACOS) || defined(JPH_PLATFORM_IOS)
 	#if defined(JPH_CPU_X86)
-		#define JPH_BREAKPOINT		__asm volatile ("int $0x3")
+		#define JPH_BREAKPOINT	__asm volatile ("int $0x3")
 	#elif defined(JPH_CPU_ARM)
-		#define JPH_BREAKPOINT		__builtin_trap()
+		#define JPH_BREAKPOINT	__builtin_trap()
 	#endif
 #elif defined(JPH_PLATFORM_WASM)
 	#define JPH_BREAKPOINT		do { } while (false) // Not supported

+ 14 - 0
Jolt/Core/Profiler.cpp

@@ -41,6 +41,20 @@ Profiler *Profiler::sInstance = nullptr;
 
 bool ProfileMeasurement::sOutOfSamplesReported = false;
 
+void Profiler::UpdateReferenceTime()
+{
+	mReferenceTick = GetProcessorTickCount();
+	mReferenceTime = std::chrono::high_resolution_clock::now();
+}
+
+uint64 Profiler::GetProcessorTicksPerSecond() const
+{
+	uint64 ticks = GetProcessorTickCount();
+	std::chrono::high_resolution_clock::time_point time = std::chrono::high_resolution_clock::now();
+
+	return (ticks - mReferenceTick) * 1000000000ULL / std::chrono::duration_cast<std::chrono::nanoseconds>(time - mReferenceTime).count();
+}
+
 void Profiler::NextFrame()
 {
 	std::lock_guard lock(mLock);

+ 12 - 0
Jolt/Core/Profiler.h

@@ -6,6 +6,7 @@
 
 JPH_SUPPRESS_WARNINGS_STD_BEGIN
 #include <mutex>
+#include <chrono>
 JPH_SUPPRESS_WARNINGS_STD_END
 
 #include <Jolt/Core/NonCopyable.h>
@@ -79,6 +80,9 @@ class JPH_EXPORT Profiler : public NonCopyable
 public:
 	JPH_OVERRIDE_NEW_DELETE
 
+	/// Constructor
+								Profiler()															{ UpdateReferenceTime(); }
+
 	/// Increments the frame counter to provide statistics per frame
 	void						NextFrame();
 
@@ -145,12 +149,20 @@ private:
 	/// Helper function to aggregate profile sample data
 	static void					sAggregate(int inDepth, uint32 inColor, ProfileSample *&ioSample, const ProfileSample *inEnd, Aggregators &ioAggregators, KeyToAggregator &ioKeyToAggregator);
 
+	/// We measure the amount of ticks per second, this function resets the reference time point
+	void						UpdateReferenceTime();
+
+	/// Get the amount of ticks per second, note that this number will never be fully accurate as the amound of ticks per second may vary with CPU load, so this number is only to be used to give an indication of time for profiling purposes
+	uint64						GetProcessorTicksPerSecond() const;
+
 	/// Dump profiling statistics
 	void						DumpInternal();
 	void						DumpList(const char *inTag, const Aggregators &inAggregators);
 	void						DumpChart(const char *inTag, const Threads &inThreads, const KeyToAggregator &inKeyToAggregators, const Aggregators &inAggregators);
 
 	std::mutex					mLock;																///< Lock that protects mThreads
+	uint64						mReferenceTick;														///< Tick count at the start of the frame
+	std::chrono::high_resolution_clock::time_point mReferenceTime;									///< Time at the start of the frame
 	Array<ProfileThread *>		mThreads;															///< List of all active threads
 	bool						mDump = false;														///< When true, the samples are dumped next frame
 	String						mDumpTag;															///< When not empty, this overrides the auto incrementing number of the dump filename

+ 0 - 21
Jolt/Core/TickCounter.cpp

@@ -18,15 +18,8 @@
 	JPH_SUPPRESS_WARNING_POP
 #endif
 
-JPH_SUPPRESS_WARNINGS_STD_BEGIN
-#include <chrono>
-JPH_SUPPRESS_WARNINGS_STD_END
-
 JPH_NAMESPACE_BEGIN
 
-static uint64 sReferenceTick;
-static std::chrono::high_resolution_clock::time_point sReferenceTime;
-
 #if defined(JPH_PLATFORM_WINDOWS_UWP) || (defined(JPH_PLATFORM_WINDOWS) && defined(JPH_CPU_ARM))
 
 uint64 GetProcessorTickCount()
@@ -38,18 +31,4 @@ uint64 GetProcessorTickCount()
 
 #endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM)
 
-void UpdateReferenceTime()
-{
-	sReferenceTick = GetProcessorTickCount();
-	sReferenceTime = std::chrono::high_resolution_clock::now();
-}
-
-uint64 GetProcessorTicksPerSecond()
-{
-	uint64 ticks = GetProcessorTickCount();
-	std::chrono::high_resolution_clock::time_point time = std::chrono::high_resolution_clock::now();
-
-	return (ticks - sReferenceTick) * 1000000000ULL / std::chrono::duration_cast<std::chrono::nanoseconds>(time - sReferenceTime).count();
-}
-
 JPH_NAMESPACE_END

+ 0 - 6
Jolt/Core/TickCounter.h

@@ -42,10 +42,4 @@ JPH_INLINE uint64 GetProcessorTickCount()
 
 #endif // JPH_PLATFORM_WINDOWS_UWP || (JPH_PLATFORM_WINDOWS && JPH_CPU_ARM)
 
-/// We measure the amount of ticks per second, this function resets the reference time point
-JPH_EXPORT void UpdateReferenceTime();
-
-/// Get the amount of ticks per second, note that this number will never be fully accurate as the amound of ticks per second may vary with CPU load, so this number is only to be used to give an indication of time for profiling purposes
-JPH_EXPORT uint64 GetProcessorTicksPerSecond();
-
 JPH_NAMESPACE_END

+ 10 - 0
Jolt/Jolt.cmake

@@ -504,6 +504,16 @@ if (OBJECT_LAYER_BITS)
 	target_compile_definitions(Jolt PUBLIC JPH_OBJECT_LAYER_BITS=${OBJECT_LAYER_BITS})
 endif()
 
+# Setting to periodically trace broadphase stats to help determine if the broadphase layer configuration is optimal
+if (TRACK_BROADPHASE_STATS)
+	target_compile_definitions(Jolt PUBLIC JPH_TRACK_BROADPHASE_STATS)
+endif()
+
+# Setting to periodically trace narrowphase stats to help determine which collision queries could be optimized
+if (TRACK_NARROWPHASE_STATS)
+	target_compile_definitions(Jolt PUBLIC JPH_TRACK_NARROWPHASE_STATS)
+endif()
+
 # Emit the instruction set definitions to ensure that child projects use the same settings even if they override the used instruction sets (a mismatch causes link errors)
 function(EMIT_X86_INSTRUCTION_SET_DEFINITIONS)
 	if (USE_AVX512)

+ 7 - 2
Jolt/Physics/Collision/BroadPhase/BroadPhaseQuadTree.cpp

@@ -583,9 +583,14 @@ void BroadPhaseQuadTree::FindCollidingPairs(BodyID *ioActiveBodies, int inNumAct
 
 void BroadPhaseQuadTree::ReportStats()
 {
-	Trace("Query Type, Filter Description, Tree Name, Num Queries, Total Time (ms), Total Time Excl. Collector (ms), Nodes Visited, Bodies Visited, Hits Reported, Hits Reported vs Bodies Visited (%%), Hits Reported vs Nodes Visited");
+	Trace("Query Type, Filter Description, Tree Name, Num Queries, Total Time (%%), Total Time Excl. Collector (%%), Nodes Visited, Bodies Visited, Hits Reported, Hits Reported vs Bodies Visited (%%), Hits Reported vs Nodes Visited");
+
+	uint64 total_ticks = 0;
+	for (BroadPhaseLayer::Type l = 0; l < mNumLayers; ++l)
+		total_ticks += mLayers[l].GetTicks100Pct();
+		
 	for (BroadPhaseLayer::Type l = 0; l < mNumLayers; ++l)
-		mLayers[l].ReportStats();
+		mLayers[l].ReportStats(total_ticks);
 }
 
 #endif // JPH_TRACK_BROADPHASE_STATS

+ 32 - 14
Jolt/Physics/Collision/BroadPhase/QuadTree.cpp

@@ -1607,32 +1607,50 @@ void QuadTree::DumpTree(const NodeID &inRoot, const char *inFileNamePrefix) cons
 
 #ifdef JPH_TRACK_BROADPHASE_STATS
 
-void QuadTree::ReportStats(const char *inName, const LayerToStats &inLayer) const
+uint64 QuadTree::GetTicks100Pct(const LayerToStats &inLayer) const
 {
-	uint64 ticks_per_sec = GetProcessorTicksPerSecond();
+	uint64 total_ticks = 0;
+	for (const LayerToStats::value_type &kv : inLayer)
+		total_ticks += kv.second.mTotalTicks;
+	return total_ticks;
+}
 
+void QuadTree::ReportStats(const char *inName, const LayerToStats &inLayer, uint64 inTicks100Pct) const
+{
 	for (const LayerToStats::value_type &kv : inLayer)
 	{
-		double total_time = 1000.0 * double(kv.second.mTotalTicks) / double(ticks_per_sec);
-		double total_time_excl_collector = 1000.0 * double(kv.second.mTotalTicks - kv.second.mCollectorTicks) / double(ticks_per_sec);
+		double total_pct = 100.0 * double(kv.second.mTotalTicks) / double(inTicks100Pct);
+		double total_pct_excl_collector = 100.0 * double(kv.second.mTotalTicks - kv.second.mCollectorTicks) / double(inTicks100Pct);
 		double hits_reported_vs_bodies_visited = kv.second.mBodiesVisited > 0? 100.0 * double(kv.second.mHitsReported) / double(kv.second.mBodiesVisited) : 100.0;
-		double hits_reported_vs_nodes_visited = kv.second.mNodesVisited > 0? double(kv.second.mHitsReported) / double(kv.second.mNodesVisited) : -1.0f;
+		double hits_reported_vs_nodes_visited = kv.second.mNodesVisited > 0? double(kv.second.mHitsReported) / double(kv.second.mNodesVisited) : -1.0;
 
-		stringstream str;
-		str << inName << ", " << kv.first << ", " << mName << ", " << kv.second.mNumQueries << ", " << total_time << ", " << total_time_excl_collector << ", " << kv.second.mNodesVisited << ", " << kv.second.mBodiesVisited << ", " << kv.second.mHitsReported << ", " << hits_reported_vs_bodies_visited << ", " << hits_reported_vs_nodes_visited;
+		std::stringstream str;
+		str << inName << ", " << kv.first << ", " << mName << ", " << kv.second.mNumQueries << ", " << total_pct << ", " << total_pct_excl_collector << ", " << kv.second.mNodesVisited << ", " << kv.second.mBodiesVisited << ", " << kv.second.mHitsReported << ", " << hits_reported_vs_bodies_visited << ", " << hits_reported_vs_nodes_visited;
 		Trace(str.str().c_str());
 	}
 }
 
-void QuadTree::ReportStats() const
+uint64 QuadTree::GetTicks100Pct() const
+{
+	uint64 total_ticks = 0;
+	total_ticks += GetTicks100Pct(mCastRayStats);
+	total_ticks += GetTicks100Pct(mCollideAABoxStats);
+	total_ticks += GetTicks100Pct(mCollideSphereStats);
+	total_ticks += GetTicks100Pct(mCollidePointStats);
+	total_ticks += GetTicks100Pct(mCollideOrientedBoxStats);
+	total_ticks += GetTicks100Pct(mCastAABoxStats);
+	return total_ticks;
+}
+
+void QuadTree::ReportStats(uint64 inTicks100Pct) const
 {
 	unique_lock lock(mStatsMutex);
-	ReportStats("RayCast", mCastRayStats);
-	ReportStats("CollideAABox", mCollideAABoxStats);
-	ReportStats("CollideSphere", mCollideSphereStats);
-	ReportStats("CollidePoint", mCollidePointStats);
-	ReportStats("CollideOrientedBox", mCollideOrientedBoxStats);
-	ReportStats("CastAABox", mCastAABoxStats);
+	ReportStats("RayCast", mCastRayStats, inTicks100Pct);
+	ReportStats("CollideAABox", mCollideAABoxStats, inTicks100Pct);
+	ReportStats("CollideSphere", mCollideSphereStats, inTicks100Pct);
+	ReportStats("CollidePoint", mCollidePointStats, inTicks100Pct);
+	ReportStats("CollideOrientedBox", mCollideOrientedBoxStats, inTicks100Pct);
+	ReportStats("CastAABox", mCastAABoxStats, inTicks100Pct);
 }
 
 #endif // JPH_TRACK_BROADPHASE_STATS

+ 8 - 2
Jolt/Physics/Collision/BroadPhase/QuadTree.h

@@ -249,8 +249,11 @@ public:
 	void						FindCollidingPairs(const BodyVector &inBodies, const BodyID *inActiveBodies, int inNumActiveBodies, float inSpeculativeContactDistance, BodyPairCollector &ioPairCollector, const ObjectLayerPairFilter &inObjectLayerPairFilter) const;
 
 #ifdef JPH_TRACK_BROADPHASE_STATS
+	/// Sum up all the ticks spent in the various layers
+	uint64						GetTicks100Pct() const;
+
 	/// Trace the stats of this tree to the TTY
-	void						ReportStats() const;
+	void						ReportStats(uint64 inTicks100Pct) const;
 #endif // JPH_TRACK_BROADPHASE_STATS
 
 private:
@@ -335,8 +338,11 @@ private:
 
 	using LayerToStats = UnorderedMap<String, Stat>;
 
+	/// Sum up all the ticks in a layer
+	uint64						GetTicks100Pct(const LayerToStats &inLayer) const;
+
 	/// Trace the stats of a single query type to the TTY
-	void						ReportStats(const char *inName, const LayerToStats &inLayer) const;
+	void						ReportStats(const char *inName, const LayerToStats &inLayer, uint64 inTicks100Pct) const;
 
 	mutable LayerToStats		mCastRayStats;
 	mutable LayerToStats		mCollideAABoxStats;

+ 1 - 1
Jolt/Physics/Collision/CollisionDispatch.h

@@ -52,7 +52,7 @@ public:
 	/// @param ioCollector The collector that receives the results.
 	static inline void		sCastShapeVsShapeLocalSpace(const ShapeCast &inShapeCastLocal, const ShapeCastSettings &inShapeCastSettings, const Shape *inShape, Vec3Arg inScale, const ShapeFilter &inShapeFilter, Mat44Arg inCenterOfMassTransform2, const SubShapeIDCreator &inSubShapeIDCreator1, const SubShapeIDCreator &inSubShapeIDCreator2, CastShapeCollector &ioCollector)
 	{
-		JPH_IF_TRACK_NARROWPHASE_STATS(TrackNarrowPhaseStat track(NarrowPhaseStat::sCastShape[(int)inShapeCast.mShape->GetSubType()][(int)inShape->GetSubType()]);)
+		JPH_IF_TRACK_NARROWPHASE_STATS(TrackNarrowPhaseStat track(NarrowPhaseStat::sCastShape[(int)inShapeCastLocal.mShape->GetSubType()][(int)inShape->GetSubType()]);)
 
 		// Only test shape if it passes the shape filter
 		if (inShapeFilter.ShouldCollide(inShapeCastLocal.mShape, inSubShapeIDCreator1.GetID(), inShape, inSubShapeIDCreator2.GetID()))

+ 19 - 10
Jolt/Physics/Collision/NarrowPhaseStats.cpp

@@ -15,28 +15,37 @@ NarrowPhaseStat	NarrowPhaseStat::sCastShape[NumSubShapeTypes][NumSubShapeTypes];
 
 thread_local TrackNarrowPhaseStat *TrackNarrowPhaseStat::sRoot = nullptr;
 
-void NarrowPhaseStat::ReportStats(const char *inName, EShapeSubType inType1, EShapeSubType inType2) const
+void NarrowPhaseStat::ReportStats(const char *inName, EShapeSubType inType1, EShapeSubType inType2, uint64 inTicks100Pct) const
 {
-	uint64 ticks_per_sec = GetProcessorTicksPerSecond();
+	double total_pct = 100.0 * double(mTotalTicks) / double(inTicks100Pct);
+	double total_pct_excl_children = 100.0 * double(mTotalTicks - mChildTicks) / double(inTicks100Pct);
 
-	double total_time = 1000.0 * double(mTotalTicks) / double(ticks_per_sec);
-	double total_time_excl_children = 1000.0 * double(mTotalTicks - mChildTicks) / double(ticks_per_sec);
-
-	stringstream str;
-	str << inName << ", " << sSubShapeTypeNames[(int)inType1] << ", " << sSubShapeTypeNames[(int)inType2] << ", " << mNumQueries << ", " << total_time << ", " << total_time_excl_children << ", " << total_time_excl_children / mNumQueries << ", " << mHitsReported;
+	std::stringstream str;
+	str << inName << ", " << sSubShapeTypeNames[(int)inType1] << ", " << sSubShapeTypeNames[(int)inType2] << ", " << mNumQueries << ", " << total_pct << ", " << total_pct_excl_children << ", " << total_pct_excl_children / mNumQueries << ", " << mHitsReported;
 	Trace(str.str().c_str());
 }
 
 void NarrowPhaseStat::sReportStats()
 {
-	Trace("Query Type, Shape Type 1, Shape Type 2, Num Queries, Total Time (ms), Total Time Excl Children (ms), Total Time Excl. Children / Query (ms), Hits Reported");
+	Trace("Query Type, Shape Type 1, Shape Type 2, Num Queries, Total Time (%%), Total Time Excl Children (%%), Total Time Excl. Children / Query (%%), Hits Reported");
+
+	uint64 total_ticks = 0;
+	for (EShapeSubType t1 : sAllSubShapeTypes)
+		for (EShapeSubType t2 : sAllSubShapeTypes)
+		{
+			const NarrowPhaseStat &collide_stat = sCollideShape[(int)t1][(int)t2];
+			total_ticks += collide_stat.mTotalTicks - collide_stat.mChildTicks;
+
+			const NarrowPhaseStat &cast_stat = sCastShape[(int)t1][(int)t2];
+			total_ticks += cast_stat.mTotalTicks - cast_stat.mChildTicks;
+		}
 
 	for (EShapeSubType t1 : sAllSubShapeTypes)
 		for (EShapeSubType t2 : sAllSubShapeTypes)
 		{
 			const NarrowPhaseStat &stat = sCollideShape[(int)t1][(int)t2];
 			if (stat.mNumQueries > 0)
-				stat.ReportStats("CollideShape", t1, t2);
+				stat.ReportStats("CollideShape", t1, t2, total_ticks);
 		}
 
 	for (EShapeSubType t1 : sAllSubShapeTypes)
@@ -44,7 +53,7 @@ void NarrowPhaseStat::sReportStats()
 		{
 			const NarrowPhaseStat &stat = sCastShape[(int)t1][(int)t2];
 			if (stat.mNumQueries > 0)
-				stat.ReportStats("CastShape", t1, t2);
+				stat.ReportStats("CastShape", t1, t2, total_ticks);
 		}
 }
 

+ 1 - 1
Jolt/Physics/Collision/NarrowPhaseStats.h

@@ -28,7 +28,7 @@ class NarrowPhaseStat
 {
 public:
 	/// Trace an individual stat in CSV form.
-	void					ReportStats(const char *inName, EShapeSubType inType1, EShapeSubType inType2) const;
+	void					ReportStats(const char *inName, EShapeSubType inType1, EShapeSubType inType2, uint64 inTicks100Pct) const;
 
 	/// Trace the collected broadphase stats in CSV form.
 	/// This report can be used to judge and tweak the efficiency of the broadphase.

+ 0 - 3
Jolt/RegisterTypes.cpp

@@ -90,9 +90,6 @@ void RegisterTypesInternal(uint64 inVersionID)
 
 	JPH_ASSERT(Factory::sInstance != nullptr, "Need to create a factory first!");
 
-	// Set the initial reference time
-	UpdateReferenceTime();
-
 	// Initialize dispatcher
 	CollisionDispatch::sInit();