#pragma once #include "BsCorePrerequisites.h" #include "BsModule.h" #include "BsFrameAlloc.h" namespace BansheeEngine { class CPUProfilerReport; /** * @brief Provides various performance measuring methods. * * @note Thread safe. Matching begin*\end* calls * must belong to the same thread though. */ class BS_CORE_EXPORT ProfilerCPU : public Module { /** * @brief Timer class responsible for tracking elapsed time. */ class Timer { public: Timer(); /** * @brief Sets the start time for the timer. */ void start(); /** * @brief Stops the timer and calculates the elapsed time * from start time to now. */ void stop(); /** * @brief Resets the elapsed time to zero. */ void reset(); double time; private: double startTime; /** * @brief Returns time elapsed since CPU was started in millseconds. */ static inline double getCurrentTime(); }; /** * @brief Timer class responsible for tracking number of elapsed CPU cycles. */ class TimerPrecise { public: TimerPrecise(); /** * @brief Starts the counter marking the current number of executed * CPU cycles since CPU was started. */ void start(); /** * @brief Ends the counter and calculates the number of CPU cycles between * now and the start time. */ void stop(); /** * @brief Resets the cycle count to zero. */ void reset(); UINT64 cycles; private: UINT64 startCycles; /** * @brief Queries the CPU for the current number of CPU cycles executed since the * program was started. */ static inline UINT64 getNumCycles(); }; /** * @brief Contains data about a single profiler sample (counting time in milliseconds). * * @note A sample is created whenever a named profile block is entered. e.g. if you have a function * you are profiling, and it gets called 10 times, there will be 10 samples. */ struct ProfileSample { ProfileSample(double _time, UINT64 _numAllocs, UINT64 _numFrees) :time(_time), numAllocs(_numAllocs), numFrees(_numFrees) { } double time; UINT64 numAllocs; UINT64 numFrees; }; /** * @brief Contains data about a single precise profiler sample (counting CPU cycles). * * @note A sample is created whenever a named profile block is entered. e.g. if you have a function * you are profiling, and it gets called 10 times, there will be 10 samples. */ struct PreciseProfileSample { PreciseProfileSample(UINT64 _cycles, UINT64 _numAllocs, UINT64 _numFrees) :cycles(_cycles), numAllocs(_numAllocs), numFrees(_numFrees) { } UINT64 cycles; UINT64 numAllocs; UINT64 numFrees; }; /** * @brief Contains basic (time based) profiling data contained in a profiling block. */ struct ProfileData { ProfileData(FrameAlloc* alloc); /** * @brief Begins a new sample and records current sample state. Previous sample must * not be active. */ void beginSample(); /** * @brief Records current sample state and creates a new sample based on start and end state. * Adds the sample to the sample list. */ void endSample(); /** * @brief Removes the last added sample from the sample list and makes it active again. You must * call endSample when done as if you called beginSample. */ void resumeLastSample(); Vector> samples; Timer timer; UINT64 memAllocs; UINT64 memFrees; }; /** * @brief Contains precise (CPU cycle based) profiling data contained in a profiling block. */ struct PreciseProfileData { PreciseProfileData(FrameAlloc* alloc); /** * @brief Begins a new sample and records current sample state. Previous sample must * not be active. */ void beginSample(); /** * @brief Records current sample state and creates a new sample based on start and end state. * Adds the sample to the sample list. */ void endSample(); /** * @brief Removes the last added sample from the sample list and makes it active again. You must * call endSample when done as if you called beginSample. */ void resumeLastSample(); Vector> samples; TimerPrecise timer; UINT64 memAllocs; UINT64 memFrees; }; /** * @brief Contains all sampling information about a single named profiling block. * Each block has its own sampling information and optionally child blocks. */ struct ProfiledBlock { ProfiledBlock(FrameAlloc* alloc); ~ProfiledBlock(); /** * @brief Attempts to find a child block with the specified name. Returns * null if not found. */ ProfiledBlock* findChild(const char* name) const; char* name; ProfileData basic; PreciseProfileData precise; Vector> children; }; /** * @brief CPU sampling type. */ enum class ActiveSamplingType { Basic, /**< Sample using milliseconds. */ Precise /**< Sample using CPU cycles. */ }; /** * @brief Contains data about the currently active profiling block. */ struct ActiveBlock { ActiveBlock() :type(ActiveSamplingType::Basic), block(nullptr) { } ActiveBlock(ActiveSamplingType _type, ProfiledBlock* _block) :type(_type), block(_block) { } ActiveSamplingType type; ProfiledBlock* block; }; /** * @brief Contains data about an active profiling thread. */ struct ThreadInfo { ThreadInfo(); /** * @brief Starts profiling on the thread. New primary profiling block * is created with the given name. */ void begin(const char* _name); /** * @brief Ends profiling on the thread. You should end all samples before calling this, * but if you don't they will be terminated automatically. */ void end(); /** * @brief Deletes all internal profiling data and makes the object ready for another * iteration. Should be called after end in order to delete any existing data. */ void reset(); /** * @brief Gets the primary profiling block used by the thread. */ ProfiledBlock* getBlock(const char* name); /** * @brief Deletes the provided block. */ void releaseBlock(ProfiledBlock* block); static BS_THREADLOCAL ThreadInfo* activeThread; bool isActive; ProfiledBlock* rootBlock; FrameAlloc frameAlloc; ActiveBlock activeBlock; Stack>* activeBlocks; }; public: ProfilerCPU(); ~ProfilerCPU(); /** * @brief Registers a new thread we will be doing sampling in. This needs to be called before any beginSample*\endSample* calls * are made in that thread. * * @param name Name that will allow you to more easily identify the thread. */ void beginThread(const char* name); /** * @brief Ends sampling for the current thread. No beginSample*\endSample* calls after this point. */ void endThread(); /** * @brief Begins sample measurement. Must be followed by endSample. * * @param name Unique name for the sample you can later use to find the sampling data. */ void beginSample(const char* name); /** * @brief Ends sample measurement. * * @param name Unique name for the sample. * * @note Unique name is primarily needed to more easily identify mismatched * begin/end sample pairs. Otherwise the name in beginSample would be enough. */ void endSample(const char* name); /** * @brief Begins sample measurement. Must be followed by endSample. * * @param name Unique name for the sample you can later use to find the sampling data. * * @note This method uses very precise CPU counters to determine variety of data not * provided by standard beginSample. However due to the way these counters work you should * not use this method for larger parts of code. It does not consider context switches so if the OS * decides to switch context between measurements you will get invalid data. */ void beginSamplePrecise(const char* name); /** * @brief Ends precise sample measurement. * * @param name Unique name for the sample. * * @note Unique name is primarily needed to more easily identify mismatched * begin/end sample pairs. Otherwise the name in beginSamplePrecise would be enough. */ void endSamplePrecise(const char* name); /** * @brief Clears all sampling data, and ends any unfinished sampling blocks. */ void reset(); /** * @brief Generates a report from all previously sampled data. * * @note Generating a report will stop all in-progress sampling. You should make sure * you call endSample* manually beforehand so this doesn't have to happen. */ CPUProfilerReport generateReport(); private: /** * @brief Calculates overhead that the timing and sampling methods themselves introduce * so we might get more accurate measurements when creating reports. */ void estimateTimerOverhead(); private: double mBasicTimerOverhead; UINT64 mPreciseTimerOverhead; double mBasicSamplingOverheadMs; double mPreciseSamplingOverheadMs; UINT64 mBasicSamplingOverheadCycles; UINT64 mPreciseSamplingOverheadCycles; ProfilerVector mActiveThreads; BS_MUTEX(mThreadSync); }; /** * @brief Profiling entry containing information about a single CPU profiling block * containing timing information. */ struct BS_CORE_EXPORT CPUProfilerBasicSamplingEntry { struct BS_CORE_EXPORT Data { Data(); String name; /**< Name of the profiling block. */ UINT32 numCalls; /**< Number of times the block was entered. */ UINT64 memAllocs; /**< Number of memory allocations that happened within the block. */ UINT64 memFrees; /**< Number of memory deallocations that happened within the block. */ double avgTimeMs; /**< Average time it took to execute the block, per call. In milliseconds. */ double maxTimeMs; /**< Maximum time of a single call in the block. In milliseconds. */ double totalTimeMs; /**< Total time the block took, across all calls. In milliseconds. */ double avgSelfTimeMs; /**< Average time it took to execute the block, per call. Ignores time used by child blocks. In milliseconds. */ double totalSelfTimeMs; /**< Total time the block took, across all calls. Ignores time used by child blocks. In milliseconds. */ double estimatedSelfOverheadMs; /**< Estimated overhead of profiling methods, only for this exact block. In milliseconds. */ double estimatedOverheadMs; /**< Estimated overhead of profiling methods for this block and all children. In milliseconds. */ float pctOfParent; /**< Percent of parent block time this block took to execute. Ranging [0.0, 1.0]. */ } data; ProfilerVector childEntries; }; /** * @brief Profiling entry containing information about a single CPU profiling block * containing CPU cycle count based information. */ struct BS_CORE_EXPORT CPUProfilerPreciseSamplingEntry { struct BS_CORE_EXPORT Data { Data(); String name; /**< Name of the profiling block. */ UINT32 numCalls; /**< Number of times the block was entered. */ UINT64 memAllocs; /**< Number of memory allocations that happened within the block. */ UINT64 memFrees; /**< Number of memory deallocations that happened within the block. */ UINT64 avgCycles; /**< Average number of cycles it took to execute the block, per call. */ UINT64 maxCycles; /**< Maximum number of cycles of a single call in the block. */ UINT64 totalCycles; /**< Total number of cycles across all calls in the block. */ UINT64 avgSelfCycles; /**< Average number of cycles it took to execute the block, per call. Ignores cycles used by child blocks. */ UINT64 totalSelfCycles; /**< Total number of cycles across all calls in the block. Ignores time used by child blocks. */ UINT64 estimatedSelfOverhead; /**< Estimated overhead of profiling methods, only for this exact block. In cycles. */ UINT64 estimatedOverhead; /**< Estimated overhead of profiling methods for this block and all children. In cycles. */ float pctOfParent; /**< Percent of parent block cycles used by this block. Ranging [0.0, 1.0]. */ } data; ProfilerVector childEntries; }; /** * @brief CPU profiling report containing all profiling information for a single profiling session. */ class BS_CORE_EXPORT CPUProfilerReport { public: CPUProfilerReport(); /** * @brief Returns root entry for the basic (time based) sampling data. Root entry always contains the * profiling block associated with the entire thread. */ const CPUProfilerBasicSamplingEntry& getBasicSamplingData() const { return mBasicSamplingRootEntry; } /** * @brief Returns root entry for the precise (CPU cycle based) sampling data. Root entry always contains the * profiling block associated with the entire thread. */ const CPUProfilerPreciseSamplingEntry& getPreciseSamplingData() const { return mPreciseSamplingRootEntry; } private: friend class ProfilerCPU; CPUProfilerBasicSamplingEntry mBasicSamplingRootEntry; CPUProfilerPreciseSamplingEntry mPreciseSamplingRootEntry; }; /** * @brief Quick way to access the CPU profiler. */ BS_CORE_EXPORT ProfilerCPU& gProfilerCPU(); /** * @brief Shortcut for profiling a single function call. */ #define PROFILE_CALL(call, name) \ BansheeEngine::gProfilerCPU().beginSample(##name##); \ call; \ BansheeEngine::gProfilerCPU().endSample(##name##); }