| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473 |
- #pragma once
- #include "BsCorePrerequisites.h"
- #include "BsModule.h"
- #include "BsFrameAlloc.h"
- namespace BansheeEngine
- {
- class CPUProfilerReport;
- /**
- * @brief Provides various performance measuring methods.
- *
- * @note Thread safe. Matching begin*\end* calls
- * must belong to the same thread though.
- */
- class BS_CORE_EXPORT ProfilerCPU : public Module<ProfilerCPU>
- {
- /**
- * @brief Timer class responsible for tracking elapsed time.
- */
- class Timer
- {
- public:
- Timer();
- /**
- * @brief Sets the start time for the timer.
- */
- void start();
- /**
- * @brief Stops the timer and calculates the elapsed time
- * from start time to now.
- */
- void stop();
- /**
- * @brief Resets the elapsed time to zero.
- */
- void reset();
- double time;
- private:
- double startTime;
- /**
- * @brief Returns time elapsed since CPU was started in millseconds.
- */
- static inline double getCurrentTime();
- };
- /**
- * @brief Timer class responsible for tracking number of elapsed CPU cycles.
- */
- class TimerPrecise
- {
- public:
- TimerPrecise();
- /**
- * @brief Starts the counter marking the current number of executed
- * CPU cycles since CPU was started.
- */
- void start();
- /**
- * @brief Ends the counter and calculates the number of CPU cycles between
- * now and the start time.
- */
- void stop();
- /**
- * @brief Resets the cycle count to zero.
- */
- void reset();
- UINT64 cycles;
- private:
- UINT64 startCycles;
- /**
- * @brief Queries the CPU for the current number of CPU cycles executed since the
- * program was started.
- */
- static inline UINT64 getNumCycles();
- };
- /**
- * @brief Contains data about a single profiler sample (counting time in milliseconds).
- *
- * @note A sample is created whenever a named profile block is entered. e.g. if you have a function
- * you are profiling, and it gets called 10 times, there will be 10 samples.
- */
- struct ProfileSample
- {
- ProfileSample(double _time, UINT64 _numAllocs, UINT64 _numFrees)
- :time(_time), numAllocs(_numAllocs), numFrees(_numFrees)
- { }
- double time;
- UINT64 numAllocs;
- UINT64 numFrees;
- };
- /**
- * @brief Contains data about a single precise profiler sample (counting CPU cycles).
- *
- * @note A sample is created whenever a named profile block is entered. e.g. if you have a function
- * you are profiling, and it gets called 10 times, there will be 10 samples.
- */
- struct PreciseProfileSample
- {
- PreciseProfileSample(UINT64 _cycles, UINT64 _numAllocs, UINT64 _numFrees)
- :cycles(_cycles), numAllocs(_numAllocs), numFrees(_numFrees)
- { }
- UINT64 cycles;
- UINT64 numAllocs;
- UINT64 numFrees;
- };
- /**
- * @brief Contains basic (time based) profiling data contained in a profiling block.
- */
- struct ProfileData
- {
- ProfileData(FrameAlloc* alloc);
- /**
- * @brief Begins a new sample and records current sample state. Previous sample must
- * not be active.
- */
- void beginSample();
- /**
- * @brief Records current sample state and creates a new sample based on start and end state.
- * Adds the sample to the sample list.
- */
- void endSample();
- /**
- * @brief Removes the last added sample from the sample list and makes it active again. You must
- * call endSample when done as if you called beginSample.
- */
- void resumeLastSample();
- Vector<ProfileSample, StdFrameAlloc<ProfileSample>> samples;
- Timer timer;
- UINT64 memAllocs;
- UINT64 memFrees;
- };
- /**
- * @brief Contains precise (CPU cycle based) profiling data contained in a profiling block.
- */
- struct PreciseProfileData
- {
- PreciseProfileData(FrameAlloc* alloc);
- /**
- * @brief Begins a new sample and records current sample state. Previous sample must
- * not be active.
- */
- void beginSample();
- /**
- * @brief Records current sample state and creates a new sample based on start and end state.
- * Adds the sample to the sample list.
- */
- void endSample();
- /**
- * @brief Removes the last added sample from the sample list and makes it active again. You must
- * call endSample when done as if you called beginSample.
- */
- void resumeLastSample();
- Vector<PreciseProfileSample, StdFrameAlloc<ProfileSample>> samples;
- TimerPrecise timer;
- UINT64 memAllocs;
- UINT64 memFrees;
- };
- /**
- * @brief Contains all sampling information about a single named profiling block.
- * Each block has its own sampling information and optionally child blocks.
- */
- struct ProfiledBlock
- {
- ProfiledBlock(FrameAlloc* alloc);
- ~ProfiledBlock();
- /**
- * @brief Attempts to find a child block with the specified name. Returns
- * null if not found.
- */
- ProfiledBlock* findChild(const char* name) const;
- char* name;
-
- ProfileData basic;
- PreciseProfileData precise;
- Vector<ProfiledBlock*, StdFrameAlloc<ProfiledBlock*>> children;
- };
- /**
- * @brief CPU sampling type.
- */
- enum class ActiveSamplingType
- {
- Basic, /**< Sample using milliseconds. */
- Precise /**< Sample using CPU cycles. */
- };
- /**
- * @brief Contains data about the currently active profiling block.
- */
- struct ActiveBlock
- {
- ActiveBlock()
- :type(ActiveSamplingType::Basic), block(nullptr)
- { }
- ActiveBlock(ActiveSamplingType _type, ProfiledBlock* _block)
- :type(_type), block(_block)
- { }
- ActiveSamplingType type;
- ProfiledBlock* block;
- };
- /**
- * @brief Contains data about an active profiling thread.
- */
- struct ThreadInfo
- {
- ThreadInfo();
- /**
- * @brief Starts profiling on the thread. New primary profiling block
- * is created with the given name.
- */
- void begin(const char* _name);
- /**
- * @brief Ends profiling on the thread. You should end all samples before calling this,
- * but if you don't they will be terminated automatically.
- */
- void end();
- /**
- * @brief Deletes all internal profiling data and makes the object ready for another
- * iteration. Should be called after end in order to delete any existing data.
- */
- void reset();
- /**
- * @brief Gets the primary profiling block used by the thread.
- */
- ProfiledBlock* getBlock(const char* name);
-
- /**
- * @brief Deletes the provided block.
- */
- void releaseBlock(ProfiledBlock* block);
- static BS_THREADLOCAL ThreadInfo* activeThread;
- bool isActive;
- ProfiledBlock* rootBlock;
- FrameAlloc frameAlloc;
- ActiveBlock activeBlock;
- Stack<ActiveBlock, StdFrameAlloc<ActiveBlock>>* activeBlocks;
- };
- public:
- ProfilerCPU();
- ~ProfilerCPU();
- /**
- * @brief Registers a new thread we will be doing sampling in. This needs to be called before any beginSample*\endSample* calls
- * are made in that thread.
- *
- * @param name Name that will allow you to more easily identify the thread.
- */
- void beginThread(const char* name);
- /**
- * @brief Ends sampling for the current thread. No beginSample*\endSample* calls after this point.
- */
- void endThread();
- /**
- * @brief Begins sample measurement. Must be followed by endSample.
- *
- * @param name Unique name for the sample you can later use to find the sampling data.
- */
- void beginSample(const char* name);
- /**
- * @brief Ends sample measurement.
- *
- * @param name Unique name for the sample.
- *
- * @note Unique name is primarily needed to more easily identify mismatched
- * begin/end sample pairs. Otherwise the name in beginSample would be enough.
- */
- void endSample(const char* name);
- /**
- * @brief Begins sample measurement. Must be followed by endSample.
- *
- * @param name Unique name for the sample you can later use to find the sampling data.
- *
- * @note This method uses very precise CPU counters to determine variety of data not
- * provided by standard beginSample. However due to the way these counters work you should
- * not use this method for larger parts of code. It does not consider context switches so if the OS
- * decides to switch context between measurements you will get invalid data.
- */
- void beginSamplePrecise(const char* name);
- /**
- * @brief Ends precise sample measurement.
- *
- * @param name Unique name for the sample.
- *
- * @note Unique name is primarily needed to more easily identify mismatched
- * begin/end sample pairs. Otherwise the name in beginSamplePrecise would be enough.
- */
- void endSamplePrecise(const char* name);
- /**
- * @brief Clears all sampling data, and ends any unfinished sampling blocks.
- */
- void reset();
- /**
- * @brief Generates a report from all previously sampled data.
- *
- * @note Generating a report will stop all in-progress sampling. You should make sure
- * you call endSample* manually beforehand so this doesn't have to happen.
- */
- CPUProfilerReport generateReport();
- private:
- /**
- * @brief Calculates overhead that the timing and sampling methods themselves introduce
- * so we might get more accurate measurements when creating reports.
- */
- void estimateTimerOverhead();
- private:
- double mBasicTimerOverhead;
- UINT64 mPreciseTimerOverhead;
- double mBasicSamplingOverheadMs;
- double mPreciseSamplingOverheadMs;
- UINT64 mBasicSamplingOverheadCycles;
- UINT64 mPreciseSamplingOverheadCycles;
- ProfilerVector<ThreadInfo*> mActiveThreads;
- BS_MUTEX(mThreadSync);
- };
- /**
- * @brief Profiling entry containing information about a single CPU profiling block
- * containing timing information.
- */
- struct BS_CORE_EXPORT CPUProfilerBasicSamplingEntry
- {
- struct BS_CORE_EXPORT Data
- {
- Data();
- String name; /**< Name of the profiling block. */
- UINT32 numCalls; /**< Number of times the block was entered. */
- UINT64 memAllocs; /**< Number of memory allocations that happened within the block. */
- UINT64 memFrees; /**< Number of memory deallocations that happened within the block. */
- double avgTimeMs; /**< Average time it took to execute the block, per call. In milliseconds. */
- double maxTimeMs; /**< Maximum time of a single call in the block. In milliseconds. */
- double totalTimeMs; /**< Total time the block took, across all calls. In milliseconds. */
- double avgSelfTimeMs; /**< Average time it took to execute the block, per call. Ignores time used by child blocks. In milliseconds. */
- double totalSelfTimeMs; /**< Total time the block took, across all calls. Ignores time used by child blocks. In milliseconds. */
- double estimatedSelfOverheadMs; /**< Estimated overhead of profiling methods, only for this exact block. In milliseconds. */
- double estimatedOverheadMs; /**< Estimated overhead of profiling methods for this block and all children. In milliseconds. */
- float pctOfParent; /**< Percent of parent block time this block took to execute. Ranging [0.0, 1.0]. */
- } data;
- ProfilerVector<CPUProfilerBasicSamplingEntry> childEntries;
- };
- /**
- * @brief Profiling entry containing information about a single CPU profiling block
- * containing CPU cycle count based information.
- */
- struct BS_CORE_EXPORT CPUProfilerPreciseSamplingEntry
- {
- struct BS_CORE_EXPORT Data
- {
- Data();
- String name; /**< Name of the profiling block. */
- UINT32 numCalls; /**< Number of times the block was entered. */
- UINT64 memAllocs; /**< Number of memory allocations that happened within the block. */
- UINT64 memFrees; /**< Number of memory deallocations that happened within the block. */
- UINT64 avgCycles; /**< Average number of cycles it took to execute the block, per call. */
- UINT64 maxCycles; /**< Maximum number of cycles of a single call in the block. */
- UINT64 totalCycles; /**< Total number of cycles across all calls in the block. */
- UINT64 avgSelfCycles; /**< Average number of cycles it took to execute the block, per call. Ignores cycles used by child blocks. */
- UINT64 totalSelfCycles; /**< Total number of cycles across all calls in the block. Ignores time used by child blocks. */
- UINT64 estimatedSelfOverhead; /**< Estimated overhead of profiling methods, only for this exact block. In cycles. */
- UINT64 estimatedOverhead; /**< Estimated overhead of profiling methods for this block and all children. In cycles. */
- float pctOfParent; /**< Percent of parent block cycles used by this block. Ranging [0.0, 1.0]. */
- } data;
- ProfilerVector<CPUProfilerPreciseSamplingEntry> childEntries;
- };
- /**
- * @brief CPU profiling report containing all profiling information for a single profiling session.
- */
- class BS_CORE_EXPORT CPUProfilerReport
- {
- public:
- CPUProfilerReport();
- /**
- * @brief Returns root entry for the basic (time based) sampling data. Root entry always contains the
- * profiling block associated with the entire thread.
- */
- const CPUProfilerBasicSamplingEntry& getBasicSamplingData() const { return mBasicSamplingRootEntry; }
- /**
- * @brief Returns root entry for the precise (CPU cycle based) sampling data. Root entry always contains the
- * profiling block associated with the entire thread.
- */
- const CPUProfilerPreciseSamplingEntry& getPreciseSamplingData() const { return mPreciseSamplingRootEntry; }
- private:
- friend class ProfilerCPU;
- CPUProfilerBasicSamplingEntry mBasicSamplingRootEntry;
- CPUProfilerPreciseSamplingEntry mPreciseSamplingRootEntry;
- };
- /**
- * @brief Quick way to access the CPU profiler.
- */
- BS_CORE_EXPORT ProfilerCPU& gProfilerCPU();
- /**
- * @brief Shortcut for profiling a single function call.
- */
- #define PROFILE_CALL(call, name) \
- BansheeEngine::gProfilerCPU().beginSample(##name##); \
- call; \
- BansheeEngine::gProfilerCPU().endSample(##name##);
- }
|