| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961 |
- #include "CmCPUProfiler.h"
- #include "CmDebug.h"
- #include "CmPlatform.h"
- namespace CamelotFramework
- {
- CPUProfiler::Timer::Timer()
- {
- time = 0.0f;
- }
- void CPUProfiler::Timer::start()
- {
- startTime = getCurrentTime();
- }
- void CPUProfiler::Timer::stop()
- {
- time += getCurrentTime() - startTime;
- }
- void CPUProfiler::Timer::reset()
- {
- time = 0.0f;
- }
- inline double CPUProfiler::Timer::getCurrentTime()
- {
- return Platform::queryPerformanceTimerMs();
- }
- CPUProfiler::TimerPrecise::TimerPrecise()
- {
- cycles = 0;
- }
- void CPUProfiler::TimerPrecise::start()
- {
- startCycles = getNumCycles();
- }
- void CPUProfiler::TimerPrecise::stop()
- {
- cycles += getNumCycles() - startCycles;
- }
- void CPUProfiler::TimerPrecise::reset()
- {
- cycles = 0;
- }
- inline UINT64 CPUProfiler::TimerPrecise::getNumCycles()
- {
- #if CM_COMPILER == CM_COMPILER_GNUC
- asm volatile("cpuid" : : : "%eax", "%ebx", "%ecx", "%edx" );
- UINT32 __a,__d;
- asm volatile("rdtsc" : "=a" (__a), "=d" (__d));
- return ( UINT64(__a) | UINT64(__d) << 32 );
- #else
- int a[4];
- int b = 0;
- __cpuid(a, b);
- return __rdtsc();
- #endif
- }
- void CPUProfiler::ProfileData::beginSample()
- {
- memAllocs = MemoryCounter::getNumAllocs();
- memFrees = MemoryCounter::getNumFrees();
- timer.reset();
- timer.start();
- }
- void CPUProfiler::ProfileData::endSample()
- {
- timer.stop();
- UINT64 numAllocs = MemoryCounter::getNumAllocs() - memAllocs;
- UINT64 numFrees = MemoryCounter::getNumFrees() - memFrees;
- samples.push_back(ProfileSample(timer.time, numAllocs, numFrees));
- }
- void CPUProfiler::ProfileData::resumeLastSample()
- {
- timer.start();
- samples.erase(samples.end() - 1);
- }
- void CPUProfiler::PreciseProfileData::beginSample()
- {
- memAllocs = MemoryCounter::getNumAllocs();
- memFrees = MemoryCounter::getNumFrees();
- timer.reset();
- timer.start();
- }
- void CPUProfiler::PreciseProfileData::endSample()
- {
- timer.stop();
- UINT64 numAllocs = MemoryCounter::getNumAllocs() - memAllocs;
- UINT64 numFrees = MemoryCounter::getNumFrees() - memFrees;
- samples.push_back(PreciseProfileSample(timer.cycles, numAllocs, numFrees));
- }
- void CPUProfiler::PreciseProfileData::resumeLastSample()
- {
- timer.start();
- samples.erase(samples.end() - 1);
- }
- CM_THREADLOCAL CPUProfiler::ThreadInfo* CPUProfiler::ThreadInfo::activeThread = nullptr;
- CPUProfiler::ThreadInfo::ThreadInfo()
- :isActive(false), rootBlock(nullptr)
- {
- }
- void CPUProfiler::ThreadInfo::begin(const String& _name)
- {
- if(isActive)
- {
- LOGWRN("Profiler::beginThread called on a thread that was already being sampled");
- return;
- }
- if(rootBlock == nullptr)
- rootBlock = getBlock();
- activeBlock = ActiveBlock(ActiveSamplingType::Basic, rootBlock);
- activeBlocks.push(activeBlock);
-
- rootBlock->name = _name;
- rootBlock->basic.beginSample();
- isActive = true;
- }
- void CPUProfiler::ThreadInfo::end()
- {
- if(activeBlock.type == ActiveSamplingType::Basic)
- activeBlock.block->basic.endSample();
- else
- activeBlock.block->precise.endSample();
- activeBlocks.pop();
- if(!isActive)
- LOGWRN("Profiler::endThread called on a thread that isn't being sampled.");
- if(activeBlocks.size() > 0)
- {
- LOGWRN("Profiler::endThread called but not all sample pairs were closed. Sampling data will not be valid.");
- while(activeBlocks.size() > 0)
- {
- ActiveBlock& curBlock = activeBlocks.top();
- if(curBlock.type == ActiveSamplingType::Basic)
- curBlock.block->basic.endSample();
- else
- curBlock.block->precise.endSample();
- activeBlocks.pop();
- }
- }
- isActive = false;
- activeBlocks = Stack<ActiveBlock>::type();
- activeBlock = ActiveBlock();
- }
- void CPUProfiler::ThreadInfo::reset()
- {
- if(isActive)
- end();
- if(rootBlock != nullptr)
- releaseBlock(rootBlock);
- rootBlock = nullptr;
- }
- CPUProfiler::ProfiledBlock* CPUProfiler::ThreadInfo::getBlock()
- {
- // TODO - Pool this, if possible using the memory allocator stuff
- // TODO - Also consider moving all samples in ThreadInfo, and also pool them (otherwise I can't pool ProfiledBlock since it will be variable size)
- return cm_new<ProfiledBlock>();
- }
- void CPUProfiler::ThreadInfo::releaseBlock(CPUProfiler::ProfiledBlock* block)
- {
- cm_delete(block);
- }
- CPUProfiler::ProfiledBlock::ProfiledBlock()
- { }
- CPUProfiler::ProfiledBlock::~ProfiledBlock()
- {
- ThreadInfo* thread = ThreadInfo::activeThread;
- for(auto& child : children)
- thread->releaseBlock(child);
- children.clear();
- }
- CPUProfiler::ProfiledBlock* CPUProfiler::ProfiledBlock::findChild(const String& name) const
- {
- for(auto& child : children)
- {
- if(child->name == name)
- return child;
- }
- return nullptr;
- }
- CPUProfiler::CPUProfiler()
- :mBasicTimerOverhead(0.0), mPreciseTimerOverhead(0), mBasicSamplingOverheadMs(0.0), mPreciseSamplingOverheadCycles(0),
- mBasicSamplingOverheadCycles(0), mPreciseSamplingOverheadMs(0.0)
- {
- // TODO - We only estimate overhead on program start. It might be better to estimate it each time beginThread is called,
- // and keep separate values per thread.
- estimateTimerOverhead();
- }
- CPUProfiler::~CPUProfiler()
- {
- reset();
- CM_LOCK_MUTEX(mThreadSync);
- for(auto& threadInfo : mActiveThreads)
- cm_delete(threadInfo);
- }
- void CPUProfiler::beginThread(const String& name)
- {
- ThreadInfo* thread = ThreadInfo::activeThread;
- if(thread == nullptr)
- {
- ThreadInfo::activeThread = cm_new<ThreadInfo>();
- thread = ThreadInfo::activeThread;
- {
- CM_LOCK_MUTEX(mThreadSync);
- mActiveThreads.push_back(thread);
- }
- }
- thread->begin(name);
- }
- void CPUProfiler::endThread()
- {
- // I don't do a nullcheck where on purpose, so endSample can be called ASAP
- ThreadInfo::activeThread->end();
- }
- void CPUProfiler::beginSample(const String& name)
- {
- ThreadInfo* thread = ThreadInfo::activeThread;
- if(thread == nullptr || !thread->isActive)
- beginThread("Unknown");
- ProfiledBlock* parent = thread->activeBlock.block;
- ProfiledBlock* block = nullptr;
-
- if(parent != nullptr)
- block = parent->findChild(name);
- if(block == nullptr)
- {
- block = thread->getBlock();
- block->name = name;
- if(parent != nullptr)
- parent->children.push_back(block);
- else
- thread->rootBlock->children.push_back(block);
- }
- thread->activeBlock = ActiveBlock(ActiveSamplingType::Basic, block);
- thread->activeBlocks.push(thread->activeBlock);
- block->basic.beginSample();
- }
- void CPUProfiler::endSample(const String& name)
- {
- ThreadInfo* thread = ThreadInfo::activeThread;
- ProfiledBlock* block = thread->activeBlock.block;
- #if CM_DEBUG_MODE
- if(block == nullptr)
- {
- LOGWRN("Mismatched CPUProfiler::endSample. No beginSample was called.");
- return;
- }
- if(thread->activeBlock.type == ActiveSamplingType::Precise)
- {
- LOGWRN("Mismatched CPUProfiler::endSample. Was expecting Profiler::endSamplePrecise.");
- return;
- }
- if(block->name != name)
- {
- LOGWRN("Mismatched CPUProfiler::endSample. Was expecting \"" + block->name + "\" but got \"" + name + "\". Sampling data will not be valid.");
- return;
- }
- #endif
- block->basic.endSample();
- thread->activeBlocks.pop();
- if(!thread->activeBlocks.empty())
- thread->activeBlock = thread->activeBlocks.top();
- else
- thread->activeBlock = ActiveBlock();
- }
- void CPUProfiler::beginSamplePrecise(const String& name)
- {
- // Note: There is a (small) possibility a context switch will happen during this measurement in which case result will be skewed.
- // Increasing thread priority might help. This is generally only a problem with code that executes a long time (10-15+ ms - depending on OS quant length)
-
- ThreadInfo* thread = ThreadInfo::activeThread;
- if(thread == nullptr || !thread->isActive)
- beginThread("Unknown");
- ProfiledBlock* parent = thread->activeBlock.block;
- ProfiledBlock* block = nullptr;
-
- if(parent != nullptr)
- block = parent->findChild(name);
- if(block == nullptr)
- {
- block = thread->getBlock();
- block->name = name;
- if(parent != nullptr)
- parent->children.push_back(block);
- else
- thread->rootBlock->children.push_back(block);
- }
- thread->activeBlock = ActiveBlock(ActiveSamplingType::Precise, block);
- thread->activeBlocks.push(thread->activeBlock);
- block->precise.beginSample();
- }
- void CPUProfiler::endSamplePrecise(const String& name)
- {
- ThreadInfo* thread = ThreadInfo::activeThread;
- ProfiledBlock* block = thread->activeBlock.block;
- #if CM_DEBUG_MODE
- if(block == nullptr)
- {
- LOGWRN("Mismatched Profiler::endSamplePrecise. No beginSamplePrecise was called.");
- return;
- }
- if(thread->activeBlock.type == ActiveSamplingType::Basic)
- {
- LOGWRN("Mismatched CPUProfiler::endSamplePrecise. Was expecting Profiler::endSample.");
- return;
- }
- if(block->name != name)
- {
- LOGWRN("Mismatched Profiler::endSamplePrecise. Was expecting \"" + block->name + "\" but got \"" + name + "\". Sampling data will not be valid.");
- return;
- }
- #endif
- block->precise.endSample();
- thread->activeBlocks.pop();
- if(!thread->activeBlocks.empty())
- thread->activeBlock = thread->activeBlocks.top();
- else
- thread->activeBlock = ActiveBlock();
- }
- void CPUProfiler::reset()
- {
- ThreadInfo* thread = ThreadInfo::activeThread;
- if(thread != nullptr)
- thread->reset();
- }
- CPUProfilerReport CPUProfiler::generateReport()
- {
- CPUProfilerReport report;
- ThreadInfo* thread = ThreadInfo::activeThread;
- if(thread == nullptr)
- return report;
- if(thread->isActive)
- thread->end();
- // We need to separate out basic and precise data and form two separate hierarchies
- if(thread->rootBlock == nullptr)
- return report;
- struct TempEntry
- {
- TempEntry(ProfiledBlock* _parentBlock, UINT32 _entryIdx)
- :parentBlock(_parentBlock), entryIdx(_entryIdx)
- { }
- ProfiledBlock* parentBlock;
- UINT32 entryIdx;
- Vector<UINT32>::type childIndexes;
- };
- Vector<CPUProfilerBasicSamplingEntry>::type basicEntries;
- Vector<CPUProfilerPreciseSamplingEntry>::type preciseEntries;
- // Fill up flatHierarchy array in a way so we always process children before parents
- Stack<UINT32>::type todo;
- Vector<TempEntry>::type flatHierarchy;
- UINT32 entryIdx = 0;
- todo.push(entryIdx);
- flatHierarchy.push_back(TempEntry(thread->rootBlock, entryIdx));
- entryIdx++;
- while(!todo.empty())
- {
- UINT32 curDataIdx = todo.top();
- ProfiledBlock* curBlock = flatHierarchy[curDataIdx].parentBlock;
- todo.pop();
- for(auto& child : curBlock->children)
- {
- flatHierarchy[curDataIdx].childIndexes.push_back(entryIdx);
- todo.push(entryIdx);
- flatHierarchy.push_back(TempEntry(child, entryIdx));
- entryIdx++;
- }
- }
-
- // Calculate sampling data for all entries
- basicEntries.resize(flatHierarchy.size());
- preciseEntries.resize(flatHierarchy.size());
- for(auto& iter = flatHierarchy.rbegin(); iter != flatHierarchy.rend(); ++iter)
- {
- TempEntry& curData = *iter;
- ProfiledBlock* curBlock = curData.parentBlock;
- CPUProfilerBasicSamplingEntry* entryBasic = &basicEntries[curData.entryIdx];
- CPUProfilerPreciseSamplingEntry* entryPrecise = &preciseEntries[curData.entryIdx];
- // Calculate basic data
- entryBasic->data.name = curBlock->name;
- entryBasic->data.memAllocs = 0;
- entryBasic->data.memFrees = 0;
- entryBasic->data.totalTimeMs = 0.0;
- entryBasic->data.maxTimeMs = 0.0;
- for(auto& sample : curBlock->basic.samples)
- {
- entryBasic->data.totalTimeMs += sample.time;
- entryBasic->data.maxTimeMs = std::max(entryBasic->data.maxTimeMs, sample.time);
- entryBasic->data.memAllocs += sample.numAllocs;
- entryBasic->data.memFrees += sample.numFrees;
- }
- entryBasic->data.numCalls = (UINT32)curBlock->basic.samples.size();
- if(entryBasic->data.numCalls > 0)
- entryBasic->data.avgTimeMs = entryBasic->data.totalTimeMs / entryBasic->data.numCalls;
- double totalChildTime = 0.0;
- for(auto& childIdx : curData.childIndexes)
- {
- CPUProfilerBasicSamplingEntry* childEntry = &basicEntries[childIdx];
- totalChildTime += childEntry->data.totalTimeMs;
- childEntry->data.pctOfParent = (float)(childEntry->data.totalTimeMs / entryBasic->data.totalTimeMs);
- entryBasic->data.estimatedOverheadMs += childEntry->data.estimatedOverheadMs;
- }
- entryBasic->data.estimatedOverheadMs += curBlock->basic.samples.size() * mBasicSamplingOverheadMs;
- entryBasic->data.estimatedOverheadMs += curBlock->precise.samples.size() * mPreciseSamplingOverheadMs;
- entryBasic->data.totalSelfTimeMs = entryBasic->data.totalTimeMs - totalChildTime;
- if(entryBasic->data.numCalls > 0)
- entryBasic->data.avgSelfTimeMs = entryBasic->data.totalSelfTimeMs / entryBasic->data.numCalls;
- entryBasic->data.estimatedSelfOverheadMs = mBasicTimerOverhead;
- // Calculate precise data
- entryPrecise->data.name = curBlock->name;
- entryPrecise->data.memAllocs = 0;
- entryPrecise->data.memFrees = 0;
- entryPrecise->data.totalCycles = 0;
- entryPrecise->data.maxCycles = 0;
- for(auto& sample : curBlock->precise.samples)
- {
- entryPrecise->data.totalCycles += sample.cycles;
- entryPrecise->data.maxCycles = std::max(entryPrecise->data.maxCycles, sample.cycles);
- entryPrecise->data.memAllocs += sample.numAllocs;
- entryPrecise->data.memFrees += sample.numFrees;
- }
- entryPrecise->data.numCalls = (UINT32)curBlock->precise.samples.size();
- if(entryPrecise->data.numCalls > 0)
- entryPrecise->data.avgCycles = entryPrecise->data.totalCycles / entryPrecise->data.numCalls;
- UINT64 totalChildCycles = 0;
- for(auto& childIdx : curData.childIndexes)
- {
- CPUProfilerPreciseSamplingEntry* childEntry = &preciseEntries[childIdx];
- totalChildCycles += childEntry->data.totalCycles;
- childEntry->data.pctOfParent = childEntry->data.totalCycles / (float)entryPrecise->data.totalCycles;
- entryPrecise->data.estimatedOverhead += childEntry->data.estimatedOverhead;
- }
- entryPrecise->data.estimatedOverhead += curBlock->precise.samples.size() * mPreciseSamplingOverheadCycles;
- entryPrecise->data.estimatedOverhead += curBlock->basic.samples.size() * mBasicSamplingOverheadCycles;
- entryPrecise->data.totalSelfCycles = entryPrecise->data.totalCycles - totalChildCycles;
- if(entryPrecise->data.numCalls > 0)
- entryPrecise->data.avgSelfCycles = entryPrecise->data.totalSelfCycles / entryPrecise->data.numCalls;
- entryPrecise->data.estimatedSelfOverhead = mPreciseTimerOverhead;
- }
- // Prune empty basic entries
- Stack<UINT32>::type finalBasicHierarchyTodo;
- Stack<UINT32>::type parentBasicEntryIndexes;
- Vector<TempEntry>::type newBasicEntries;
- finalBasicHierarchyTodo.push(0);
- entryIdx = 0;
- parentBasicEntryIndexes.push(entryIdx);
- newBasicEntries.push_back(TempEntry(nullptr, entryIdx));
- entryIdx++;
- while(!finalBasicHierarchyTodo.empty())
- {
- UINT32 parentEntryIdx = parentBasicEntryIndexes.top();
- parentBasicEntryIndexes.pop();
- UINT32 curEntryIdx = finalBasicHierarchyTodo.top();
- TempEntry& curEntry = flatHierarchy[curEntryIdx];
- finalBasicHierarchyTodo.pop();
- for(auto& childIdx : curEntry.childIndexes)
- {
- finalBasicHierarchyTodo.push(childIdx);
- CPUProfilerBasicSamplingEntry& basicEntry = basicEntries[childIdx];
- if(basicEntry.data.numCalls > 0)
- {
- newBasicEntries.push_back(TempEntry(nullptr, childIdx));
- newBasicEntries[parentEntryIdx].childIndexes.push_back(entryIdx);
- parentBasicEntryIndexes.push(entryIdx);
- entryIdx++;
- }
- else
- parentBasicEntryIndexes.push(parentEntryIdx);
- }
- }
- if(newBasicEntries.size() > 0)
- {
- Vector<CPUProfilerBasicSamplingEntry*>::type finalBasicEntries;
- report.mBasicSamplingRootEntry = basicEntries[newBasicEntries[0].entryIdx];
- finalBasicEntries.push_back(&report.mBasicSamplingRootEntry);
- finalBasicHierarchyTodo.push(0);
- while(!finalBasicHierarchyTodo.empty())
- {
- UINT32 curEntryIdx = finalBasicHierarchyTodo.top();
- finalBasicHierarchyTodo.pop();
- TempEntry& curEntry = newBasicEntries[curEntryIdx];
- CPUProfilerBasicSamplingEntry* basicEntry = finalBasicEntries[curEntryIdx];
- basicEntry->childEntries.resize(curEntry.childIndexes.size());
- UINT32 idx = 0;
- for(auto& childIdx : curEntry.childIndexes)
- {
- TempEntry& childEntry = newBasicEntries[childIdx];
- basicEntry->childEntries[idx] = basicEntries[childEntry.entryIdx];
- finalBasicEntries.push_back(&(basicEntry->childEntries[idx]));
- finalBasicHierarchyTodo.push(childIdx);
- idx++;
- }
- }
- }
- // Prune empty precise entries
- Stack<UINT32>::type finalPreciseHierarchyTodo;
- Stack<UINT32>::type parentPreciseEntryIndexes;
- Vector<TempEntry>::type newPreciseEntries;
- finalPreciseHierarchyTodo.push(0);
- entryIdx = 0;
- parentPreciseEntryIndexes.push(entryIdx);
- newPreciseEntries.push_back(TempEntry(nullptr, entryIdx));
- entryIdx++;
- while(!finalPreciseHierarchyTodo.empty())
- {
- UINT32 parentEntryIdx = parentPreciseEntryIndexes.top();
- parentPreciseEntryIndexes.pop();
- UINT32 curEntryIdx = finalPreciseHierarchyTodo.top();
- TempEntry& curEntry = flatHierarchy[curEntryIdx];
- finalPreciseHierarchyTodo.pop();
- for(auto& childIdx : curEntry.childIndexes)
- {
- finalPreciseHierarchyTodo.push(childIdx);
- CPUProfilerPreciseSamplingEntry& preciseEntry = preciseEntries[childIdx];
- if(preciseEntry.data.numCalls > 0)
- {
- newPreciseEntries.push_back(TempEntry(nullptr, childIdx));
- newPreciseEntries[parentEntryIdx].childIndexes.push_back(entryIdx);
- parentPreciseEntryIndexes.push(entryIdx);
- entryIdx++;
- }
- else
- parentPreciseEntryIndexes.push(parentEntryIdx);
- }
- }
- if(newPreciseEntries.size() > 0)
- {
- Vector<CPUProfilerPreciseSamplingEntry*>::type finalPreciseEntries;
- report.mPreciseSamplingRootEntry = preciseEntries[newPreciseEntries[0].entryIdx];
- finalPreciseEntries.push_back(&report.mPreciseSamplingRootEntry);
- finalPreciseHierarchyTodo.push(0);
- while(!finalPreciseHierarchyTodo.empty())
- {
- UINT32 curEntryIdx = finalPreciseHierarchyTodo.top();
- finalPreciseHierarchyTodo.pop();
- TempEntry& curEntry = newPreciseEntries[curEntryIdx];
- CPUProfilerPreciseSamplingEntry* preciseEntry = finalPreciseEntries[curEntryIdx];
- preciseEntry->childEntries.resize(curEntry.childIndexes.size());
- UINT32 idx = 0;
- for(auto& childIdx : curEntry.childIndexes)
- {
- TempEntry& childEntry = newPreciseEntries[childIdx];
- preciseEntry->childEntries[idx] = preciseEntries[childEntry.entryIdx];
- finalPreciseEntries.push_back(&preciseEntry->childEntries.back());
- finalPreciseHierarchyTodo.push(childIdx);
- idx++;
- }
- }
- }
- return report;
- }
- void CPUProfiler::estimateTimerOverhead()
- {
- // Get an idea of how long timer calls and RDTSC takes
- const UINT32 reps = 1000, sampleReps = 100;
- mBasicTimerOverhead = 1000000.0;
- mPreciseTimerOverhead = 1000000;
- for (UINT32 tries = 0; tries < 20; tries++)
- {
- Timer timer;
- for (UINT32 i = 0; i < reps; i++)
- {
- timer.start();
- timer.stop();
- }
- double avgTime = double(timer.time)/double(reps);
- if (avgTime < mBasicTimerOverhead)
- mBasicTimerOverhead = avgTime;
- TimerPrecise timerPrecise;
- for (UINT32 i = 0; i < reps; i++)
- {
- timerPrecise.start();
- timerPrecise.stop();
- }
- UINT64 avgCycles = timerPrecise.cycles/reps;
- if (avgCycles < mPreciseTimerOverhead)
- mPreciseTimerOverhead = avgCycles;
- }
- mBasicSamplingOverheadMs = 1000000.0;
- mPreciseSamplingOverheadMs = 1000000.0;
- mBasicSamplingOverheadCycles = 1000000;
- mPreciseSamplingOverheadCycles = 1000000;
- for (UINT32 tries = 0; tries < 20; tries++)
- {
- /************************************************************************/
- /* AVERAGE TIME IN MS FOR BASIC SAMPLING */
- /************************************************************************/
- Timer timerA;
- timerA.start();
- beginThread("Main");
- // Two different cases that can effect performance, one where
- // sample already exists and other where new one needs to be created
- for (UINT32 i = 0; i < sampleReps; i++)
- {
- beginSample("TestAvg1");
- endSample("TestAvg1");
- beginSample("TestAvg2");
- endSample("TestAvg2");
- beginSample("TestAvg3");
- endSample("TestAvg3");
- beginSample("TestAvg4");
- endSample("TestAvg4");
- beginSample("TestAvg5");
- endSample("TestAvg5");
- beginSample("TestAvg6");
- endSample("TestAvg6");
- beginSample("TestAvg7");
- endSample("TestAvg7");
- beginSample("TestAvg8");
- endSample("TestAvg8");
- beginSample("TestAvg9");
- endSample("TestAvg9");
- beginSample("TestAvg10");
- endSample("TestAvg10");
- }
- for (UINT32 i = 0; i < sampleReps * 5; i++)
- {
- beginSample("TestAvg#" + toString(i));
- endSample("TestAvg#" + toString(i));
- }
- endThread();
- timerA.stop();
- reset();
- double avgTimeBasic = double(timerA.time)/double(sampleReps * 10 + sampleReps * 5) - mBasicTimerOverhead;
- if (avgTimeBasic < mBasicSamplingOverheadMs)
- mBasicSamplingOverheadMs = avgTimeBasic;
- /************************************************************************/
- /* AVERAGE CYCLES FOR BASIC SAMPLING */
- /************************************************************************/
- TimerPrecise timerPreciseA;
- timerPreciseA.start();
- beginThread("Main");
- // Two different cases that can effect performance, one where
- // sample already exists and other where new one needs to be created
- for (UINT32 i = 0; i < sampleReps; i++)
- {
- beginSample("TestAvg1");
- endSample("TestAvg1");
- beginSample("TestAvg2");
- endSample("TestAvg2");
- beginSample("TestAvg3");
- endSample("TestAvg3");
- beginSample("TestAvg4");
- endSample("TestAvg4");
- beginSample("TestAvg5");
- endSample("TestAvg5");
- beginSample("TestAvg6");
- endSample("TestAvg6");
- beginSample("TestAvg7");
- endSample("TestAvg7");
- beginSample("TestAvg8");
- endSample("TestAvg8");
- beginSample("TestAvg9");
- endSample("TestAvg9");
- beginSample("TestAvg10");
- endSample("TestAvg10");
- }
- for (UINT32 i = 0; i < sampleReps * 5; i++)
- {
- beginSample("TestAvg#" + toString(i));
- endSample("TestAvg#" + toString(i));
- }
- endThread();
- timerPreciseA.stop();
- reset();
- UINT64 avgCyclesBasic = timerPreciseA.cycles/(sampleReps * 10 + sampleReps * 5) - mPreciseTimerOverhead;
- if (avgCyclesBasic < mBasicSamplingOverheadCycles)
- mBasicSamplingOverheadCycles = avgCyclesBasic;
- /************************************************************************/
- /* AVERAGE TIME IN MS FOR PRECISE SAMPLING */
- /************************************************************************/
- Timer timerB;
- timerB.start();
- beginThread("Main");
- // Two different cases that can effect performance, one where
- // sample already exists and other where new one needs to be created
- for (UINT32 i = 0; i < sampleReps; i++)
- {
- beginSamplePrecise("TestAvg1");
- endSamplePrecise("TestAvg1");
- beginSamplePrecise("TestAvg2");
- endSamplePrecise("TestAvg2");
- beginSamplePrecise("TestAvg3");
- endSamplePrecise("TestAvg3");
- beginSamplePrecise("TestAvg4");
- endSamplePrecise("TestAvg4");
- beginSamplePrecise("TestAvg5");
- endSamplePrecise("TestAvg5");
- beginSamplePrecise("TestAvg6");
- endSamplePrecise("TestAvg6");
- beginSamplePrecise("TestAvg7");
- endSamplePrecise("TestAvg7");
- beginSamplePrecise("TestAvg8");
- endSamplePrecise("TestAvg8");
- beginSamplePrecise("TestAvg9");
- endSamplePrecise("TestAvg9");
- beginSamplePrecise("TestAvg10");
- endSamplePrecise("TestAvg10");
- }
- for (UINT32 i = 0; i < sampleReps * 5; i++)
- {
- beginSamplePrecise("TestAvg#" + toString(i));
- endSamplePrecise("TestAvg#" + toString(i));
- }
- endThread();
- timerB.stop();
- reset();
- double avgTimesPrecise = timerB.time/(sampleReps * 10 + sampleReps * 5);
- if (avgTimesPrecise < mPreciseSamplingOverheadMs)
- mPreciseSamplingOverheadMs = avgTimesPrecise;
- /************************************************************************/
- /* AVERAGE CYCLES FOR PRECISE SAMPLING */
- /************************************************************************/
- TimerPrecise timerPreciseB;
- timerPreciseB.start();
- beginThread("Main");
- // Two different cases that can effect performance, one where
- // sample already exists and other where new one needs to be created
- for (UINT32 i = 0; i < sampleReps; i++)
- {
- beginSamplePrecise("TestAvg1");
- endSamplePrecise("TestAvg1");
- beginSamplePrecise("TestAvg2");
- endSamplePrecise("TestAvg2");
- beginSamplePrecise("TestAvg3");
- endSamplePrecise("TestAvg3");
- beginSamplePrecise("TestAvg4");
- endSamplePrecise("TestAvg4");
- beginSamplePrecise("TestAvg5");
- endSamplePrecise("TestAvg5");
- beginSamplePrecise("TestAvg6");
- endSamplePrecise("TestAvg6");
- beginSamplePrecise("TestAvg7");
- endSamplePrecise("TestAvg7");
- beginSamplePrecise("TestAvg8");
- endSamplePrecise("TestAvg8");
- beginSamplePrecise("TestAvg9");
- endSamplePrecise("TestAvg9");
- beginSamplePrecise("TestAvg10");
- endSamplePrecise("TestAvg10");
- }
- for (UINT32 i = 0; i < sampleReps * 5; i++)
- {
- beginSamplePrecise("TestAvg#" + toString(i));
- endSamplePrecise("TestAvg#" + toString(i));
- }
- endThread();
- timerPreciseB.stop();
- reset();
- UINT64 avgCyclesPrecise = timerPreciseB.cycles/(sampleReps * 10 + sampleReps * 5);
- if (avgCyclesPrecise < mPreciseSamplingOverheadCycles)
- mPreciseSamplingOverheadCycles = avgCyclesPrecise;
- }
- }
- CPUProfilerBasicSamplingEntry::Data::Data()
- :numCalls(0), avgTimeMs(0.0), maxTimeMs(0.0), totalTimeMs(0.0),
- avgSelfTimeMs(0.0), totalSelfTimeMs(0.0), estimatedSelfOverheadMs(0.0),
- estimatedOverheadMs(0.0), pctOfParent(1.0f)
- { }
- CPUProfilerPreciseSamplingEntry::Data::Data()
- :numCalls(0), avgCycles(0), maxCycles(0), totalCycles(0),
- avgSelfCycles(0), totalSelfCycles(0), estimatedSelfOverhead(0),
- estimatedOverhead(0), pctOfParent(1.0f)
- { }
- CPUProfilerReport::CPUProfilerReport()
- {
- }
- }
|