Browse Source

accurate profile duration tracking, output filtering (no 0.000 second spikes), preliminary profile result aggregation

Darren Ranalli 17 years ago
parent
commit
61d0f91617
2 changed files with 129 additions and 73 deletions
  1. 84 41
      direct/src/showbase/ProfileSession.py
  2. 45 32
      direct/src/task/TaskProfiler.py

+ 84 - 41
direct/src/showbase/ProfileSession.py

@@ -26,12 +26,9 @@ class ProfileSession:
     def __init__(self, func, name):
         self._func = func
         self._name = name
-        self._filename = 'profileData-%s-%s' % (self._name, id(self))
-        self._profileSucceeded = False
-        self._wallClockDur = None
-        self._ramFile = None
+        self._filenameBase = 'profileData-%s-%s' % (self._name, id(self))
         self._refCount = 0
-        self._resultCache = {}
+        self._reset()
         self.acquire()
 
     def acquire(self):
@@ -44,28 +41,51 @@ class ProfileSession:
     def _destroy(self):
         del self._func
         del self._name
-        del self._filename
-        del self._wallClockDur
-        del self._ramFile
+        del self._filenameBase
+        del self._filenameCounter
+        del self._filenames
+        del self._duration
+        del self._filename2ramFile
         del self._resultCache
+        del self._successfulProfiles
+
+    def _reset(self):
+        self._filenameCounter = 0
+        self._filenames = []
+        # index of next file to be added to stats object
+        self._statFileCounter = 0
+        self._successfulProfiles = 0
+        self._duration = None
+        self._filename2ramFile = {}
+        self._stats = None
+        self._resultCache = {}
+
+    def _getNextFilename(self):
+        filename = '%s-%s' % (self._filenameBase, self._filenameCounter)
+        self._filenameCounter += 1
+        return filename
 
-    def run(self):
+    def run(self, aggregate=True):
         # make sure this instance doesn't get destroyed inside self._func
         self.acquire()
 
-        self._profileSucceeded = False
+        if not aggregate:
+            self._reset()
 
         # if we're already profiling, just run the func and don't profile
         if 'globalProfileFunc' in __builtin__.__dict__:
             result = self._func()
-            self._wallClockDur = 0.
+            if self._duration is None:
+                self._duration = 0.
         else:
             # put the function in the global namespace so that profile can find it
             __builtin__.globalProfileFunc = self._func
             __builtin__.globalProfileResult = [None]
 
             # set up the RAM file
-            _installProfileCustomFuncs(self._filename)
+            self._filenames.append(self._getNextFilename())
+            filename = self._filenames[-1]
+            _installProfileCustomFuncs(filename)
 
             # do the profiling
             Profile = profile.Profile
@@ -76,55 +96,87 @@ class ProfileSession:
             # this is based on profile.run, the code is replicated here to allow us to
             # eliminate a memory leak
             prof = Profile()
-            # try to get wall-clock duration that is as accurate as possible
-            startT = self.TrueClock.getShortTime()
             try:
                 prof = prof.run(statement)
             except SystemExit:
                 pass
-            # try to get wall-clock duration that is as accurate as possible
-            endT = self.TrueClock.getShortTime()
             # this has to be run immediately after profiling for the timings to be accurate
             # tell the Profile object to generate output to the RAM file
-            prof.dump_stats(self._filename)
-
-            dur = endT - startT
+            prof.dump_stats(filename)
 
             # eliminate the memory leak
             del prof.dispatcher
 
-            # and store the results
-            self._wallClockDur = dur
-
             # store the RAM file for later
-            self._ramFile = _getProfileResultFileInfo(self._filename)
+            profData = _getProfileResultFileInfo(filename)
+            self._filename2ramFile[filename] = profData
+            # calculate the duration (this is dependent on the internal Python profile data format.
+            # see profile.py and pstats.py, this was copied from pstats.Stats.strip_dirs)
+            maxTime = 0.
+            for cc, nc, tt, ct, callers in profData[1].itervalues():
+                if ct > maxTime:
+                    maxTime = ct
+            self._duration = maxTime
             # clean up the RAM file support
-            _removeProfileCustomFuncs(self._filename)
+            _removeProfileCustomFuncs(filename)
 
             # clean up the globals
             result = globalProfileResult[0]
             del __builtin__.__dict__['globalProfileFunc']
             del __builtin__.__dict__['globalProfileResult']
 
-            self._profileSucceeded = True
+            self._successfulProfiles += 1
             
         self.release()
         return result
 
-    def getWallClockDuration(self):
-        # this might not be accurate, it may include time taken up by other processes
-        return self._wallClockDur
+    def getDuration(self):
+        return self._duration
 
     def profileSucceeded(self):
-        return self._profileSucceeded
+        return self._successfulProfiles > 0
+
+    def _restoreRamFile(self, filename):
+        # set up the RAM file
+        _installProfileCustomFuncs(filename)
+        # install the stored RAM file from self.run()
+        _setProfileResultsFileInfo(filename, self._filename2ramFile[filename])
+
+    def _discardRamFile(self, filename):
+        # take down the RAM file
+        _removeProfileCustomFuncs(filename)
+        # and discard it
+        del self._filename2ramFile[filename]
     
     def getResults(self,
                    lines=80,
                    sorts=['cumulative', 'time', 'calls'],
                    callInfo=False):
-        if not self._profileSucceeded:
+        if not self.profileSucceeded():
             output = '%s: profiler already running, could not profile' % self._name
         else:
+            # make sure our stats object exists and is up-to-date
+            statsChanged = (self._statFileCounter < len(self._filenames))
+
+            if self._stats is None:
+                for filename in self._filenames:
+                    self._restoreRamFile(filename)
+                self._stats = pstats.Stats(*self._filenames)
+                self._statFileCounter = len(self._filenames)
+                for filename in self._filenames:
+                    self._discardRamFile(filename)
+            else:
+                while self._statFileCounter < len(self._filenames):
+                    filename = self._filenames[self._statFileCounter]
+                    self._restoreRamFile(filename)
+                    self._stats.add(filename)
+                    self._discardRamFile(filename)
+
+            if statsChanged:
+                self._stats.strip_dirs()
+                # throw out any cached result strings
+                self._resultCache = {}
+
             # make sure the arguments will hash efficiently if callers provide different types
             lines = int(lines)
             sorts = list(sorts)
@@ -134,19 +186,13 @@ class ProfileSession:
                 # we've already created this output string, get it from the cache
                 output = self._resultCache[k]
             else:
-                # set up the RAM file
-                _installProfileCustomFuncs(self._filename)
-                # install the stored RAM file from self.run()
-                _setProfileResultsFileInfo(self._filename, self._ramFile)
-
-                # now extract human-readable output from the RAM file
+                # now get human-readable output from the profile stats
 
                 # capture print output to a string
                 sc = StdoutCapture()
 
                 # print the info to stdout
-                s = pstats.Stats(self._filename)
-                s.strip_dirs()
+                s = self._stats
                 for sort in sorts:
                     s.sort_stats(sort)
                     s.print_stats(lines)
@@ -160,9 +206,6 @@ class ProfileSession:
                 # restore stdout to what it was before
                 sc.destroy()
 
-                # clean up the RAM file support
-                _removeProfileCustomFuncs(self._filename)
-
                 # cache this result
                 self._resultCache[k] = output
 

+ 45 - 32
direct/src/task/TaskProfiler.py

@@ -5,11 +5,18 @@ from direct.showbase.PythonUtil import Averager
 class TaskTracker:
     # call it TaskProfiler to avoid confusion for the user
     notify = directNotify.newCategory("TaskProfiler")
+    MinSamples = None
+    SpikeThreshold = None
     def __init__(self, namePrefix):
         self._namePrefix = namePrefix
         self._durationAverager = Averager('%s-durationAverager' % namePrefix)
         self._avgSession = None
         self._maxNonSpikeSession = None
+        if TaskTracker.MinSamples is None:
+            # number of samples required before spikes start getting identified
+            TaskTracker.MinSamples = config.GetInt('profile-task-spike-min-samples', 30)
+            # defines spike as longer than this multiple of avg task duration
+            TaskTracker.SpikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.)
     def destroy(self):
         self.flush()
         del self._namePrefix
@@ -24,14 +31,43 @@ class TaskTracker:
         self._maxNonSpikeSession = None
     def getNamePrefix(self, namePrefix):
         return self._namePrefix
-    def addProfileSession(self, session, isSpike):
-        duration = session.getWallClockDuration()
+    def _checkSpike(self, session):
+        duration = session.getDuration()
+        isSpike = False
+        # was it long enough to show up on a printout?
+        if duration >= .001:
+            # do we have enough samples?
+            if self.getNumDurationSamples() > self.MinSamples:
+                # was this a spike?
+                if duration > (self.getAvgDuration() * self.SpikeThreshold):
+                    isSpike = True
+                    avgSession = self.getAvgSession()
+                    maxNSSession = self.getMaxNonSpikeSession()
+                    self.notify.info('task CPU spike profile (%s):\n'
+                                     '== AVERAGE (%s seconds)\n%s\n'
+                                     '== LONGEST NON-SPIKE (%s seconds)\n%s\n'
+                                     '== SPIKE (%s seconds)\n%s' % (
+                        self._namePrefix,
+                        avgSession.getDuration(), avgSession.getResults(),
+                        maxNSSession.getDuration(), maxNSSession.getResults(),
+                        duration, session.getResults()))
+        return isSpike
+    def addProfileSession(self, session):
+        isSpike = self._checkSpike(session)
+        duration = session.getDuration()
         self._durationAverager.addValue(duration)
+        if self.getNumDurationSamples() == self.MinSamples:
+            # if we just collected enough samples to check for spikes, see if
+            # our max 'non-spike' session collected so far counts as a spike
+            # and if so, log it
+            if self._checkSpike(self._maxNonSpikeSession):
+                self._maxNonSpikeSession = self._avgSession
+
         storeAvg = True
         storeMaxNS = True
         if self._avgSession is not None:
             avgDur = self.getAvgDuration()
-            if abs(self._avgSession.getWallClockDuration() - avgDur) < abs(duration - avgDur):
+            if abs(self._avgSession.getDuration() - avgDur) < abs(duration - avgDur):
                 # current avg data is more average than this new sample, keep the data we've
                 # already got stored
                 storeAvg = False
@@ -39,7 +75,7 @@ class TaskTracker:
             storeMaxNS = False
         else:
             if (self._maxNonSpikeSession is not None and
-                self._maxNonSpikeSession.getWallClockDuration() > duration):
+                self._maxNonSpikeSession.getDuration() > duration):
                 storeMaxNS = False
         if storeAvg:
             if self._avgSession:
@@ -64,12 +100,12 @@ class TaskTracker:
     def log(self):
         if self._avgSession:
             self.notify.info('task CPU profile (%s):\n'
-                             '== AVERAGE (%s wall-clock seconds)\n%s\n'
-                             '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % (
+                             '== AVERAGE (%s seconds)\n%s\n'
+                             '== LONGEST NON-SPIKE (%s seconds)\n%s' % (
                 self._namePrefix,
-                self._avgSession.getWallClockDuration(),
+                self._avgSession.getDuration(),
                 self._avgSession.getResults(),
-                self._maxNonSpikeSession.getWallClockDuration(),
+                self._maxNonSpikeSession.getDuration(),
                 self._maxNonSpikeSession.getResults(),
                 ))
         else:
@@ -85,10 +121,6 @@ class TaskProfiler:
         # table of task name pattern to TaskTracker
         self._namePrefix2tracker = {}
         self._task = None
-        # number of samples required before spikes start getting identified
-        self._minSamples = config.GetInt('profile-task-spike-min-samples', 30)
-        # defines spike as longer than this multiple of avg task duration
-        self._spikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.)
 
     def destroy(self):
         if taskMgr.getProfileTasks():
@@ -131,30 +163,11 @@ class TaskProfiler:
             session = taskMgr._getLastProfileSession()
             # if we couldn't profile, throw this result out
             if session.profileSucceeded():
-                sessionDur = session.getWallClockDuration()
                 namePrefix = self._task.getNamePrefix()
                 if namePrefix not in self._namePrefix2tracker:
                     self._namePrefix2tracker[namePrefix] = TaskTracker(namePrefix)
                 tracker = self._namePrefix2tracker[namePrefix]
-                isSpike = False
-                # do we have enough samples?
-                if tracker.getNumDurationSamples() > self._minSamples:
-                    # was this a spike?
-                    if sessionDur > (tracker.getAvgDuration() * self._spikeThreshold):
-                        print 'sessionDur=%s' % sessionDur
-                        print 'avgDur=%s' % tracker.getAvgDuration()
-                        isSpike = True
-                        avgSession = tracker.getAvgSession()
-                        maxNSSession = tracker.getMaxNonSpikeSession()
-                        self.notify.info('task CPU spike profile (%s):\n'
-                                         '== AVERAGE (%s wall-clock seconds)\n%s\n'
-                                         '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n'
-                                         '== SPIKE (%s wall-clock seconds)\n%s' % (
-                            namePrefix,
-                            avgSession.getWallClockDuration(), avgSession.getResults(),
-                            maxNSSession.getWallClockDuration(), maxNSSession.getResults(),
-                            sessionDur, session.getResults()))
-                tracker.addProfileSession(session, isSpike)
+                tracker.addProfileSession(session)
 
         # set up the next task
         self._task = taskMgr._getRandomTask()