Ver código fonte

added ProfileSession, don't format profile output until it's requested

Darren Ranalli 17 anos atrás
pai
commit
eaa1318450

+ 141 - 0
direct/src/showbase/ProfileSession.py

@@ -0,0 +1,141 @@
+from pandac.libpandaexpressModules import TrueClock
+from direct.showbase.PythonUtil import (
+    StdoutCapture, _installProfileCustomFuncs,_removeProfileCustomFuncs,
+    _profileWithoutGarbageLeak, _getProfileResultFileInfo, _setProfileResultsFileInfo,
+    _clearProfileResultFileInfo, )
+import __builtin__
+import profile
+import pstats
+from StringIO import StringIO
+import marshal
+
+class ProfileSession:
+    # class that encapsulates a profile of a single callable using Python's standard
+    # 'profile' module
+    #
+    # defers formatting of profile results until they are requested
+    # 
+    # implementation sidesteps memory leak in Python profile module,
+    # and redirects file output to RAM file for efficiency
+    DefaultFilename = 'profilesession'
+    DefaultLines = 80
+    DefaultSorts = ['cumulative', 'time', 'calls']
+
+    TrueClock = TrueClock.getGlobalPtr()
+    
+    def __init__(self, func, name):
+        self._func = func
+        self._name = name
+        self._filename = 'profileData-%s-%s' % (self._name, id(self))
+        self._wallClockDur = None
+        self._ramFile = None
+        self._refCount = 0
+        self.acquire()
+
+    def acquire(self):
+        self._refCount += 1
+    def release(self):
+        self._refCount -= 1
+        if not self._refCount:
+            self._destroy()
+
+    def _destroy(self):
+        del self._func
+        del self._name
+        del self._filename
+        del self._wallClockDur
+        del self._ramFile
+
+    def run(self):
+        # make sure this instance doesn't get destroyed inside self._func
+        self.acquire()
+
+        # put the function in the global namespace so that profile can find it
+        assert 'globalProfileFunc' not in __builtin__.__dict__
+        __builtin__.globalProfileFunc = self._func
+        __builtin__.globalProfileResult = [None]
+
+        # set up the RAM file
+        _installProfileCustomFuncs(self._filename)
+
+        # do the profiling
+        Profile = profile.Profile
+        statement = 'globalProfileResult[0]=globalProfileFunc()'
+        sort = -1
+        retVal = None
+
+        # this is based on profile.run, the code is replicated here to allow us to
+        # eliminate a memory leak
+        prof = Profile()
+        # try to get wall-clock duration that is as accurate as possible
+        startT = self.TrueClock.getShortTime()
+        try:
+            prof = prof.run(statement)
+        except SystemExit:
+            pass
+        # try to get wall-clock duration that is as accurate as possible
+        endT = self.TrueClock.getShortTime()
+        # this has to be run immediately after profiling for the timings to be accurate
+        # tell the Profile object to generate output to the RAM file
+        prof.dump_stats(self._filename)
+
+        dur = endT - startT
+
+        # eliminate the memory leak
+        del prof.dispatcher
+
+        # and store the results
+        self._wallClockDur = dur
+
+        # store the RAM file for later
+        self._ramFile = _getProfileResultFileInfo(self._filename)
+        # clean up the RAM file support
+        _removeProfileCustomFuncs(self._filename)
+
+        # clean up the globals
+        result = globalProfileResult[0]
+        del __builtin__.__dict__['globalProfileFunc']
+        del __builtin__.__dict__['globalProfileResult']
+
+        self.release()
+        return result
+
+    def getWallClockDuration(self):
+        # this might not be accurate, it may include time taken up by other processes
+        return self._wallClockDur
+    
+    def getResults(self,
+                   lines=80,
+                   sorts=['cumulative', 'time', 'calls'],
+                   callInfo=False):
+        # set up the RAM file
+        _installProfileCustomFuncs(self._filename)
+        # install the stored RAM file from self.run()
+        _setProfileResultsFileInfo(self._filename, self._ramFile)
+
+        # now extract human-readable output from the RAM file
+
+        # capture print output to a string
+        sc = StdoutCapture()
+
+        # print the info to stdout
+        s = pstats.Stats(self._filename)
+        s.strip_dirs()
+        for sort in sorts:
+            s.sort_stats(sort)
+            s.print_stats(lines)
+            if callInfo:
+                s.print_callees(lines)
+                s.print_callers(lines)
+        
+        # make a copy of the print output
+        output = sc.getString()
+
+        # restore stdout to what it was before
+        sc.destroy()
+
+        # clean up the RAM file support
+        _removeProfileCustomFuncs(self._filename)
+
+        return output
+    

+ 15 - 0
direct/src/showbase/PythonUtil.py

@@ -1086,6 +1086,21 @@ def _installProfileCustomFuncs(filename):
     movedLoadFuncs.append(marshal.load)
     marshal.load = _profileMarshalLoad
 
+def _getProfileResultFileInfo(filename):
+    return (profileFilename2file.get(filename, None),
+            profileFilename2marshalData.get(filename, None))
+
+def _setProfileResultsFileInfo(filename, info):
+    f, m = info
+    if f:
+        profileFilename2file[filename] = f
+    if m:
+        profileFilename2marshalData[filename] = m
+
+def _clearProfileResultFileInfo(filename):
+    profileFilename2file.pop(filename, None)
+    profileFilename2marshalData.pop(filename, None)
+
 def _removeProfileCustomFuncs(filename):
     assert profileFilenameList.top() == filename
     marshal.load = movedLoadFuncs.pop()

+ 29 - 21
direct/src/task/TaskOrig.py

@@ -16,6 +16,7 @@ from direct.directnotify.DirectNotifyGlobal import *
 from direct.showbase.PythonUtil import *
 from direct.showbase.MessengerGlobal import *
 from direct.showbase import ExceptionVarDump
+from direct.showbase.ProfileSession import ProfileSession
 import time
 import fnmatch
 import string
@@ -406,8 +407,8 @@ class TaskManager:
         self._taskProfiler = None
         self._profileInfo = ScratchPad(
             taskId = None,
-            dt = None,
-            lastProfileResultString = None,
+            profiled = False,
+            session = None,
             )
 
         # We copy this value in from __builtins__ when it gets set.
@@ -809,14 +810,20 @@ class TaskManager:
         # by the task when it runs
         profileInfo = self._profileInfo
         doProfile = (task.id == profileInfo.taskId)
+        # don't profile the same task twice in a row
+        doProfile = doProfile and (not profileInfo.profiled)
         
         if not self.taskTimerVerbose:
             startTime = self.trueClock.getShortTime()
             
             # don't record timing info
             if doProfile:
-                ret = profileFunc(Functor(task, *task.extraArgs),
-                                  'TASK_PROFILE:%s' % task.name, True, log=False)
+                profileSession = ProfileSession(Functor(task, *task.extraArgs),
+                                                'TASK_PROFILE:%s' % task.name)
+                ret = profileSession.run()
+                # set these values *after* profiling in case we're profiling the TaskProfiler
+                profileInfo.session = profileSession
+                profileInfo.profiled = True
             else:
                 ret = task(*task.extraArgs)
             endTime = self.trueClock.getShortTime()
@@ -824,9 +831,7 @@ class TaskManager:
             # Record the dt
             dt = endTime - startTime
             if doProfile:
-                # if we profiled, record the measured duration but don't pollute the task's
-                # normal duration
-                profileInfo.dt = dt
+                # if we profiled, don't pollute the task's recorded duration
                 dt = task.avgDt
             task.dt = dt
 
@@ -836,8 +841,12 @@ class TaskManager:
                 task.pstats.start()
             startTime = self.trueClock.getShortTime()
             if doProfile:
-                ret = profileFunc(Functor(task, *task.extraArgs),
-                                  'profiled-task-%s' % task.name, True, log=False)
+                profileSession = ProfileSession(Functor(task, *task.extraArgs),
+                                                'profiled-task-%s' % task.name)
+                ret = profileSession.run()
+                # set these values *after* profiling in case we're profiling the TaskProfiler
+                profileInfo.session = profileSession
+                profileInfo.profiled = True
             else:
                 ret = task(*task.extraArgs)
             endTime = self.trueClock.getShortTime()
@@ -847,9 +856,7 @@ class TaskManager:
             # Record the dt
             dt = endTime - startTime
             if doProfile:
-                # if we profiled, record the measured duration but don't pollute the task's
-                # normal duration
-                profileInfo.dt = dt
+                # if we profiled, don't pollute the task's recorded duration
                 dt = task.avgDt
             task.dt = dt
 
@@ -864,9 +871,6 @@ class TaskManager:
             else:
                 task.avgDt = 0
 
-        if doProfile:
-            profileInfo.lastProfileResultString = self._getProfileResultString()
-
         # warn if the task took too long
         if self.warnTaskDuration and self.globalClock:
             avgFrameRate = self.globalClock.getAverageFrameRate()
@@ -1020,17 +1024,21 @@ class TaskManager:
             self._taskProfiler.flush(name)
 
     def _setProfileTask(self, task):
+        if self._profileInfo.session:
+            self._profileInfo.session.release()
+            self._profileInfo.session = None
         self._profileInfo = ScratchPad(
             taskId = task.id,
-            dt = None,
-            lastProfileResultString = None,
+            profiled = False,
+            session = None,
             )
 
-    def _getTaskProfileDt(self):
-        return self._profileInfo.dt
+    def _hasProfiledDesignatedTask(self):
+        # have we run a profile of the designated task yet?
+        return self._profileInfo.profiled
 
-    def _getLastProfileResultString(self):
-        return self._profileInfo.lastProfileResultString
+    def _getLastProfileSession(self):
+        return self._profileInfo.session
 
     def _getRandomTask(self):
         numTasks = 0

+ 75 - 64
direct/src/task/TaskProfiler.py

@@ -1,6 +1,6 @@
 from direct.directnotify.DirectNotifyGlobal import directNotify
 from direct.fsm.StatePush import FunctionCall
-from direct.showbase.PythonUtil import getProfileResultString, Averager
+from direct.showbase.PythonUtil import Averager
 
 class TaskTracker:
     # call it TaskProfiler to avoid confusion for the user
@@ -8,60 +8,72 @@ class TaskTracker:
     def __init__(self, namePattern):
         self._namePattern = namePattern
         self._durationAverager = Averager('%s-durationAverager' % namePattern)
-        self._avgData = None
-        self._avgDataDur = None
-        self._maxNonSpikeData = None
-        self._maxNonSpikeDataDur = None
+        self._avgSession = None
+        self._maxNonSpikeSession = None
     def destroy(self):
+        self.flush()
         del self._namePattern
         del self._durationAverager
     def flush(self):
         self._durationAverager.reset()
-        self._avgData = None
-        self._avgDataDur = None
-        self._maxNonSpikeData = None
-        self._maxNonSpikeDataDur = None
+        if self._avgSession:
+            self._avgSession.release()
+        if self._maxNonSpikeSession:
+            self._maxNonSpikeSession.release()
+        self._avgSession = None
+        self._maxNonSpikeSession = None
     def getNamePattern(self, namePattern):
         return self._namePattern
-    def addDuration(self, duration, data, isSpike):
+    def addProfileSession(self, session, isSpike):
+        duration = session.getWallClockDuration()
         self._durationAverager.addValue(duration)
-        storeAvgData = True
-        storeMaxNSData = True
-        if self._avgDataDur is not None:
+        storeAvg = True
+        storeMaxNS = True
+        if self._avgSession is not None:
             avgDur = self.getAvgDuration()
-            if abs(self._avgDataDur - avgDur) < abs(duration - avgDur):
+            if abs(self._avgSession.getWallClockDuration() - avgDur) < abs(duration - avgDur):
                 # current avg data is more average than this new sample, keep the data we've
                 # already got stored
-                storeAvgData = False
+                storeAvg = False
         if isSpike:
-            storeMaxNSData = False
+            storeMaxNS = False
         else:
-            if self._maxNonSpikeDataDur is not None and self._maxNonSpikeDataDur > duration:
-                storeMaxNSData = False
-        if storeAvgData:
-            self._avgData = data
-            self._avgDataDur = duration
-        if storeMaxNSData:
-            self._maxNonSpikeData = data
-            self._maxNonSpikeDataDur = duration
+            if (self._maxNonSpikeSession is not None and
+                self._maxNonSpikeSession.getWallClockDuration() > duration):
+                storeMaxNS = False
+        if storeAvg:
+            if self._avgSession:
+                self._avgSession.release()
+            session.acquire()
+            self._avgSession = session
+        if storeMaxNS:
+            if self._maxNonSpikeSession:
+                self._maxNonSpikeSession.release()
+            session.acquire()
+            self._maxNonSpikeSession = session
     def getAvgDuration(self):
         return self._durationAverager.getAverage()
     def getNumDurationSamples(self):
         return self._durationAverager.getCount()
-    def getAvgData(self):
-        # returns duration, data for closest-to-average sample
-        return self._avgDataDur, self._avgData
-    def getMaxNonSpikeData(self):
-        # returns duration, data for closest-to-average sample
-        return self._maxNonSpikeDataDur, self._maxNonSpikeData
+    def getAvgSession(self):
+        # returns profile session for closest-to-average sample
+        return self._avgSession
+    def getMaxNonSpikeSession(self):
+        # returns profile session for closest-to-average sample
+        return self._maxNonSpikeSession
     def log(self):
-        self.notify.info('task CPU profile (%s):\n'
-                         '== AVERAGE (%s wall-clock seconds)\n%s\n'
-                         '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % (
-            self._namePattern,
-            self._avgDataDur, self._avgData,
-            self._maxNonSpikeDataDur, self._maxNonSpikeData,
-            ))
+        if self._avgSession:
+            self.notify.info('task CPU profile (%s):\n'
+                             '== AVERAGE (%s wall-clock seconds)\n%s\n'
+                             '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % (
+                self._namePattern,
+                self._avgSession.getWallClockDuration(),
+                self._avgSession.getResults(),
+                self._maxNonSpikeSession.getWallClockDuration(),
+                self._maxNonSpikeSession.getResults(),
+                ))
+        else:
+            self.notify.info('task CPU profile (%s): no data collected' % self._namePattern)
 
 class TaskProfiler:
     # this does intermittent profiling of tasks running on the system
@@ -77,8 +89,6 @@ class TaskProfiler:
         self._minSamples = config.GetInt('profile-task-spike-min-samples', 30)
         # defines spike as longer than this multiple of avg task duration
         self._spikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.)
-        # assign getProfileResultString to the taskMgr here, since Task.py can't import PythonUtil
-        taskMgr._getProfileResultString = getProfileResultString
 
     def destroy(self):
         if taskMgr.getProfileTasks():
@@ -117,31 +127,32 @@ class TaskProfiler:
     def _doProfileTasks(self, task=None):
         # gather data from the previous frame
         # set up for the next frame
-        profileDt = taskMgr._getTaskProfileDt()
-        if (self._task is not None) and (profileDt is not None):
-            lastProfileResult = taskMgr._getLastProfileResultString()
-            if lastProfileResult:
-                namePattern = self._task.getNamePattern()
-                if namePattern not in self._namePattern2tracker:
-                    self._namePattern2tracker[namePattern] = TaskTracker(namePattern)
-                tracker = self._namePattern2tracker[namePattern]
-                isSpike = False
-                # do we have enough samples?
-                if tracker.getNumDurationSamples() > self._minSamples:
-                    # was this a spike?
-                    if profileDt > (tracker.getAvgDuration() * self._spikeThreshold):
-                        isSpike = True
-                        avgDur, avgResult = tracker.getAvgData()
-                        maxNSDur, maxNSResult = tracker.getMaxNonSpikeData()
-                        self.notify.info('task CPU spike profile (%s):\n'
-                                         '== AVERAGE (%s wall-clock seconds)\n%s\n'
-                                         '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n'
-                                         '== SPIKE (%s wall-clock seconds)\n%s' % (
-                            namePattern,
-                            avgDur, avgResult,
-                            maxNSDur, maxNSResult,
-                            profileDt, lastProfileResult))
-                tracker.addDuration(profileDt, lastProfileResult, isSpike)
+        if (self._task is not None) and taskMgr._hasProfiledDesignatedTask():
+            session = taskMgr._getLastProfileSession()
+            sessionDur = session.getWallClockDuration()
+            namePattern = self._task.getNamePattern()
+            if namePattern not in self._namePattern2tracker:
+                self._namePattern2tracker[namePattern] = TaskTracker(namePattern)
+            tracker = self._namePattern2tracker[namePattern]
+            isSpike = False
+            # do we have enough samples?
+            if tracker.getNumDurationSamples() > self._minSamples:
+                # was this a spike?
+                if sessionDur > (tracker.getAvgDuration() * self._spikeThreshold):
+                    print 'sessionDur=%s' % sessionDur
+                    print 'avgDur=%s' % tracker.getAvgDuration()
+                    isSpike = True
+                    avgSession = tracker.getAvgSession()
+                    maxNSSession = tracker.getMaxNonSpikeSession()
+                    self.notify.info('task CPU spike profile (%s):\n'
+                                     '== AVERAGE (%s wall-clock seconds)\n%s\n'
+                                     '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n'
+                                     '== SPIKE (%s wall-clock seconds)\n%s' % (
+                        namePattern,
+                        avgSession.getWallClockDuration(), avgSession.getResults(),
+                        maxNSSession.getWallClockDuration(), maxNSSession.getResults(),
+                        sessionDur, session.getResults()))
+            tracker.addProfileSession(session, isSpike)
 
         # set up the next task
         self._task = taskMgr._getRandomTask()