Browse Source

Add GPU profiling capabilities to PStats using OpenGL timer queries

rdb 11 years ago
parent
commit
db0fd516a0
43 changed files with 1578 additions and 452 deletions
  1. 2 0
      panda/src/display/Sources.pp
  2. 130 113
      panda/src/display/graphicsEngine.cxx
  3. 42 3
      panda/src/display/graphicsStateGuardian.I
  4. 212 39
      panda/src/display/graphicsStateGuardian.cxx
  5. 39 7
      panda/src/display/graphicsStateGuardian.h
  6. 61 0
      panda/src/display/pStatGPUTimer.I
  7. 65 0
      panda/src/display/pStatGPUTimer.h
  8. 12 12
      panda/src/glstuff/glCgShaderContext_src.cxx
  9. 26 5
      panda/src/glstuff/glGraphicsBuffer_src.cxx
  10. 14 9
      panda/src/glstuff/glGraphicsBuffer_src.h
  11. 9 5
      panda/src/glstuff/glGraphicsStateGuardian_src.I
  12. 242 135
      panda/src/glstuff/glGraphicsStateGuardian_src.cxx
  13. 52 33
      panda/src/glstuff/glGraphicsStateGuardian_src.h
  14. 14 0
      panda/src/glstuff/glLatencyQueryContext_src.I
  15. 54 0
      panda/src/glstuff/glLatencyQueryContext_src.cxx
  16. 57 0
      panda/src/glstuff/glLatencyQueryContext_src.h
  17. 28 19
      panda/src/glstuff/glShaderContext_src.cxx
  18. 28 0
      panda/src/glstuff/glTimerQueryContext_src.I
  19. 104 0
      panda/src/glstuff/glTimerQueryContext_src.cxx
  20. 68 0
      panda/src/glstuff/glTimerQueryContext_src.h
  21. 11 1
      panda/src/glstuff/glmisc_src.cxx
  22. 1 0
      panda/src/glstuff/glmisc_src.h
  23. 2 0
      panda/src/glstuff/glstuff_src.cxx
  24. 2 0
      panda/src/glstuff/glstuff_src.h
  25. 3 3
      panda/src/glstuff/panda_glext.h
  26. 3 0
      panda/src/gobj/Sources.pp
  27. 5 3
      panda/src/gobj/config_gobj.cxx
  28. 3 0
      panda/src/gobj/p3gobj_composite1.cxx
  29. 1 3
      panda/src/gobj/p3gobj_composite2.cxx
  30. 26 0
      panda/src/gobj/timerQueryContext.I
  31. 35 0
      panda/src/gobj/timerQueryContext.cxx
  32. 60 0
      panda/src/gobj/timerQueryContext.h
  33. 1 6
      panda/src/gsgbase/graphicsStateGuardianBase.h
  34. 11 0
      panda/src/pstatclient/config_pstats.cxx
  35. 1 0
      panda/src/pstatclient/config_pstats.h
  36. 72 37
      panda/src/pstatclient/pStatClient.cxx
  37. 5 2
      panda/src/pstatclient/pStatClient.h
  38. 51 9
      panda/src/pstatclient/pStatClientImpl.cxx
  39. 2 1
      panda/src/pstatclient/pStatClientImpl.h
  40. 5 3
      panda/src/pstatclient/pStatProperties.cxx
  41. 14 1
      panda/src/pstatclient/pStatThread.I
  42. 3 1
      panda/src/pstatclient/pStatThread.h
  43. 2 2
      panda/src/pstatclient/pStatTimer.h

+ 2 - 0
panda/src/display/Sources.pp

@@ -38,6 +38,7 @@
     lru.h \
     nativeWindowHandle.I nativeWindowHandle.h \
     parasiteBuffer.I parasiteBuffer.h \
+    pStatGPUTimer.I pStatGPUTimer.h \
     windowHandle.I windowHandle.h \
     windowProperties.I windowProperties.h \
     renderBuffer.h \
@@ -112,6 +113,7 @@
     lru.h \
     nativeWindowHandle.I nativeWindowHandle.h \
     parasiteBuffer.I parasiteBuffer.h \
+    pStatGPUTimer.I pStatGPUTimer.h \
     windowHandle.I windowHandle.h \
     windowProperties.I windowProperties.h \
     renderBuffer.h \

+ 130 - 113
panda/src/display/graphicsEngine.cxx

@@ -24,6 +24,7 @@
 #include "cullTraverser.h"
 #include "clockObject.h"
 #include "pStatTimer.h"
+#include "pStatGPUTimer.h"
 #include "pStatClient.h"
 #include "pStatCollector.h"
 #include "mutexHolder.h"
@@ -51,7 +52,7 @@
   #define WINDOWS_LEAN_AND_MEAN
   #include <WinSock2.h>
   #include <wtypes.h>
-  #undef WINDOWS_LEAN_AND_MEAN  
+  #undef WINDOWS_LEAN_AND_MEAN
 #else
   #include <sys/time.h>
 #endif
@@ -139,7 +140,7 @@ GraphicsEngine(Pipeline *pipeline) :
   _windows_sorted = true;
   _window_sort_index = 0;
   _needs_open_windows = false;
-  
+
   set_threading_model(GraphicsThreadingModel(threading_model));
   if (!_threading_model.is_default()) {
     display_cat.info()
@@ -187,7 +188,7 @@ set_threading_model(const GraphicsThreadingModel &threading_model) {
       return;
     }
   }
-    
+
 #ifndef THREADED_PIPELINE
   if (!threading_model.is_single_threaded()) {
     display_cat.warning()
@@ -237,7 +238,7 @@ get_threading_model() const {
 //
 //               If a null pointer is supplied for the gsg, then this
 //               routine will create a new gsg.
-//               
+//
 //               This routine is only called from the app thread.
 ////////////////////////////////////////////////////////////////////
 
@@ -282,7 +283,7 @@ make_output(GraphicsPipe *pipe,
   //  already-initialized gsg.
 
   // Simplify the input parameters.
-  
+
   int x_size = 0, y_size = 0;
   if (win_prop.has_size()) {
     x_size = win_prop.get_x_size();
@@ -358,7 +359,7 @@ make_output(GraphicsPipe *pipe,
     // an unencumbered GSG.
     return NULL;
   }
-  
+
   // Determine if a parasite buffer meets the user's specs.
 
   bool can_use_parasite = false;
@@ -380,7 +381,7 @@ make_output(GraphicsPipe *pipe,
   // Even if prefer-parasite-buffer is set, parasites are not preferred
   // if the host window is too small, or if the host window does not
   // have the requested properties.
-  
+
   if ((prefer_parasite_buffer) &&
       (can_use_parasite) &&
       (x_size <= host->get_x_size())&&
@@ -407,10 +408,10 @@ make_output(GraphicsPipe *pipe,
   }
 
   // Ask the pipe to create a window.
-  
+
   for (int retry=0; retry<10; retry++) {
     bool precertify = false;
-    PT(GraphicsOutput) window = 
+    PT(GraphicsOutput) window =
       pipe->make_output(name, fb_prop, win_prop, flags, this, gsg, host, retry, precertify);
     if (window != (GraphicsOutput *)NULL) {
       window->_sort = sort;
@@ -454,11 +455,11 @@ make_output(GraphicsPipe *pipe,
       nassertr(removed, NULL);
     }
   }
-  
+
   // Parasite buffers were not preferred, but the pipe could not
   // create a window to the user's specs.  Try a parasite as a
   // last hope.
-  
+
   if (can_use_parasite) {
     ParasiteBuffer *buffer = new ParasiteBuffer(host, name, x_size, y_size, flags);
     buffer->_sort = sort;
@@ -612,7 +613,7 @@ remove_all_windows() {
 //       Access: Published
 //  Description: Resets the framebuffer of the current window.  This
 //               is currently used by DirectX 8 only. It calls a
-//               reset_window function on each active window to 
+//               reset_window function on each active window to
 //               release/create old/new framebuffer
 ////////////////////////////////////////////////////////////////////
 void GraphicsEngine::
@@ -711,11 +712,11 @@ render_frame() {
     if (!_windows_sorted) {
       do_resort_windows();
     }
-    
+
     if (sync_flip && _flip_state != FS_flip) {
       do_flip_frame(current_thread);
     }
-    
+
     // Are any of the windows ready to be deleted?
     Windows new_windows;
     new_windows.reserve(_windows.size());
@@ -725,10 +726,10 @@ render_frame() {
       nassertv(win != NULL);
       if (win->get_delete_flag()) {
         do_remove_window(win, current_thread);
-        
+
       } else {
         new_windows.push_back(win);
-        
+
         // Let's calculate each scene's bounding volume here in App,
         // before we cycle the pipeline.  The cull traversal will
         // calculate it anyway, but if we calculate it in App first
@@ -769,11 +770,11 @@ render_frame() {
       }
       _loaded_textures.clear();
     }
-    
+
     // Now it's time to do any drawing from the main frame--after all of
     // the App code has executed, but before we begin the next frame.
     _app.do_frame(this, current_thread);
-    
+
     // Grab each thread's mutex again after all windows have flipped,
     // and wait for the thread to finish.
     {
@@ -782,32 +783,32 @@ render_frame() {
       for (ti = _threads.begin(); ti != _threads.end(); ++ti) {
         RenderThread *thread = (*ti).second;
         thread->_cv_mutex.acquire();
-        
+
         while (thread->_thread_state != TS_wait) {
           thread->_cv_done.wait();
         }
       }
     }
-    
+
 #if defined(THREADED_PIPELINE) && defined(DO_PSTATS)
     _cyclers_pcollector.set_level(_pipeline->get_num_cyclers());
     _dirty_cyclers_pcollector.set_level(_pipeline->get_num_dirty_cyclers());
-    
+
 #ifdef DEBUG_THREADS
     if (PStatClient::is_connected()) {
       _pipeline->iterate_all_cycler_types(pstats_count_cycler_type, this);
       _pipeline->iterate_dirty_cycler_types(pstats_count_dirty_cycler_type, this);
     }
 #endif  // DEBUG_THREADS
-    
+
 #endif  // THREADED_PIPELINE && DO_PSTATS
-    
+
     GeomCacheManager::flush_level();
     CullTraverser::flush_level();
     RenderState::flush_level();
     TransformState::flush_level();
     CullableObject::flush_level();
-    
+
     // Now cycle the pipeline and officially begin the next frame.
 #ifdef THREADED_PIPELINE
     {
@@ -815,15 +816,15 @@ render_frame() {
       _pipeline->cycle();
     }
 #endif  // THREADED_PIPELINE
-    
+
     global_clock->tick(current_thread);
     if (global_clock->check_errors(current_thread)) {
       throw_event("clock_error");
     }
-    
+
 #ifdef DO_PSTATS
     PStatClient::main_tick();
-    
+
     // Reset our pcollectors that track data across the frame.
     CullTraverser::_nodes_pcollector.clear_level();
     CullTraverser::_geom_nodes_pcollector.clear_level();
@@ -832,18 +833,18 @@ render_frame() {
     GeomCacheManager::_geom_cache_record_pcollector.clear_level();
     GeomCacheManager::_geom_cache_erase_pcollector.clear_level();
     GeomCacheManager::_geom_cache_evict_pcollector.clear_level();
-    
+
     GraphicsStateGuardian::init_frame_pstats();
-    
+
     _transform_states_pcollector.set_level(TransformState::get_num_states());
     _render_states_pcollector.set_level(RenderState::get_num_states());
     if (pstats_unused_states) {
       _transform_states_unused_pcollector.set_level(TransformState::get_num_unused_states());
       _render_states_unused_pcollector.set_level(RenderState::get_num_unused_states());
     }
-    
+
     _sw_sprites_pcollector.clear_level();
-    
+
     _cnode_volume_pcollector.clear_level();
     _gnode_volume_pcollector.clear_level();
     _geom_volume_pcollector.clear_level();
@@ -868,18 +869,18 @@ render_frame() {
     _occlusion_passed_pcollector.clear_level();
     _occlusion_failed_pcollector.clear_level();
     _occlusion_tests_pcollector.clear_level();
-    
+
     if (PStatClient::is_connected()) {
       size_t small_buf = GeomVertexArrayData::get_small_lru()->get_total_size();
       size_t independent = GeomVertexArrayData::get_independent_lru()->get_total_size();
       size_t resident = VertexDataPage::get_global_lru(VertexDataPage::RC_resident)->get_total_size();
       size_t compressed = VertexDataPage::get_global_lru(VertexDataPage::RC_compressed)->get_total_size();
       size_t pending = VertexDataPage::get_pending_lru()->get_total_size();
-      
+
       VertexDataSaveFile *save_file = VertexDataPage::get_save_file();
       size_t total_disk = save_file->get_total_file_size();
       size_t used_disk = save_file->get_used_file_size();
-      
+
       _vertex_data_small_pcollector.set_level(small_buf);
       _vertex_data_independent_pcollector.set_level(independent);
       _vertex_data_pending_pcollector.set_level(pending);
@@ -888,11 +889,11 @@ render_frame() {
       _vertex_data_unused_disk_pcollector.set_level(total_disk - used_disk);
       _vertex_data_used_disk_pcollector.set_level(used_disk);
     }
-    
+
 #endif  // DO_PSTATS
-    
+
     GeomVertexArrayData::lru_epoch();
-    
+
     // Now signal all of our threads to begin their next frame.
     Threads::const_iterator ti;
     for (ti = _threads.begin(); ti != _threads.end(); ++ti) {
@@ -903,24 +904,24 @@ render_frame() {
       }
       thread->_cv_mutex.release();
     }
-    
+
     // Some threads may still be drawing, so indicate that we have to
     // wait for those threads before we can flip.
     _flip_state = _auto_flip ? FS_flip : FS_draw;
   }
 
   // Now the lock is released.
-  
+
   if (yield_timeslice) {
     // Nap for a moment to yield the timeslice, to be polite to other
     // running applications.
     PStatTimer timer(_yield_pcollector, current_thread);
     Thread::force_yield();
-  } else if (!Thread::is_true_threads()) { 
+  } else if (!Thread::is_true_threads()) {
     PStatTimer timer(_yield_pcollector, current_thread);
     Thread::consider_yield();
   }
-  
+
   // Anything that happens outside of GraphicsEngine::render_frame()
   // is deemed to be App.
   _app_pcollector.start();
@@ -960,11 +961,11 @@ open_windows() {
     for (ti = _threads.begin(); ti != _threads.end(); ++ti) {
       RenderThread *thread = (*ti).second;
       thread->_cv_mutex.acquire();
-      
+
       while (thread->_thread_state != TS_wait) {
         thread->_cv_done.wait();
       }
-      
+
       thread->_thread_state = TS_do_windows;
       thread->_cv_start.notify();
       thread->_cv_mutex.release();
@@ -1003,7 +1004,7 @@ sync_frame() {
 //               we seems to return once all draw calls have been submitted.
 //               Calling 'flip_frame' after this function should immediately
 //               cause a buffer flip.  This function will only work in
-//               opengl right now, for all other graphics pipelines it will 
+//               opengl right now, for all other graphics pipelines it will
 //               simply return immediately.  In opengl it's a bit of a hack:
 //               it will attempt to read a single pixel from the frame buffer to
 //               force the graphics card to finish drawing before it returns
@@ -1081,7 +1082,7 @@ extract_texture_data(Texture *tex, GraphicsStateGuardian *gsg) {
     WindowRenderer *wr = get_window_renderer(draw_name, 0);
     RenderThread *thread = (RenderThread *)wr;
     MutexHolder holder2(thread->_cv_mutex);
-      
+
     while (thread->_thread_state != TS_wait) {
       thread->_cv_done.wait();
     }
@@ -1130,7 +1131,7 @@ dispatch_compute(const LVecBase3i &work_groups, const ShaderAttrib *sattr, Graph
     WindowRenderer *wr = get_window_renderer(draw_name, 0);
     RenderThread *thread = (RenderThread *)wr;
     MutexHolder holder2(thread->_cv_mutex);
-      
+
     while (thread->_thread_state != TS_wait) {
       thread->_cv_done.wait();
     }
@@ -1148,7 +1149,7 @@ dispatch_compute(const LVecBase3i &work_groups, const ShaderAttrib *sattr, Graph
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsEngine::get_global_ptr
 //       Access: Published, Static
-//  Description: 
+//  Description:
 ////////////////////////////////////////////////////////////////////
 GraphicsEngine *GraphicsEngine::
 get_global_ptr() {
@@ -1216,7 +1217,7 @@ do_cull(CullHandler *cull_handler, SceneSetup *scene_setup,
       local_frustum = DCAST(GeometricBoundingVolume, bv->make_copy());
 
       NodePath scene_parent = scene_setup->get_scene_root().get_parent(current_thread);
-      CPT(TransformState) cull_center_transform = 
+      CPT(TransformState) cull_center_transform =
         scene_setup->get_cull_center().get_transform(scene_parent, current_thread);
       local_frustum->xform(cull_center_transform->get_mat());
 
@@ -1337,7 +1338,7 @@ cull_and_draw_together(const GraphicsEngine::Windows &wlist,
 
       if (win->begin_frame(GraphicsOutput::FM_render, current_thread)) {
         win->clear(current_thread);
-      
+
         int num_display_regions = win->get_num_active_display_regions();
         for (int i = 0; i < num_display_regions; i++) {
           DisplayRegion *dr = win->get_active_display_region(i);
@@ -1376,7 +1377,7 @@ cull_and_draw_together(GraphicsOutput *win, DisplayRegion *dr,
   GraphicsStateGuardian *gsg = win->get_gsg();
   nassertv(gsg != (GraphicsStateGuardian *)NULL);
 
-  DisplayRegionPipelineReader *dr_reader = 
+  DisplayRegionPipelineReader *dr_reader =
     new DisplayRegionPipelineReader(dr, current_thread);
 
   win->change_scenes(dr_reader);
@@ -1409,9 +1410,9 @@ cull_and_draw_together(GraphicsOutput *win, DisplayRegion *dr,
         // Issue the cull callback on this DisplayRegion.
         DisplayRegionCullCallbackData cbdata(&cull_handler, scene_setup);
         cbobj->do_callback(&cbdata);
-        
+
         // The callback has taken care of the culling.
-        
+
       } else {
         // Perform the cull normally.
         dr->do_cull(&cull_handler, scene_setup, gsg, current_thread);
@@ -1455,7 +1456,7 @@ cull_to_bins(const GraphicsEngine::Windows &wlist, Thread *current_thread) {
       for (int i = 0; i < num_display_regions; ++i) {
         DisplayRegion *dr = win->get_active_display_region(i);
         if (dr != (DisplayRegion *)NULL) {
-          DisplayRegionPipelineReader *dr_reader = 
+          DisplayRegionPipelineReader *dr_reader =
             new DisplayRegionPipelineReader(dr, current_thread);
           NodePath camera = dr_reader->get_camera();
           AlreadyCulled::iterator aci = already_culled.insert(AlreadyCulled::value_type(camera, (DisplayRegion *)NULL)).first;
@@ -1466,7 +1467,7 @@ cull_to_bins(const GraphicsEngine::Windows &wlist, Thread *current_thread) {
             dr_reader = NULL;
             (*aci).second = dr;
             cull_to_bins(win, dr, current_thread);
-            
+
           } else {
             // We have already culled a scene using this camera in
             // this thread, and now we're being asked to cull another
@@ -1480,7 +1481,7 @@ cull_to_bins(const GraphicsEngine::Windows &wlist, Thread *current_thread) {
                                 setup_scene(win->get_gsg(), dr_reader),
                                 current_thread);
           }
-        
+
           if (dr_reader != (DisplayRegionPipelineReader *)NULL) {
             delete dr_reader;
           }
@@ -1538,7 +1539,7 @@ cull_to_bins(GraphicsOutput *win, DisplayRegion *dr, Thread *current_thread) {
     PStatTimer timer(_cull_sort_pcollector, current_thread);
     cull_result->finish_cull(scene_setup, current_thread);
   }
-  
+
   // Save the results for next frame.
   dr->set_cull_result(cull_result, scene_setup, current_thread);
 }
@@ -1559,53 +1560,73 @@ draw_bins(const GraphicsEngine::Windows &wlist, Thread *current_thread) {
   size_t wlist_size = wlist.size();
   for (size_t wi = 0; wi < wlist_size; ++wi) {
     GraphicsOutput *win = wlist[wi];
+
     if (win->is_active()) {
-      if (win->flip_ready()) {
+      GraphicsStateGuardian *gsg = win->get_gsg();
+
+      GraphicsOutput *host = win->get_host();
+      if (host->flip_ready()) {
         {
+          // We can't use a PStatGPUTimer before begin_frame, so when using GPU
+          // timing, it is advisable to set auto-flip to #t.
           PStatTimer timer(GraphicsEngine::_flip_begin_pcollector, current_thread);
-          win->begin_flip();
+          host->begin_flip();
         }
         {
           PStatTimer timer(GraphicsEngine::_flip_end_pcollector, current_thread);
-          win->end_flip();
+          host->end_flip();
         }
       }
 
-      PStatTimer timer(win->get_draw_window_pcollector(), current_thread);
       if (win->begin_frame(GraphicsOutput::FM_render, current_thread)) {
-        win->clear(current_thread);
+        // We have to place this collector inside begin_frame, because
+        // we need a current context for PStatGPUTimer to work.
+        {
+          PStatGPUTimer timer(win->get_gsg(), win->get_draw_window_pcollector(), current_thread);
+          win->clear(current_thread);
 
-        if (display_cat.is_spam()) {
-          display_cat.spam()
-            << "Drawing window " << win->get_name() << "\n";
-        }
-        int num_display_regions = win->get_num_active_display_regions();
-        for (int i = 0; i < num_display_regions; ++i) {
-          DisplayRegion *dr = win->get_active_display_region(i);
-          if (dr != (DisplayRegion *)NULL) {
-            draw_bins(win, dr, current_thread);
+          if (display_cat.is_spam()) {
+            display_cat.spam()
+              << "Drawing window " << win->get_name() << "\n";
+          }
+          int num_display_regions = win->get_num_active_display_regions();
+          for (int i = 0; i < num_display_regions; ++i) {
+            DisplayRegion *dr = win->get_active_display_region(i);
+            if (dr != (DisplayRegion *)NULL) {
+              draw_bins(win, dr, current_thread);
+            }
           }
         }
         win->end_frame(GraphicsOutput::FM_render, current_thread);
 
         if (_auto_flip) {
+#ifdef DO_PSTATS
+          // This is a good time to perform a latency query.
+          if (win->get_gsg()->get_timer_queries_active()) {
+            win->get_gsg()->issue_timer_query(GraphicsStateGuardian::_command_latency_pcollector.get_index());
+          }
+#endif
+
           if (win->flip_ready()) {
             {
+              // begin_flip doesn't do anything interesting, let's not waste two timer queries on that.
               PStatTimer timer(GraphicsEngine::_flip_begin_pcollector, current_thread);
               win->begin_flip();
             }
             {
-              PStatTimer timer(GraphicsEngine::_flip_end_pcollector, current_thread);
+              PStatGPUTimer timer(win->get_gsg(), GraphicsEngine::_flip_end_pcollector, current_thread);
               win->end_flip();
             }
           }
         }
+
       } else {
         if (display_cat.is_spam()) {
           display_cat.spam()
             << "Not drawing window " << win->get_name() << "\n";
         }
       }
+
     } else {
       if (display_cat.is_spam()) {
         display_cat.spam()
@@ -1722,8 +1743,6 @@ ready_flip_windows(const GraphicsEngine::Windows &wlist, Thread *current_thread)
   }
 }
 
-
-
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsEngine::do_sync_frame
 //       Access: Private
@@ -1751,7 +1770,6 @@ do_sync_frame(Thread *current_thread) {
   _flip_state = FS_sync;
 }
 
-
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsEngine::do_ready_flip
 //       Access: Private
@@ -1777,8 +1795,6 @@ do_ready_flip(Thread *current_thread) {
   }
   _app.do_ready_flip(this,current_thread);
   _flip_state = FS_sync;
-  
-
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -1811,7 +1827,7 @@ do_flip_frame(Thread *current_thread) {
       }
     }
   }
-  
+
   // Now signal all of our threads to flip the windows.
   _app.do_flip(this, current_thread);
 
@@ -1895,7 +1911,7 @@ setup_scene(GraphicsStateGuardian *gsg, DisplayRegionPipelineReader *dr) {
     // There must be a singular transform over the scene.
     if (!_singular_warning_last_frame) {
       display_cat.warning()
-        << "Scene " << scene_root << " has net scale (" 
+        << "Scene " << scene_root << " has net scale ("
         << scene_root.get_scale(NodePath()) << "); cannot render.\n";
       _singular_warning_this_frame = true;
     }
@@ -1906,7 +1922,7 @@ setup_scene(GraphicsStateGuardian *gsg, DisplayRegionPipelineReader *dr) {
     // There must be a singular transform over the camera.
     if (!_singular_warning_last_frame) {
       display_cat.warning()
-        << "Camera " << camera << " has net scale (" 
+        << "Camera " << camera << " has net scale ("
         << camera.get_scale(NodePath()) << "); cannot render.\n";
     }
     _singular_warning_this_frame = true;
@@ -1950,11 +1966,12 @@ setup_scene(GraphicsStateGuardian *gsg, DisplayRegionPipelineReader *dr) {
 void GraphicsEngine::
 do_draw(CullResult *cull_result, SceneSetup *scene_setup,
         GraphicsOutput *win, DisplayRegion *dr, Thread *current_thread) {
-  // Statistics
-  PStatTimer timer(dr->get_draw_region_pcollector(), current_thread);
 
-  GraphicsStateGuardian *gsg = win->get_gsg();
   CallbackObject *cbobj;
+  GraphicsStateGuardian *gsg = win->get_gsg();
+
+  // Statistics
+  PStatGPUTimer timer(gsg, dr->get_draw_region_pcollector(), current_thread);
 
   {
     DisplayRegionPipelineReader dr_reader(dr, current_thread);
@@ -2033,20 +2050,20 @@ do_add_window(GraphicsOutput *window,
   _windows_sorted = false;
   _windows.push_back(window);
 
-  WindowRenderer *cull = 
+  WindowRenderer *cull =
     get_window_renderer(threading_model.get_cull_name(),
                         threading_model.get_cull_stage());
-  WindowRenderer *draw = 
+  WindowRenderer *draw =
     get_window_renderer(threading_model.get_draw_name(),
                         threading_model.get_draw_stage());
-  
+
   if (threading_model.get_cull_sorting()) {
     cull->add_window(cull->_cull, window);
     draw->add_window(draw->_draw, window);
   } else {
     cull->add_window(cull->_cdraw, window);
   }
-  
+
   // Ask the pipe which thread it prefers to run its windowing
   // commands in (the "window thread").  This is the thread that
   // handles the commands to open, resize, etc. the window.  X
@@ -2056,7 +2073,7 @@ do_add_window(GraphicsOutput *window,
   // has been bound in a given thread, it cannot subsequently be bound
   // in any other thread, and we have to bind a context in
   // open_window()).
-  
+
   switch (window->get_pipe()->get_preferred_window_thread()) {
   case GraphicsPipe::PWT_app:
     _app.add_window(_app._window, window);
@@ -2095,8 +2112,8 @@ do_add_gsg(GraphicsStateGuardian *gsg, GraphicsPipe *pipe,
   }
 
   auto_adjust_capabilities(gsg);
-  
-  WindowRenderer *draw = 
+
+  WindowRenderer *draw =
     get_window_renderer(threading_model.get_draw_name(),
                         threading_model.get_draw_stage());
 
@@ -2227,7 +2244,7 @@ auto_adjust_capabilities(GraphicsStateGuardian *gsg) {
       << "textures_power_2 to 'up' or 'down'.\n";
     textures_power_2 = ATS_down; // Not a fix.  Just suppresses further error messages.
   }
-  
+
   if (textures_auto_power_2 && !Texture::has_textures_power_2()) {
     if (gsg->get_supports_tex_non_pow2()) {
       Texture::set_textures_power_2(ATS_none);
@@ -2235,13 +2252,13 @@ auto_adjust_capabilities(GraphicsStateGuardian *gsg) {
       Texture::set_textures_power_2(textures_power_2);
     }
   }
-  
-  if ((Texture::get_textures_power_2() == ATS_none) && 
+
+  if ((Texture::get_textures_power_2() == ATS_none) &&
       (!gsg->get_supports_tex_non_pow2())) {
-    
+
     // Overaggressive configuration detected
-    
-    display_cat.error() 
+
+    display_cat.error()
       << "The 'textures_power_2' configuration is set to 'none', meaning \n"
       << "that non-power-of-two texture support is required, but the video \n"
       << "driver I'm trying to use does not support non-power-of-two textures.\n";
@@ -2250,7 +2267,7 @@ auto_adjust_capabilities(GraphicsStateGuardian *gsg) {
       display_cat.error()
         << "The 'none' did not come from the config file.  In other words,\n"
         << "the variable 'textures_power_2' was altered procedurally.\n";
-    
+
       if (textures_auto_power_2) {
         display_cat.error()
           << "It is possible that it was set by panda's automatic mechanisms,\n"
@@ -2263,7 +2280,7 @@ auto_adjust_capabilities(GraphicsStateGuardian *gsg) {
       }
     }
   }
-  
+
   if (shader_auto_utilization && (shader_utilization != SUT_none)) {
     display_cat.error()
       << "Invalid panda config file: if you set the config-variable\n"
@@ -2271,7 +2288,7 @@ auto_adjust_capabilities(GraphicsStateGuardian *gsg) {
       << "shader_utilization to 'none'.\n";
     shader_utilization = SUT_none; // Not a fix.  Just suppresses further error messages.
   }
-  
+
   if (shader_auto_utilization && !Shader::have_shader_utilization()) {
     if (gsg->get_supports_basic_shaders()) {
       Shader::set_shader_utilization(SUT_basic);
@@ -2279,13 +2296,13 @@ auto_adjust_capabilities(GraphicsStateGuardian *gsg) {
       Shader::set_shader_utilization(SUT_none);
     }
   }
-  
-  if ((Shader::get_shader_utilization() != SUT_none) && 
+
+  if ((Shader::get_shader_utilization() != SUT_none) &&
       (!gsg->get_supports_basic_shaders())) {
-    
+
     // Overaggressive configuration detected
-    
-    display_cat.error() 
+
+    display_cat.error()
       << "The 'shader_utilization' config variable is set, meaning\n"
       << "that panda may try to generate shaders.  However, the video \n"
       << "driver I'm trying to use does not support shaders.\n";
@@ -2294,7 +2311,7 @@ auto_adjust_capabilities(GraphicsStateGuardian *gsg) {
       display_cat.error()
         << "The 'shader_utilization' setting did not come from the config\n"
         << "file.  In other words, it was altered procedurally.\n";
-    
+
       if (shader_auto_utilization) {
         display_cat.error()
           << "It is possible that it was set by panda's automatic mechanisms,\n"
@@ -2322,7 +2339,7 @@ terminate_threads(Thread *current_thread) {
   // We spend almost our entire time in this method just waiting for
   // threads.  Time it appropriately.
   PStatTimer timer(_wait_pcollector, current_thread);
-  
+
   // First, wait for all the threads to finish their current frame.
   // Grabbing the mutex should achieve that.
   Threads::const_iterator ti;
@@ -2330,7 +2347,7 @@ terminate_threads(Thread *current_thread) {
     RenderThread *thread = (*ti).second;
     thread->_cv_mutex.acquire();
   }
-  
+
   // Now tell them to close their windows and terminate.
   for (ti = _threads.begin(); ti != _threads.end(); ++ti) {
     RenderThread *thread = (*ti).second;
@@ -2344,7 +2361,7 @@ terminate_threads(Thread *current_thread) {
     RenderThread *thread = (*ti).second;
     thread->join();
   }
-  
+
   _threads.clear();
 }
 
@@ -2450,7 +2467,7 @@ get_window_renderer(const string &name, int pipeline_stage) {
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsEngine::WindowRenderer::Constructor
 //       Access: Public
-//  Description: 
+//  Description:
 ////////////////////////////////////////////////////////////////////
 GraphicsEngine::WindowRenderer::
 WindowRenderer(const string &name) :
@@ -2588,7 +2605,7 @@ do_frame(GraphicsEngine *engine, Thread *current_thread) {
         // This one has no outstanding pointers; clean it up.
         GraphicsPipe *pipe = gsg->get_pipe();
         engine->close_gsg(pipe, gsg);
-      } else { 
+      } else {
         // This one is ok; preserve it.
         new_gsgs.insert(gsg);
       }
@@ -2667,12 +2684,12 @@ do_close(GraphicsEngine *engine, Thread *current_thread) {
       // This one has no outstanding pointers; clean it up.
       GraphicsPipe *pipe = gsg->get_pipe();
       engine->close_gsg(pipe, gsg);
-    } else { 
+    } else {
       // This one is ok; preserve it.
       new_gsgs.insert(gsg);
     }
   }
-  
+
   _gsgs.swap(new_gsgs);
 }
 
@@ -2728,10 +2745,10 @@ any_done_gsgs() const {
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsEngine::RenderThread::Constructor
 //       Access: Public
-//  Description: 
+//  Description:
 ////////////////////////////////////////////////////////////////////
 GraphicsEngine::RenderThread::
-RenderThread(const string &name, GraphicsEngine *engine) : 
+RenderThread(const string &name, GraphicsEngine *engine) :
   Thread(name, "Main"),
   WindowRenderer(name),
   _engine(engine),

+ 42 - 3
panda/src/display/graphicsStateGuardian.I

@@ -1,6 +1,6 @@
 // Filename: graphicsStateGuardian.I
 // Created by:  drose (24Sep99)
-// Updated by: fperazzi, PandaSE (29Apr10) (added 
+// Updated by: fperazzi, PandaSE (29Apr10) (added
 // get_max_2d_texture_array_layers and related)
 //
 ////////////////////////////////////////////////////////////////////
@@ -334,10 +334,10 @@ get_max_3d_texture_dimension() const {
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsStateGuardian::get_max_2d_texture_array_layers
 //       Access: Published
-//  Description: Returns the largest possible number of pages, or -1 
+//  Description: Returns the largest possible number of pages, or -1
 //               if there is no particular limit. Returns 0 if 2-d
 //               texture arrays not supported.
-//               
+//
 //               The value returned may not be meaningful until after
 //               the graphics context has been fully created (e.g. the
 //               window has been opened).
@@ -713,6 +713,45 @@ get_supports_geometry_instancing() const {
   return _supports_geometry_instancing;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GraphicsStateGuardian::get_supports_occlusion_query
+//       Access: Published
+//  Description: Returns true if this GSG supports an occlusion query.
+//               If this is true, then begin_occlusion_query() and
+//               end_occlusion_query() may be called to bracket a
+//               sequence of draw_triangles() (or whatever) calls to
+//               measure pixels that pass the depth test.
+////////////////////////////////////////////////////////////////////
+bool GraphicsStateGuardian::
+get_supports_occlusion_query() const {
+  return _supports_occlusion_query;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: GraphicsStateGuardian::get_supports_timer_query
+//       Access: Published
+//  Description: Returns true if this GSG supports a timer query.
+////////////////////////////////////////////////////////////////////
+bool GraphicsStateGuardian::
+get_supports_timer_query() const {
+  return _supports_timer_query;
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: GraphicsStateGuardian::get_timer_queries_active
+//       Access: Published
+//  Description: Returns true if timer queries are currently
+//               enabled on this GSG.
+////////////////////////////////////////////////////////////////////
+bool GraphicsStateGuardian::
+get_timer_queries_active() const {
+#ifdef DO_PSTATS
+  return _timer_queries_active;
+#else
+  return false;
+#endif
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsStateGuardian::get_max_color_targets
 //       Access: Published

+ 212 - 39
panda/src/display/graphicsStateGuardian.cxx

@@ -28,6 +28,7 @@
 #include "throw_event.h"
 #include "clockObject.h"
 #include "pStatTimer.h"
+#include "pStatGPUTimer.h"
 #include "geomTristrips.h"
 #include "geomTrifans.h"
 #include "geomLinestrips.h"
@@ -55,6 +56,7 @@
 #include "colorScaleAttrib.h"
 #include "clipPlaneAttrib.h"
 #include "fogAttrib.h"
+#include "config_pstats.h"
 
 #include <algorithm>
 #include <limits.h>
@@ -87,9 +89,19 @@ PStatCollector GraphicsStateGuardian::_draw_primitive_pcollector("Draw:Primitive
 PStatCollector GraphicsStateGuardian::_draw_set_state_pcollector("Draw:Set State");
 PStatCollector GraphicsStateGuardian::_clear_pcollector("Draw:Clear");
 PStatCollector GraphicsStateGuardian::_flush_pcollector("Draw:Flush");
+PStatCollector GraphicsStateGuardian::_compute_dispatch_pcollector("Draw:Compute dispatch");
 
 PStatCollector GraphicsStateGuardian::_wait_occlusion_pcollector("Wait:Occlusion");
+PStatCollector GraphicsStateGuardian::_wait_timer_pcollector("Wait:Timer Queries");
+PStatCollector GraphicsStateGuardian::_timer_queries_pcollector("Timer queries");
+PStatCollector GraphicsStateGuardian::_command_latency_pcollector("Command latency");
 
+PStatCollector GraphicsStateGuardian::_prepare_pcollector("Draw:Prepare");
+PStatCollector GraphicsStateGuardian::_prepare_texture_pcollector("Draw:Prepare:Texture");
+PStatCollector GraphicsStateGuardian::_prepare_geom_pcollector("Draw:Prepare:Geom");
+PStatCollector GraphicsStateGuardian::_prepare_shader_pcollector("Draw:Prepare:Shader");
+PStatCollector GraphicsStateGuardian::_prepare_vertex_buffer_pcollector("Draw:Prepare:Vertex buffer");
+PStatCollector GraphicsStateGuardian::_prepare_index_buffer_pcollector("Draw:Prepare:Index buffer");
 
 PStatCollector GraphicsStateGuardian::_draw_set_state_transform_pcollector("Draw:Set State:Transform");
 PStatCollector GraphicsStateGuardian::_draw_set_state_alpha_test_pcollector("Draw:Set State:Alpha test");
@@ -115,7 +127,6 @@ PStatCollector GraphicsStateGuardian::_draw_set_state_stencil_pcollector("Draw:S
 PStatCollector GraphicsStateGuardian::_draw_set_state_fog_pcollector("Draw:Set State:Fog");
 PStatCollector GraphicsStateGuardian::_draw_set_state_scissor_pcollector("Draw:Set State:Scissor");
 
-
 PT(TextureStage) GraphicsStateGuardian::_alpha_scale_texture_stage = NULL;
 
 TypeHandle GraphicsStateGuardian::_type_handle;
@@ -197,6 +208,16 @@ GraphicsStateGuardian(CoordinateSystem internal_coordinate_system,
   _max_vertex_transform_indices = 0;
 
   _supports_occlusion_query = false;
+  _supports_timer_query = false;
+
+#ifdef DO_PSTATS
+  _timer_queries_active = false;
+  _last_query_frame = 0;
+  _last_num_queried = 0;
+  //_timer_delta = 0.0;
+
+  _pstats_gpu_thread = -1;
+#endif
 
   // Initially, we set this to false; a GSG that knows it has this
   // property should set it to true.
@@ -261,7 +282,7 @@ GraphicsStateGuardian::
     delete _stencil_render_states;
     _stencil_render_states = 0;
   }
-  
+
   if (_shader_generator) {
     delete _shader_generator;
     _shader_generator = 0;
@@ -322,7 +343,7 @@ get_supported_geom_rendering() const {
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsStateGuardian::get_supports_cg_profile
 //       Access: Published, Virtual
-//  Description: Returns true if this particular GSG supports the 
+//  Description: Returns true if this particular GSG supports the
 //               specified Cg Shader Profile.
 ////////////////////////////////////////////////////////////////////
 bool GraphicsStateGuardian::
@@ -405,7 +426,7 @@ get_prepared_objects() {
 ////////////////////////////////////////////////////////////////////
 bool GraphicsStateGuardian::
 set_gamma(PN_stdfloat gamma) {
-  _gamma = gamma;  
+  _gamma = gamma;
 
   return false;
 }
@@ -437,7 +458,7 @@ restore_gamma() {
 //               function returns false.
 ////////////////////////////////////////////////////////////////////
 void GraphicsStateGuardian::
-traverse_prepared_textures(GraphicsStateGuardian::TextureCallback *func, 
+traverse_prepared_textures(GraphicsStateGuardian::TextureCallback *func,
                            void *callback_arg) {
   ReMutexHolder holder(_prepared_objects->_lock);
   PreparedGraphicsObjects::Textures::const_iterator ti;
@@ -702,20 +723,6 @@ void GraphicsStateGuardian::
 release_index_buffer(IndexBufferContext *) {
 }
 
-////////////////////////////////////////////////////////////////////
-//     Function: GraphicsStateGuardian::get_supports_occlusion_query
-//       Access: Public, Virtual
-//  Description: Returns true if this GSG supports an occlusion query.
-//               If this is true, then begin_occlusion_query() and
-//               end_occlusion_query() may be called to bracket a
-//               sequence of draw_triangles() (or whatever) calls to
-//               measure pixels that pass the depth test.
-////////////////////////////////////////////////////////////////////
-bool GraphicsStateGuardian::
-get_supports_occlusion_query() const {
-  return _supports_occlusion_query;
-}
-
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsStateGuardian::begin_occlusion_query
 //       Access: Public, Virtual
@@ -754,6 +761,17 @@ end_occlusion_query() {
   return result;
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GraphicsStateGuardian::issue_timer_query
+//       Access: Public, Virtual
+//  Description: Adds a timer query to the command stream, associated
+//               with the given PStats collector index.
+////////////////////////////////////////////////////////////////////
+PT(TimerQueryContext) GraphicsStateGuardian::
+issue_timer_query(int pstats_index) {
+  return NULL;
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsStateGuardian::dispatch_compute
 //       Access: Public, Virtual
@@ -888,7 +906,7 @@ compute_distance_to(const LPoint3 &point) const {
 //               the need for a separate routine to fetch these values.
 //
 //               The "altered" bits indicate what parts of the
-//               state_and_transform have changed since the last 
+//               state_and_transform have changed since the last
 //               time this particular ShaderMatSpec was evaluated.
 //               This may allow data to be cached and not reevaluated.
 //
@@ -896,7 +914,7 @@ compute_distance_to(const LPoint3 &point) const {
 const LMatrix4 *GraphicsStateGuardian::
 fetch_specified_value(Shader::ShaderMatSpec &spec, int altered) {
   LVecBase3 v;
-  
+
   if (altered & spec._dep[0]) {
     const LMatrix4 *t = fetch_specified_part(spec._part[0], spec._arg[0], spec._cache[0]);
     if (t != &spec._cache[0]) {
@@ -909,7 +927,7 @@ fetch_specified_value(Shader::ShaderMatSpec &spec, int altered) {
       spec._cache[1] = *t;
     }
   }
-  
+
   switch(spec._func) {
   case Shader::SMF_compose:
     spec._value.multiply(spec._cache[0], spec._cache[1]);
@@ -1470,6 +1488,29 @@ begin_frame(Thread *current_thread) {
   _state_rs = RenderState::make_empty();
   _state_mask.clear();
 
+#ifdef DO_PSTATS
+  // We have to do this here instead of in GraphicsEngine because
+  // we need a current context to issue timer queries.
+  int frame = ClockObject::get_global_clock()->get_frame_count();
+  if (_last_query_frame < frame) {
+    _last_query_frame = frame;
+    _timer_queries_pcollector.clear_level();
+
+    // Now is a good time to flush previous frame's queries.  We
+    // may not actually have all of the previous frame's results
+    // in yet, but that's okay; the GPU data is allowed to lag a
+    // few frames behind.
+    flush_timer_queries();
+
+    if (_timer_queries_active) {
+      // Issue a stop and start event for collector 0, marking the
+      // beginning of the new frame.
+      issue_timer_query(0x8000);
+      issue_timer_query(0x0000);
+    }
+  }
+#endif
+
   return !_needs_reset;
 }
 
@@ -1566,6 +1607,138 @@ end_frame(Thread *current_thread) {
   _prepared_objects->_graphics_memory_lru.begin_epoch();
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: GraphicsStateGuardian::flush_timer_queries
+//       Access: Public
+//  Description: Called by the graphics engine on the draw thread
+//               to check the status of the running timer queries
+//               and submit their results to the PStats server.
+////////////////////////////////////////////////////////////////////
+void GraphicsStateGuardian::
+flush_timer_queries() {
+#ifdef DO_PSTATS
+  // This uses the lower-level PStats interfaces for now because
+  // of all the unnecessary overhead that would otherwise be incurred
+  // when adding such a large amount of data at once.
+
+  PStatClient *client = PStatClient::get_global_pstats();
+
+  if (!client->client_is_connected()) {
+    _timer_queries_active = false;
+    return;
+  }
+
+  if (!_timer_queries_active) {
+    if (pstats_gpu_timing && _supports_timer_query) {
+      // Check if timer queries should be enabled.
+      _timer_queries_active = true;
+    } else {
+      return;
+    }
+  }
+
+  // Currently, we use one thread per GSG, for convenience.  In the
+  // future, we may want to try and use one thread per graphics card.
+  if (_pstats_gpu_thread == -1) {
+    _pstats_gpu_thread = client->make_gpu_thread(get_driver_renderer()).get_index();
+  }
+  PStatThread gpu_thread(client, _pstats_gpu_thread);
+
+  // Get the results of all the timer queries.
+  int first = 0;
+  if (!_pending_timer_queries.empty()) {
+    int count = _pending_timer_queries.size();
+    if (count == 0) {
+      return;
+    }
+
+    PStatGPUTimer timer(this, _wait_timer_pcollector);
+
+    if (_last_num_queried > 0) {
+      // We know how many queries were available last frame, and this
+      // usually stays fairly constant, so use this as a starting point.
+      int i = min(_last_num_queried, count) - 1;
+
+      if (_pending_timer_queries[i]->is_answer_ready()) {
+        first = count;
+        while (i < count) {
+          if (!_pending_timer_queries[++i]->is_answer_ready()) {
+            first = i;
+            break;
+          }
+        }
+      } else {
+        first = 0;
+        while (i > 0) {
+          if (_pending_timer_queries[--i]->is_answer_ready()) {
+            first = i + 1;
+            break;
+          }
+        }
+      }
+    } else {
+      // We figure out which tasks the GPU has already finished by doing
+      // a binary search for the first query that does not have an answer
+      // ready.  We know then that everything before that must be ready.
+      while (count > 0) {
+        int step = count / 2;
+        int i = first + step;
+        if (_pending_timer_queries[i]->is_answer_ready()) {
+          first += step + 1;
+          count -= step + 1;
+        } else {
+          count = step;
+        }
+      }
+    }
+
+    if (first <= 0) {
+      return;
+    }
+
+    _last_num_queried = first;
+
+    int frame_index = ClockObject::get_global_clock()->get_frame_count();
+
+    for (int i = 0; i < first; ++i) {
+      CPT(TimerQueryContext) query = _pending_timer_queries[i];
+
+      double time_data = query->get_timestamp(); //  + _timer_delta;
+
+      if (query->_pstats_index == _command_latency_pcollector.get_index()) {
+        // Special case for the latency pcollector.
+        PStatCollectorDef *cdef;
+        cdef = client->get_collector_ptr(query->_pstats_index)->get_def(client, query->_pstats_index);
+        _pstats_gpu_data.add_level(query->_pstats_index, time_data * cdef->_factor);
+
+      } else if (query->_pstats_index & 0x8000) {
+        _pstats_gpu_data.add_stop(query->_pstats_index & 0x7fff, time_data);
+
+      } else {
+        _pstats_gpu_data.add_start(query->_pstats_index & 0x7fff, time_data);
+      }
+
+      // We found an end-frame marker (a stop event for collector 0).
+      // This means that the GPU actually caught up with that frame,
+      // and we can flush the GPU thread's frame data to the pstats server.
+      if (query->_pstats_index == 0x8000) {
+        gpu_thread.add_frame(_pstats_gpu_data);
+        _pstats_gpu_data.clear();
+      }
+    }
+  }
+
+  if (first > 0) {
+    // Do this out of the scope of _wait_timer_pcollector.
+    _pending_timer_queries.erase(
+      _pending_timer_queries.begin(),
+      _pending_timer_queries.begin() + first
+    );
+    _timer_queries_pcollector.add_level_now(first);
+  }
+#endif
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsStateGuardian::depth_offset_decals
 //       Access: Public, Virtual
@@ -1819,7 +1992,7 @@ reset() {
     delete _stencil_render_states;
     _stencil_render_states = 0;
   }
-  _stencil_render_states = new StencilRenderStates (this);
+  _stencil_render_states = new StencilRenderStates(this);
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -1935,12 +2108,12 @@ do_issue_clip_plane() {
           enable_clip_planes(true);
           _clip_planes_enabled = true;
         }
-        
+
         enable_clip_plane(num_enabled, true);
         if (num_enabled == 0) {
           begin_bind_clip_planes();
         }
-        
+
         bind_clip_plane(plane, num_enabled);
         num_enabled++;
       }
@@ -2123,7 +2296,7 @@ do_issue_light() {
           if (num_enabled == 0) {
             begin_bind_lights();
           }
-          
+
           light_obj->bind(this, light, num_enabled);
           num_enabled++;
         }
@@ -2278,24 +2451,24 @@ create_gamma_table (PN_stdfloat gamma, unsigned short *red_table, unsigned short
     // avoid divide by zero and negative exponents
     gamma = 1.0;
   }
-  
+
   for (i = 0; i < 256; i++) {
     double g;
     double x;
     PN_stdfloat gamma_correction;
-    
+
     x = ((double) i / 255.0);
-    gamma_correction = 1.0 / gamma;    
+    gamma_correction = 1.0 / gamma;
     x = pow (x, (double) gamma_correction);
     if (x > 1.00) {
       x = 1.0;
     }
 
-    g = x * 65535.0;    
+    g = x * 65535.0;
     red_table [i] = (int)g;
     green_table [i] = (int)g;
     blue_table [i] = (int)g;
-  }    
+  }
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -2638,7 +2811,7 @@ async_reload_texture(TextureContext *tc) {
 
   string task_name = string("reload:") + tc->get_texture()->get_name();
   PT(AsyncTaskManager) task_mgr = _loader->get_task_manager();
-  
+
   // See if we are already loading this task.
   AsyncTaskCollection orig_tasks = task_mgr->find_tasks(task_name);
   int num_tasks = orig_tasks.get_num_tasks();
@@ -2655,7 +2828,7 @@ async_reload_texture(TextureContext *tc) {
 
   // This texture has not yet been queued to be reloaded.  Queue it up
   // now.
-  PT(AsyncTask) request = 
+  PT(AsyncTask) request =
     new TextureReloadRequest(task_name,
                              _prepared_objects, tc->get_texture(),
                              _supports_compressed_texture);
@@ -2762,20 +2935,20 @@ make_shadow_buffer(const NodePath &light_np, GraphicsOutputBase *host) {
 ////////////////////////////////////////////////////////////////////
 //     Function: GraphicsStateGuardian::get_driver_vendor
 //       Access: Public, Virtual
-//  Description: Returns the vendor of the video card driver 
+//  Description: Returns the vendor of the video card driver
 ////////////////////////////////////////////////////////////////////
 string GraphicsStateGuardian::
 get_driver_vendor() {
-  return string("0");
+  return string();
 }
 
 ////////////////////////////////////////////////////////////////////
-//     Function: GraphicsStateGuardian::get_driver_vendor
+//     Function: GraphicsStateGuardian::get_driver_renderer
 //       Access: Public, Virtual
 //  Description: Returns GL_Renderer
 ////////////////////////////////////////////////////////////////////
 string GraphicsStateGuardian::get_driver_renderer() {
-  return string("0");
+  return string();
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -2783,12 +2956,12 @@ string GraphicsStateGuardian::get_driver_renderer() {
 //       Access: Public, Virtual
 //  Description: Returns driver version
 //               This has an implementation-defined meaning, and may
-//               be "0" if the particular graphics implementation
+//               be "" if the particular graphics implementation
 //               does not provide a way to query this information.
 ////////////////////////////////////////////////////////////////////
 string GraphicsStateGuardian::
 get_driver_version() {
-  return string("0");
+  return string();
 }
 
 ////////////////////////////////////////////////////////////////////

+ 39 - 7
panda/src/display/graphicsStateGuardian.h

@@ -44,6 +44,7 @@
 #include "bitMask.h"
 #include "texture.h"
 #include "occlusionQueryContext.h"
+#include "timerQueryContext.h"
 #include "stencilRenderStates.h"
 #include "loader.h"
 #include "shaderAttrib.h"
@@ -151,6 +152,10 @@ PUBLISHED:
   INLINE bool get_supports_two_sided_stencil() const;
   INLINE bool get_supports_geometry_instancing() const;
 
+  INLINE bool get_supports_occlusion_query() const;
+  INLINE bool get_supports_timer_query() const;
+  INLINE bool get_timer_queries_active() const;
+
   INLINE int get_max_color_targets() const;
   INLINE int get_maximum_simultaneous_render_targets() const;
 
@@ -200,7 +205,7 @@ PUBLISHED:
   virtual int get_driver_version_minor();
   virtual int get_driver_shader_version_major();
   virtual int get_driver_shader_version_minor();
-  
+
   bool set_scene(SceneSetup *scene_setup);
   virtual SceneSetup *get_scene() const;
 
@@ -222,10 +227,11 @@ public:
   virtual IndexBufferContext *prepare_index_buffer(GeomPrimitive *data);
   virtual void release_index_buffer(IndexBufferContext *ibc);
 
-  virtual bool get_supports_occlusion_query() const;
   virtual void begin_occlusion_query();
   virtual PT(OcclusionQueryContext) end_occlusion_query();
 
+  virtual PT(TimerQueryContext) issue_timer_query(int pstats_index);
+
   virtual void dispatch_compute(int size_x, int size_y, int size_z);
 
   virtual PT(GeomMunger) get_geom_munger(const RenderState *state,
@@ -239,7 +245,7 @@ public:
   virtual PN_stdfloat compute_distance_to(const LPoint3 &point) const;
 
   virtual void clear(DrawableRegion *clearable);
-  
+
   const LMatrix4 *fetch_specified_value(Shader::ShaderMatSpec &spec, int altered);
   const LMatrix4 *fetch_specified_part(Shader::ShaderMatInput input, InternalName *name, LMatrix4 &t);
   const Shader::ShaderPtrData *fetch_ptr_parameter(const Shader::ShaderPtrSpec& spec);
@@ -260,6 +266,8 @@ PUBLISHED:
 public:
   virtual void end_frame(Thread *current_thread);
 
+  void flush_timer_queries();
+
   void set_current_properties(const FrameBufferProperties *properties);
 
   virtual bool depth_offset_decals();
@@ -375,7 +383,7 @@ protected:
   // This bitmask contains a 1 bit everywhere that _state_rs has a
   // known value.  If a bit is 0, the corresponding state must be
   // re-sent.
-  // 
+  //
   // Derived GSGs should initialize _inv_state_mask in reset() as a mask of
   // 1's where they don't care, and 0's where they do care, about the state.
   RenderState::SlotMask _state_mask;
@@ -406,7 +414,7 @@ protected:
 
   unsigned int _color_write_mask;
 
-  CPT(DisplayRegion) _current_display_region;
+  PT(DisplayRegion) _current_display_region;
   Lens::StereoChannel _current_stereo_channel;
   int _current_tex_view_offset;
   CPT(Lens) _current_lens;
@@ -481,6 +489,19 @@ protected:
   bool _supports_occlusion_query;
   PT(OcclusionQueryContext) _current_occlusion_query;
 
+  bool _supports_timer_query;
+#ifdef DO_PSTATS
+  int _pstats_gpu_thread;
+  bool _timer_queries_active;
+  PStatFrameData _pstats_gpu_data;
+
+  int _last_query_frame;
+  int _last_num_queried;
+  //double _timer_delta;
+  typedef pdeque<PT(TimerQueryContext)> TimerQueryQueue;
+  TimerQueryQueue _pending_timer_queries;
+#endif
+
   bool _copy_texture_inverted;
   bool _supports_multisample;
   bool _supports_generate_mipmap;
@@ -520,13 +541,13 @@ protected:
 
   PN_stdfloat _gamma;
   Texture::QualityLevel _texture_quality_override;
-  
+
   ShaderGenerator* _shader_generator;
 
 #ifndef NDEBUG
   PT(Texture) _flash_texture;
 #endif
-  
+
 public:
   // Statistics
   static PStatCollector _vertex_buffer_switch_pcollector;
@@ -558,7 +579,18 @@ public:
   static PStatCollector _draw_set_state_pcollector;
   static PStatCollector _clear_pcollector;
   static PStatCollector _flush_pcollector;
+  static PStatCollector _compute_dispatch_pcollector;
   static PStatCollector _wait_occlusion_pcollector;
+  static PStatCollector _wait_timer_pcollector;
+  static PStatCollector _timer_queries_pcollector;
+  static PStatCollector _command_latency_pcollector;
+
+  static PStatCollector _prepare_pcollector;
+  static PStatCollector _prepare_texture_pcollector;
+  static PStatCollector _prepare_geom_pcollector;
+  static PStatCollector _prepare_shader_pcollector;
+  static PStatCollector _prepare_vertex_buffer_pcollector;
+  static PStatCollector _prepare_index_buffer_pcollector;
 
   // A whole slew of collectors to measure the cost of individual
   // state changes.  These are disabled by default.

+ 61 - 0
panda/src/display/pStatGPUTimer.I

@@ -0,0 +1,61 @@
+// Filename: pStatGPUTimer.I
+// Created by:  rdb (21Aug14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+
+#ifdef DO_PSTATS
+
+////////////////////////////////////////////////////////////////////
+//     Function: PStatGPUTimer::Constructor
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+INLINE PStatGPUTimer::
+PStatGPUTimer(GraphicsStateGuardian *gsg, PStatCollector &collector) :
+  PStatTimer(collector),
+  _gsg(gsg)
+{
+  if (gsg->get_timer_queries_active()) {
+    gsg->issue_timer_query(collector.get_index());
+    //cerr << "issuing " << collector << " active " << collector.is_active() << "\n";
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: PStatGPUTimer::Constructor
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+INLINE PStatGPUTimer::
+PStatGPUTimer(GraphicsStateGuardian *gsg, PStatCollector &collector, Thread *current_thread) :
+  PStatTimer(collector, current_thread),
+  _gsg(gsg)
+{
+  if (gsg->get_timer_queries_active()) {
+    gsg->issue_timer_query(collector.get_index());
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: PStatGPUTimer::Destructor
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+INLINE PStatGPUTimer::
+~PStatGPUTimer() {
+  if (_gsg->get_timer_queries_active()) {
+    _gsg->issue_timer_query(_collector.get_index() | 0x8000);
+  }
+}
+
+#endif

+ 65 - 0
panda/src/display/pStatGPUTimer.h

@@ -0,0 +1,65 @@
+// Filename: pStatGPUTimer.h
+// Created by:  rdb (21Aug14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+#ifndef PSTATGPUTIMER_H
+#define PSTATGPUTIMER_H
+
+#include "pandabase.h"
+#include "pStatTimer.h"
+#include "pStatCollector.h"
+#include "config_pstats.h"
+#include "timerQueryContext.h"
+
+class Thread;
+class GraphicsStateGuardian;
+
+////////////////////////////////////////////////////////////////////
+//       Class : PStatGPUTimer
+// Description : This is a special type of PStatTimer that also
+//               uses a timer query on the GSG to measure how long
+//               a task actually takes to execute on the GPU, rather
+//               than how long it took for the API commands to be
+//               queued up.
+//
+//               This class may only be used on the draw thread.
+//
+//               At present, it tracks both the CPU time (like a
+//               regular PStatTimer does) and the GPU time, which
+//               is recorded using a special PStatThread.
+////////////////////////////////////////////////////////////////////
+class EXPCL_PANDA_DISPLAY PStatGPUTimer : public PStatTimer {
+public:
+#ifdef DO_PSTATS
+  INLINE PStatGPUTimer(GraphicsStateGuardian *gsg,
+                       PStatCollector &collector);
+  INLINE PStatGPUTimer(GraphicsStateGuardian *gsg,
+                       PStatCollector &collector,
+                       Thread *current_thread);
+  INLINE ~PStatGPUTimer();
+
+  GraphicsStateGuardian *_gsg;
+
+private:
+#else // DO_PSTATS
+
+  INLINE PStatGPUTimer(GraphicsStateGuardian *, PStatCollector &) { }
+  INLINE PStatGPUTimer(GraphicsStateGuardian *, PStatCollector &, Thread *) { }
+  INLINE ~PStatGPUTimer() { }
+
+#endif  // DO_PSTATS
+};
+
+#include "pStatGPUTimer.I"
+
+#endif

+ 12 - 12
panda/src/glstuff/glCgShaderContext_src.cxx

@@ -18,7 +18,7 @@
 
 #include "Cg/cgGL.h"
 
-#include "pStatTimer.h"
+#include "pStatGPUTimer.h"
 
 TypeHandle CLP(CgShaderContext)::_type_handle;
 
@@ -50,7 +50,7 @@ CLP(CgShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderConte
 
   nassertv(s->get_language() == Shader::SL_Cg);
 
-  // Ask the shader to compile itself for us and 
+  // Ask the shader to compile itself for us and
   // to give us the resulting Cg program objects.
   if (!s->cg_compile_for(_glgsg->_shader_caps,
                          _cg_context,
@@ -223,7 +223,7 @@ unbind() {
 ////////////////////////////////////////////////////////////////////
 void CLP(CgShaderContext)::
 issue_parameters(int altered) {
-  PStatTimer timer(_glgsg->_draw_set_state_shader_parameters_pcollector);
+  //PStatGPUTimer timer(_glgsg, _glgsg->_draw_set_state_shader_parameters_pcollector);
 
   if (!valid()) {
     return;
@@ -233,9 +233,9 @@ issue_parameters(int altered) {
   for (int i=0; i<(int)_shader->_ptr_spec.size(); i++) {
     if (altered & (_shader->_ptr_spec[i]._dep[0] | _shader->_ptr_spec[i]._dep[1])) {
       const Shader::ShaderPtrSpec& _ptr = _shader->_ptr_spec[i];
-      Shader::ShaderPtrData* ptr_data = 
+      Shader::ShaderPtrData* ptr_data =
         const_cast< Shader::ShaderPtrData*>(_glgsg->fetch_ptr_parameter(_ptr));
-      
+
       if (ptr_data == NULL){ //the input is not contained in ShaderPtrData
         release_resources();
         return;
@@ -249,14 +249,14 @@ issue_parameters(int altered) {
       int input_size = _ptr._dim[0] * _ptr._dim[1] * _ptr._dim[2];
 
       // dimension is negative only if the parameter had the (deprecated)k_ prefix.
-      if ((input_size > ptr_data->_size) && (_ptr._dim[0] > 0)) { 
-        GLCAT.error() << _ptr._id._name << ": incorrect number of elements, expected " 
+      if ((input_size > ptr_data->_size) && (_ptr._dim[0] > 0)) {
+        GLCAT.error() << _ptr._id._name << ": incorrect number of elements, expected "
                       <<  input_size <<" got " <<  ptr_data->_size << "\n";
         release_resources();
         return;
       }
       CGparameter p = _cg_parameter_map[_ptr._id._seqno];
-      
+
       switch (ptr_data->_type) {
       case Shader::SPT_float:
         switch(_ptr._info._class) {
@@ -271,7 +271,7 @@ issue_parameters(int altered) {
         case Shader::SAC_matrix: cgGLSetMatrixParameterfc(p,(float*)ptr_data->_ptr); continue;
         case Shader::SAC_array: {
           switch(_ptr._info._subclass) {
-          case Shader::SAC_scalar: 
+          case Shader::SAC_scalar:
             cgGLSetParameterArray1f(p,0,_ptr._dim[0],(float*)ptr_data->_ptr); continue;
           case Shader::SAC_vector:
             switch(_ptr._dim[2]) {
@@ -298,7 +298,7 @@ issue_parameters(int altered) {
         case Shader::SAC_matrix: cgGLSetMatrixParameterdc(p,(double*)ptr_data->_ptr); continue;
         case Shader::SAC_array: {
           switch(_ptr._info._subclass) {
-          case Shader::SAC_scalar: 
+          case Shader::SAC_scalar:
             cgGLSetParameterArray1d(p,0,_ptr._dim[0],(double*)ptr_data->_ptr); continue;
           case Shader::SAC_vector:
             switch(_ptr._dim[2]) {
@@ -323,8 +323,8 @@ issue_parameters(int altered) {
           case Shader::SAT_vec4: cgSetParameter4iv(p,(int*)ptr_data->_ptr); continue;
           }
         }
-      default: GLCAT.error() << _ptr._id._name << ":" << "unrecognized parameter type\n"; 
-        release_resources(); 
+      default: GLCAT.error() << _ptr._id._name << ":" << "unrecognized parameter type\n";
+        release_resources();
         return;
       }
     }

+ 26 - 5
panda/src/glstuff/glGraphicsBuffer_src.cxx

@@ -27,7 +27,10 @@ CLP(GraphicsBuffer)(GraphicsEngine *engine, GraphicsPipe *pipe,
                     int flags,
                     GraphicsStateGuardian *gsg,
                     GraphicsOutput *host) :
-  GraphicsBuffer(engine, pipe, name, fb_prop, win_prop, flags, gsg, host)
+  GraphicsBuffer(engine, pipe, name, fb_prop, win_prop, flags, gsg, host),
+  _bind_texture_pcollector(_draw_window_pcollector, "Bind textures"),
+  _generate_mipmap_pcollector(_draw_window_pcollector, "Generate mipmaps"),
+  _resolve_multisample_pcollector(_draw_window_pcollector, "Resolve multisamples")
 {
   CLP(GraphicsStateGuardian) *glgsg;
 
@@ -175,7 +178,7 @@ begin_frame(FrameMode mode, Thread *current_thread) {
       CLP(GraphicsStateGuardian) *glgsg;
       DCAST_INTO_R(glgsg, _gsg, false);
 
-      pvector<CLP(TextureContext)*>::iterator it;
+      TextureContexts::iterator it;
       for (it = _texture_contexts.begin(); it != _texture_contexts.end(); ++it) {
         CLP(TextureContext) *gtc = *it;
 
@@ -274,6 +277,8 @@ rebuild_bitplanes() {
     return;
   }
 
+  PStatGPUTimer timer(glgsg, _bind_texture_pcollector);
+
   // Calculate bitplane size.  This can be larger than the buffer.
   if (_creation_flags & GraphicsPipe::BF_size_track_host) {
     if (_host->get_size() != _size) {
@@ -437,6 +442,17 @@ rebuild_bitplanes() {
     // Bind the FBO
     if (_fbo[layer] == 0) {
       glgsg->_glGenFramebuffers(1, &_fbo[layer]);
+
+      if (glgsg->_use_object_labels) {
+        if (num_fbos > 1) {
+          GLchar name[128];
+          GLsizei len = snprintf(name, 128, "%s[%d]", _name.c_str(), layer);
+          glgsg->_glObjectLabel(GL_FRAMEBUFFER, _fbo[layer], len, name);
+        } else {
+          glgsg->_glObjectLabel(GL_FRAMEBUFFER, _fbo[layer], _name.size(), _name.data());
+        }
+      }
+
       if (_fbo[layer] == 0) {
         report_my_gl_errors();
         return;
@@ -1131,6 +1147,8 @@ generate_mipmaps() {
   CLP(GraphicsStateGuardian) *glgsg;
   DCAST_INTO_V(glgsg, _gsg);
 
+  //PStatGPUTimer timer(glgsg, _generate_mipmap_pcollector);
+
   pvector<CLP(TextureContext)*>::iterator it;
   for (it = _texture_contexts.begin(); it != _texture_contexts.end(); ++it) {
     CLP(TextureContext) *gtc = *it;
@@ -1168,7 +1186,8 @@ end_frame(FrameMode mode, Thread *current_thread) {
     copy_to_textures();
   }
 
-  // Unbind the FBO
+  // Unbind the FBO.  TODO: calling bind_fbo is slow, so we should
+  // probably move this to begin_frame to prevent unnecessary calls.
   CLP(GraphicsStateGuardian) *glgsg;
   DCAST_INTO_V(glgsg, _gsg);
   glgsg->bind_fbo(0);
@@ -1604,10 +1623,12 @@ check_host_valid() {
 ////////////////////////////////////////////////////////////////////
 void CLP(GraphicsBuffer)::
 resolve_multisamples() {
+  nassertv(_fbo.size() > 0);
+
   CLP(GraphicsStateGuardian) *glgsg;
   DCAST_INTO_V(glgsg, _gsg);
 
-  nassertv(_fbo.size() > 0);
+  PStatGPUTimer timer(glgsg, _resolve_multisample_pcollector);
 
   if (gl_enable_memory_barriers) {
     // Issue memory barriers as necessary to make sure that the
@@ -1631,7 +1652,7 @@ resolve_multisamples() {
   }
   glgsg->_glBindFramebuffer(GL_DRAW_FRAMEBUFFER_EXT, fbo);
   glgsg->_glBindFramebuffer(GL_READ_FRAMEBUFFER_EXT, _fbo_multisample);
-  
+
   // If the depth buffer is shared, resolve it only on the last to render FBO.
   bool do_depth_blit = false;
   if (_rbm[RTP_depth_stencil] != 0 || _rbm[RTP_depth] != 0) {

+ 14 - 9
panda/src/glstuff/glGraphicsBuffer_src.h

@@ -34,7 +34,7 @@
 //               * Can render onto a texture without clearing it first.
 //               * Supports multisample antialiased rendering.
 //
-//               Some of these deserve a little explanation. 
+//               Some of these deserve a little explanation.
 //               Auxiliary bitplanes are additional bitplanes above
 //               and beyond the normal depth,stencil,color.  One can
 //               use them to render out multiple textures in a single
@@ -43,14 +43,14 @@
 //               buffer will be equal to the texture's previous
 //               contents.  This alo means you can meaningfully
 //               share a bitplane between two buffers by binding
-//               the same texture to both buffers. 
+//               the same texture to both buffers.
 //
 //               If either of the necessary OpenGL extensions is not
 //               available, then the glGraphicsBuffer will not be
 //               available (although it may still be possible to
 //               create a wglGraphicsBuffer or glxGraphicsBuffer).
 //
-//               This class now also uses the extensions 
+//               This class now also uses the extensions
 //               EXT_framebuffer_multisample and EXT_framebuffer_blit
 //               to allow for multisample antialiasing these offscreen
 //               render targets.  If these extensions are unavailable
@@ -87,13 +87,13 @@ public:
 protected:
   virtual void close_buffer();
   virtual bool open_buffer();
-  
+
   void check_host_valid();
-  
+
   void report_my_errors(int line, const char *file);
 
 private:
-  
+
   void bind_slot(int layer, bool rb_resize, Texture **attach,
                  RenderTexturePlane plane, GLenum attachpoint);
   void bind_slot_multisample(bool rb_resize, Texture **attach,
@@ -127,7 +127,8 @@ private:
 
   // List of textures for which we might have to generate mipmaps
   // after rendering one frame.
-  pvector<CLP(TextureContext)*> _texture_contexts;
+  typedef pvector<CLP(TextureContext)*> TextureContexts;
+  TextureContexts _texture_contexts;
 
   // The cube map face we are currently drawing to or have just
   // finished drawing to, or -1 if we are not drawing to a cube map.
@@ -136,10 +137,14 @@ private:
   bool _initial_clear;
   bool _needs_rebuild;
   UpdateSeq _last_textures_seq;
-  
+
   CLP(GraphicsBuffer) *_shared_depth_buffer;
   list <CLP(GraphicsBuffer) *> _shared_depth_buffer_list;
-  
+
+  PStatCollector _bind_texture_pcollector;
+  PStatCollector _generate_mipmap_pcollector;
+  PStatCollector _resolve_multisample_pcollector;
+
 public:
   static TypeHandle get_class_type() {
     return _type_handle;

+ 9 - 5
panda/src/glstuff/glGraphicsStateGuardian_src.I

@@ -27,6 +27,7 @@
 INLINE bool CLP(GraphicsStateGuardian)::
 report_errors(int line, const char *source_file) {
 #ifndef NDEBUG
+  PStatTimer timer(_check_error_pcollector);
   GLenum error_code = glGetError();
   if (error_code != GL_NO_ERROR) {
     int error_count = 0;
@@ -46,6 +47,7 @@ INLINE void CLP(GraphicsStateGuardian)::
 report_my_errors(int line, const char *source_file) {
 #ifndef NDEBUG
   if (_check_errors) {
+    PStatTimer timer(_check_error_pcollector);
     GLenum error_code = glGetError();
     if (error_code != GL_NO_ERROR) {
       if (!report_errors_loop(line, source_file, error_code, _error_count)) {
@@ -69,6 +71,7 @@ report_my_errors(int line, const char *source_file) {
 ////////////////////////////////////////////////////////////////////
 INLINE bool CLP(GraphicsStateGuardian)::
 clear_errors(int line, const char *source_file) {
+  PStatTimer timer(_check_error_pcollector);
   GLenum error_code = glGetError();
   if (error_code != GL_NO_ERROR) {
     int error_count = 0;
@@ -92,6 +95,7 @@ clear_errors(int line, const char *source_file) {
 INLINE void CLP(GraphicsStateGuardian)::
 clear_my_errors(int line, const char *source_file) {
   if (_check_errors) {
+    PStatTimer timer(_check_error_pcollector);
     GLenum error_code = glGetError();
     if (error_code != GL_NO_ERROR) {
       int error_count = 0;
@@ -743,7 +747,7 @@ get_clip_plane_id(int index) const {
 ////////////////////////////////////////////////////////////////////
 //     Function: CLP(GraphicsStateGuardian)::get_supports_framebuffer_multisample
 //       Access: Public
-//  Description: Returns if this glGsg supports multisample 
+//  Description: Returns if this glGsg supports multisample
 //               antialiasing for framebuffer objects.
 ////////////////////////////////////////////////////////////////////
 INLINE bool CLP(GraphicsStateGuardian)::
@@ -754,7 +758,7 @@ get_supports_framebuffer_multisample() {
 ////////////////////////////////////////////////////////////////////
 //     Function: CLP(GraphicsStateGuardian)::get_supports_framebuffer_multisample
 //       Access: Public
-//  Description: Returns if this glGsg supports multisample 
+//  Description: Returns if this glGsg supports multisample
 //               antialiasing for framebuffer objects.
 ////////////////////////////////////////////////////////////////////
 INLINE bool CLP(GraphicsStateGuardian)::
@@ -766,7 +770,7 @@ get_supports_framebuffer_multisample_coverage_nv() {
 ////////////////////////////////////////////////////////////////////
 //     Function: CLP(GraphicsStateGuardian)::get_supports_framebuffer_blit
 //       Access: Public
-//  Description: Returns if this glGsg supports multisample 
+//  Description: Returns if this glGsg supports multisample
 //               antialiasing for framebuffer objects.
 ////////////////////////////////////////////////////////////////////
 INLINE bool CLP(GraphicsStateGuardian)::
@@ -778,7 +782,7 @@ get_supports_framebuffer_blit() {
 ////////////////////////////////////////////////////////////////////
 //     Function: GLGraphicsStateGuardian::UsageTextureKey::Constructor
 //       Access: Public
-//  Description: 
+//  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE CLP(GraphicsStateGuardian)::UsageTextureKey::
 UsageTextureKey(int x_size, int y_size) :
@@ -792,7 +796,7 @@ UsageTextureKey(int x_size, int y_size) :
 ////////////////////////////////////////////////////////////////////
 //     Function: GLGraphicsStateGuardian::UsageTextureKey::operator <
 //       Access: Public
-//  Description: 
+//  Description:
 ////////////////////////////////////////////////////////////////////
 INLINE bool CLP(GraphicsStateGuardian)::UsageTextureKey::
 operator < (const CLP(GraphicsStateGuardian)::UsageTextureKey &other) const {

File diff suppressed because it is too large
+ 242 - 135
panda/src/glstuff/glGraphicsStateGuardian_src.cxx


+ 52 - 33
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -1,6 +1,6 @@
 // Filename: glGraphicsStateGuardian_src.h
 // Created by:  drose (02Feb99)
-// Updated by: fperazzi, PandaSE (05May10) (added 
+// Updated by: fperazzi, PandaSE (05May10) (added
 //   get_supports_cg_profile)
 //
 ////////////////////////////////////////////////////////////////////
@@ -37,6 +37,7 @@
 #include "pmap.h"
 #include "geomVertexArrayData.h"
 #include "lightMutex.h"
+#include "pStatGPUTimer.h"
 
 class PlaneNode;
 class Light;
@@ -58,6 +59,7 @@ typedef double GLdouble;
 typedef void (APIENTRY *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,GLvoid *userParam);
 typedef void (APIENTRYP PFNGLDEBUGMESSAGECALLBACKPROC) (GLDEBUGPROC callback, const void *userParam);
 typedef void (APIENTRYP PFNGLDEBUGMESSAGECONTROLPROC) (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled);
+typedef void (APIENTRYP PFNGLOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei length, const GLchar *label);
 typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXIMAGEPROC) (GLenum target, GLint level, GLvoid *img);
 typedef void (APIENTRYP PFNGLGENQUERIESPROC) (GLsizei n, GLuint *ids);
 typedef void (APIENTRYP PFNGLBEGINQUERYPROC) (GLenum target, GLuint id);
@@ -65,6 +67,8 @@ typedef void (APIENTRYP PFNGLENDQUERYPROC) (GLenum target);
 typedef void (APIENTRYP PFNGLDELETEQUERIESPROC) (GLsizei n, const GLuint *ids);
 typedef void (APIENTRYP PFNGLGETQUERYIVPROC) (GLenum target, GLenum pname, GLint *params);
 typedef void (APIENTRYP PFNGLGETQUERYOBJECTUIVPROC) (GLuint id, GLenum pname, GLuint *params);
+typedef void (APIENTRYP PFNGLGETQUERYOBJECTUI64VPROC) (GLuint id, GLenum pname, GLuint64 *params);
+typedef void (APIENTRYP PFNGLGETINTEGER64VPROC) (GLenum pname, GLint64 *params);
 typedef void (APIENTRYP PFNGLPOINTPARAMETERFVPROC) (GLenum pname, const GLfloat *params);
 typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTSPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices);
 // There is some trivial disagreement between different gl.h headers about this one, so we use our own typename.
@@ -98,32 +102,32 @@ typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC) (GLenum target, GLint
 typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid *data);
 typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const GLvoid *data);
 typedef void (APIENTRYP PFNGLACTIVESTENCILFACEEXTPROC) (GLenum face);
-typedef void (APIENTRYP PFNGLWEIGHTPOINTERARBPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer); 
-typedef void (APIENTRYP PFNGLVERTEXBLENDARBPROC) (GLint count); 
-typedef void (APIENTRYP PFNGLWEIGHTFVARBPROC) (GLint size, const GLfloat *weights); 
-typedef void (APIENTRYP PFNGLWEIGHTDVARBPROC) (GLint size, const GLdouble *weights); 
-typedef GLboolean (APIENTRYP PFNGLISRENDERBUFFEREXTPROC) (GLuint renderbuffer); 
-typedef void (APIENTRYP PFNGLBINDRENDERBUFFEREXTPROC) (GLenum target, GLuint renderbuffer); 
-typedef void (APIENTRYP PFNGLDELETERENDERBUFFERSEXTPROC) (GLsizei n, const GLuint *renderbuffers); 
-typedef void (APIENTRYP PFNGLGENRENDERBUFFERSEXTPROC) (GLsizei n, GLuint *renderbuffers); 
-typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height); 
-typedef void (APIENTRYP PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params); 
-typedef GLboolean (APIENTRYP PFNGLISFRAMEBUFFEREXTPROC) (GLuint framebuffer); 
-typedef void (APIENTRYP PFNGLBINDFRAMEBUFFEREXTPROC) (GLenum target, GLuint framebuffer); 
-typedef void (APIENTRYP PFNGLDELETEFRAMEBUFFERSEXTPROC) (GLsizei n, const GLuint *framebuffers); 
-typedef void (APIENTRYP PFNGLGENFRAMEBUFFERSEXTPROC) (GLsizei n, GLuint *framebuffers); 
-typedef GLenum (APIENTRYP PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC) (GLenum target); 
-typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE1DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); 
-typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); 
+typedef void (APIENTRYP PFNGLWEIGHTPOINTERARBPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (APIENTRYP PFNGLVERTEXBLENDARBPROC) (GLint count);
+typedef void (APIENTRYP PFNGLWEIGHTFVARBPROC) (GLint size, const GLfloat *weights);
+typedef void (APIENTRYP PFNGLWEIGHTDVARBPROC) (GLint size, const GLdouble *weights);
+typedef GLboolean (APIENTRYP PFNGLISRENDERBUFFEREXTPROC) (GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLBINDRENDERBUFFEREXTPROC) (GLenum target, GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLDELETERENDERBUFFERSEXTPROC) (GLsizei n, const GLuint *renderbuffers);
+typedef void (APIENTRYP PFNGLGENRENDERBUFFERSEXTPROC) (GLsizei n, GLuint *renderbuffers);
+typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params);
+typedef GLboolean (APIENTRYP PFNGLISFRAMEBUFFEREXTPROC) (GLuint framebuffer);
+typedef void (APIENTRYP PFNGLBINDFRAMEBUFFEREXTPROC) (GLenum target, GLuint framebuffer);
+typedef void (APIENTRYP PFNGLDELETEFRAMEBUFFERSEXTPROC) (GLsizei n, const GLuint *framebuffers);
+typedef void (APIENTRYP PFNGLGENFRAMEBUFFERSEXTPROC) (GLsizei n, GLuint *framebuffers);
+typedef GLenum (APIENTRYP PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC) (GLenum target);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE1DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
 #ifdef OPENGLES_2
 typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DOES) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
 #else
-typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset); 
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
 #endif
 typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREARBPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level);
 typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURELAYERPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
-typedef void (APIENTRYP PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC) (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); 
-typedef void (APIENTRYP PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC) (GLenum target, GLenum attachment, GLenum pname, GLint *params); 
+typedef void (APIENTRYP PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC) (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+typedef void (APIENTRYP PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC) (GLenum target, GLenum attachment, GLenum pname, GLint *params);
 typedef void (APIENTRYP PFNGLGENERATEMIPMAPEXTPROC) (GLenum target);
 typedef void (APIENTRYP PFNGLCURRENTPALETTEMATRIXARBPROC) (GLint index);
 typedef void (APIENTRYP PFNGLMATRIXINDEXUIVARBPROC) (GLint size, const GLuint *indices);
@@ -189,7 +193,7 @@ typedef void (APIENTRYP PFNGLBINDIMAGETEXTURESPROC) (GLuint first, GLsizei count
 typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
 typedef void (APIENTRYP PFNGLMEMORYBARRIERPROC) (GLbitfield barriers);
 typedef void (APIENTRYP PFNGLGETPROGRAMBINARYPROC) (GLuint program, GLsizei bufsize, GLsizei *length, GLenum *binaryFormat, void *binary);
-typedef void (APIENTRYP PFNGLGETINTERNALFORMATIVPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params); 
+typedef void (APIENTRYP PFNGLGETINTERNALFORMATIVPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params);
 typedef GLuint64 (APIENTRYP PFNGLGETTEXTUREHANDLEPROC) (GLuint texture);
 typedef GLuint64 (APIENTRYP PFNGLGETTEXTURESAMPLERHANDLEPROC) (GLuint texture, GLuint sampler);
 typedef void (APIENTRYP PFNGLMAKETEXTUREHANDLERESIDENTPROC) (GLuint64 handle);
@@ -228,7 +232,7 @@ public:
   virtual int get_driver_version_minor();
   virtual int get_driver_shader_version_major();
   virtual int get_driver_shader_version_minor();
-  
+
 #ifndef OPENGLES_1
   static void debug_callback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *message, GLvoid *userParam);
 #endif
@@ -302,13 +306,15 @@ public:
   virtual void begin_occlusion_query();
   virtual PT(OcclusionQueryContext) end_occlusion_query();
 
+  virtual PT(TimerQueryContext) issue_timer_query(int pstats_index);
+
   virtual void dispatch_compute(int size_x, int size_y, int size_z);
 
   virtual PT(GeomMunger) make_geom_munger(const RenderState *state,
                                           Thread *current_thread);
 
   virtual void clear(DrawableRegion *region);
-  
+
   virtual bool framebuffer_copy_to_texture
     (Texture *tex, int view, int z, const DisplayRegion *dr, const RenderBuffer &rb);
   virtual bool framebuffer_copy_to_ram
@@ -332,9 +338,9 @@ public:
   void draw_immediate_composite_primitives(const GeomPrimitivePipelineReader *reader, GLenum mode);
 #endif  // SUPPORT_IMMEDIATE_MODE
 
-  INLINE static bool report_errors(int line, const char *source_file);
+  INLINE bool report_errors(int line, const char *source_file);
   INLINE void report_my_errors(int line, const char *source_file);
-  INLINE static bool clear_errors(int line, const char *source_file);
+  INLINE bool clear_errors(int line, const char *source_file);
   INLINE void clear_my_errors(int line, const char *source_file);
 
   INLINE const string &get_gl_vendor() const;
@@ -475,8 +481,8 @@ protected:
   bool upload_texture(CLP(TextureContext) *gtc, bool force);
   bool upload_texture_image(CLP(TextureContext) *gtc, bool needs_reload,
                             bool uses_mipmaps, int mipmap_bias,
-                            GLenum texture_target, GLenum page_target, 
-                            GLint internal_format, GLint external_format, 
+                            GLenum texture_target, GLenum page_target,
+                            GLint internal_format, GLint external_format,
                             GLenum component_type,
                             bool one_page_only, int z,
                             Texture::CompressionMode image_compression);
@@ -590,7 +596,6 @@ protected:
 public:
   bool _supports_point_parameters;
   PFNGLPOINTPARAMETERFVPROC _glPointParameterfv;
-
   bool _supports_point_sprite;
 
   bool _supports_vertex_blend;
@@ -703,6 +708,12 @@ public:
   PFNGLGETQUERYIVPROC _glGetQueryiv;
   PFNGLGETQUERYOBJECTUIVPROC _glGetQueryObjectuiv;
 
+  PFNGLQUERYCOUNTERPROC _glQueryCounter;
+  PFNGLGETQUERYOBJECTI64VPROC _glGetQueryObjecti64v;
+  PFNGLGETQUERYOBJECTUI64VPROC _glGetQueryObjectui64v;
+
+  PFNGLGETINTEGER64VPROC _glGetInteger64v;
+
   PFNGLACTIVESTENCILFACEEXTPROC _glActiveStencilFaceEXT;
 
 #ifndef OPENGLES_1
@@ -778,9 +789,9 @@ public:
 #endif
 
   LightMutex _lock;
-  typedef pvector<GLuint> DeletedDisplayLists;
-  DeletedDisplayLists _deleted_display_lists;
-  DeletedDisplayLists _deleted_queries;
+  typedef pvector<GLuint> DeletedNames;
+  DeletedNames _deleted_display_lists;
+  DeletedNames _deleted_queries;
 
 #ifndef OPENGLES
   // Stores textures for which memory bariers should be issued.
@@ -797,6 +808,9 @@ public:
   bool _force_flush;
   bool _supports_debug;
 
+  bool _use_object_labels;
+  PFNGLOBJECTLABELPROC _glObjectLabel;
+
 #ifndef NDEBUG
   bool _show_texture_usage;
   int _show_texture_usage_max_size;
@@ -818,7 +832,11 @@ public:
   static PStatCollector _primitive_batches_display_list_pcollector;
   static PStatCollector _vertices_display_list_pcollector;
   static PStatCollector _vertices_immediate_pcollector;
-  static PStatCollector _compute_dispatch_pcollector;
+  static PStatCollector _memory_barrier_pcollector;
+  static PStatCollector _vertex_array_update_pcollector;
+  static PStatCollector _texture_update_pcollector;
+  static PStatCollector _fbo_bind_pcollector;
+  static PStatCollector _check_error_pcollector;
 
 public:
   virtual TypeHandle get_type() const {
@@ -846,6 +864,7 @@ private:
   friend class CLP(CgShaderContext);
   friend class CLP(GraphicsBuffer);
   friend class CLP(OcclusionQueryContext);
+  friend class CLP(TimerQueryContext);
 };
 
 #include "glGraphicsStateGuardian_src.I"

+ 14 - 0
panda/src/glstuff/glLatencyQueryContext_src.I

@@ -0,0 +1,14 @@
+// Filename: glLatencyQueryContext_src.I
+// Created by:  rdb (24Sep14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+

+ 54 - 0
panda/src/glstuff/glLatencyQueryContext_src.cxx

@@ -0,0 +1,54 @@
+// Filename: glLatencyQueryContext_src.cxx
+// Created by:  rdb (24Sep14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+#ifndef OPENGLES  // Timer queries not supported by OpenGL ES.
+
+TypeHandle CLP(LatencyQueryContext)::_type_handle;
+
+////////////////////////////////////////////////////////////////////
+//     Function: CLP(LatencyQueryContext)::Constructor
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+CLP(LatencyQueryContext)::
+CLP(LatencyQueryContext)(CLP(GraphicsStateGuardian) *glgsg,
+                         int pstats_index) :
+  CLP(TimerQueryContext)(glgsg, pstats_index),
+  _timestamp(0)
+{
+  glgsg->_glGetInteger64v(GL_TIMESTAMP, &_timestamp);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: LatencyQueryContext::get_timestamp
+//       Access: Public, Virtual
+//  Description: Returns the timestamp that is the result of this
+//               timer query.  There's no guarantee about which
+//               clock this uses, the only guarantee is that
+//               subtracting a start time from an end time should
+//               yield a time in seconds.
+//               If is_answer_ready() did not return true, this
+//               function may block before it returns.
+//
+//               It is only valid to call this from the draw thread.
+////////////////////////////////////////////////////////////////////
+double CLP(LatencyQueryContext)::
+get_timestamp() const {
+  GLint64 time_ns;
+  _glgsg->_glGetQueryObjecti64v(_index, GL_QUERY_RESULT, &time_ns);
+
+  return (time_ns - _timestamp) * 0.000000001;
+}
+
+#endif  // OPENGLES

+ 57 - 0
panda/src/glstuff/glLatencyQueryContext_src.h

@@ -0,0 +1,57 @@
+// Filename: glLatencyQueryContext_src.h
+// Created by:  rdb (24Sep14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+class GraphicsStateGuardian;
+
+#ifndef OPENGLES  // Timer queries not supported by OpenGL ES.
+
+////////////////////////////////////////////////////////////////////
+//       Class : GLLatencyQueryContext
+// Description : This is a special variant of GLTimerQueryContext
+//               that measures the command latency, ie. the time
+//               it takes for the GPU to actually get to the commands
+//               we are issuing right now.
+////////////////////////////////////////////////////////////////////
+class EXPCL_GL CLP(LatencyQueryContext) : public CLP(TimerQueryContext) {
+public:
+  CLP(LatencyQueryContext)(CLP(GraphicsStateGuardian) *glgsg, int pstats_index);
+
+  ALLOC_DELETED_CHAIN(CLP(LatencyQueryContext));
+
+  virtual double get_timestamp() const;
+
+  GLint64 _timestamp;
+
+public:
+  static TypeHandle get_class_type() {
+    return _type_handle;
+  }
+  static void init_type() {
+    CLP(TimerQueryContext)::init_type();
+    register_type(_type_handle, CLASSPREFIX_QUOTED "LatencyQueryContext",
+                  CLP(TimerQueryContext)::get_class_type());
+  }
+  virtual TypeHandle get_type() const {
+    return get_class_type();
+  }
+  virtual TypeHandle force_init_type() {init_type(); return get_class_type();}
+
+private:
+  static TypeHandle _type_handle;
+};
+
+#include "glLatencyQueryContext_src.I"
+
+#endif  // OPENGLES
+

+ 28 - 19
panda/src/glstuff/glShaderContext_src.cxx

@@ -16,7 +16,7 @@
 
 #ifndef OPENGLES_1
 
-#include "pStatTimer.h"
+#include "pStatGPUTimer.h"
 
 TypeHandle CLP(ShaderContext)::_type_handle;
 
@@ -35,19 +35,19 @@ TypeHandle CLP(ShaderContext)::_type_handle;
 //       Access: Public
 //  Description: The Panda CG shader syntax defines a useful set of shorthand notations for setting nodepath
 //               properties as shaderinputs. For example, float4 mspos_XXX refers to nodepath XXX's position
-//               in model space. This function is a rough attempt to reimplement some of the shorthand 
+//               in model space. This function is a rough attempt to reimplement some of the shorthand
 //               notations for GLSL. The code is ~99% composed of excerpts dealing with matrix shaderinputs
-//               from Shader::compile_parameter.  
-//               
-//               Given a uniform variable name queried from the compiled shader passed in via arg_id, 
+//               from Shader::compile_parameter.
+//
+//               Given a uniform variable name queried from the compiled shader passed in via arg_id,
 //                  1) parse the name
 //                  2a) if the name refers to a Panda shorthand notation
 //                        push the appropriate matrix into shader._mat_spec
 //                        returns True
 //                  2b) If the name doesn't refer to a Panda shorthand notation
 //                        returns False
-//               
-//               The boolean return is used to notify down-river processing whether the shader var/parm was 
+//
+//               The boolean return is used to notify down-river processing whether the shader var/parm was
 //               actually picked up and the appropriate ShaderMatSpec pushed onto _mat_spec.
 ////////////////////////////////////////////////////////////////////
 bool CLP(ShaderContext)::
@@ -700,7 +700,7 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext
         }
       }
     }
-    
+
     // Now we've processed the uniforms, we'll process the attribs.
     _glgsg->_glGetProgramiv(_glsl_program, GL_ACTIVE_ATTRIBUTES, &param_count);
     _glgsg->_glGetProgramiv(_glsl_program, GL_ACTIVE_ATTRIBUTE_MAX_LENGTH, &param_maxlength);
@@ -836,7 +836,7 @@ release_resources() {
   }
 
   _glsl_shaders.clear();
-  
+
   _glgsg->report_my_gl_errors();
 }
 
@@ -890,7 +890,7 @@ unbind() {
 ////////////////////////////////////////////////////////////////////
 void CLP(ShaderContext)::
 issue_parameters(int altered) {
-  PStatTimer timer(_glgsg->_draw_set_state_shader_parameters_pcollector);
+  //PStatGPUTimer timer(_glgsg, _glgsg->_draw_set_state_shader_parameters_pcollector);
 
   if (!valid()) {
     return;
@@ -1374,12 +1374,11 @@ glsl_report_shader_errors(GLuint shader) {
   _glgsg->_glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &length);
 
   if (length > 1) {
-    info_log = (char *) malloc(length);
+    info_log = (char *) alloca(length);
     _glgsg->_glGetShaderInfoLog(shader, length, &num_chars, info_log);
     if (strcmp(info_log, "Success.\n") != 0 && strcmp(info_log, "No errors.\n") != 0) {
       GLCAT.error(false) << info_log << "\n";
     }
-    free(info_log);
   }
 }
 
@@ -1397,19 +1396,18 @@ glsl_report_program_errors(GLuint program) {
   _glgsg->_glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length);
 
   if (length > 1) {
-    info_log = (char *) malloc(length);
+    info_log = (char *) alloca(length);
     _glgsg->_glGetProgramInfoLog(program, length, &num_chars, info_log);
     if (strcmp(info_log, "Success.\n") != 0 && strcmp(info_log, "No errors.\n") != 0) {
       GLCAT.error(false) << info_log << "\n";
     }
-    free(info_log);
   }
 }
 
 ////////////////////////////////////////////////////////////////////
 //     Function: Shader::glsl_compile_shader
 //       Access: Private
-//  Description: 
+//  Description:
 ////////////////////////////////////////////////////////////////////
 bool CLP(ShaderContext)::
 glsl_compile_shader(Shader::ShaderType type) {
@@ -1451,16 +1449,21 @@ glsl_compile_shader(Shader::ShaderType type) {
     return false;
   }
 
+  if (_glgsg->_use_object_labels) {
+    string name = _shader->get_filename(type);
+    _glgsg->_glObjectLabel(GL_SHADER, handle, name.size(), name.data());
+  }
+
   string text_str = _shader->get_text(type);
   const char* text = text_str.c_str();
   _glgsg->_glShaderSource(handle, 1, &text, NULL);
   _glgsg->_glCompileShader(handle);
   GLint status;
   _glgsg->_glGetShaderiv(handle, GL_COMPILE_STATUS, &status);
-  
+
   if (status != GL_TRUE) {
-    GLCAT.error() 
-      << "An error occurred while compiling shader " 
+    GLCAT.error()
+      << "An error occurred while compiling shader "
       << _shader->get_filename(type) << "\n";
     glsl_report_shader_errors(handle);
     _glgsg->_glDeleteShader(handle);
@@ -1485,6 +1488,12 @@ glsl_compile_and_link() {
   if (!_glsl_program) {
     return false;
   }
+
+  if (_glgsg->_use_object_labels) {
+    string name = _shader->get_filename();
+    _glgsg->_glObjectLabel(GL_PROGRAM, _glsl_program, name.size(), name.data());
+  }
+
   bool valid = true;
 
   if (!_shader->get_text(Shader::ST_vertex).empty()) {
@@ -1507,7 +1516,7 @@ glsl_compile_and_link() {
     nassertr(_glgsg->_glProgramParameteri != NULL, false);
     GLint max_vertices;
     glGetIntegerv(GL_MAX_GEOMETRY_OUTPUT_VERTICES, &max_vertices);
-    _glgsg->_glProgramParameteri(_glsl_program, GL_GEOMETRY_VERTICES_OUT_ARB, max_vertices); 
+    _glgsg->_glProgramParameteri(_glsl_program, GL_GEOMETRY_VERTICES_OUT_ARB, max_vertices);
   }
 #endif
 

+ 28 - 0
panda/src/glstuff/glTimerQueryContext_src.I

@@ -0,0 +1,28 @@
+// Filename: glTimerQueryContext_src.I
+// Created by:  rdb (22Aug14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////
+//     Function: CLP(TimerQueryContext)::Constructor
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+INLINE CLP(TimerQueryContext)::
+CLP(TimerQueryContext)(CLP(GraphicsStateGuardian) *glgsg,
+                       int pstats_index) :
+  TimerQueryContext(pstats_index),
+  _glgsg(glgsg),
+  _index(0)
+{
+}

+ 104 - 0
panda/src/glstuff/glTimerQueryContext_src.cxx

@@ -0,0 +1,104 @@
+// Filename: glTimerQueryContext_src.cxx
+// Created by:  rdb (22Aug14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+#include "pnotify.h"
+#include "dcast.h"
+#include "lightMutexHolder.h"
+#include "pStatTimer.h"
+
+#ifndef OPENGLES  // Timer queries not supported by OpenGL ES.
+
+TypeHandle CLP(TimerQueryContext)::_type_handle;
+
+////////////////////////////////////////////////////////////////////
+//     Function: GLTimerQueryContext::Destructor
+//       Access: Public, Virtual
+//  Description:
+////////////////////////////////////////////////////////////////////
+CLP(TimerQueryContext)::
+~CLP(TimerQueryContext)() {
+  if (_index != 0) {
+    // Tell the GSG to recycle this index when it gets around to it.
+    LightMutexHolder holder(_glgsg->_lock);
+    _glgsg->_deleted_queries.push_back(_index);
+    _index = 0;
+  }
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: GLTimerQueryContext::is_answer_ready
+//       Access: Public, Virtual
+//  Description: Returns true if the query's answer is ready, false
+//               otherwise.  If this returns false, the application
+//               must continue to poll until it returns true.
+//
+//               It is only valid to call this from the draw thread.
+////////////////////////////////////////////////////////////////////
+bool CLP(TimerQueryContext)::
+is_answer_ready() const {
+  GLuint result;
+  _glgsg->_glGetQueryObjectuiv(_index, GL_QUERY_RESULT_AVAILABLE, &result);
+
+  return (result != 0);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: GLTimerQueryContext::waiting_for_answer
+//       Access: Public, Virtual
+//  Description: Requests the graphics engine to expedite the pending
+//               answer--the application is now waiting until the
+//               answer is ready.
+//
+//               It is only valid to call this from the draw thread.
+////////////////////////////////////////////////////////////////////
+void CLP(TimerQueryContext)::
+waiting_for_answer() {
+  PStatTimer timer(GraphicsStateGuardian::_wait_timer_pcollector);
+  glFlush();
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: TimerQueryContext::get_timestamp
+//       Access: Public, Virtual
+//  Description: Returns the timestamp that is the result of this
+//               timer query.  There's no guarantee about which
+//               clock this uses, the only guarantee is that
+//               subtracting a start time from an end time should
+//               yield a time in seconds.
+//               If is_answer_ready() did not return true, this
+//               function may block before it returns.
+//
+//               It is only valid to call this from the draw thread.
+////////////////////////////////////////////////////////////////////
+double CLP(TimerQueryContext)::
+get_timestamp() const {
+  GLuint64 time_ns;
+
+  /*GLuint available;
+  _glgsg->_glGetQueryObjectuiv(_index[1], GL_QUERY_RESULT_AVAILABLE, &available);
+  if (available) {
+    // The answer is ready now.
+    do_get_timestamps(begin_ns, end_ns);
+  } else {
+    // The answer is not ready; this call will block.
+    PStatTimer timer(GraphicsStateGuardian::_wait_timer_pcollector);
+    do_get_timestamps(begin_ns, end_ns);
+  }*/
+
+  _glgsg->_glGetQueryObjectui64v(_index, GL_QUERY_RESULT, &time_ns);
+
+  return time_ns * 0.000000001;
+}
+
+#endif  // OPENGLES

+ 68 - 0
panda/src/glstuff/glTimerQueryContext_src.h

@@ -0,0 +1,68 @@
+// Filename: glTimerQueryContext_src.h
+// Created by:  rdb (22Aug14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+#include "pandabase.h"
+#include "timerQueryContext.h"
+#include "deletedChain.h"
+#include "clockObject.h"
+
+class GraphicsStateGuardian;
+
+#ifndef OPENGLES  // Timer queries not supported by OpenGL ES.
+
+////////////////////////////////////////////////////////////////////
+//       Class : GLTimerQueryContext
+// Description : This class manages a timer query that can be used
+//               by a PStatGPUTimer to measure the time a task takes
+//               to execute on the GPU.
+//               This records the current timestamp; a pair of these
+//               is usually used to get the elapsed time.
+////////////////////////////////////////////////////////////////////
+class EXPCL_GL CLP(TimerQueryContext) : public TimerQueryContext {
+public:
+  INLINE CLP(TimerQueryContext)(CLP(GraphicsStateGuardian) *glgsg,
+                                int pstats_index);
+  virtual ~CLP(TimerQueryContext)();
+
+  ALLOC_DELETED_CHAIN(CLP(TimerQueryContext));
+
+  virtual bool is_answer_ready() const;
+  virtual void waiting_for_answer();
+  virtual double get_timestamp() const;
+
+  GLuint _index;
+  CLP(GraphicsStateGuardian) *_glgsg;
+
+public:
+  static TypeHandle get_class_type() {
+    return _type_handle;
+  }
+  static void init_type() {
+    TimerQueryContext::init_type();
+    register_type(_type_handle, CLASSPREFIX_QUOTED "TimerQueryContext",
+                  TimerQueryContext::get_class_type());
+  }
+  virtual TypeHandle get_type() const {
+    return get_class_type();
+  }
+  virtual TypeHandle force_init_type() {init_type(); return get_class_type();}
+
+private:
+  static TypeHandle _type_handle;
+};
+
+#include "glTimerQueryContext_src.I"
+
+#endif  // OPENGLES
+

+ 11 - 1
panda/src/glstuff/glmisc_src.cxx

@@ -151,6 +151,13 @@ ConfigVariableEnum<NotifySeverity> gl_debug_abort_level
             "that triggered the error message.  "
             "This feature is not available when NDEBUG has been defined."));
 
+ConfigVariableBool gl_debug_object_labels
+  ("gl-debug-object-labels", true,
+   PRC_DESC("When gl-debug is set to true, this will tell OpenGL the "
+            "name of textures, shaders, and other objects, so that OpenGL "
+            "can display those in error messages.  There's usually no "
+            "reason to disable this."));
+
 ConfigVariableBool gl_debug_buffers
   ("gl-debug-buffers", false,
    PRC_DESC("Set this true, in addition to enabling debug notify for "
@@ -162,7 +169,8 @@ ConfigVariableBool gl_finish
    PRC_DESC("Set this true to force a call to glFinish() after every major "
             "graphics operation.  This is likely to slow down rendering "
             "performance substantially, but it will make PStats graphs "
-            "more accurately reflect where the graphics bottlenecks are.  "
+            "more accurately reflect where the graphics bottlenecks are, "
+            "although it is better to use timer queries when available. "
             "This variable is enabled only if PStats is compiled in."));
 
 ConfigVariableBool gl_force_depth_stencil
@@ -252,6 +260,8 @@ void CLP(init_classes)() {
 
 #ifndef OPENGLES
   CLP(OcclusionQueryContext)::init_type();
+  CLP(TimerQueryContext)::init_type();
+  CLP(LatencyQueryContext)::init_type();
 #endif
 
   PandaSystem *ps = PandaSystem::get_global_ptr();

+ 1 - 0
panda/src/glstuff/glmisc_src.h

@@ -58,6 +58,7 @@ extern ConfigVariableEnum<GeomEnums::UsageHint> gl_min_buffer_usage_hint;
 extern ConfigVariableBool gl_debug;
 extern ConfigVariableBool gl_debug_synchronous;
 extern ConfigVariableEnum<NotifySeverity> gl_debug_abort_level;
+extern ConfigVariableBool gl_debug_object_labels;
 extern ConfigVariableBool gl_debug_buffers;
 extern ConfigVariableBool gl_finish;
 extern ConfigVariableBool gl_force_depth_stencil;

+ 2 - 0
panda/src/glstuff/glstuff_src.cxx

@@ -22,6 +22,8 @@
 #include "glVertexBufferContext_src.cxx"
 #include "glIndexBufferContext_src.cxx"
 #include "glOcclusionQueryContext_src.cxx"
+#include "glTimerQueryContext_src.cxx"
+#include "glLatencyQueryContext_src.cxx"
 #include "glGeomContext_src.cxx"
 #include "glGeomMunger_src.cxx"
 #include "glShaderContext_src.cxx"

+ 2 - 0
panda/src/glstuff/glstuff_src.h

@@ -36,6 +36,8 @@
 #include "glVertexBufferContext_src.h"
 #include "glIndexBufferContext_src.h"
 #include "glOcclusionQueryContext_src.h"
+#include "glTimerQueryContext_src.h"
+#include "glLatencyQueryContext_src.h"
 #include "glGeomContext_src.h"
 #include "glGeomMunger_src.h"
 #include "glShaderContext_src.h"

+ 3 - 3
panda/src/glstuff/panda_glext.h

@@ -6,7 +6,7 @@ extern "C" {
 
 /*
 ** Copyright (c) 2007-2012 The Khronos Group Inc.
-** 
+**
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
 ** "Materials"), to deal in the Materials without restriction, including
@@ -14,10 +14,10 @@ extern "C" {
 ** distribute, sublicense, and/or sell copies of the Materials, and to
 ** permit persons to whom the Materials are furnished to do so, subject to
 ** the following conditions:
-** 
+**
 ** The above copyright notice and this permission notice shall be included
 ** in all copies or substantial portions of the Materials.
-** 
+**
 ** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

+ 3 - 0
panda/src/gobj/Sources.pp

@@ -66,6 +66,7 @@
     textureReloadRequest.I textureReloadRequest.h \
     textureStage.I textureStage.h \
     textureStagePool.I textureStagePool.h \
+    timerQueryContext.I timerQueryContext.h \
     transformBlend.I transformBlend.h \
     transformBlendTable.I transformBlendTable.h \
     transformTable.I transformTable.h \
@@ -136,6 +137,7 @@
     textureReloadRequest.cxx \
     textureStage.cxx \
     textureStagePool.cxx \
+    timerQueryContext.cxx \
     transformBlend.cxx \
     transformBlendTable.cxx \
     transformTable.cxx \
@@ -208,6 +210,7 @@
     textureReloadRequest.I textureReloadRequest.h \
     textureStage.I textureStage.h \
     textureStagePool.I textureStagePool.h \
+    timerQueryContext.I timerQueryContext.h \
     transformBlend.I transformBlend.h \
     transformBlendTable.I transformBlendTable.h \
     transformTable.I transformTable.h \

+ 5 - 3
panda/src/gobj/config_gobj.cxx

@@ -44,6 +44,7 @@
 #include "textureReloadRequest.h"
 #include "textureStage.h"
 #include "textureContext.h"
+#include "timerQueryContext.h"
 #include "shader.h"
 #include "shaderContext.h"
 #include "transformBlend.h"
@@ -92,7 +93,7 @@ ConfigVariableInt texture_scale_limit
           "to both X and Y."));
 
 ConfigVariableList exclude_texture_scale
-("exclude-texture-scale", 
+("exclude-texture-scale",
  PRC_DESC("This is a list of glob patterns for texture filenames "
           "(excluding the directory part of the filename, but including "
           "the extension); for instance, 'digits_*.png'.  Any texture "
@@ -287,7 +288,7 @@ ConfigVariableInt vertex_column_alignment
           "this alignment for the vertex animation columns only."));
 
 ConfigVariableBool vertex_animation_align_16
-("vertex-animation-align-16", 
+("vertex-animation-align-16",
 #ifdef LINMATH_ALIGN
  true,
 #else
@@ -343,7 +344,7 @@ ConfigVariableInt simple_image_size
 
 ConfigVariableDouble simple_image_threshold
 ("simple-image-threshold", 0.1,
- PRC_DESC("This is a value that indicates how closely a texture's " 
+ PRC_DESC("This is a value that indicates how closely a texture's "
           "generated simple "
           "image should approximate the original image.  The smaller the "
           "number, the closer the match; small numbers will result in "
@@ -568,6 +569,7 @@ ConfigureFn(config_gobj) {
   TexturePoolFilter::init_type();
   TextureReloadRequest::init_type();
   TextureStage::init_type();
+  TimerQueryContext::init_type();
   TransformBlend::init_type();
   TransformBlendTable::init_type();
   TransformTable::init_type();

+ 3 - 0
panda/src/gobj/p3gobj_composite1.cxx

@@ -30,3 +30,6 @@
 #include "indexBufferContext.cxx"
 #include "internalName.cxx"
 #include "lens.cxx"
+#include "material.cxx"
+#include "materialPool.cxx"
+#include "matrixLens.cxx"

+ 1 - 3
panda/src/gobj/p3gobj_composite2.cxx

@@ -1,6 +1,3 @@
-#include "material.cxx"
-#include "materialPool.cxx"
-#include "matrixLens.cxx"
 #include "occlusionQueryContext.cxx"
 #include "orthographicLens.cxx"
 #include "perspectiveLens.cxx"
@@ -21,6 +18,7 @@
 #include "textureReloadRequest.cxx"
 #include "textureStage.cxx"
 #include "textureStagePool.cxx"
+#include "timerQueryContext.cxx"
 #include "transformBlend.cxx"
 #include "transformBlendTable.cxx"
 #include "transformTable.cxx"

+ 26 - 0
panda/src/gobj/timerQueryContext.I

@@ -0,0 +1,26 @@
+// Filename: occlusionQueryContext.I
+// Created by:  rdb (22Aug14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////
+//     Function: TimerQueryContext::Constructor
+//       Access: Public
+//  Description:
+////////////////////////////////////////////////////////////////////
+INLINE TimerQueryContext::
+TimerQueryContext(int pstats_index) :
+  _pstats_index(pstats_index),
+  _frame_index(ClockObject::get_global_clock()->get_frame_count())
+{
+}

+ 35 - 0
panda/src/gobj/timerQueryContext.cxx

@@ -0,0 +1,35 @@
+// Filename: timerQueryContext.cxx
+// Created by:  rdb (22Aug14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+#include "timerQueryContext.h"
+
+TypeHandle TimerQueryContext::_type_handle;
+
+////////////////////////////////////////////////////////////////////
+//     Function: TimerQueryContext::get_timestamp
+//       Access: Public, Virtual
+//  Description: Returns the timestamp that is the result of this
+//               timer query.  There's no guarantee about which
+//               clock this uses, the only guarantee is that
+//               subtracting a start time from an end time should
+//               yield a time in seconds.
+//               If is_answer_ready() did not return true, this
+//               function may block before it returns.
+//
+//               It is only valid to call this from the draw thread.
+////////////////////////////////////////////////////////////////////
+double TimerQueryContext::
+get_timestamp() const {
+  return 0.0;
+}

+ 60 - 0
panda/src/gobj/timerQueryContext.h

@@ -0,0 +1,60 @@
+// Filename: timerQueryContext.h
+// Created by:  rdb (22Aug14)
+//
+////////////////////////////////////////////////////////////////////
+//
+// PANDA 3D SOFTWARE
+// Copyright (c) Carnegie Mellon University.  All rights reserved.
+//
+// All use of this software is subject to the terms of the revised BSD
+// license.  You should have received a copy of this license along
+// with this source code in a file named "LICENSE."
+//
+////////////////////////////////////////////////////////////////////
+
+#ifndef TIMERQUERYCONTEXT_H
+#define TIMERQUERYCONTEXT_H
+
+#include "pandabase.h"
+#include "queryContext.h"
+#include "clockObject.h"
+#include "pStatCollector.h"
+
+////////////////////////////////////////////////////////////////////
+//       Class : TimerQueryContext
+// Description :
+////////////////////////////////////////////////////////////////////
+class EXPCL_PANDA_GOBJ TimerQueryContext : public QueryContext {
+public:
+  INLINE TimerQueryContext(int pstats_index);
+
+  ALLOC_DELETED_CHAIN(TimerQueryContext);
+
+  virtual double get_timestamp() const=0;
+
+  int _frame_index;
+  int _pstats_index;
+
+public:
+  static TypeHandle get_class_type() {
+    return _type_handle;
+  }
+  static void init_type() {
+    QueryContext::init_type();
+    register_type(_type_handle, "TimerQueryContext",
+                  QueryContext::get_class_type());
+  }
+  virtual TypeHandle get_type() const {
+    return get_class_type();
+  }
+  virtual TypeHandle force_init_type() {init_type(); return get_class_type();}
+
+private:
+  static TypeHandle _type_handle;
+
+  friend class PreparedGraphicsObjects;
+};
+
+#include "timerQueryContext.I"
+
+#endif

+ 1 - 6
panda/src/gsgbase/graphicsStateGuardianBase.h

@@ -32,7 +32,6 @@ class GraphicsOutputBase;
 
 class VertexBufferContext;
 class IndexBufferContext;
-class OcclusionQueryContext;
 class GeomContext;
 class GeomNode;
 class Geom;
@@ -123,7 +122,6 @@ PUBLISHED:
 
   virtual bool get_supports_multisample() const=0;
   virtual int get_supported_geom_rendering() const=0;
-  virtual bool get_supports_occlusion_query() const=0;
   virtual bool get_supports_shadow_filter() const=0;
 
 public:
@@ -153,16 +151,13 @@ public:
 
   virtual ShaderContext *prepare_shader(Shader *shader)=0;
   virtual void release_shader(ShaderContext *sc)=0;
-  
+
   virtual VertexBufferContext *prepare_vertex_buffer(GeomVertexArrayData *data)=0;
   virtual void release_vertex_buffer(VertexBufferContext *vbc)=0;
 
   virtual IndexBufferContext *prepare_index_buffer(GeomPrimitive *data)=0;
   virtual void release_index_buffer(IndexBufferContext *ibc)=0;
 
-  virtual void begin_occlusion_query()=0;
-  virtual PT(OcclusionQueryContext) end_occlusion_query()=0;
-
   virtual void dispatch_compute(int size_x, int size_y, int size_z)=0;
 
   virtual PT(GeomMunger) get_geom_munger(const RenderState *state,

+ 11 - 0
panda/src/pstatclient/config_pstats.cxx

@@ -68,6 +68,17 @@ ConfigVariableDouble pstats_target_frame_rate
           "This frame rate is marked with a different-colored line; "
           "otherwise, this setting has no effect."));
 
+ConfigVariableBool pstats_gpu_timing
+("pstats-gpu-timing", false,
+ PRC_DESC("Set this true to query the graphics library for the actual time "
+          "that graphics operations take to execute on the video card.  "
+          "Enabling this will harm performance, but this information can "
+          "be more useful than the regular Draw information in tracking "
+          "down bottlenecks, because the CPU-based Draw collectors only "
+          "measure how long it takes for the API call to complete, which "
+          "is not usually an accurate reflectino of how long the actual "
+          "operation takes on the video card."));
+
 // The rest are different in that they directly control the server,
 // not the client.
 ConfigVariableBool pstats_scroll_mode

+ 1 - 0
panda/src/pstatclient/config_pstats.h

@@ -38,6 +38,7 @@ extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_tcp_ratio;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableString pstats_host;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableInt pstats_port;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_target_frame_rate;
+extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_gpu_timing;
 
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableBool pstats_scroll_mode;
 extern EXPCL_PANDA_PSTATCLIENT ConfigVariableDouble pstats_history;

+ 72 - 37
panda/src/pstatclient/pStatClient.cxx

@@ -152,7 +152,7 @@ get_collector_fullname(int index) const {
   if (parent_index == 0) {
     return collector->get_name();
   } else {
-    return get_collector_fullname(parent_index) + ":" + 
+    return get_collector_fullname(parent_index) + ":" +
       collector->get_name();
   }
 }
@@ -180,7 +180,6 @@ get_main_thread() const {
   return PStatThread((PStatClient *)this, 0);
 }
 
-
 ////////////////////////////////////////////////////////////////////
 //     Function: PStatClient::get_current_thread
 //       Access: Published
@@ -191,7 +190,7 @@ get_main_thread() const {
 PStatThread PStatClient::
 get_current_thread() const {
   if (!client_is_connected()) {
-    // No need to make the relatively expensive call to 
+    // No need to make the relatively expensive call to
     // Thread::get_current_thread() if we're not even connected.
     return get_main_thread();
   }
@@ -222,10 +221,10 @@ main_tick() {
 
 
     _mmap_size_pcollector.set_level(MemoryUsage::get_panda_mmap_size());
-    
+
     TypeRegistry *type_reg = TypeRegistry::ptr();
     int num_typehandles = type_reg->get_num_typehandles();
-    
+
     while ((int)type_handle_cols.size() < num_typehandles) {
       type_handle_cols.push_back(TypeHandleCollector());
     }
@@ -261,7 +260,7 @@ main_tick() {
         case TypeHandle::MC_limit:
           // Not used.
           break;
-        }          
+        }
       }
     }
     size_t min_usage = (single_total_usage + array_total_usage + dc_active_total_usage + dc_inactive_total_usage) / 1024;
@@ -288,7 +287,7 @@ main_tick() {
             case TypeHandle::MC_singleton:
               category = "Heap:Single";
               break;
-              
+
             case TypeHandle::MC_array:
               category = "Heap:Array";
               break;
@@ -315,11 +314,11 @@ main_tick() {
           case TypeHandle::MC_singleton:
             single_other_usage -= usage;
             break;
-            
+
           case TypeHandle::MC_array:
             array_other_usage -= usage;
             break;
-            
+
           case TypeHandle::MC_deleted_chain_active:
             dc_active_other_usage -= usage;
             break;
@@ -348,7 +347,7 @@ main_tick() {
 #endif  // DO_MEMORY_USAGE
 
   get_global_pstats()->client_main_tick();
-}  
+}
 
 ////////////////////////////////////////////////////////////////////
 //     Function: PStatClient::thread_tick
@@ -359,7 +358,7 @@ main_tick() {
 void PStatClient::
 thread_tick(const string &sync_name) {
   get_global_pstats()->client_thread_tick(sync_name);
-}  
+}
 
 ////////////////////////////////////////////////////////////////////
 //     Function: PStatClient::client_main_tick
@@ -383,8 +382,8 @@ client_main_tick() {
       _threads_by_sync_name.find("Main");
     if (ni != _threads_by_sync_name.end()) {
       const vector_int &indices = (*ni).second;
-      for (vector_int::const_iterator vi = indices.begin(); 
-           vi != indices.end(); 
+      for (vector_int::const_iterator vi = indices.begin();
+           vi != indices.end();
            ++vi) {
         _impl->new_frame(*vi);
       }
@@ -407,8 +406,8 @@ client_thread_tick(const string &sync_name) {
       _threads_by_sync_name.find(sync_name);
     if (ni != _threads_by_sync_name.end()) {
       const vector_int &indices = (*ni).second;
-      for (vector_int::const_iterator vi = indices.begin(); 
-           vi != indices.end(); 
+      for (vector_int::const_iterator vi = indices.begin();
+           vi != indices.end();
            ++vi) {
         _impl->new_frame(*vi);
       }
@@ -463,7 +462,7 @@ PStatClient *PStatClient::
 get_global_pstats() {
   if (_global_pstats == (PStatClient *)NULL) {
     _global_pstats = new PStatClient;
-    
+
     ClockObject::_start_clock_wait = start_clock_wait;
     ClockObject::_start_clock_busy_wait = start_clock_busy_wait;
     ClockObject::_stop_clock_wait = stop_clock_wait;
@@ -616,8 +615,8 @@ do_make_thread(Thread *thread) {
     // We have seen a thread with this name before.  Can we re-use any
     // of them?
     const vector_int &indices = (*ni).second;
-    for (vector_int::const_iterator vi = indices.begin(); 
-         vi != indices.end(); 
+    for (vector_int::const_iterator vi = indices.begin();
+         vi != indices.end();
          ++vi) {
       int index = (*vi);
       nassertr(index >= 0 && index < _num_threads, PStatThread());
@@ -637,20 +636,26 @@ do_make_thread(Thread *thread) {
   int new_index = _num_threads;
   thread->set_pstats_index(new_index);
   thread->set_pstats_callback(this);
-  _threads_by_name[thread->get_name()].push_back(new_index);
-  _threads_by_sync_name[thread->get_sync_name()].push_back(new_index);
-        
+
   InternalThread *pthread = new InternalThread(thread);
   add_thread(pthread);
 
-  // We need an additional PerThreadData for this thread in all of the
-  // collectors.
-  CollectorPointer *collectors = (CollectorPointer *)_collectors;
-  for (int ci = 0; ci < _num_collectors; ++ci) {
-    Collector *collector = collectors[ci];
-    collector->_per_thread.push_back(PerThreadData());
-    nassertr((int)collector->_per_thread.size() == _num_threads, PStatThread());
-  }
+  return PStatThread(this, new_index);
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: PStatClient::make_gpu_thread
+//       Access: Private
+//  Description: Returns a PStatThread representing the GPU.
+//               This is normally called by the GSG only.
+////////////////////////////////////////////////////////////////////
+PStatThread PStatClient::
+make_gpu_thread(const string &name) {
+  ReMutexHolder holder(_lock);
+  int new_index = _num_threads;
+
+  InternalThread *pthread = new InternalThread(name, "GPU");
+  add_thread(pthread);
 
   return PStatThread(this, new_index);
 }
@@ -960,7 +965,7 @@ get_level(int collector_index, int thread_index) const {
 //  Description: This function is added as a hook into ClockObject, so
 //               that we may time the delay for
 //               ClockObject::wait_until(), used for certain special
-//               clock modes.  
+//               clock modes.
 //
 //               This callback is a hack around the fact that we can't
 //               let the ClockObject directly create a PStatCollector,
@@ -977,7 +982,7 @@ start_clock_wait() {
 //  Description: This function is added as a hook into ClockObject, so
 //               that we may time the delay for
 //               ClockObject::wait_until(), used for certain special
-//               clock modes.  
+//               clock modes.
 //
 //               This callback is a hack around the fact that we can't
 //               let the ClockObject directly create a PStatCollector,
@@ -995,7 +1000,7 @@ start_clock_busy_wait() {
 //  Description: This function is added as a hook into ClockObject, so
 //               that we may time the delay for
 //               ClockObject::wait_until(), used for certain special
-//               clock modes.  
+//               clock modes.
 //
 //               This callback is a hack around the fact that we can't
 //               let the ClockObject directly create a PStatCollector,
@@ -1051,6 +1056,9 @@ add_collector(PStatClient::Collector *collector) {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 add_thread(PStatClient::InternalThread *thread) {
+  _threads_by_name[thread->_name].push_back(_num_threads);
+  _threads_by_sync_name[thread->_sync_name].push_back(_num_threads);
+
   if (_num_threads >= _threads_size) {
     // We need to grow the array.  We have to be careful here, because
     // there might be clients accessing the array right now who are
@@ -1071,12 +1079,21 @@ add_thread(PStatClient::InternalThread *thread) {
     // and then no more.)
 
     new_threads[_num_threads] = thread;
-    AtomicAdjust::inc(_num_threads);
 
   } else {
     ThreadPointer *threads = (ThreadPointer *)_threads;
     threads[_num_threads] = thread;
-    AtomicAdjust::inc(_num_threads);
+  }
+
+  AtomicAdjust::inc(_num_threads);
+
+  // We need an additional PerThreadData for this thread in all of the
+  // collectors.
+  CollectorPointer *collectors = (CollectorPointer *)_collectors;
+  for (int ci = 0; ci < _num_collectors; ++ci) {
+    Collector *collector = collectors[ci];
+    collector->_per_thread.push_back(PerThreadData());
+    nassertv((int)collector->_per_thread.size() == _num_threads);
   }
 }
 
@@ -1148,7 +1165,7 @@ make_def(const PStatClient *client, int this_index) {
   if (_def == (PStatCollectorDef *)NULL) {
     _def = new PStatCollectorDef(this_index, _name);
     if (_parent_index != this_index) {
-      const PStatCollectorDef *parent_def = 
+      const PStatCollectorDef *parent_def =
         client->get_collector_def(_parent_index);
       _def->set_parent(*parent_def);
     }
@@ -1157,9 +1174,9 @@ make_def(const PStatClient *client, int this_index) {
 }
 
 ////////////////////////////////////////////////////////////////////
-//     Function: PStatClient::Collector::make_def
+//     Function: PStatClient::InternalThread::Constructor
 //       Access: Private
-//  Description: Creates the new PStatCollectorDef for this collector.
+//  Description:
 ////////////////////////////////////////////////////////////////////
 PStatClient::InternalThread::
 InternalThread(Thread *thread) :
@@ -1174,4 +1191,22 @@ InternalThread(Thread *thread) :
 {
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: PStatClient::InternalThread::Constructor
+//       Access: Private
+//  Description:
+////////////////////////////////////////////////////////////////////
+PStatClient::InternalThread::
+InternalThread(const string &name, const string &sync_name) :
+  _thread(NULL),
+  _name(name),
+  _sync_name(sync_name),
+  _is_active(false),
+  _frame_number(0),
+  _next_packet(0.0),
+  _thread_active(true),
+  _thread_lock(string("PStatClient::InternalThread ") + name)
+{
+}
+
 #endif // DO_PSTATS

+ 5 - 2
panda/src/pstatclient/pStatClient.h

@@ -35,6 +35,7 @@
 class PStatCollector;
 class PStatCollectorDef;
 class PStatThread;
+class GraphicsStateGuardian;
 
 ////////////////////////////////////////////////////////////////////
 //       Class : PStatClient
@@ -113,6 +114,7 @@ private:
   PStatThread do_get_current_thread() const;
   PStatThread make_thread(Thread *thread);
   PStatThread do_make_thread(Thread *thread);
+  PStatThread make_gpu_thread(const string &name);
 
   bool is_active(int collector_index, int thread_index) const;
   bool is_started(int collector_index, int thread_index) const;
@@ -171,7 +173,7 @@ private:
     INLINE const string &get_name() const;
     INLINE bool is_active() const;
     INLINE PStatCollectorDef *get_def(const PStatClient *client, int this_index) const;
-      
+
   private:
     void make_def(const PStatClient *client, int this_index);
 
@@ -201,6 +203,7 @@ private:
   class InternalThread {
   public:
     InternalThread(Thread *thread);
+    InternalThread(const string &name, const string &sync_name = "Main");
 
     WPT(Thread) _thread;
     string _name;
@@ -248,6 +251,7 @@ private:
   friend class PStatCollector;
   friend class PStatThread;
   friend class PStatClientImpl;
+  friend class GraphicsStateGuardian;
 };
 
 #include "pStatClient.I"
@@ -272,4 +276,3 @@ PUBLISHED:
 #endif  // DO_PSTATS
 
 #endif
-

+ 51 - 9
panda/src/pstatclient/pStatClientImpl.cxx

@@ -47,7 +47,7 @@ PStatClientImpl(PStatClient *client) :
   _reader(this, 0),
   _writer(this, pstats_threaded_write ? 1 : 0)
 {
-  _writer.set_max_queue_size(pstats_max_queue_size); 
+  _writer.set_max_queue_size(pstats_max_queue_size);
   _reader.set_tcp_header_size(4);
   _writer.set_tcp_header_size(4);
   _is_connected = false;
@@ -201,10 +201,10 @@ new_frame(int thread_index) {
     // Fill up the level data for all the collectors who have level
     // data for this pthread.
     int num_collectors = _client->_num_collectors;
-    PStatClient::CollectorPointer *collectors = 
+    PStatClient::CollectorPointer *collectors =
       (PStatClient::CollectorPointer *)_client->_collectors;
     for (int i = 0; i < num_collectors; i++) {
-      const PStatClient::PerThreadData &ptd = 
+      const PStatClient::PerThreadData &ptd =
         collectors[i]->_per_thread[thread_index];
       if (ptd._has_level) {
         pthread->_frame_data.add_level(i, ptd._level);
@@ -219,13 +219,55 @@ new_frame(int thread_index) {
   _client->start(0, thread_index, frame_start);
 
   // Also record the time for the PStats operation itself.
+  int current_thread_index = Thread::get_current_thread()->get_pstats_index();
   int pstats_index = PStatClient::_pstats_pcollector.get_index();
-  _client->start(pstats_index, thread_index, frame_start);
+  _client->start(pstats_index, current_thread_index, frame_start);
 
   if (frame_number != -1) {
     transmit_frame_data(thread_index, frame_number, frame_data);
   }
-  _client->stop(pstats_index, thread_index, get_real_time());
+  _client->stop(pstats_index, current_thread_index, get_real_time());
+}
+
+////////////////////////////////////////////////////////////////////
+//     Function: PStatClientImpl::add_frame
+//       Access: Public
+//  Description: Slightly lower-level interface than new_frame that
+//               takes a set of frame data.
+////////////////////////////////////////////////////////////////////
+void PStatClientImpl::
+add_frame(int thread_index, const PStatFrameData &frame_data) {
+  nassertv(thread_index >= 0 && thread_index < _client->_num_threads);
+
+  PStatClient::InternalThread *pthread = _client->get_thread_ptr(thread_index);
+
+  // If we're the main thread, we should exchange control packets with
+  // the server.
+  if (thread_index == 0) {
+    transmit_control_data();
+  }
+
+  // If we've got the UDP port by the time the frame starts, it's
+  // time to become active and start actually tracking data.
+  if (_got_udp_port) {
+    pthread->_is_active = true;
+  }
+
+  if (!pthread->_is_active) {
+    return;
+  }
+
+  int frame_number = pthread->_frame_number++;
+
+  // Also record the time for the PStats operation itself.
+  int current_thread_index = Thread::get_current_thread()->get_pstats_index();
+  int pstats_index = PStatClient::_pstats_pcollector.get_index();
+  _client->start(pstats_index, current_thread_index);
+
+  if (frame_number != -1) {
+    transmit_frame_data(thread_index, frame_number, frame_data);
+  }
+  _client->stop(pstats_index, current_thread_index);
 }
 
 ////////////////////////////////////////////////////////////////////
@@ -235,7 +277,7 @@ new_frame(int thread_index) {
 //               transmit the latest data to the PStatServer.
 ////////////////////////////////////////////////////////////////////
 void PStatClientImpl::
-transmit_frame_data(int thread_index, int frame_number, 
+transmit_frame_data(int thread_index, int frame_number,
                     const PStatFrameData &frame_data) {
   nassertv(thread_index >= 0 && thread_index < _client->_num_threads);
   PStatClient::InternalThread *thread = _client->get_thread_ptr(thread_index);
@@ -397,7 +439,7 @@ report_new_collectors() {
   // single datagram.  So we limit ourselves here to sending only
   // half that many.
   static const int max_collectors_at_once = 700;
-  
+
   while (_is_connected && _collectors_reported < _client->_num_collectors) {
     PStatClientControlMessage message;
     message._type = PStatClientControlMessage::T_define_collectors;
@@ -408,7 +450,7 @@ report_new_collectors() {
       _collectors_reported++;
       i++;
     }
-    
+
     Datagram datagram;
     message.encode(datagram);
     _writer.send(datagram, _tcp_connection, true);
@@ -427,7 +469,7 @@ report_new_threads() {
     PStatClientControlMessage message;
     message._type = PStatClientControlMessage::T_define_threads;
     message._first_thread_index = _threads_reported;
-    PStatClient::ThreadPointer *threads = 
+    PStatClient::ThreadPointer *threads =
       (PStatClient::ThreadPointer *)_client->_threads;
     while (_threads_reported < _client->_num_threads) {
       message._names.push_back(threads[_threads_reported]->_name);

+ 2 - 1
panda/src/pstatclient/pStatClientImpl.h

@@ -72,6 +72,7 @@ public:
   INLINE void client_resume_after_pause();
 
   void new_frame(int thread_index);
+  void add_frame(int thread_index, const PStatFrameData &frame_data);
 
 private:
   void transmit_frame_data(int thread_index, int frame_number,
@@ -90,7 +91,7 @@ private:
   void report_new_threads();
   void handle_server_control_message(const PStatServerControlMessage &message);
 
-  virtual void connection_reset(const PT(Connection) &connection, 
+  virtual void connection_reset(const PT(Connection) &connection,
                                 bool okflag);
 
   PStatClient *_client;

+ 5 - 3
panda/src/pstatclient/pStatProperties.cxx

@@ -152,9 +152,10 @@ static TimeCollectorProperties time_properties[] = {
   { 1, "Draw:Flush",                       { 0.9, 0.2, 0.7 } },
   { 1, "Draw:Sync",                        { 0.5, 0.7, 0.7 } },
   { 0, "Draw:Transform",                   { 0.0, 0.5, 0.0 } },
-  { 0, "Draw:Primitive",                   { 0.0, 0.0, 0.5 } },
-  { 0, "Draw:Set State",                   { 0.2, 0.6, 0.8 } },
+  { 1, "Draw:Primitive",                   { 0.0, 0.0, 0.5 } },
+  { 1, "Draw:Set State",                   { 0.2, 0.6, 0.8 } },
   { 1, "Draw:Wait occlusion",              { 1.0, 0.5, 0.0 } },
+  { 1, "Draw:Bind FBO",                    { 0.0, 0.8, 0.8 } },
   { 0, NULL }
 };
 
@@ -226,6 +227,7 @@ static LevelCollectorProperties level_properties[] = {
   { 1, "Dirty PipelineCyclers",            { 0.2, 0.2, 0.2 },  "", 5000 },
   { 1, "Collision Volumes",                { 1.0, 0.8, 0.5 },  "", 500 },
   { 1, "Collision Tests",                  { 0.5, 0.8, 1.0 },  "", 100 },
+  { 1, "Command latency",                  { 0.8, 0.2, 0.0 },  "ms", 10, 1.0 / 1000.0 },
   { 0, NULL }
 };
 
@@ -339,7 +341,7 @@ initialize_collector_def(const PStatClient *client, PStatCollectorDef *def) {
     ("pstats-factor-" + config_name, 1.0, "", ConfigVariable::F_dynamic);
   ConfigVariableDouble pstats_color
     ("pstats-color-" + config_name, 0.0, "", ConfigVariable::F_dynamic);
-  
+
   if (pstats_active.has_value()) {
     def->_is_active = pstats_active;
     def->_active_explicitly_set = true;

+ 14 - 1
panda/src/pstatclient/pStatThread.I

@@ -26,7 +26,7 @@ PStatThread() {
 
 ////////////////////////////////////////////////////////////////////
 //     Function: PStatThread::Constructor
-//       Access: Private
+//       Access: Published
 //  Description: Normally, this constructor is called only from
 //               PStatClient.  Use one of the constructors below to
 //               create your own Thread.
@@ -109,6 +109,19 @@ new_frame() {
 #endif
 }
 
+////////////////////////////////////////////////////////////////////
+//     Function: PStatThread::add_frame
+//       Access: Public
+//  Description: This is a slightly lower-level version of new_frame
+//               that also specifies the data to send for this frame.
+////////////////////////////////////////////////////////////////////
+INLINE void PStatThread::
+add_frame(const PStatFrameData &frame_data) {
+#ifdef DO_PSTATS
+  _client->get_impl()->add_frame(_index, frame_data);
+#endif
+}
+
 ////////////////////////////////////////////////////////////////////
 //     Function: PStatThread::get_index
 //       Access: Published

+ 3 - 1
panda/src/pstatclient/pStatThread.h

@@ -18,6 +18,7 @@
 #include "pandabase.h"
 
 #include "pStatClient.h"
+#include "pStatFrameData.h"
 
 class Thread;
 
@@ -30,15 +31,16 @@ class Thread;
 class EXPCL_PANDA_PSTATCLIENT PStatThread {
 private:
   INLINE PStatThread();
-  INLINE PStatThread(PStatClient *client, int index);
 
 PUBLISHED:
+  INLINE PStatThread(PStatClient *client, int index);
   INLINE PStatThread(Thread *thread, PStatClient *client = NULL);
 
   INLINE PStatThread(const PStatThread &copy);
   INLINE void operator = (const PStatThread &copy);
 
   INLINE void new_frame();
+  INLINE void add_frame(const PStatFrameData &frame_data);
 
   Thread *get_thread() const;
   INLINE int get_index() const;

+ 2 - 2
panda/src/pstatclient/pStatTimer.h

@@ -38,12 +38,12 @@ public:
   INLINE PStatTimer(PStatCollector &collector, Thread *current_thread);
   INLINE ~PStatTimer();
 
-private:
+protected:
   PStatCollector &_collector;
   PStatThread _thread;
 #else // DO_PSTATS
 
-  INLINE PStatTimer(PStatCollector &) { } 
+  INLINE PStatTimer(PStatCollector &) { }
   INLINE PStatTimer(PStatCollector &, Thread *) { }
   INLINE ~PStatTimer() { }
 

Some files were not shown because too many files changed in this diff