Browse Source

pipeline: Avoid dynamic allocation in PipelineCycler when using 1 stage

Eliminates a level of indirection at cost of 8 bytes per cycler
rdb 3 years ago
parent
commit
f02a3156ca

+ 16 - 16
panda/src/pipeline/pipeline.cxx

@@ -157,7 +157,7 @@ cycle() {
           saved_cdatas.push_back(cycler->cycle_2());
 
           // cycle_2() won't leave a cycler dirty.  Add it to the clean list.
-          nassertd(!cycler->_dirty) break;
+          nassertd(!cycler->is_dirty()) break;
           cycler->insert_before(&_clean);
 #ifdef DEBUG_THREADS
           inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);
@@ -193,11 +193,11 @@ cycle() {
 
           saved_cdatas.push_back(cycler->cycle_3());
 
-          if (cycler->_dirty) {
+          if (cycler->is_dirty()) {
             // The cycler is still dirty.  Add it back to the dirty list.
-            nassertd(cycler->_dirty == prev_seq) break;
+            nassertd(!cycler->is_dirty(prev_seq));
             cycler->insert_before(&_dirty);
-            cycler->_dirty = next_seq;
+            cycler->mark_dirty(next_seq);
             ++_num_dirty_cyclers;
           } else {
             // The cycler is now clean.  Add it back to the clean list.
@@ -237,11 +237,11 @@ cycle() {
 
           saved_cdatas.push_back(cycler->cycle());
 
-          if (cycler->_dirty) {
+          if (cycler->is_dirty()) {
             // The cycler is still dirty.  Add it back to the dirty list.
-            nassertd(cycler->_dirty == prev_seq) break;
+            nassertd(!cycler->is_dirty(prev_seq)) break;
             cycler->insert_before(&_dirty);
-            cycler->_dirty = next_seq;
+            cycler->mark_dirty(next_seq);
             ++_num_dirty_cyclers;
           } else {
             // The cycler is now clean.  Add it back to the clean list.
@@ -346,7 +346,7 @@ add_cycler(PipelineCyclerTrueImpl *cycler) {
   // It's safe to add it to the list while cycling, since the _clean list is
   // not touched during the cycle loop.
   MutexHolder holder(_lock);
-  nassertv(!cycler->_dirty);
+  nassertv(!cycler->is_dirty());
 
   cycler->insert_before(&_clean);
   ++_num_cyclers;
@@ -370,7 +370,7 @@ add_cycler(PipelineCyclerTrueImpl *cycler, bool dirty) {
   // It's safe to add it to the list while cycling, since the _clean list is
   // not touched during the cycle loop.
   MutexHolder holder(_lock);
-  nassertv(!cycler->_dirty);
+  nassertv(!cycler->is_dirty());
 
   if (!dirty) {
     cycler->insert_before(&_clean);
@@ -378,7 +378,7 @@ add_cycler(PipelineCyclerTrueImpl *cycler, bool dirty) {
   else {
     nassertv(_num_stages != 1);
     cycler->insert_before(&_dirty);
-    cycler->_dirty = _next_cycle_seq;
+    cycler->mark_dirty(_next_cycle_seq);
     ++_num_dirty_cyclers;
 
 #ifdef DEBUG_THREADS
@@ -407,13 +407,13 @@ add_dirty_cycler(PipelineCyclerTrueImpl *cycler) {
   // It's safe to add it to the list while cycling, since it's not currently
   // on the dirty list.
   MutexHolder holder(_lock);
-  nassertv(!cycler->_dirty);
+  nassertv(!cycler->is_dirty());
   nassertv(_num_stages != 1);
 
   // Remove it from the "clean" list and add it to the "dirty" list.
   cycler->remove_from_list();
   cycler->insert_before(&_dirty);
-  cycler->_dirty = _next_cycle_seq;
+  cycler->mark_dirty(_next_cycle_seq);
   ++_num_dirty_cyclers;
 
 #ifdef DEBUG_THREADS
@@ -437,7 +437,7 @@ remove_cycler(PipelineCyclerTrueImpl *cycler) {
   // careful not to cause a race condition.  It's safe for us to remove it
   // during cycle only if it's 0 (clean) or _next_cycle_seq (scheduled for the
   // next cycle, so not owned by the current one).
-  while (cycler->_dirty != 0 && cycler->_dirty != _next_cycle_seq) {
+  while (cycler->is_dirty(_next_cycle_seq)) {
     if (_cycle_lock.try_lock()) {
       // OK, great, we got the lock, so it finished cycling already.
       nassertv(!_cycling);
@@ -445,7 +445,7 @@ remove_cycler(PipelineCyclerTrueImpl *cycler) {
       --_num_cyclers;
       cycler->remove_from_list();
 
-      cycler->_dirty = false;
+      cycler->clear_dirty();
       --_num_dirty_cyclers;
 
   #ifdef DEBUG_THREADS
@@ -474,8 +474,8 @@ remove_cycler(PipelineCyclerTrueImpl *cycler) {
   inc_cycler_type(_all_cycler_types, cycler->get_parent_type(), -1);
 #endif
 
-  if (cycler->_dirty) {
-    cycler->_dirty = 0;
+  if (cycler->is_dirty()) {
+    cycler->clear_dirty();
     --_num_dirty_cyclers;
 #ifdef DEBUG_THREADS
     inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);

+ 62 - 31
panda/src/pipeline/pipelineCyclerTrueImpl.I

@@ -54,7 +54,7 @@ read_unlocked(Thread *current_thread) const {
   TAU_PROFILE("const CycleData *PipelineCyclerTrueImpl::read_unlocked(Thread *)", " ", TAU_USER);
   int pipeline_stage = current_thread->get_pipeline_stage();
 #ifdef _DEBUG
-  nassertr(pipeline_stage >= 0 && pipeline_stage < _num_stages, nullptr);
+  nassertr(pipeline_stage >= 0 && pipeline_stage < get_num_stages(), nullptr);
 #endif
   return _data[pipeline_stage]._cdata;
 }
@@ -72,7 +72,7 @@ read(Thread *current_thread) const {
   TAU_PROFILE("const CycleData *PipelineCyclerTrueImpl::read(Thread *)", " ", TAU_USER);
   int pipeline_stage = current_thread->get_pipeline_stage();
 #ifdef _DEBUG
-  nassertr(pipeline_stage >= 0 && pipeline_stage < _num_stages, nullptr);
+  nassertr(pipeline_stage >= 0 && pipeline_stage < get_num_stages(), nullptr);
 #endif
   _lock.acquire(current_thread);
   return _data[pipeline_stage]._cdata;
@@ -87,7 +87,7 @@ increment_read(const CycleData *pointer) const {
   TAU_PROFILE("void PipelineCyclerTrueImpl::increment_read(const CycleData *)", " ", TAU_USER);
 #ifdef _DEBUG
   int pipeline_stage = Thread::get_current_pipeline_stage();
-  nassertv(pipeline_stage >= 0 && pipeline_stage < _num_stages);
+  nassertv(pipeline_stage >= 0 && pipeline_stage < get_num_stages());
   nassertv(_data[pipeline_stage]._cdata == pointer);
 #endif
   _lock.elevate_lock();
@@ -101,7 +101,7 @@ release_read(const CycleData *pointer) const {
   TAU_PROFILE("void PipelineCyclerTrueImpl::release_read(const CycleData *)", " ", TAU_USER);
 #ifdef _DEBUG
   int pipeline_stage = Thread::get_current_pipeline_stage();
-  nassertv(pipeline_stage >= 0 && pipeline_stage < _num_stages);
+  nassertv(pipeline_stage >= 0 && pipeline_stage < get_num_stages());
   nassertv(_data[pipeline_stage]._cdata == pointer);
 #endif
   _lock.release();
@@ -161,7 +161,7 @@ elevate_read(const CycleData *pointer, Thread *current_thread) {
   TAU_PROFILE("CycleData *PipelineCyclerTrueImpl::elevate_read(const CycleData *)", " ", TAU_USER);
 #ifdef _DEBUG
   int pipeline_stage = current_thread->get_pipeline_stage();
-  nassertr(pipeline_stage >= 0 && pipeline_stage < _num_stages, nullptr);
+  nassertr(pipeline_stage >= 0 && pipeline_stage < get_num_stages(), nullptr);
   nassertr(_data[pipeline_stage]._cdata == pointer, nullptr);
 #endif
   CycleData *new_pointer = write(current_thread);
@@ -179,7 +179,7 @@ elevate_read_upstream(const CycleData *pointer, bool force_to_0, Thread *current
   TAU_PROFILE("CycleData *PipelineCyclerTrueImpl::elevate_read_upstream(const CycleData *, bool)", " ", TAU_USER);
 #ifdef _DEBUG
   int pipeline_stage = current_thread->get_pipeline_stage();
-  nassertr(pipeline_stage >= 0 && pipeline_stage < _num_stages, nullptr);
+  nassertr(pipeline_stage >= 0 && pipeline_stage < get_num_stages(), nullptr);
   nassertr(_data[pipeline_stage]._cdata == pointer, nullptr);
 #endif
   CycleData *new_pointer = write_upstream(force_to_0, current_thread);
@@ -196,7 +196,7 @@ increment_write(CycleData *pointer) const {
   TAU_PROFILE("void PipelineCyclerTrueImpl::increment_write(CycleData *)", " ", TAU_USER);
   int pipeline_stage = Thread::get_current_pipeline_stage();
 #ifdef _DEBUG
-  nassertv(pipeline_stage >= 0 && pipeline_stage < _num_stages);
+  nassertv(pipeline_stage >= 0 && pipeline_stage < get_num_stages());
   nassertv(_data[pipeline_stage]._cdata == pointer);
 #endif
   ++(_data[pipeline_stage]._writes_outstanding);
@@ -213,12 +213,44 @@ release_write(CycleData *pointer) {
   return release_write_stage(pipeline_stage, pointer);
 }
 
+/**
+ *
+ */
+ALWAYS_INLINE bool PipelineCyclerTrueImpl::
+is_dirty() const {
+  return _single_data._dirty != 0;
+}
+
+/**
+ *
+ */
+INLINE bool PipelineCyclerTrueImpl::
+is_dirty(unsigned int seq) const {
+  return _single_data._dirty != 0 && _single_data._dirty != seq;
+}
+
+/**
+ *
+ */
+INLINE void PipelineCyclerTrueImpl::
+mark_dirty(unsigned int seq) {
+  _single_data._dirty = seq;
+}
+
+/**
+ *
+ */
+INLINE void PipelineCyclerTrueImpl::
+clear_dirty() {
+  _single_data._dirty = 0;
+}
+
 /**
  * Returns the number of stages in the pipeline.
  */
 INLINE int PipelineCyclerTrueImpl::
-get_num_stages() {
-  return _num_stages;
+get_num_stages() const {
+  return (_data == &_single_data) ? 1 : _data[0]._num_stages;
 }
 
 /**
@@ -230,7 +262,7 @@ INLINE const CycleData *PipelineCyclerTrueImpl::
 read_stage_unlocked(int pipeline_stage) const {
   TAU_PROFILE("const CycleData *PipelineCyclerTrueImpl::read_stage_unlocked(int)", " ", TAU_USER);
 #ifdef _DEBUG
-  nassertr(pipeline_stage >= 0 && pipeline_stage < _num_stages, nullptr);
+  nassertr(pipeline_stage >= 0 && pipeline_stage < get_num_stages(), nullptr);
 #elif defined(__has_builtin) && __has_builtin(__builtin_assume)
   __builtin_assume(pipeline_stage >= 0);
 #endif
@@ -249,7 +281,7 @@ INLINE const CycleData *PipelineCyclerTrueImpl::
 read_stage(int pipeline_stage, Thread *current_thread) const {
   TAU_PROFILE("const CycleData *PipelineCyclerTrueImpl::read_stage(int, Thread *)", " ", TAU_USER);
 #ifdef _DEBUG
-  nassertr(pipeline_stage >= 0 && pipeline_stage < _num_stages, nullptr);
+  nassertr(pipeline_stage >= 0 && pipeline_stage < get_num_stages(), nullptr);
 #elif defined(__has_builtin) && __has_builtin(__builtin_assume)
   __builtin_assume(pipeline_stage >= 0);
 #endif
@@ -264,7 +296,7 @@ INLINE void PipelineCyclerTrueImpl::
 release_read_stage(int pipeline_stage, const CycleData *pointer) const {
   TAU_PROFILE("void PipelineCyclerTrueImpl::release_read_stage(int, const CycleData *)", " ", TAU_USER);
 #ifdef _DEBUG
-  nassertv(pipeline_stage >= 0 && pipeline_stage < _num_stages);
+  nassertv(pipeline_stage >= 0 && pipeline_stage < get_num_stages());
   nassertv(_data[pipeline_stage]._cdata == pointer);
 #endif
   _lock.release();
@@ -280,7 +312,7 @@ elevate_read_stage(int pipeline_stage, const CycleData *pointer,
                    Thread *current_thread) {
   TAU_PROFILE("CycleData *PipelineCyclerTrueImpl::elevate_read_stage(int, const CycleData *)", " ", TAU_USER);
 #ifdef _DEBUG
-  nassertr(pipeline_stage >= 0 && pipeline_stage < _num_stages, nullptr);
+  nassertr(pipeline_stage >= 0 && pipeline_stage < get_num_stages(), nullptr);
   nassertr(_data[pipeline_stage]._cdata == pointer, nullptr);
 #elif defined(__has_builtin) && __has_builtin(__builtin_assume)
   __builtin_assume(pipeline_stage >= 0);
@@ -300,7 +332,7 @@ elevate_read_stage_upstream(int pipeline_stage, const CycleData *pointer,
                             bool force_to_0, Thread *current_thread) {
   TAU_PROFILE("CycleData *PipelineCyclerTrueImpl::elevate_read_stage(int, const CycleData *)", " ", TAU_USER);
 #ifdef _DEBUG
-  nassertr(pipeline_stage >= 0 && pipeline_stage < _num_stages, nullptr);
+  nassertr(pipeline_stage >= 0 && pipeline_stage < get_num_stages(), nullptr);
   nassertr(_data[pipeline_stage]._cdata == pointer, nullptr);
 #elif defined(__has_builtin) && __has_builtin(__builtin_assume)
   __builtin_assume(pipeline_stage >= 0);
@@ -318,7 +350,7 @@ INLINE void PipelineCyclerTrueImpl::
 release_write_stage(int pipeline_stage, CycleData *pointer) {
   TAU_PROFILE("void PipelineCyclerTrueImpl::release_write_stage(int, const CycleData *)", " ", TAU_USER);
 #ifdef _DEBUG
-  nassertv(pipeline_stage >= 0 && pipeline_stage < _num_stages);
+  nassertv(pipeline_stage >= 0 && pipeline_stage < get_num_stages());
   nassertv(_data[pipeline_stage]._cdata == pointer);
   nassertv(_data[pipeline_stage]._writes_outstanding > 0);
 #elif defined(__has_builtin) && __has_builtin(__builtin_assume)
@@ -347,7 +379,9 @@ INLINE CycleData *PipelineCyclerTrueImpl::
 cheat() const {
   TAU_PROFILE("CycleData *PipelineCyclerTrueImpl::cheat()", " ", TAU_USER);
   int pipeline_stage = Thread::get_current_pipeline_stage();
-  nassertr(pipeline_stage >= 0 && pipeline_stage < _num_stages, nullptr);
+#ifdef _DEBUG
+  nassertr(pipeline_stage >= 0 && pipeline_stage < get_num_stages(), nullptr);
+#endif
   return _data[pipeline_stage]._cdata;
 }
 
@@ -386,14 +420,17 @@ cycle_2() {
   last_val->node_unref_only();
 
   nassertr(_lock.debug_is_locked(), last_val);
-  nassertr(_dirty, last_val);
-  nassertr(_num_stages == 2, last_val);
+  nassertr(is_dirty(), last_val);
+
+#ifdef _DEBUG
+  nassertr(get_num_stages() == 2, last_val);
+#endif
 
   nassertr(_data[1]._writes_outstanding == 0, last_val);
   _data[1]._cdata = _data[0]._cdata;
 
   // No longer dirty.
-  _dirty = 0;
+  clear_dirty();
   return last_val;
 }
 
@@ -413,8 +450,11 @@ cycle_3() {
   last_val->node_unref_only();
 
   nassertr(_lock.debug_is_locked(), last_val);
-  nassertr(_dirty, last_val);
-  nassertr(_num_stages == 3, last_val);
+  nassertr(is_dirty(), last_val);
+
+#ifdef _DEBUG
+  nassertr(get_num_stages() == 3, last_val);
+#endif
 
   nassertr(_data[2]._writes_outstanding == 0, last_val);
   nassertr(_data[1]._writes_outstanding == 0, last_val);
@@ -423,7 +463,7 @@ cycle_3() {
 
   if (_data[2]._cdata == _data[1]._cdata) {
     // No longer dirty.
-    _dirty = 0;
+    clear_dirty();
   }
 
   return last_val;
@@ -439,15 +479,6 @@ CyclerMutex(PipelineCyclerTrueImpl *cycler) {
 #endif
 }
 
-/**
- *
- */
-INLINE PipelineCyclerTrueImpl::CycleDataNode::
-CycleDataNode() :
-  _writes_outstanding(0)
-{
-}
-
 /**
  *
  */

+ 101 - 52
panda/src/pipeline/pipelineCyclerTrueImpl.cxx

@@ -24,17 +24,26 @@
 PipelineCyclerTrueImpl::
 PipelineCyclerTrueImpl(CycleData *initial_data, Pipeline *pipeline) :
   _pipeline(pipeline),
-  _dirty(0),
   _lock(this)
 {
+  clear_dirty();
+
   if (_pipeline == nullptr) {
     _pipeline = Pipeline::get_render_pipeline();
   }
 
-  _num_stages = _pipeline->get_num_stages();
-  _data = new CycleDataNode[_num_stages];
-  for (int i = 0; i < _num_stages; ++i) {
-    _data[i]._cdata = initial_data;
+  int num_stages = _pipeline->get_num_stages();
+  if (num_stages == 1) {
+    _single_data._cdata = initial_data;
+    _data = &_single_data;
+  }
+  else {
+    _data = new CycleDataNode[num_stages];
+    _data[0]._num_stages = num_stages;
+
+    for (int i = 0; i < num_stages; ++i) {
+      _data[i]._cdata = initial_data;
+    }
   }
 
   _pipeline->add_cycler(this);
@@ -46,27 +55,31 @@ PipelineCyclerTrueImpl(CycleData *initial_data, Pipeline *pipeline) :
 PipelineCyclerTrueImpl::
 PipelineCyclerTrueImpl(const PipelineCyclerTrueImpl &copy) :
   _pipeline(copy._pipeline),
-  _dirty(0),
   _lock(this)
 {
+  clear_dirty();
+
   ReMutexHolder holder(_lock);
   ReMutexHolder holder2(copy._lock);
 
-  _num_stages = _pipeline->get_num_stages();
-  nassertv(_num_stages == copy._num_stages);
-  _data = new CycleDataNode[_num_stages];
+  int num_stages = _pipeline->get_num_stages();
+  nassertv(num_stages == copy.get_num_stages());
 
-  if (_num_stages == 1) {
-    _data[0]._cdata = copy._data[0]._cdata->make_copy();
+  if (num_stages == 1) {
+    _single_data._cdata = copy._single_data._cdata->make_copy();
+    _data = &_single_data;
   }
   else {
+    _data = new CycleDataNode[num_stages];
+    _data[0]._num_stages = num_stages;
+
     // It's no longer critically important that we preserve pointerwise
     // equivalence between different stages in the copy, but it doesn't cost
     // much and might be a little more efficient, so we do it anyway.
     typedef pmap<CycleData *, PT(CycleData) > Pointers;
     Pointers pointers;
 
-    for (int i = 0; i < _num_stages; ++i) {
+    for (int i = 0; i < num_stages; ++i) {
       PT(CycleData) &new_pt = pointers[copy._data[i]._cdata];
       if (new_pt == nullptr) {
         new_pt = copy._data[i]._cdata->make_copy();
@@ -75,7 +88,7 @@ PipelineCyclerTrueImpl(const PipelineCyclerTrueImpl &copy) :
     }
   }
 
-  _pipeline->add_cycler(this, copy._dirty != 0);
+  _pipeline->add_cycler(this, copy.is_dirty());
 }
 
 /**
@@ -87,19 +100,27 @@ operator = (const PipelineCyclerTrueImpl &copy) {
   ReMutexHolder holder2(copy._lock);
   nassertv(get_parent_type() == copy.get_parent_type());
 
-  typedef pmap<CycleData *, PT(CycleData) > Pointers;
-  Pointers pointers;
+  if (_data == &_single_data) {
+    _single_data._cdata = copy._single_data._cdata->make_copy();
+    nassertv(!copy.is_dirty());
+  }
+  else {
+    int num_stages = _data[0]._num_stages;
+
+    typedef pmap<CycleData *, PT(CycleData) > Pointers;
+    Pointers pointers;
 
-  for (int i = 0; i < _num_stages; ++i) {
-    PT(CycleData) &new_pt = pointers[copy._data[i]._cdata];
-    if (new_pt == nullptr) {
-      new_pt = copy._data[i]._cdata->make_copy();
+    for (int i = 0; i < num_stages; ++i) {
+      PT(CycleData) &new_pt = pointers[copy._data[i]._cdata];
+      if (new_pt == nullptr) {
+        new_pt = copy._data[i]._cdata->make_copy();
+      }
+      _data[i]._cdata = new_pt.p();
     }
-    _data[i]._cdata = new_pt.p();
-  }
 
-  if (copy._dirty && !_dirty) {
-    _pipeline->add_dirty_cycler(this);
+    if (copy.is_dirty() && !is_dirty()) {
+      _pipeline->add_dirty_cycler(this);
+    }
   }
 }
 
@@ -112,9 +133,10 @@ PipelineCyclerTrueImpl::
 
   _pipeline->remove_cycler(this);
 
-  delete[] _data;
+  if (_data != &_single_data) {
+    delete[] _data;
+  }
   _data = nullptr;
-  _num_stages = 0;
 }
 
 /**
@@ -127,12 +149,12 @@ CycleData *PipelineCyclerTrueImpl::
 write_stage(int pipeline_stage, Thread *current_thread) {
   _lock.acquire(current_thread);
 
-#ifndef NDEBUG
-  nassertd(pipeline_stage >= 0 && pipeline_stage < _num_stages) {
+#ifdef _DEBUG
+  nassertd(pipeline_stage >= 0 && pipeline_stage < get_num_stages()) {
     _lock.release();
     return nullptr;
   }
-#endif  // NDEBUG
+#endif
 
   CycleData *old_data = _data[pipeline_stage]._cdata;
 
@@ -157,7 +179,7 @@ write_stage(int pipeline_stage, Thread *current_thread) {
 
       // Now we have differences between some of the data pointers, so we're
       // "dirty".  Mark it so.
-      if (!_dirty && _num_stages != 1) {
+      if (!is_dirty() && _data != &_single_data) {
         _pipeline->add_dirty_cycler(this);
       }
     }
@@ -175,12 +197,12 @@ CycleData *PipelineCyclerTrueImpl::
 write_stage_upstream(int pipeline_stage, bool force_to_0, Thread *current_thread) {
   _lock.acquire(current_thread);
 
-#ifndef NDEBUG
-  nassertd(pipeline_stage >= 0 && pipeline_stage < _num_stages) {
+#ifdef _DEBUG
+  nassertd(pipeline_stage >= 0 && pipeline_stage < get_num_stages()) {
     _lock.release();
     return nullptr;
   }
-#endif  // NDEBUG
+#endif
 
   CycleData *old_data = _data[pipeline_stage]._cdata;
 
@@ -218,15 +240,15 @@ write_stage_upstream(int pipeline_stage, bool force_to_0, Thread *current_thread
 
       _data[pipeline_stage]._cdata = new_data;
 
-      if (k >= 0 || pipeline_stage + 1 < _num_stages) {
+      if (k >= 0 || pipeline_stage + 1 < get_num_stages()) {
         // Now we have differences between some of the data pointers, which
         // makes us "dirty".
-        if (!_dirty) {
+        if (!is_dirty()) {
           _pipeline->add_dirty_cycler(this);
         }
       }
-
-    } else if (k >= 0 && force_to_0) {
+    }
+    else if (k >= 0 && force_to_0) {
       // There are no external pointers, so no need to copy-on-write, but the
       // current pointer doesn't go all the way back.  Make it do so.
       while (k >= 0) {
@@ -259,20 +281,21 @@ PT(CycleData) PipelineCyclerTrueImpl::
 cycle() {
   // This trick moves an NPT into a PT without unnecessarily incrementing and
   // subsequently decrementing the regular reference count.
+  int num_stages = get_num_stages();
   PT(CycleData) last_val;
-  last_val.swap(_data[_num_stages - 1]._cdata);
+  last_val.swap(_data[num_stages - 1]._cdata);
   last_val->node_unref_only();
 
   nassertr(_lock.debug_is_locked(), last_val);
-  nassertr(_dirty, last_val);
+  nassertr(is_dirty(), last_val);
 
   int i;
-  for (i = _num_stages - 1; i > 0; --i) {
+  for (i = num_stages - 1; i > 0; --i) {
     nassertr(_data[i]._writes_outstanding == 0, last_val);
     _data[i]._cdata = _data[i - 1]._cdata;
   }
 
-  for (i = 1; i < _num_stages; ++i) {
+  for (i = 1; i < num_stages; ++i) {
     if (_data[i]._cdata != _data[i - 1]._cdata) {
       // Still dirty.
       return last_val;
@@ -280,7 +303,7 @@ cycle() {
   }
 
   // No longer dirty.
-  _dirty = 0;
+  clear_dirty();
   return last_val;
 }
 
@@ -292,31 +315,57 @@ void PipelineCyclerTrueImpl::
 set_num_stages(int num_stages) {
   nassertv(_lock.debug_is_locked());
 
-  if (num_stages <= _num_stages) {
+  if (_data == &_single_data) {
+    // We've got only 1 stage.  Allocate an array.
+    if (num_stages > 1) {
+      nassertv(_single_data._writes_outstanding == 0);
+
+      CycleDataNode *new_data = new CycleDataNode[num_stages];
+      new_data[0]._num_stages = num_stages;
+      new_data[0]._cdata = std::move(_single_data._cdata);
+      _single_data._cdata.clear();
+
+      for (int i = 1; i < num_stages; ++i) {
+        new_data[i]._cdata = new_data[0]._cdata;
+      }
+      _data = new_data;
+    }
+  }
+  else if (num_stages == 1) {
+    // Deallocate the array, since we're back to one stage.
+    if (_data != &_single_data) {
+      nassertv(_data[0]._writes_outstanding == 0);
+      _single_data._cdata = std::move(_data[0]._cdata);
+      _data = &_single_data;
+      delete[] _data;
+    }
+  }
+  else if (num_stages <= _data[0]._num_stages) {
     // Don't bother to reallocate the array smaller; we just won't use the
     // rest of the array.
-    for (int i = _num_stages; i < num_stages; ++i) {
+    int old_stages = _data[0]._num_stages;
+    for (int i = old_stages; i < num_stages; ++i) {
       nassertv(_data[i]._writes_outstanding == 0);
       _data[i]._cdata.clear();
     }
 
-    _num_stages = num_stages;
-
-
-  } else {
+    _data[0]._num_stages = num_stages;
+  }
+  else {
     // To increase the array, we must reallocate it larger.
+    int old_stages = _data[0]._num_stages;
     CycleDataNode *new_data = new CycleDataNode[num_stages];
+    new_data[0]._num_stages = num_stages;
+
     int i;
-    for (i = 0; i < _num_stages; ++i) {
+    for (i = 0; i < old_stages; ++i) {
       nassertv(_data[i]._writes_outstanding == 0);
       new_data[i]._cdata = _data[i]._cdata;
     }
-    for (i = _num_stages; i < num_stages; ++i) {
-      new_data[i]._cdata = _data[_num_stages - 1]._cdata;
+    for (i = old_stages; i < num_stages; ++i) {
+      new_data[i]._cdata = _data[old_stages - 1]._cdata;
     }
     delete[] _data;
-
-    _num_stages = num_stages;
     _data = new_data;
   }
 }

+ 24 - 9
panda/src/pipeline/pipelineCyclerTrueImpl.h

@@ -66,7 +66,12 @@ public:
   INLINE void increment_write(CycleData *pointer) const;
   INLINE void release_write(CycleData *pointer);
 
-  INLINE int get_num_stages();
+  ALWAYS_INLINE bool is_dirty() const;
+  INLINE bool is_dirty(unsigned int seq) const;
+  INLINE void mark_dirty(unsigned int seq);
+  INLINE void clear_dirty();
+
+  INLINE int get_num_stages() const;
   INLINE const CycleData *read_stage_unlocked(int pipeline_stage) const;
   INLINE const CycleData *read_stage(int pipeline_stage, Thread *current_thread) const;
   INLINE void release_read_stage(int pipeline_stage, const CycleData *pointer) const;
@@ -110,22 +115,32 @@ private:
 
   // An array of PT(CycleData) objects representing the different copies of
   // the cycled data, one for each stage.
-  class CycleDataNode : public MemoryBase {
+  class CycleDataNode {
   public:
-    INLINE CycleDataNode();
+    CycleDataNode() = default;
     INLINE CycleDataNode(const CycleDataNode &copy);
     INLINE ~CycleDataNode();
     INLINE void operator = (const CycleDataNode &copy);
 
     NPT(CycleData) _cdata;
-    int _writes_outstanding;
+    int _writes_outstanding = 0;
+
+    // Take advantage of the extra padding space.
+    // If used as part of _single_cdata, stores the dirty flag.
+    // If used as part of _data[0], stores the number of stages.
+    // Otherwise, this field is unused.
+    union {
+      // This is 0 if it's clean, or set to Pipeline::_next_cycle_seq if it's
+      // scheduled to be cycled during the next cycle() call.
+      unsigned int _dirty;
+
+      int _num_stages;
+    };
   };
+  // Store a single copy on the class, to optimize the common case of only one
+  // stage.
+  CycleDataNode _single_data;
   CycleDataNode *_data;
-  int _num_stages;
-
-  // This is 0 if it's clean, or set to Pipeline::_next_cycle_seq if it's
-  // scheduled to be cycled during the next cycle() call.
-  unsigned int _dirty;
 
   CyclerMutex _lock;