Browse Source

pipeline: fix multithreaded render pipeline deadlock

This deadlock happens when another thread holds a cycler lock and then attempts to call Pipeline::remove_cycler() while Pipeline::cycle() is running on the main thread.

The fix for this problem is threefold:
* Allow remove_cycler (and add_cycler) to be called while a cycle is in progress, by introducing a second lock
* Let cycle() not block if a dirty cycler can't be locked, instead trying other cyclers first
* Adding a way to let remove_cycler() check whether a cycler is currently in use by the main thread, and yielding if so

More information is on https://github.com/panda3d/panda3d/issues/217

Fixes #217 (also see LP 1186880)
rdb 8 years ago
parent
commit
e04ddbef9a

+ 2 - 2
panda/src/pipeline/pipeline.I

@@ -45,7 +45,7 @@ get_num_stages() const {
  */
  */
 INLINE int Pipeline::
 INLINE int Pipeline::
 get_num_cyclers() const {
 get_num_cyclers() const {
-  ReMutexHolder holder(_lock);
+  MutexHolder holder(_lock);
   return _num_cyclers;
   return _num_cyclers;
 }
 }
 #endif  // THREADED_PIPELINE
 #endif  // THREADED_PIPELINE
@@ -58,7 +58,7 @@ get_num_cyclers() const {
  */
  */
 INLINE int Pipeline::
 INLINE int Pipeline::
 get_num_dirty_cyclers() const {
 get_num_dirty_cyclers() const {
-  ReMutexHolder holder(_lock);
+  MutexHolder holder(_lock);
   return _num_dirty_cyclers;
   return _num_dirty_cyclers;
 }
 }
 #endif  // THREADED_PIPELINE
 #endif  // THREADED_PIPELINE

+ 189 - 67
panda/src/pipeline/pipeline.cxx

@@ -27,7 +27,9 @@ Pipeline(const string &name, int num_stages) :
   Namable(name),
   Namable(name),
 #ifdef THREADED_PIPELINE
 #ifdef THREADED_PIPELINE
   _num_stages(num_stages),
   _num_stages(num_stages),
-  _lock("Pipeline")
+  _cycle_lock("Pipeline cycle"),
+  _lock("Pipeline"),
+  _next_cycle_seq(1)
 #else
 #else
   _num_stages(1)
   _num_stages(1)
 #endif
 #endif
@@ -91,87 +93,166 @@ cycle() {
   }
   }
 
 
   pvector< PT(CycleData) > saved_cdatas;
   pvector< PT(CycleData) > saved_cdatas;
-  saved_cdatas.reserve(_num_dirty_cyclers);
   {
   {
-    ReMutexHolder holder(_lock);
-    if (_num_stages == 1) {
-      // No need to cycle if there's only one stage.
-      nassertv(_dirty._next == &_dirty);
-      return;
-    }
+    ReMutexHolder cycle_holder(_cycle_lock);
+    int prev_seq, next_seq;
+    PipelineCyclerLinks prev_dirty;
+    {
+      // We can't hold the lock protecting the linked lists during the cycling
+      // itself, since it could cause a deadlock.
+      MutexHolder holder(_lock);
+      if (_num_stages == 1) {
+        // No need to cycle if there's only one stage.
+        nassertv(_dirty._next == &_dirty);
+        return;
+      }
 
 
-    nassertv(!_cycling);
-    _cycling = true;
+      nassertv(!_cycling);
+      _cycling = true;
 
 
-    // Move the dirty list to prev_dirty, for processing.
-    PipelineCyclerLinks prev_dirty;
-    prev_dirty.make_head();
-    prev_dirty.take_list(_dirty);
-    _num_dirty_cyclers = 0;
+      // Increment the cycle sequence number, which is used by this method to
+      // communicate with remove_cycler() about the status of dirty cyclers.
+      prev_seq = next_seq = _next_cycle_seq;
+      if (++next_seq == 0) {
+        // Skip 0, which is a reserved number used to indicate a clean cycler.
+        ++next_seq;
+      }
+      _next_cycle_seq = next_seq;
+
+      // Move the dirty list to prev_dirty, for processing.
+      prev_dirty.make_head();
+      prev_dirty.take_list(_dirty);
+
+      saved_cdatas.reserve(_num_dirty_cyclers);
+      _num_dirty_cyclers = 0;
+    }
 
 
+    // This is duplicated for different number of stages, as an optimization.
     switch (_num_stages) {
     switch (_num_stages) {
     case 2:
     case 2:
       while (prev_dirty._next != &prev_dirty) {
       while (prev_dirty._next != &prev_dirty) {
-        PipelineCyclerTrueImpl *cycler = (PipelineCyclerTrueImpl *)prev_dirty._next;
-        cycler->remove_from_list();
-        ReMutexHolder holder2(cycler->_lock);
-
-        // We save the result of cycle(), so that we can defer the side-
-        // effects that might occur when CycleDatas destruct, at least until
-        // the end of this loop.
-        saved_cdatas.push_back(cycler->cycle_2());
-
-        if (cycler->_dirty) {
-          // The cycler is still dirty after cycling.  Keep it on the dirty
-          // list for next time.
-          cycler->insert_before(&_dirty);
-          ++_num_dirty_cyclers;
-        } else {
-          // The cycler is now clean.  Add it back to the clean list.
+        PipelineCyclerLinks *link = prev_dirty._next;
+        while (link != &prev_dirty) {
+          PipelineCyclerTrueImpl *cycler = (PipelineCyclerTrueImpl *)link;
+
+          if (!cycler->_lock.try_acquire()) {
+            // No big deal, just move on to the next one for now, and we'll
+            // come back around to it.  It's important not to block here in
+            // order to prevent one cycler from deadlocking another.
+            if (link->_prev != &prev_dirty || link->_next != &prev_dirty) {
+              link = cycler->_next;
+              continue;
+            } else {
+              // Well, we are the last cycler left, so we might as well wait.
+              // This is necessary to trigger the deadlock detection code.
+              cycler->_lock.acquire();
+            }
+          }
+
+          MutexHolder holder(_lock);
+          cycler->remove_from_list();
+
+          // We save the result of cycle(), so that we can defer the side-
+          // effects that might occur when CycleDatas destruct, at least until
+          // the end of this loop.
+          saved_cdatas.push_back(cycler->cycle_2());
+
+          // cycle_2() won't leave a cycler dirty.  Add it to the clean list.
+          nassertd(!cycler->_dirty) break;
           cycler->insert_before(&_clean);
           cycler->insert_before(&_clean);
 #ifdef DEBUG_THREADS
 #ifdef DEBUG_THREADS
           inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);
           inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);
 #endif
 #endif
+          cycler->_lock.release();
+          break;
         }
         }
       }
       }
       break;
       break;
 
 
     case 3:
     case 3:
       while (prev_dirty._next != &prev_dirty) {
       while (prev_dirty._next != &prev_dirty) {
-        PipelineCyclerTrueImpl *cycler = (PipelineCyclerTrueImpl *)prev_dirty._next;
-        cycler->remove_from_list();
-        ReMutexHolder holder2(cycler->_lock);
-
-        saved_cdatas.push_back(cycler->cycle_3());
-
-        if (cycler->_dirty) {
-          cycler->insert_before(&_dirty);
-          ++_num_dirty_cyclers;
-        } else {
-          cycler->insert_before(&_clean);
+        PipelineCyclerLinks *link = prev_dirty._next;
+        while (link != &prev_dirty) {
+          PipelineCyclerTrueImpl *cycler = (PipelineCyclerTrueImpl *)link;
+
+          if (!cycler->_lock.try_acquire()) {
+            // No big deal, just move on to the next one for now, and we'll
+            // come back around to it.  It's important not to block here in
+            // order to prevent one cycler from deadlocking another.
+            if (link->_prev != &prev_dirty || link->_next != &prev_dirty) {
+              link = cycler->_next;
+              continue;
+            } else {
+              // Well, we are the last cycler left, so we might as well wait.
+              // This is necessary to trigger the deadlock detection code.
+              cycler->_lock.acquire();
+            }
+          }
+
+          MutexHolder holder(_lock);
+          cycler->remove_from_list();
+
+          saved_cdatas.push_back(cycler->cycle_3());
+
+          if (cycler->_dirty) {
+            // The cycler is still dirty.  Add it back to the dirty list.
+            nassertd(cycler->_dirty == prev_seq) break;
+            cycler->insert_before(&_dirty);
+            cycler->_dirty = next_seq;
+            ++_num_dirty_cyclers;
+          } else {
+            // The cycler is now clean.  Add it back to the clean list.
+            cycler->insert_before(&_clean);
 #ifdef DEBUG_THREADS
 #ifdef DEBUG_THREADS
-          inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);
+            inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);
 #endif
 #endif
+          }
+          cycler->_lock.release();
+          break;
         }
         }
       }
       }
       break;
       break;
 
 
     default:
     default:
       while (prev_dirty._next != &prev_dirty) {
       while (prev_dirty._next != &prev_dirty) {
-        PipelineCyclerTrueImpl *cycler = (PipelineCyclerTrueImpl *)prev_dirty._next;
-        cycler->remove_from_list();
-        ReMutexHolder holder2(cycler->_lock);
-
-        saved_cdatas.push_back(cycler->cycle());
-
-        if (cycler->_dirty) {
-          cycler->insert_before(&_dirty);
-          ++_num_dirty_cyclers;
-        } else {
-          cycler->insert_before(&_clean);
+        PipelineCyclerLinks *link = prev_dirty._next;
+        while (link != &prev_dirty) {
+          PipelineCyclerTrueImpl *cycler = (PipelineCyclerTrueImpl *)link;
+
+          if (!cycler->_lock.try_acquire()) {
+            // No big deal, just move on to the next one for now, and we'll
+            // come back around to it.  It's important not to block here in
+            // order to prevent one cycler from deadlocking another.
+            if (link->_prev != &prev_dirty || link->_next != &prev_dirty) {
+              link = cycler->_next;
+              continue;
+            } else {
+              // Well, we are the last cycler left, so we might as well wait.
+              // This is necessary to trigger the deadlock detection code.
+              cycler->_lock.acquire();
+            }
+          }
+
+          MutexHolder holder(_lock);
+          cycler->remove_from_list();
+
+          saved_cdatas.push_back(cycler->cycle());
+
+          if (cycler->_dirty) {
+            // The cycler is still dirty.  Add it back to the dirty list.
+            nassertd(cycler->_dirty == prev_seq) break;
+            cycler->insert_before(&_dirty);
+            cycler->_dirty = next_seq;
+            ++_num_dirty_cyclers;
+          } else {
+            // The cycler is now clean.  Add it back to the clean list.
+            cycler->insert_before(&_clean);
 #ifdef DEBUG_THREADS
 #ifdef DEBUG_THREADS
-          inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);
+            inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);
 #endif
 #endif
+          }
+          cycler->_lock.release();
+          break;
         }
         }
       }
       }
       break;
       break;
@@ -203,7 +284,9 @@ void Pipeline::
 set_num_stages(int num_stages) {
 set_num_stages(int num_stages) {
   nassertv(num_stages >= 1);
   nassertv(num_stages >= 1);
 #ifdef THREADED_PIPELINE
 #ifdef THREADED_PIPELINE
-  ReMutexHolder holder(_lock);
+  // Make sure it's not currently cycling.
+  ReMutexHolder cycle_holder(_cycle_lock);
+  MutexHolder holder(_lock);
   if (num_stages != _num_stages) {
   if (num_stages != _num_stages) {
 
 
     // We need to lock every PipelineCycler object attached to this pipeline
     // We need to lock every PipelineCycler object attached to this pipeline
@@ -261,9 +344,10 @@ set_num_stages(int num_stages) {
  */
  */
 void Pipeline::
 void Pipeline::
 add_cycler(PipelineCyclerTrueImpl *cycler) {
 add_cycler(PipelineCyclerTrueImpl *cycler) {
-  ReMutexHolder holder(_lock);
+  // It's safe to add it to the list while cycling, since the _clean list is
+  // not touched during the cycle loop.
+  MutexHolder holder(_lock);
   nassertv(!cycler->_dirty);
   nassertv(!cycler->_dirty);
-  nassertv(!_cycling);
 
 
   cycler->insert_before(&_clean);
   cycler->insert_before(&_clean);
   ++_num_cyclers;
   ++_num_cyclers;
@@ -285,15 +369,16 @@ void Pipeline::
 add_dirty_cycler(PipelineCyclerTrueImpl *cycler) {
 add_dirty_cycler(PipelineCyclerTrueImpl *cycler) {
   nassertv(cycler->_lock.debug_is_locked());
   nassertv(cycler->_lock.debug_is_locked());
 
 
-  ReMutexHolder holder(_lock);
-  nassertv(_num_stages != 1);
-  nassertv(!_cycling);
+  // It's safe to add it to the list while cycling, since it's not currently
+  // on the dirty list.
+  MutexHolder holder(_lock);
   nassertv(!cycler->_dirty);
   nassertv(!cycler->_dirty);
+  nassertv(_num_stages != 1);
 
 
   // Remove it from the "clean" list and add it to the "dirty" list.
   // Remove it from the "clean" list and add it to the "dirty" list.
   cycler->remove_from_list();
   cycler->remove_from_list();
   cycler->insert_before(&_dirty);
   cycler->insert_before(&_dirty);
-  cycler->_dirty = true;
+  cycler->_dirty = _next_cycle_seq;
   ++_num_dirty_cyclers;
   ++_num_dirty_cyclers;
 
 
 #ifdef DEBUG_THREADS
 #ifdef DEBUG_THREADS
@@ -311,9 +396,42 @@ void Pipeline::
 remove_cycler(PipelineCyclerTrueImpl *cycler) {
 remove_cycler(PipelineCyclerTrueImpl *cycler) {
   nassertv(cycler->_lock.debug_is_locked());
   nassertv(cycler->_lock.debug_is_locked());
 
 
-  ReMutexHolder holder(_lock);
-  nassertv(!_cycling);
+  MutexHolder holder(_lock);
+
+  // If it's dirty, it may currently be processed by cycle(), so we need to be
+  // careful not to cause a race condition.  It's safe for us to remove it
+  // during cycle only if it's 0 (clean) or _next_cycle_seq (scheduled for the
+  // next cycle, so not owned by the current one).
+  while (cycler->_dirty != 0 && cycler->_dirty != _next_cycle_seq) {
+    if (_cycle_lock.try_acquire()) {
+      // OK, great, we got the lock, so it finished cycling already.
+      nassertv(!_cycling);
+
+      --_num_cyclers;
+      cycler->remove_from_list();
+
+      cycler->_dirty = false;
+      --_num_dirty_cyclers;
+
+  #ifdef DEBUG_THREADS
+      inc_cycler_type(_all_cycler_types, cycler->get_parent_type(), -1);
+      inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);
+  #endif
+
+      _cycle_lock.release();
+      return;
+    } else {
+      // It's possibly currently being cycled.  We will wait for the cycler
+      // to be done with it, so that we can safely remove it.
+      _lock.release();
+      cycler->_lock.release();
+      Thread::force_yield();
+      cycler->_lock.acquire();
+      _lock.acquire();
+    }
+  }
 
 
+  // It's not being owned by a cycle operation, so it's fair game.
   --_num_cyclers;
   --_num_cyclers;
   cycler->remove_from_list();
   cycler->remove_from_list();
 
 
@@ -322,7 +440,7 @@ remove_cycler(PipelineCyclerTrueImpl *cycler) {
 #endif
 #endif
 
 
   if (cycler->_dirty) {
   if (cycler->_dirty) {
-    cycler->_dirty = false;
+    cycler->_dirty = 0;
     --_num_dirty_cyclers;
     --_num_dirty_cyclers;
 #ifdef DEBUG_THREADS
 #ifdef DEBUG_THREADS
     inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);
     inc_cycler_type(_dirty_cycler_types, cycler->get_parent_type(), -1);
@@ -341,7 +459,9 @@ remove_cycler(PipelineCyclerTrueImpl *cycler) {
  */
  */
 void Pipeline::
 void Pipeline::
 iterate_all_cycler_types(CallbackFunc *func, void *data) const {
 iterate_all_cycler_types(CallbackFunc *func, void *data) const {
-  ReMutexHolder holder(_lock);
+  // Make sure it's not currently cycling.
+  ReMutexHolder cycle_holder(_cycle_lock);
+  MutexHolder holder(_lock);
   TypeCount::const_iterator ci;
   TypeCount::const_iterator ci;
   for (ci = _all_cycler_types.begin(); ci != _all_cycler_types.end(); ++ci) {
   for (ci = _all_cycler_types.begin(); ci != _all_cycler_types.end(); ++ci) {
     func((*ci).first, (*ci).second, data);
     func((*ci).first, (*ci).second, data);
@@ -356,7 +476,9 @@ iterate_all_cycler_types(CallbackFunc *func, void *data) const {
  */
  */
 void Pipeline::
 void Pipeline::
 iterate_dirty_cycler_types(CallbackFunc *func, void *data) const {
 iterate_dirty_cycler_types(CallbackFunc *func, void *data) const {
-  ReMutexHolder holder(_lock);
+  // Make sure it's not currently cycling.
+  ReMutexHolder cycle_holder(_cycle_lock);
+  MutexHolder holder(_lock);
   TypeCount::const_iterator ci;
   TypeCount::const_iterator ci;
   for (ci = _dirty_cycler_types.begin(); ci != _dirty_cycler_types.end(); ++ci) {
   for (ci = _dirty_cycler_types.begin(); ci != _dirty_cycler_types.end(); ++ci) {
     func((*ci).first, (*ci).second, data);
     func((*ci).first, (*ci).second, data);

+ 10 - 1
panda/src/pipeline/pipeline.h

@@ -85,7 +85,16 @@ private:
   // This is true only during cycle().
   // This is true only during cycle().
   bool _cycling;
   bool _cycling;
 
 
-  ReMutex _lock;
+  // This increases with every cycle run.  If the _dirty field of a cycler is
+  // set to the same value as this, it indicates that it is scheduled for the
+  // next cycle.
+  unsigned int _next_cycle_seq;
+
+  // This lock is always held during cycle().
+  ReMutex _cycle_lock;
+
+  // This lock protects the data stored on this Pipeline.
+  Mutex _lock;
 #endif  // THREADED_PIPELINE
 #endif  // THREADED_PIPELINE
 };
 };
 
 

+ 2 - 2
panda/src/pipeline/pipelineCyclerTrueImpl.I

@@ -393,7 +393,7 @@ cycle_2() {
   _data[1]._cdata = _data[0]._cdata;
   _data[1]._cdata = _data[0]._cdata;
 
 
   // No longer dirty.
   // No longer dirty.
-  _dirty = false;
+  _dirty = 0;
   return last_val;
   return last_val;
 }
 }
 
 
@@ -423,7 +423,7 @@ cycle_3() {
 
 
   if (_data[2]._cdata == _data[1]._cdata) {
   if (_data[2]._cdata == _data[1]._cdata) {
     // No longer dirty.
     // No longer dirty.
-    _dirty = false;
+    _dirty = 0;
   }
   }
 
 
   return last_val;
   return last_val;

+ 3 - 3
panda/src/pipeline/pipelineCyclerTrueImpl.cxx

@@ -24,7 +24,7 @@
 PipelineCyclerTrueImpl::
 PipelineCyclerTrueImpl::
 PipelineCyclerTrueImpl(CycleData *initial_data, Pipeline *pipeline) :
 PipelineCyclerTrueImpl(CycleData *initial_data, Pipeline *pipeline) :
   _pipeline(pipeline),
   _pipeline(pipeline),
-  _dirty(false),
+  _dirty(0),
   _lock(this)
   _lock(this)
 {
 {
   if (_pipeline == (Pipeline *)NULL) {
   if (_pipeline == (Pipeline *)NULL) {
@@ -46,7 +46,7 @@ PipelineCyclerTrueImpl(CycleData *initial_data, Pipeline *pipeline) :
 PipelineCyclerTrueImpl::
 PipelineCyclerTrueImpl::
 PipelineCyclerTrueImpl(const PipelineCyclerTrueImpl &copy) :
 PipelineCyclerTrueImpl(const PipelineCyclerTrueImpl &copy) :
   _pipeline(copy._pipeline),
   _pipeline(copy._pipeline),
-  _dirty(false),
+  _dirty(0),
   _lock(this)
   _lock(this)
 {
 {
   ReMutexHolder holder(_lock);
   ReMutexHolder holder(_lock);
@@ -278,7 +278,7 @@ cycle() {
   }
   }
 
 
   // No longer dirty.
   // No longer dirty.
-  _dirty = false;
+  _dirty = 0;
   return last_val;
   return last_val;
 }
 }
 
 

+ 4 - 1
panda/src/pipeline/pipelineCyclerTrueImpl.h

@@ -122,7 +122,10 @@ private:
   };
   };
   CycleDataNode *_data;
   CycleDataNode *_data;
   int _num_stages;
   int _num_stages;
-  bool _dirty;
+
+  // This is 0 if it's clean, or set to Pipeline::_next_cycle_seq if it's
+  // scheduled to be cycled during the next cycle() call.
+  unsigned int _dirty;
 
 
   CyclerMutex _lock;
   CyclerMutex _lock;