Przeglądaj źródła

Feature: Animation system now even more multi-threaded
- Each individual animation now executes as a separate task, ensuring multiple animation can be evaluated in parallel
- Additionally all animation tasks can now execute parallel to the rendering thread (at the cost of 1 frame latency). This can be disabled if latency is critical.

BearishSun 7 lat temu
rodzic
commit
feaed12a13

+ 320 - 319
Source/BansheeCore/Animation/BsAnimationManager.cpp

@@ -15,14 +15,8 @@ namespace bs
 {
 	AnimationManager::AnimationManager()
 		: mNextId(1), mUpdateRate(1.0f / 60.0f), mAnimationTime(0.0f), mLastAnimationUpdateTime(0.0f)
-		, mNextAnimationUpdateTime(0.0f), mPaused(false), mWorkerStarted(false), mPoseReadBufferIdx(1)
-		, mPoseWriteBufferIdx(0), mDataReady(false)
+		, mNextAnimationUpdateTime(0.0f), mPaused(false), mPoseReadBufferIdx(1), mPoseWriteBufferIdx(0)
 	{
-		mAnimationWorker = Task::create("Animation", std::bind(&AnimationManager::evaluateAnimation, this));
-
-		mDataReadyCount.store(0, std::memory_order_release);
-		mWorkerState.store(WorkerState::Inactive, std::memory_order_release);
-
 		mBlendShapeVertexDesc = VertexDataDesc::create();
 		mBlendShapeVertexDesc->addVertElem(VET_FLOAT3, VES_POSITION, 1, 1);
 		mBlendShapeVertexDesc->addVertElem(VET_UBYTE4_NORM, VES_NORMAL, 1, 1);
@@ -41,41 +35,48 @@ namespace bs
 		mUpdateRate = 1.0f / fps;
 	}
 
-	void AnimationManager::preUpdate()
+	const EvaluatedAnimationData* AnimationManager::update(bool async)
 	{
-		if (mPaused || !mWorkerStarted)
-			return;
+		// Wait for any workers to complete
+		{
+			Lock lock(mMutex);
 
-		mAnimationWorker->wait();
-		
-		WorkerState state = mWorkerState.load(std::memory_order_acquire);
-		assert(state == WorkerState::DataReady);
+			while (mNumActiveWorkers > 0)
+				mWorkerDoneSignal.wait(lock);
 
-		// Trigger events
-		for (auto& anim : mAnimations)
-		{
-			anim.second->updateFromProxy();
-			anim.second->triggerEvents(mAnimationTime, gTime().getFrameDelta());
+			// Advance the buffers (last write buffer becomes read buffer)
+			if(mSwapBuffers)
+			{
+				mPoseReadBufferIdx = (mPoseReadBufferIdx + 1) % (CoreThread::NUM_SYNC_BUFFERS + 1);
+				mPoseWriteBufferIdx = (mPoseWriteBufferIdx + 1) % (CoreThread::NUM_SYNC_BUFFERS + 1);
+
+				mSwapBuffers = false;
+			}
 		}
-	}
 
-	void AnimationManager::postUpdate()
-	{
-		if (mPaused)
-			return;
+		if(mPaused)
+			return &mAnimData[mPoseReadBufferIdx];
 
 		mAnimationTime += gTime().getFrameDelta();
 		if (mAnimationTime < mNextAnimationUpdateTime)
-			return;
+			return &mAnimData[mPoseReadBufferIdx];
 
 		mNextAnimationUpdateTime = Math::floor(mAnimationTime / mUpdateRate) * mUpdateRate + mUpdateRate;
 
 		float timeDelta = mAnimationTime - mLastAnimationUpdateTime;
 		mLastAnimationUpdateTime = mAnimationTime;
 
-		// Update poses in the currently active buffer. Multi-buffering allows the core thread to safely read the
-		// poses without worrying about them being overwritten by another call to postUpdate, as long as the simulation
-		// thread doesn't go more than (CoreThread::NUM_SYNC_BUFFERS - 1) frames ahead.
+		// Trigger events and update attachments (for the data from the last frame)
+		if(async)
+		{
+			for (auto& anim : mAnimations)
+			{
+				anim.second->updateFromProxy();
+				anim.second->triggerEvents(mAnimationTime, gTime().getFrameDelta());
+			}
+		}
+
+		// Update animation proxies from the latest data
 		mProxies.clear();
 		for (auto& anim : mAnimations)
 		{
@@ -83,6 +84,7 @@ namespace bs
 			mProxies.push_back(anim.second->mAnimProxy);
 		}
 
+		// Build frustums for culling
 		mCullFrustums.clear();
 
 		auto& allCameras = gSceneManager().getAllCameras();
@@ -97,26 +99,7 @@ namespace bs
 			mCullFrustums.push_back(entry.second->getWorldFrustum());
 		}
 
-		// Make sure thread finishes writing all changes to the anim proxies as they will be read by the animation thread
-		mWorkerStarted = true;
-		mWorkerState.store(WorkerState::Started, std::memory_order_release);
-
-		// Note: Animation thread will trigger about the same time as the core thread. The core thread will need to wait
-		// until animation thread finishes, which might end up blocking it (and losing the multi-threading performance). 
-		// Consider delaying displayed animation for a single frame or pre-calculating animations (by advancing time the
-		// previous frame) for non-dirty animations.
-		TaskScheduler::instance().addTask(mAnimationWorker);
-	}
-
-	void AnimationManager::evaluateAnimation()
-	{
-		// Make sure we don't load obsolete anim proxy data written by the simulation thread
-		WorkerState state = mWorkerState.load(std::memory_order_acquire);
-		assert(state == WorkerState::Started);
-
-		// No need for locking, as we are sure that only postUpdate() writes to the proxy buffer, and increments the write
-		// buffer index. And it's called sequentially ensuring previous call to evaluate finishes.
-
+		// Prepare the write buffer
 		UINT32 totalNumBones = 0;
 		for (auto& anim : mProxies)
 		{
@@ -124,192 +107,272 @@ namespace bs
 				totalNumBones += anim->skeleton->getNumBones();
 		}
 
-		RendererAnimationData& renderData = mAnimData[mPoseWriteBufferIdx];
-		
-		UINT32 prevPoseBufferIdx = (mPoseWriteBufferIdx + CoreThread::NUM_SYNC_BUFFERS - 1) % CoreThread::NUM_SYNC_BUFFERS;
-		RendererAnimationData& prevRenderData = mAnimData[prevPoseBufferIdx];
-		
-		mPoseWriteBufferIdx = (mPoseWriteBufferIdx + 1) % CoreThread::NUM_SYNC_BUFFERS;
-
+		// Prepare the write buffer
+		EvaluatedAnimationData& renderData = mAnimData[mPoseWriteBufferIdx];
 		renderData.transforms.resize(totalNumBones);
 		renderData.infos.clear();
 
+		// Queue animation evaluation tasks
+		{
+			Lock lock(mMutex);
+			mNumActiveWorkers = (UINT32)mProxies.size();
+		}
+
 		UINT32 curBoneIdx = 0;
-		for(auto& anim : mProxies)
+		for (auto& anim : mProxies)
 		{
-			if(anim->mCullEnabled)
+			auto evaluateAnimWorker = [this, anim, curBoneIdx]()
 			{
-				bool isVisible = false;
-				for(auto& frustum : mCullFrustums)
+				UINT32 boneIdx = curBoneIdx;
+				evaluateAnimation(anim.get(), boneIdx);
+
+				Lock lock(mMutex);
 				{
-					if(frustum.intersects(anim->mBounds))
-					{
-						isVisible = true;
-						break;
-					}
+					assert(mNumActiveWorkers > 0);
+					mNumActiveWorkers--;
 				}
 
-				if (!isVisible)
-					continue;
-			}
+				mWorkerDoneSignal.notify_one();
+			};
 
-			RendererAnimationData::AnimInfo animInfo;
-			bool hasAnimInfo = false;
+			SPtr<Task> task = Task::create("AnimWorker", evaluateAnimWorker);
+			TaskScheduler::instance().addTask(task);
 
-			// Evaluate skeletal animation
 			if (anim->skeleton != nullptr)
+				curBoneIdx += anim->skeleton->getNumBones();
+		}
+
+		// Wait for tasks to complete
+		if(!async)
+		{
 			{
-				UINT32 numBones = anim->skeleton->getNumBones();
+				Lock lock(mMutex);
 
-				RendererAnimationData::PoseInfo& poseInfo = animInfo.poseInfo;
-				poseInfo.animId = anim->id;
-				poseInfo.startIdx = curBoneIdx;
-				poseInfo.numBones = numBones;
+				while (mNumActiveWorkers > 0)
+					mWorkerDoneSignal.wait(lock);
+			}
 
-				memset(anim->skeletonPose.hasOverride, 0, sizeof(bool) * anim->skeletonPose.numBones);
-				Matrix4* boneDst = renderData.transforms.data() + curBoneIdx;
+			// Trigger events and update attachments (for the data we just evaluated)
+			for (auto& anim : mAnimations)
+			{
+				anim.second->updateFromProxy();
+				anim.second->triggerEvents(mAnimationTime, gTime().getFrameDelta());
+			}
+		}
 
-				// Copy transforms from mapped scene objects
-				UINT32 boneTfrmIdx = 0;
-				for(UINT32 i = 0; i < anim->numSceneObjects; i++)
-				{
-					const AnimatedSceneObjectInfo& soInfo = anim->sceneObjectInfos[i];
+		mSwapBuffers = true;
 
-					if (soInfo.boneIdx == -1)
-						continue;
+		return &mAnimData[mPoseReadBufferIdx];
+	}
 
-					boneDst[soInfo.boneIdx] = anim->sceneObjectTransforms[boneTfrmIdx];
-					anim->skeletonPose.hasOverride[soInfo.boneIdx] = true;
-					boneTfrmIdx++;
+	void AnimationManager::evaluateAnimation(AnimationProxy* anim, UINT32& curBoneIdx)
+	{
+		if (anim->mCullEnabled)
+		{
+			bool isVisible = false;
+			for (auto& frustum : mCullFrustums)
+			{
+				if (frustum.intersects(anim->mBounds))
+				{
+					isVisible = true;
+					break;
 				}
+			}
 
-				// Animate bones
-				anim->skeleton->getPose(boneDst, anim->skeletonPose, anim->skeletonMask, anim->layers, anim->numLayers);
+			if (!isVisible)
+				return;
+		}
 
-				curBoneIdx += numBones;
-				hasAnimInfo = true;
-			}
-			else
-			{
-				RendererAnimationData::PoseInfo& poseInfo = animInfo.poseInfo;
-				poseInfo.animId = anim->id;
-				poseInfo.startIdx = 0;
-				poseInfo.numBones = 0;
-			}
+		EvaluatedAnimationData& renderData = mAnimData[mPoseWriteBufferIdx];
+		
+		UINT32 prevPoseBufferIdx = (mPoseWriteBufferIdx + CoreThread::NUM_SYNC_BUFFERS) % (CoreThread::NUM_SYNC_BUFFERS + 1);
+		EvaluatedAnimationData& prevRenderData = mAnimData[prevPoseBufferIdx];
 
-			// Reset mapped SO transform
-			for (UINT32 i = 0; i < anim->sceneObjectPose.numBones; i++)
-			{
-				anim->sceneObjectPose.positions[i] = Vector3::ZERO;
-				anim->sceneObjectPose.rotations[i] = Quaternion::IDENTITY;
-				anim->sceneObjectPose.scales[i] = Vector3::ONE;
-			}
+		EvaluatedAnimationData::AnimInfo animInfo;
+		bool hasAnimInfo = false;
+
+		// Evaluate skeletal animation
+		if (anim->skeleton != nullptr)
+		{
+			UINT32 numBones = anim->skeleton->getNumBones();
 
-			// Update mapped scene objects
-			memset(anim->sceneObjectPose.hasOverride, 1, sizeof(bool) * anim->numSceneObjects);
+			EvaluatedAnimationData::PoseInfo& poseInfo = animInfo.poseInfo;
+			poseInfo.animId = anim->id;
+			poseInfo.startIdx = curBoneIdx;
+			poseInfo.numBones = numBones;
 
-			// Update scene object transforms
-			for(UINT32 i = 0; i < anim->numSceneObjects; i++)
+			memset(anim->skeletonPose.hasOverride, 0, sizeof(bool) * anim->skeletonPose.numBones);
+			Matrix4* boneDst = renderData.transforms.data() + curBoneIdx;
+
+			// Copy transforms from mapped scene objects
+			UINT32 boneTfrmIdx = 0;
+			for (UINT32 i = 0; i < anim->numSceneObjects; i++)
 			{
 				const AnimatedSceneObjectInfo& soInfo = anim->sceneObjectInfos[i];
 
-				// We already evaluated bones
-				if (soInfo.boneIdx != -1)
+				if (soInfo.boneIdx == -1)
 					continue;
 
-				if (soInfo.layerIdx == -1 || soInfo.stateIdx == -1)
-					continue;
+				boneDst[soInfo.boneIdx] = anim->sceneObjectTransforms[boneTfrmIdx];
+				anim->skeletonPose.hasOverride[soInfo.boneIdx] = true;
+				boneTfrmIdx++;
+			}
 
-				const AnimationState& state = anim->layers[soInfo.layerIdx].states[soInfo.stateIdx];
-				if (state.disabled)
-					continue;
+			// Animate bones
+			anim->skeleton->getPose(boneDst, anim->skeletonPose, anim->skeletonMask, anim->layers, anim->numLayers);
+
+			curBoneIdx += numBones;
+			hasAnimInfo = true;
+		}
+		else
+		{
+			EvaluatedAnimationData::PoseInfo& poseInfo = animInfo.poseInfo;
+			poseInfo.animId = anim->id;
+			poseInfo.startIdx = 0;
+			poseInfo.numBones = 0;
+		}
 
+		// Reset mapped SO transform
+		for (UINT32 i = 0; i < anim->sceneObjectPose.numBones; i++)
+		{
+			anim->sceneObjectPose.positions[i] = Vector3::ZERO;
+			anim->sceneObjectPose.rotations[i] = Quaternion::IDENTITY;
+			anim->sceneObjectPose.scales[i] = Vector3::ONE;
+		}
+
+		// Update mapped scene objects
+		memset(anim->sceneObjectPose.hasOverride, 1, sizeof(bool) * anim->numSceneObjects);
+
+		// Update scene object transforms
+		for (UINT32 i = 0; i < anim->numSceneObjects; i++)
+		{
+			const AnimatedSceneObjectInfo& soInfo = anim->sceneObjectInfos[i];
+
+			// We already evaluated bones
+			if (soInfo.boneIdx != -1)
+				continue;
+
+			if (soInfo.layerIdx == -1 || soInfo.stateIdx == -1)
+				continue;
+
+			const AnimationState& state = anim->layers[soInfo.layerIdx].states[soInfo.stateIdx];
+			if (state.disabled)
+				continue;
+
+			{
+				UINT32 curveIdx = soInfo.curveIndices.position;
+				if (curveIdx != (UINT32)-1)
 				{
-					UINT32 curveIdx = soInfo.curveIndices.position;
-					if (curveIdx != (UINT32)-1)
-					{
-						const TAnimationCurve<Vector3>& curve = state.curves->position[curveIdx].curve;
-						anim->sceneObjectPose.positions[curveIdx] = curve.evaluate(state.time, state.positionCaches[curveIdx], state.loop);
-						anim->sceneObjectPose.hasOverride[curveIdx] = false;
-					}
+					const TAnimationCurve<Vector3>& curve = state.curves->position[curveIdx].curve;
+					anim->sceneObjectPose.positions[curveIdx] = curve.evaluate(state.time, state.positionCaches[curveIdx], state.loop);
+					anim->sceneObjectPose.hasOverride[curveIdx] = false;
 				}
+			}
 
+			{
+				UINT32 curveIdx = soInfo.curveIndices.rotation;
+				if (curveIdx != (UINT32)-1)
 				{
-					UINT32 curveIdx = soInfo.curveIndices.rotation;
-					if (curveIdx != (UINT32)-1)
-					{
-						const TAnimationCurve<Quaternion>& curve = state.curves->rotation[curveIdx].curve;
-						anim->sceneObjectPose.rotations[curveIdx] = curve.evaluate(state.time, state.rotationCaches[curveIdx], state.loop);
-						anim->sceneObjectPose.rotations[curveIdx].normalize();
-						anim->sceneObjectPose.hasOverride[curveIdx] = false;
-					}
+					const TAnimationCurve<Quaternion>& curve = state.curves->rotation[curveIdx].curve;
+					anim->sceneObjectPose.rotations[curveIdx] = curve.evaluate(state.time, state.rotationCaches[curveIdx], state.loop);
+					anim->sceneObjectPose.rotations[curveIdx].normalize();
+					anim->sceneObjectPose.hasOverride[curveIdx] = false;
 				}
+			}
 
+			{
+				UINT32 curveIdx = soInfo.curveIndices.scale;
+				if (curveIdx != (UINT32)-1)
 				{
-					UINT32 curveIdx = soInfo.curveIndices.scale;
-					if (curveIdx != (UINT32)-1)
-					{
-						const TAnimationCurve<Vector3>& curve = state.curves->scale[curveIdx].curve;
-						anim->sceneObjectPose.scales[curveIdx] = curve.evaluate(state.time, state.scaleCaches[curveIdx], state.loop);
-						anim->sceneObjectPose.hasOverride[curveIdx] = false;
-					}
+					const TAnimationCurve<Vector3>& curve = state.curves->scale[curveIdx].curve;
+					anim->sceneObjectPose.scales[curveIdx] = curve.evaluate(state.time, state.scaleCaches[curveIdx], state.loop);
+					anim->sceneObjectPose.hasOverride[curveIdx] = false;
 				}
 			}
+		}
 
-			// Update generic curves
-			// Note: No blending for generic animations, just use first animation
-			if (anim->numLayers > 0 && anim->layers[0].numStates > 0)
+		// Update generic curves
+		// Note: No blending for generic animations, just use first animation
+		if (anim->numLayers > 0 && anim->layers[0].numStates > 0)
+		{
+			const AnimationState& state = anim->layers[0].states[0];
+			if (!state.disabled)
 			{
-				const AnimationState& state = anim->layers[0].states[0];
-				if (!state.disabled)
+				UINT32 numCurves = (UINT32)state.curves->generic.size();
+				for (UINT32 i = 0; i < numCurves; i++)
 				{
-					UINT32 numCurves = (UINT32)state.curves->generic.size();
-					for (UINT32 i = 0; i < numCurves; i++)
-					{
-						const TAnimationCurve<float>& curve = state.curves->generic[i].curve;
-						anim->genericCurveOutputs[i] = curve.evaluate(state.time, state.genericCaches[i], state.loop);
-					}
+					const TAnimationCurve<float>& curve = state.curves->generic[i].curve;
+					anim->genericCurveOutputs[i] = curve.evaluate(state.time, state.genericCaches[i], state.loop);
 				}
 			}
+		}
 
-			// Update morph shapes
-			if(anim->numMorphShapes > 0)
+		// Update morph shapes
+		if (anim->numMorphShapes > 0)
+		{
+			auto iterFind = prevRenderData.infos.find(anim->id);
+			if (iterFind != prevRenderData.infos.end())
+				animInfo.morphShapeInfo = iterFind->second.morphShapeInfo;
+			else
+				animInfo.morphShapeInfo.version = 1; // 0 is considered invalid version
+
+			// Recalculate weights if curves are present
+			bool hasMorphCurves = false;
+			for (UINT32 i = 0; i < anim->numMorphChannels; i++)
 			{
-				auto iterFind = prevRenderData.infos.find(anim->id);
-				if (iterFind != prevRenderData.infos.end())
-					animInfo.morphShapeInfo = iterFind->second.morphShapeInfo;
+				MorphChannelInfo& channelInfo = anim->morphChannelInfos[i];
+				if (channelInfo.weightCurveIdx != (UINT32)-1)
+				{
+					channelInfo.weight = Math::clamp01(anim->genericCurveOutputs[channelInfo.weightCurveIdx]);
+					hasMorphCurves = true;
+				}
+
+				float frameWeight;
+				if (channelInfo.frameCurveIdx != (UINT32)-1)
+				{
+					frameWeight = Math::clamp01(anim->genericCurveOutputs[channelInfo.frameCurveIdx]);
+					hasMorphCurves = true;
+				}
 				else
-					animInfo.morphShapeInfo.version = 1; // 0 is considered invalid version
+					frameWeight = 0.0f;
 
-				// Recalculate weights if curves are present
-				bool hasMorphCurves = false;
-				for(UINT32 i = 0; i < anim->numMorphChannels; i++)
+				if (channelInfo.shapeCount == 1)
 				{
-					MorphChannelInfo& channelInfo = anim->morphChannelInfos[i];
-					if(channelInfo.weightCurveIdx != (UINT32)-1)
-					{
-						channelInfo.weight = Math::clamp01(anim->genericCurveOutputs[channelInfo.weightCurveIdx]);
-						hasMorphCurves = true;
-					}
+					MorphShapeInfo& shapeInfo = anim->morphShapeInfos[channelInfo.shapeStart];
 
-					float frameWeight;
-					if (channelInfo.frameCurveIdx != (UINT32)-1)
+					// Blend between base shape and the only available frame
+					float relative = frameWeight - shapeInfo.frameWeight;
+					if (relative <= 0.0f)
 					{
-						frameWeight = Math::clamp01(anim->genericCurveOutputs[channelInfo.frameCurveIdx]);
-						hasMorphCurves = true;
+						float diff = shapeInfo.frameWeight;
+						if (diff > 0.0f)
+						{
+							float t = -relative / diff;
+							shapeInfo.finalWeight = 1.0f - std::min(t, 1.0f);
+						}
+						else
+							shapeInfo.finalWeight = 1.0f;
 					}
-					else
-						frameWeight = 0.0f;
-
-					if(channelInfo.shapeCount == 1)
+					else // If past the final frame we clamp
+						shapeInfo.finalWeight = 1.0f;
+				}
+				else if (channelInfo.shapeCount > 1)
+				{
+					for (UINT32 j = 0; j < channelInfo.shapeCount - 1; j++)
 					{
-						MorphShapeInfo& shapeInfo = anim->morphShapeInfos[channelInfo.shapeStart];
+						float prevShapeWeight;
+						if (j > 0)
+							prevShapeWeight = anim->morphShapeInfos[j - 1].frameWeight;
+						else
+							prevShapeWeight = 0.0f; // Base shape, blend between it and the first frame
+
+						float nextShapeWeight = anim->morphShapeInfos[j + 1].frameWeight;
+						MorphShapeInfo& shapeInfo = anim->morphShapeInfos[j];
 
-						// Blend between base shape and the only available frame
 						float relative = frameWeight - shapeInfo.frameWeight;
 						if (relative <= 0.0f)
 						{
-							float diff = shapeInfo.frameWeight;
+							float diff = shapeInfo.frameWeight - prevShapeWeight;
 							if (diff > 0.0f)
 							{
 								float t = -relative / diff;
@@ -318,189 +381,127 @@ namespace bs
 							else
 								shapeInfo.finalWeight = 1.0f;
 						}
-						else // If past the final frame we clamp
-							shapeInfo.finalWeight = 1.0f;
-					}
-					else if(channelInfo.shapeCount > 1)
-					{
-						for(UINT32 j = 0; j < channelInfo.shapeCount - 1; j++)
+						else
 						{
-							float prevShapeWeight;
-							if (j > 0)
-								prevShapeWeight = anim->morphShapeInfos[j - 1].frameWeight;
-							else
-								prevShapeWeight = 0.0f; // Base shape, blend between it and the first frame
-
-							float nextShapeWeight = anim->morphShapeInfos[j + 1].frameWeight;
-							MorphShapeInfo& shapeInfo = anim->morphShapeInfos[j];
-
-							float relative = frameWeight - shapeInfo.frameWeight;
-							if (relative <= 0.0f)
+							float diff = nextShapeWeight - shapeInfo.frameWeight;
+							if (diff > 0.0f)
 							{
-								float diff = shapeInfo.frameWeight - prevShapeWeight;
-								if (diff > 0.0f)
-								{
-									float t = -relative / diff;
-									shapeInfo.finalWeight = 1.0f - std::min(t, 1.0f);
-								}
-								else
-									shapeInfo.finalWeight = 1.0f;
+								float t = relative / diff;
+								shapeInfo.finalWeight = std::min(t, 1.0f);
 							}
 							else
-							{
-								float diff = nextShapeWeight - shapeInfo.frameWeight;
-								if (diff > 0.0f)
-								{
-									float t = relative / diff;
-									shapeInfo.finalWeight = std::min(t, 1.0f);
-								}
-								else
-									shapeInfo.finalWeight = 0.0f;
-							}
+								shapeInfo.finalWeight = 0.0f;
 						}
+					}
 
-						// Last frame
-						{
-							UINT32 lastFrame = channelInfo.shapeStart + channelInfo.shapeCount - 1;
-							MorphShapeInfo& prevShapeInfo = anim->morphShapeInfos[lastFrame - 1];
-							MorphShapeInfo& shapeInfo = anim->morphShapeInfos[lastFrame];
+					// Last frame
+					{
+						UINT32 lastFrame = channelInfo.shapeStart + channelInfo.shapeCount - 1;
+						MorphShapeInfo& prevShapeInfo = anim->morphShapeInfos[lastFrame - 1];
+						MorphShapeInfo& shapeInfo = anim->morphShapeInfos[lastFrame];
 
-							float relative = frameWeight - shapeInfo.frameWeight;
-							if (relative <= 0.0f)
+						float relative = frameWeight - shapeInfo.frameWeight;
+						if (relative <= 0.0f)
+						{
+							float diff = shapeInfo.frameWeight - prevShapeInfo.frameWeight;
+							if (diff > 0.0f)
 							{
-								float diff = shapeInfo.frameWeight - prevShapeInfo.frameWeight;
-								if (diff > 0.0f)
-								{
-									float t = -relative / diff;
-									shapeInfo.finalWeight = 1.0f - std::min(t, 1.0f);
-								}
-								else
-									shapeInfo.finalWeight = 1.0f;
+								float t = -relative / diff;
+								shapeInfo.finalWeight = 1.0f - std::min(t, 1.0f);
 							}
-							else // If past the final frame we clamp
+							else
 								shapeInfo.finalWeight = 1.0f;
 						}
-					}
-
-					for(UINT32 j = 0; j < channelInfo.shapeCount; j++)
-					{
-						MorphShapeInfo& shapeInfo = anim->morphShapeInfos[channelInfo.shapeStart + j];
-						shapeInfo.finalWeight *= channelInfo.weight;
+						else // If past the final frame we clamp
+							shapeInfo.finalWeight = 1.0f;
 					}
 				}
 
-				// Generate morph shape vertices
-				if(anim->morphChannelWeightsDirty || hasMorphCurves)
+				for (UINT32 j = 0; j < channelInfo.shapeCount; j++)
 				{
-					SPtr<MeshData> meshData = bs_shared_ptr_new<MeshData>(anim->numMorphVertices, 0, mBlendShapeVertexDesc);
-
-					UINT8* bufferData = meshData->getData();
-					memset(bufferData, 0, meshData->getSize());
-
-					UINT32 tempDataSize = (sizeof(Vector3) + sizeof(float)) * anim->numMorphVertices;
-					UINT8* tempData = (UINT8*)bs_stack_alloc(tempDataSize);
-					memset(tempData, 0, tempDataSize);
+					MorphShapeInfo& shapeInfo = anim->morphShapeInfos[channelInfo.shapeStart + j];
+					shapeInfo.finalWeight *= channelInfo.weight;
+				}
+			}
 
-					Vector3* tempNormals = (Vector3*)tempData;
-					float* accumulatedWeight = (float*)(tempData + sizeof(Vector3) * anim->numMorphVertices);
+			// Generate morph shape vertices
+			if (anim->morphChannelWeightsDirty || hasMorphCurves)
+			{
+				SPtr<MeshData> meshData = bs_shared_ptr_new<MeshData>(anim->numMorphVertices, 0, mBlendShapeVertexDesc);
 
-					UINT8* positions = meshData->getElementData(VES_POSITION, 1, 1);
-					UINT8* normals = meshData->getElementData(VES_NORMAL, 1, 1);
+				UINT8* bufferData = meshData->getData();
+				memset(bufferData, 0, meshData->getSize());
 
-					UINT32 stride = mBlendShapeVertexDesc->getVertexStride(1);
+				UINT32 tempDataSize = (sizeof(Vector3) + sizeof(float)) * anim->numMorphVertices;
+				UINT8* tempData = (UINT8*)bs_stack_alloc(tempDataSize);
+				memset(tempData, 0, tempDataSize);
 
-					for(UINT32 i = 0; i < anim->numMorphShapes; i++)
-					{
-						const MorphShapeInfo& info = anim->morphShapeInfos[i];
-						float absWeight = Math::abs(info.finalWeight);
+				Vector3* tempNormals = (Vector3*)tempData;
+				float* accumulatedWeight = (float*)(tempData + sizeof(Vector3) * anim->numMorphVertices);
 
-						if (absWeight < 0.0001f)
-							continue;
+				UINT8* positions = meshData->getElementData(VES_POSITION, 1, 1);
+				UINT8* normals = meshData->getElementData(VES_NORMAL, 1, 1);
 
-						const Vector<MorphVertex>& morphVertices = info.shape->getVertices();
-						UINT32 numVertices = (UINT32)morphVertices.size();
-						for(UINT32 j = 0; j < numVertices; j++)
-						{
-							const MorphVertex& vertex = morphVertices[j];
+				UINT32 stride = mBlendShapeVertexDesc->getVertexStride(1);
 
-							Vector3* destPos = (Vector3*)(positions + vertex.sourceIdx * stride);
-							*destPos += vertex.deltaPosition * info.finalWeight;
+				for (UINT32 i = 0; i < anim->numMorphShapes; i++)
+				{
+					const MorphShapeInfo& info = anim->morphShapeInfos[i];
+					float absWeight = Math::abs(info.finalWeight);
 
-							tempNormals[vertex.sourceIdx] += vertex.deltaNormal * info.finalWeight;
-							accumulatedWeight[vertex.sourceIdx] += absWeight;
-						}
-					}
+					if (absWeight < 0.0001f)
+						continue;
 
-					for(UINT32 i = 0; i < anim->numMorphVertices; i++)
+					const Vector<MorphVertex>& morphVertices = info.shape->getVertices();
+					UINT32 numVertices = (UINT32)morphVertices.size();
+					for (UINT32 j = 0; j < numVertices; j++)
 					{
-						PackedNormal* destNrm = (PackedNormal*)(normals + i * stride);
+						const MorphVertex& vertex = morphVertices[j];
 
-						if (accumulatedWeight[i] > 0.0001f)
-						{
-							Vector3 normal = tempNormals[i] / accumulatedWeight[i];
-							normal /= 2.0f; // Accumulated normal is in range [-2, 2] but our normal packing method assumes [-1, 1] range
+						Vector3* destPos = (Vector3*)(positions + vertex.sourceIdx * stride);
+						*destPos += vertex.deltaPosition * info.finalWeight;
 
-							MeshUtility::packNormals(&normal, (UINT8*)destNrm, 1, sizeof(Vector3), stride);
-							destNrm->w = (UINT8)(std::min(1.0f, accumulatedWeight[i]) * 255.999f);
-						}
-						else
-						{
-							*destNrm = {{ 127, 127, 127, 0 }};
-						}
+						tempNormals[vertex.sourceIdx] += vertex.deltaNormal * info.finalWeight;
+						accumulatedWeight[vertex.sourceIdx] += absWeight;
 					}
+				}
 
-					bs_stack_free(tempData);
+				for (UINT32 i = 0; i < anim->numMorphVertices; i++)
+				{
+					PackedNormal* destNrm = (PackedNormal*)(normals + i * stride);
 
-					animInfo.morphShapeInfo.meshData = meshData;
+					if (accumulatedWeight[i] > 0.0001f)
+					{
+						Vector3 normal = tempNormals[i] / accumulatedWeight[i];
+						normal /= 2.0f; // Accumulated normal is in range [-2, 2] but our normal packing method assumes [-1, 1] range
 
-					animInfo.morphShapeInfo.version++;
-					anim->morphChannelWeightsDirty = false;
+						MeshUtility::packNormals(&normal, (UINT8*)destNrm, 1, sizeof(Vector3), stride);
+						destNrm->w = (UINT8)(std::min(1.0f, accumulatedWeight[i]) * 255.999f);
+					}
+					else
+					{
+						*destNrm = { { 127, 127, 127, 0 } };
+					}
 				}
 
-				hasAnimInfo = true;
-			}
-			else
-				animInfo.morphShapeInfo.version = 1;
-
-			if (hasAnimInfo)
-				renderData.infos[anim->id] = animInfo;
-		}
-
-		// Increments counter and ensures all writes are recorded
-		mWorkerState.store(WorkerState::DataReady, std::memory_order_release);
-		mDataReadyCount.fetch_add(1, std::memory_order_acq_rel);
-	}
+				bs_stack_free(tempData);
 
-	void AnimationManager::waitUntilComplete()
-	{
-		mAnimationWorker->wait();
+				animInfo.morphShapeInfo.meshData = meshData;
 
-		// Read counter, and ensure all reads are done after writes on anim thread complete
-		INT32 dataReadyCount = mDataReadyCount.load(std::memory_order_acquire);
+				animInfo.morphShapeInfo.version++;
+				anim->morphChannelWeightsDirty = false;
+			}
 
-		if (dataReadyCount > CoreThread::NUM_SYNC_BUFFERS)
-		{
-			LOGERR("Animation manager threading issue. Too many entries in queue: " + toString(dataReadyCount));
-			assert(dataReadyCount <= CoreThread::NUM_SYNC_BUFFERS);
+			hasAnimInfo = true;
 		}
+		else
+			animInfo.morphShapeInfo.version = 1;
 
-		mDataReady = dataReadyCount > 0;
-		if (!mDataReady)
-			return;
-
-		mDataReadyCount.fetch_sub(1, std::memory_order_release);
-		mPoseReadBufferIdx = (mPoseReadBufferIdx + 1) % CoreThread::NUM_SYNC_BUFFERS;
-	}
-
-	const RendererAnimationData& AnimationManager::getRendererData()
-	{
-		if (!mDataReady)
+		if (hasAnimInfo)
 		{
-			static RendererAnimationData dummy;
-			return dummy;
+			Lock lock(mMutex);
+			renderData.infos[anim->id] = animInfo;
 		}
-
-		return mAnimData[mPoseReadBufferIdx];
 	}
 
 	UINT64 AnimationManager::registerAnimation(Animation* anim)

+ 30 - 33
Source/BansheeCore/Animation/BsAnimationManager.h

@@ -17,7 +17,7 @@ namespace bs
 	 */
 	
 	/** Contains skeleton poses for all animations evaluated on a single frame. */
-	struct RendererAnimationData
+	struct EvaluatedAnimationData
 	{
 		/** Contains meta-data about a calculated skeleton pose. Actual data maps to the @p transforms buffer. */
 		struct PoseInfo
@@ -70,34 +70,21 @@ namespace bs
 		 */
 		void setUpdateRate(UINT32 fps);
 
-		/** 
-		 * Synchronizes animation data from the animation thread with the scene objects. Should be called before component
-		 * updates are sent. 
-		 */
-		void preUpdate();
-
 		/**
-		 * Synchronizes animation data to the animation thread, advances animation time and queues new animation evaluation
-		 * task.
+		 * Evaluates animations for all animated objects, and returns the evaluated skeleton bone poses and morph shape
+		 * meshes that can be passed along to the renderer.
+		 * 
+		 * @param[in]		async		If true the method returns immediately while the animation gets evaluated in the
+		 *								background. The returned evaluated data will be the data from the previous frame.
+		 *								Therefore note that this introduces a one frame latency on the animation. If the
+		 *								latency is not acceptable set this to false, at a potential performance impact.
+		 * @return						Evaluated animation data for this frame (if @p async is false), or the previous
+		 *								frame (if @p async is true). Note that the system re-uses the returned buffers,
+		 *								and the returned buffer should stop being used after every second call to update().
+		 *								This is enough to have one buffer be processed by the core thread, one queued
+		 *								for future rendering and one that's being written to.
 		 */
-		void postUpdate();
-
-		/** 
-		 * Blocks the animation thread until it has finished evaluating animation, and it advances the read buffer index, 
-		 * meaning this shouldn't be called more than once per frame. It must be called before calling getRendererData().
-		 *
-		 * @note	Core thread only.
-		 */
-		void waitUntilComplete();
-
-		/** 
-		 * Gets skeleton poses required by the renderer to display all the animations. The returned data can be referenced, 
-		 * and is guaranteed to be valid for a single core-thread frame. Before invoking, caller must ensure data is
-		 * available by first calling waitUntilComplete().
-		 *
-		 * @note	Core thread only.
-		 */
-		const RendererAnimationData& getRendererData();
+		const EvaluatedAnimationData* update(bool async = true);
 
 	private:
 		friend class Animation;
@@ -121,6 +108,15 @@ namespace bs
 		/** Worker method ran on the animation thread that evaluates all animation at the provided time. */
 		void evaluateAnimation();
 
+		/** 
+		 * Evaluates animation for a single object and writes the result in the currently active write buffer. 
+		 *
+		 * @param[in]	anim		Proxy representing the animation to evaluate.
+		 * @param[in]	boneIdx		Index in the output buffer in which to write evaluated bone information. This will be
+		 *							automatically advanced by the number of written bone transforms.
+		 */
+		void evaluateAnimation(AnimationProxy* anim, UINT32& boneIdx);
+
 		UINT64 mNextId;
 		UnorderedMap<UINT64, Animation*> mAnimations;
 		
@@ -130,20 +126,21 @@ namespace bs
 		float mNextAnimationUpdateTime;
 		bool mPaused;
 
-		bool mWorkerStarted;
-		SPtr<Task> mAnimationWorker;
 		SPtr<VertexDataDesc> mBlendShapeVertexDesc;
 
 		// Animation thread
 		Vector<SPtr<AnimationProxy>> mProxies;
 		Vector<ConvexVolume> mCullFrustums;
-		RendererAnimationData mAnimData[CoreThread::NUM_SYNC_BUFFERS];
+		EvaluatedAnimationData mAnimData[CoreThread::NUM_SYNC_BUFFERS + 1];
 
 		UINT32 mPoseReadBufferIdx;
 		UINT32 mPoseWriteBufferIdx;
-		std::atomic<INT32> mDataReadyCount; // Anim <-> Core thread sync
-		std::atomic<WorkerState> mWorkerState; // Anim <-> Sim thread sync
-		bool mDataReady;
+		
+		Signal mWorkerDoneSignal;
+		Mutex mMutex;
+
+		UINT32 mNumActiveWorkers = 0;
+		bool mSwapBuffers = false;
 	};
 
 	/** Provides easier access to AnimationManager. */

+ 5 - 3
Source/BansheeCore/BsCoreApplication.cpp

@@ -223,14 +223,12 @@ namespace bs
 			RenderWindowManager::instance()._update(); 
 			gInput()._triggerCallbacks();
 			gDebug()._triggerCallbacks();
-			AnimationManager::instance().preUpdate();
 
 			preUpdate();
 
 			PROFILE_CALL(gSceneManager()._update(), "SceneManager");
 			gAudio()._update();
 			gPhysics().update();
-			AnimationManager::instance().postUpdate();
 
 			// Update plugins
 			for (auto& pluginUpdateFunc : mPluginUpdateFunctions)
@@ -238,6 +236,10 @@ namespace bs
 
 			postUpdate();
 
+			// Evaluate animation after scene and plugin updates because the renderer will just now be displaying the
+			// animation we sent on the previous frame, and we want the scene information to match to what is displayed.
+			const EvaluatedAnimationData* animData = AnimationManager::instance().update();
+
 			// Send out resource events in case any were loaded/destroyed/modified
 			ResourceListenerManager::instance().update();
 
@@ -246,7 +248,7 @@ namespace bs
 			RendererManager::instance().getActive()->update();
 
 			gSceneManager()._updateCoreObjectTransforms();
-			PROFILE_CALL(RendererManager::instance().getActive()->renderAll(), "Render");
+			PROFILE_CALL(RendererManager::instance().getActive()->renderAll(animData), "Render");
 
 			// Core and sim thread run in lockstep. This will result in a larger input latency than if I was 
 			// running just a single thread. Latency becomes worse if the core thread takes longer than sim 

+ 3 - 3
Source/BansheeCore/Renderer/BsRenderable.cpp

@@ -532,12 +532,12 @@ namespace bs
 		mMorphShapeVersion = 0;
 	}
 
-	void Renderable::updateAnimationBuffers(const RendererAnimationData& animData)
+	void Renderable::updateAnimationBuffers(const EvaluatedAnimationData& animData)
 	{
 		if (mAnimationId == (UINT64)-1)
 			return;
 
-		const RendererAnimationData::AnimInfo* animInfo = nullptr;
+		const EvaluatedAnimationData::AnimInfo* animInfo = nullptr;
 
 		auto iterFind = animData.infos.find(mAnimationId);
 		if (iterFind != animData.infos.end())
@@ -548,7 +548,7 @@ namespace bs
 
 		if (mAnimType == RenderableAnimType::Skinned || mAnimType == RenderableAnimType::SkinnedMorph)
 		{
-			const RendererAnimationData::PoseInfo& poseInfo = animInfo->poseInfo;
+			const EvaluatedAnimationData::PoseInfo& poseInfo = animInfo->poseInfo;
 
 			// Note: If multiple elements are using the same animation (not possible atm), this buffer should be shared by
 			// all such elements

+ 2 - 2
Source/BansheeCore/Renderer/BsRenderable.h

@@ -12,7 +12,7 @@
 
 namespace bs
 {
-	struct RendererAnimationData;
+	struct EvaluatedAnimationData;
 
 	/** @addtogroup Implementation
 	 *  @{
@@ -249,7 +249,7 @@ namespace bs
 		 * Updates internal animation buffers from the contents of the provided animation data object. Does nothing if
 		 * renderable is not affected by animation.
 		 */
-		void updateAnimationBuffers(const RendererAnimationData& animData);
+		void updateAnimationBuffers(const EvaluatedAnimationData& animData);
 
 		/** Returns the GPU buffer containing element's bone matrices, if it has any. */
 		const SPtr<GpuBuffer>& getBoneMatrixBuffer() const { return mBoneMatrixBuffer; }

+ 2 - 1
Source/BansheeCore/Renderer/BsRenderer.h

@@ -12,6 +12,7 @@ namespace bs
 	class RendererExtension;
 	class LightProbeVolume;
 	struct RenderSettings;
+	struct EvaluatedAnimationData;
 
 	namespace ct
 	{
@@ -113,7 +114,7 @@ namespace bs
 		virtual const StringID& getName() const = 0;
 
 		/** Called in order to render all currently active cameras. */
-		virtual void renderAll() = 0;
+		virtual void renderAll(const EvaluatedAnimationData* animData) = 0;
 
 		/**
 		 * Called whenever a new camera is created.

+ 1 - 1
Source/RenderBeast/BsObjectRendering.h

@@ -12,7 +12,7 @@
 
 namespace bs 
 { 
-	struct RendererAnimationData;
+	struct EvaluatedAnimationData;
 
 	namespace ct
 	{

+ 4 - 7
Source/RenderBeast/BsRenderBeast.cpp

@@ -254,7 +254,7 @@ namespace bs { namespace ct
 		shadowRenderer.setShadowMapSize(mCoreOptions->shadowMapSize);
 	}
 
-	void RenderBeast::renderAll() 
+	void RenderBeast::renderAll(const EvaluatedAnimationData* animData) 
 	{
 		// Sync all dirty sim thread CoreObject data to core thread
 		CoreObjectManager::instance().syncToCore();
@@ -270,10 +270,10 @@ namespace bs { namespace ct
 		timings.timeDelta = gTime().getFrameDelta();
 		timings.frameIdx = gTime().getFrameIdx();
 		
-		gCoreThread().queueCommand(std::bind(&RenderBeast::renderAllCore, this, timings));
+		gCoreThread().queueCommand(std::bind(&RenderBeast::renderAllCore, this, timings, animData));
 	}
 
-	void RenderBeast::renderAllCore(FrameTimings timings)
+	void RenderBeast::renderAllCore(FrameTimings timings, const EvaluatedAnimationData* animData)
 	{
 		THROW_IF_NOT_CORE_THREAD;
 
@@ -291,13 +291,10 @@ namespace bs { namespace ct
 		mObjectRenderer->setParamFrameParams(timings.time);
 
 		// Retrieve animation data
-		AnimationManager::instance().waitUntilComplete();
-		const RendererAnimationData& animData = AnimationManager::instance().getRendererData();
-		
 		sceneInfo.renderableReady.resize(sceneInfo.renderables.size(), false);
 		sceneInfo.renderableReady.assign(sceneInfo.renderables.size(), false);
 		
-		FrameInfo frameInfo(timings, &animData);
+		FrameInfo frameInfo(timings, animData);
 
 		// Make sure any renderer tasks finish first, as rendering might depend on them
 		processTasks(false);

+ 6 - 5
Source/RenderBeast/BsRenderBeast.h

@@ -13,7 +13,7 @@
 
 namespace bs 
 { 
-	struct RendererAnimationData;
+	struct EvaluatedAnimationData;
 
 	namespace ct
 	{
@@ -34,13 +34,13 @@ namespace bs
 	/** Contains information global to an entire frame. */
 	struct FrameInfo
 	{
-		FrameInfo(const FrameTimings& timings, const RendererAnimationData* animData = nullptr)
+		FrameInfo(const FrameTimings& timings, const EvaluatedAnimationData* animData = nullptr)
 			:timeDelta(timings.timeDelta), frameIdx(timings.frameIdx), animData(animData)
 		{ }
 
 		float timeDelta;
 		UINT64 frameIdx;
-		const RendererAnimationData* animData;
+		const EvaluatedAnimationData* animData;
 	};
 
 	/**
@@ -64,7 +64,7 @@ namespace bs
 		const StringID& getName() const override;
 
 		/** @copydoc Renderer::renderAll */
-		void renderAll() override;
+		void renderAll(const EvaluatedAnimationData* animData) override;
 
 		/**	Sets options used for controlling the rendering. */
 		void setOptions(const SPtr<RendererOptions>& options) override;
@@ -148,10 +148,11 @@ namespace bs
 		 * Performs rendering over all camera proxies.
 		 *
 		 * @param[in]	timings		Information about frame time and frame index.
+		 * @param[in]	animData	Data used for rendering animated renderables.
 		 *
 		 * @note	Core thread only.
 		 */
-		void renderAllCore(FrameTimings timings);
+		void renderAllCore(FrameTimings timings, const EvaluatedAnimationData* animData);
 
 		/**
 		 * Renders all views in the provided view group.

+ 1 - 1
Source/RenderBeast/BsRendererScene.h

@@ -12,7 +12,7 @@
 
 namespace bs 
 { 
-	struct RendererAnimationData;
+	struct EvaluatedAnimationData;
 
 	namespace ct
 	{