Explorar o código

Animation playback (WIP)

BearishSun %!s(int64=9) %!d(string=hai) anos
pai
achega
e7f45a5e0e

+ 1 - 1
Source/BansheeCore/CMakeSources.cmake

@@ -503,7 +503,7 @@ set(BS_BANSHEECORE_INC_ANIMATION
 	"Include/BsSkeleton.h"
 	"Include/BsAnimation.h"
 	"Include/BsAnimationManager.h"
-	"Include/BsCurveEvaluator.h"
+	"Include/BsCurveCache.h"
 )
 
 set(BS_BANSHEECORE_SRC_ANIMATION

+ 6 - 2
Source/BansheeCore/Include/BsAnimation.h

@@ -74,7 +74,7 @@ namespace BansheeEngine
 	/** Represents a copy of the Animation data for use specifically on the animation thread. */
 	struct AnimationProxy
 	{
-		AnimationProxy();
+		AnimationProxy(UINT64 id);
 		~AnimationProxy();
 
 		/** 
@@ -118,10 +118,14 @@ namespace BansheeEngine
 		 */
 		void updateTime(const Vector<PlayingClipInfo>& clipInfos);
 
+		UINT64 id;
 		AnimationStateLayer* layers;
 		UINT32 numLayers;
 		SPtr<Skeleton> skeleton;
-		SkeletonPose pose;
+
+		// Evaluation results
+		LocalSkeletonPose localPose;
+		float* genericCurveOutputs;
 	};
 
 	/**

+ 11 - 10
Source/BansheeCore/Include/BsAnimationCurve.h

@@ -3,7 +3,7 @@
 #pragma once
 
 #include "BsCorePrerequisites.h"
-#include "BsCurveEvaluator.h"
+#include "BsCurveCache.h"
 
 namespace BansheeEngine
 {
@@ -39,21 +39,21 @@ namespace BansheeEngine
 		 * happens sequential order (which should be true for most curves). If evaluation is not happening in sequential
 		 * order using the non-caching version of evaluate() might yield better performance.
 		 *
-		 * @param[i]	animInstance	Animation instance data holding the time to evaluate the curve at, and any cached
-		 *								data from previous requests. Caller should ensure to maintain a persistent instance
-		 *								of this data for every animation using this curve in order to ensure cache is
-		 *								maintained.
+		 * @param[in]	time			Time to evaluate the curve at.
+		 * @param[i]	cache			Cached data from previous requests that can be used for speeding up sequential calls
+		 *								to this method. Caller should ensure to maintain a persistent instance of this data
+		 *								for every animation using this curve in order to ensure cache is maintained.
 		 * @param[in]	loop			If true the curve will loop when it goes past the end or beggining. Otherwise the
 		 *								curve value will be clamped.
 		 * @return						Interpolated value from the curve at provided time.
 		 */
-		T evaluate(const TCurveEvaluatorData<T>& animInstance, bool loop = true) const;
+		T evaluate(float time, const TCurveCache<T>& cache, bool loop = true) const;
 
 		/**
 		 * Evaluate the animation curve at the specified time. If evaluating multiple values in a sequential order consider
 		 * using the cached version of evaluate() for better performance.
 		 *
-		 * @param[i]	time	Time to evaluate the curve at.		
+		 * @param[in]	time	Time to evaluate the curve at.		
 		 * @param[in]	loop	If true the curve will loop when it goes past the end or beggining. Otherwise the curve 
 		 *						value will be clamped.
 		 * @return				Interpolated value from the curve at provided time.
@@ -91,13 +91,13 @@ namespace BansheeEngine
 		 *
 		 * @param[in]	time			Time for which to find the relevant keys from. It is expected to be clamped to a
 		 *								valid range within the curve.
-		 * @param[in]	animInstance	Animation instance data holding the time to evaluate the curve at, and any cached
+		 * @param[in]	cache			Animation instance data holding the time to evaluate the curve at, and any cached
 		 *								data from previous requests. Time is expected to be clamped to a valid range
 		 *								within the curve.
 		 * @param[out]	leftKey			Index of the key to interpolate from.
 		 * @param[out]	rightKey		Index of the key to interpolate to.
 		 */
-		void findKeys(float time, const TCurveEvaluatorData<T>& animInstance, UINT32& leftKey, UINT32& rightKey) const;
+		void findKeys(float time, const TCurveCache<T>& cache, UINT32& leftKey, UINT32& rightKey) const;
 
 		/** 
 		 * Returns a pair of keys that can be used for interpolating to field the value at the provided time. 
@@ -125,11 +125,12 @@ namespace BansheeEngine
 		/** 
 		 * Evaluates a value at the cached curve. Caller must ensure the request time falls within the cached curve range.
 		 *
+		 * @param[in]	time			Time to evaluate the curve at.	
 		 * @param[in]	animInstance	Animation instance data holding the time to evaluate the curve at, and any cached
 		 *								data from previous requests.
 		 * @return						Interpolated value from the curve at provided time.
 		 */
-		T evaluateCache(const TCurveEvaluatorData<T>& animInstance) const;
+		T evaluateCache(float time, const TCurveCache<T>& animInstance) const;
 
 		static const UINT32 CACHE_LOOKAHEAD;
 

+ 31 - 0
Source/BansheeCore/Include/BsAnimationManager.h

@@ -4,6 +4,7 @@
 
 #include "BsCorePrerequisites.h"
 #include "BsModule.h"
+#include "BsCoreThread.h"
 
 namespace BansheeEngine
 {
@@ -13,6 +14,21 @@ namespace BansheeEngine
 	 *  @{
 	 */
 	
+	/** Contains skeleton poses for all animations evaluated on a single frame. */
+	struct RendererAnimationData
+	{
+		/** Contains data about a calculated skeleton pose. */
+		struct PoseInfo
+		{
+			UINT64 animId;
+			UINT32 startIdx;
+			UINT32 numBones;
+		};
+
+		UnorderedMap<UINT64, PoseInfo> poseInfos;
+		Vector<Matrix4> transforms;
+	};
+
 	/** 
 	 * Keeps track of all active animations, queues animation thread tasks and synchronizes data between simulation, core
 	 * and animation threads.
@@ -45,6 +61,15 @@ namespace BansheeEngine
 		 */
 		void postUpdate();
 
+		/** 
+		 * Gets skeleton poses required by the renderer to display all the animations. This will block the animation thread
+		 * if it has not yet finished, and it will also advance the read buffer index, meaning this shouldn't be called more
+		 * than once per frame.
+		 *
+		 * @note	Core thread only.
+		 */
+		const RendererAnimationData& getRendererData();
+
 	private:
 		friend class Animation;
 
@@ -71,7 +96,13 @@ namespace BansheeEngine
 		bool mWorkerRunning;
 		SPtr<Task> mAnimationWorker;
 
+		// Animation thread
 		Vector<SPtr<AnimationProxy>> mProxies;
+		RendererAnimationData mAnimData[CoreThread::NUM_SYNC_BUFFERS];
+
+		UINT32 mPoseReadBufferIdx;
+		UINT32 mPoseWriteBufferIdx;
+		std::atomic<INT32> mDataReadyCount;
 	};
 
 	/** @} */

+ 19 - 3
Source/BansheeCore/Include/BsCoreThread.h

@@ -122,14 +122,30 @@ public:
 	 * @note	Sim thread only.
 	 */
 	FrameAlloc* getFrameAlloc() const;
-private:
-	static const int NUM_FRAME_ALLOCS = 2;
 
+	/** 
+	 * Returns number of buffers needed to sync data between core and sim thread. Currently the sim thread can be one frame
+	 * ahead of the core thread, meaning we need two buffers. If this situation changes increase this number.
+	 *
+	 * For example:
+	 *  - Sim thread frame starts, it writes some data to buffer 0.
+	 *  - Core thread frame starts, it reads some data from buffer 0.
+	 *  - Sim thread frame finishes
+	 *  - New sim thread frame starts, it writes some data to buffer 1.
+	 *  - Core thread still working, reading from buffer 0. (If we were using just one buffer at this point core thread 
+	 *	  would be reading wrong data).
+	 *  - Sim thread waiting for core thread (application defined that it cannot go ahead more than one frame)
+	 *  - Core thread frame finishes.
+	 *  - New core thread frame starts, it reads some data from buffer 1.
+	 *  - ...
+	 */
+	static const int NUM_SYNC_BUFFERS = 2;
+private:
 	/**
 	 * Double buffered frame allocators. Means sim thread cannot be more than 1 frame ahead of core thread (If that changes
 	 * you should be able to easily add more).
 	 */
-	FrameAlloc* mFrameAllocs[NUM_FRAME_ALLOCS];
+	FrameAlloc* mFrameAllocs[NUM_SYNC_BUFFERS];
 	UINT32 mActiveFrameAlloc;
 
 	static AccessorData mAccessor;

+ 5 - 6
Source/BansheeCore/Include/BsCurveEvaluator.h → Source/BansheeCore/Include/BsCurveCache.h

@@ -11,19 +11,18 @@ namespace BansheeEngine
 	 */
 
 	/** 
-	 * Data used for evaluating an AnimationCurve. Holds cached information so that sequential evaluations can be sped up.
+	 * Holds cached information used for animation curve evaluation so that sequential evaluations can be sped up.
 	 * You should not use the same instance of this object for evaluating multiple different animation curves.
 	 */
 	template <class T>
-	struct TCurveEvaluatorData
+	struct TCurveCache
 	{
 	public:
-		TCurveEvaluatorData()
-			: time(0.0f), cachedKey((UINT32)-1), cachedCurveStart(std::numeric_limits<float>::infinity())
-			, cachedCurveEnd(0.0f), cachedCubicCoefficients()
+		TCurveCache()
+			: cachedKey((UINT32)-1), cachedCurveStart(std::numeric_limits<float>::infinity()), cachedCurveEnd(0.0f)
+			, cachedCubicCoefficients()
 		{ }
 
-		float time; /**< Time at which to evaluate the curve. */
 	private:
 		friend class TAnimationCurve<T>;
 

+ 33 - 27
Source/BansheeCore/Include/BsSkeleton.h

@@ -7,7 +7,7 @@
 #include "BsMatrix4.h"
 #include "BsVector3.h"
 #include "BsQuaternion.h"
-#include "BsCurveEvaluator.h"
+#include "BsCurveCache.h"
 
 namespace BansheeEngine
 {
@@ -15,10 +15,10 @@ namespace BansheeEngine
 	 *  @{
 	 */
 
-	/** 
-	 * Contains indices for position/rotation/scale animation curves. Used for quick mapping of bones in a skeleton to 
-	 * relevant animation curves. 
-	 */
+	 /**
+	  * Contains indices for position/rotation/scale animation curves. Used for quick mapping of bones in a skeleton to
+	  * relevant animation curves.
+	  */
 	struct AnimationCurveMapping
 	{
 		UINT32 position;
@@ -41,10 +41,12 @@ namespace BansheeEngine
 		SPtr<AnimationCurves> curves; /**< All curves in the animation clip. */
 		AnimationCurveMapping* boneToCurveMapping; /**< Mapping of bone indices to curve indices for quick lookup .*/
 
-		TCurveEvaluatorData<Vector3> positionEval; /**< Time value and cache used for evaluating position curves. */
-		TCurveEvaluatorData<Quaternion> rotationEval; /**< Time value and cache used for evaluating rotation curves. */
-		TCurveEvaluatorData<Vector3> scaleEval; /**< Time value and cache used for evaluating scale curves. */
+		TCurveCache<Vector3>* positionCaches; /**< Cache used for evaluating position curves. */
+		TCurveCache<Quaternion>* rotationCaches; /**< Cache used for evaluating rotation curves. */
+		TCurveCache<Vector3>* scaleCaches; /**< Cache used for evaluating scale curves. */
+		TCurveCache<float>* genericCaches; /**< Cache used for evaluating generic curves. */
 
+		float time; /**< Time to evaluate the curve at. */
 		float weight; /**< Determines how much of an influence will this clip have in regard to others in the same layer. */
 		bool loop; /**< Determines should the animation loop (wrap) once ending or beginning frames are passed. */
 	};
@@ -57,25 +59,25 @@ namespace BansheeEngine
 
 		UINT8 index; /**< Unique index of the animation layer. */
 
-		/** 
-		 * Determines should weights of individual states be normalized or kept as is. Non-normalized weights allow the 
-		 * total contribution of all states to be less than one. 
+		/**
+		 * Determines should weights of individual states be normalized or kept as is. Non-normalized weights allow the
+		 * total contribution of all states to be less than one.
 		 */
-		bool normalizeWeights; 
+		bool normalizeWeights;
 	};
 
 	/** 
-	 * Contains information about translation, rotation and scale for every skeleton bone, after being evaluated at a
-	 * a specific time. All values are stored in the same order as the bones in the skeleton they were created by.
+	 * Contains local translation, rotation and scale values for each bone in a skeleton, after being evaluated at a
+	 * specific time of an animation.  All values are stored in the same order as the bones in the skeleton they were
+	 * created by.
 	 */
-	struct SkeletonPose
+	struct LocalSkeletonPose
 	{
-		SkeletonPose();
-		SkeletonPose(UINT32 numBones);
-		~SkeletonPose();
+		LocalSkeletonPose();
+		LocalSkeletonPose(UINT32 numBones);
+		LocalSkeletonPose(UINT32 numPos, UINT32 numRot, UINT32 numScale);
+		~LocalSkeletonPose();
 
-		/**< Global matrices transforming vectors from bind pose space to model space at specific animation time. */
-		Matrix4* bonePoses; 
 		Vector3* positions; /**< Local bone positions at specific animation time. */
 		Quaternion* rotations; /**< Local bone rotations at specific animation time. */
 		Vector3* scales; /**< Local bone scales at specific animation time. */
@@ -102,27 +104,31 @@ namespace BansheeEngine
 		 * Outputs a skeleton pose containing required transforms for transforming the skeleton to the values specified by
 		 * the provided animation clip evaluated at the specified time.
 		 *
-		 * @param[out]	pose	Output pose containing the requested transforms. Must be pre-allocated with enough space
-		 *						to hold all the bone data of this skeleton.
-		 * @param[in]	clip	Clip to evaluate.
-		 * @param[in]	time	Time to evaluate the clip with.
-		 * @param[in]	loop	Determines should the time be looped (wrapped) if it goes past the clip start/end.
+		 * @param[out]	pose		Output pose containing the requested transforms. Must be pre-allocated with enough space
+		 *							to hold all the bone matrices of this skeleton.
+		 * @param[out]	localPose	Output pose containing the local transforms. Must be pre-allocated with enough space
+		 *							to hold all the bone data of this skeleton.
+		 * @param[in]	clip		Clip to evaluate.
+		 * @param[in]	time		Time to evaluate the clip with.
+		 * @param[in]	loop		Determines should the time be looped (wrapped) if it goes past the clip start/end.
 		 *
 		 * @note	It is more efficient to use the other getPose overload as sequential calls can benefit from animation
 		 *			evaluator cache.
 		 */
-		void getPose(SkeletonPose& pose, const AnimationClip& clip, float time, bool loop = true);
+		void getPose(Matrix4* pose, LocalSkeletonPose& localPose, const AnimationClip& clip, float time, bool loop = true);
 
 		/** 
 		 * Outputs a skeleton pose containing required transforms for transforming the skeleton to the values specified by
 		 * the provided set of animation curves.
 		 *
 		 * @param[out]	pose		Output pose containing the requested transforms. Must be pre-allocated with enough space
+		 *							to hold all the bone matrices of this skeleton.
+		 * @param[out]	localPose	Output pose containing the local transforms. Must be pre-allocated with enough space
 		 *							to hold all the bone data of this skeleton.
 		 * @param[in]	layers		One or multiple layers, containing one or multiple animation states to evaluate.
 		 * @param[in]	numLayers	Number of layers in the @p layers array.
 		 */
-		void getPose(SkeletonPose& pose, const AnimationStateLayer* layers, UINT32 numLayers);
+		void getPose(Matrix4* pose, LocalSkeletonPose& localPose, const AnimationStateLayer* layers, UINT32 numLayers);
 
 		/** Returns the total number of bones in the skeleton. */
 		UINT32 getNumBones() const { return mNumBones; }

+ 77 - 20
Source/BansheeCore/Source/BsAnimation.cpp

@@ -7,19 +7,20 @@
 namespace BansheeEngine
 {
 	PlayingClipInfo::PlayingClipInfo()
-		:layerIdx(0), stateIdx(0)
+		:layerIdx(0), curveVersion(0), stateIdx(0)
 	{ }
 
 	PlayingClipInfo::PlayingClipInfo(const HAnimationClip& clip)
-		:clip(clip), layerIdx(0), stateIdx(0)
+		:clip(clip), curveVersion(0), layerIdx(0), stateIdx(0)
 	{ }
 
-	AnimationProxy::AnimationProxy()
-		:layers(nullptr), numLayers(0)
+	AnimationProxy::AnimationProxy(UINT64 id)
+		:id(id), layers(nullptr), numLayers(0), genericCurveOutputs(nullptr)
 	{ }
 
 	AnimationProxy::~AnimationProxy()
 	{
+		// All the memory is part of the same buffer, so we only need to free the first element
 		if (layers != nullptr)
 			bs_free(layers);
 	}
@@ -28,10 +29,27 @@ namespace BansheeEngine
 	{
 		this->skeleton = skeleton;
 
+		// Note: I could avoid having a separate allocation for LocalSkeletonPose and use the same buffer as the rest
+		// of AnimationProxy
 		if (skeleton != nullptr)
-			pose = SkeletonPose(skeleton->getNumBones());
+			localPose = LocalSkeletonPose(skeleton->getNumBones());
 		else
-			pose = SkeletonPose();
+		{
+			UINT32 numPosCurves = 0;
+			UINT32 numRotCurves = 0;
+			UINT32 numScaleCurves = 0;
+
+			// Note: I'm recalculating this both here and in follow-up rebuild() call, it could be avoided.
+			for (auto& clipInfo : clipInfos)
+			{
+				SPtr<AnimationCurves> curves = clipInfo.clip->getCurves();
+				numPosCurves += (UINT32)curves->position.size();
+				numRotCurves += (UINT32)curves->rotation.size();
+				numScaleCurves += (UINT32)curves->scale.size();
+			}
+
+			localPose = LocalSkeletonPose(numPosCurves, numRotCurves, numScaleCurves);
+		}
 
 		rebuild(clipInfos);
 	}
@@ -77,10 +95,31 @@ namespace BansheeEngine
 			else
 				numBones = 0;
 
+			UINT32 numPosCurves = 0;
+			UINT32 numRotCurves = 0;
+			UINT32 numScaleCurves = 0;
+			UINT32 numGenCurves = 0;
+
+			for (auto& clipInfo : clipInfos)
+			{
+				SPtr<AnimationCurves> curves = clipInfo.clip->getCurves();
+				numPosCurves += (UINT32)curves->position.size();
+				numRotCurves += (UINT32)curves->rotation.size();
+				numScaleCurves += (UINT32)curves->scale.size();
+				numGenCurves += (UINT32)curves->generic.size();
+			}
+
 			UINT32 layersSize = sizeof(AnimationStateLayer) * numLayers;
 			UINT32 clipsSize = sizeof(AnimationState) * numClips;
 			UINT32 boneMappingSize = numBones * numClips * sizeof(AnimationCurveMapping);
-			UINT8* data = (UINT8*)bs_alloc(layersSize + clipsSize + boneMappingSize);
+			UINT32 posCacheSize = numPosCurves * sizeof(TCurveCache<Vector3>);
+			UINT32 rotCacheSize = numRotCurves * sizeof(TCurveCache<Quaternion>);
+			UINT32 scaleCacheSize = numScaleCurves * sizeof(TCurveCache<Vector3>);
+			UINT32 genCacheSize = numGenCurves * sizeof(TCurveCache<float>);
+			UINT32 genericCurveOutputSize = numGenCurves * sizeof(float);
+
+			UINT8* data = (UINT8*)bs_alloc(layersSize + clipsSize + boneMappingSize + posCacheSize + rotCacheSize + 
+				scaleCacheSize + genCacheSize + genericCurveOutputSize);
 
 			layers = (AnimationStateLayer*)data;
 			memcpy(layers, tempLayers.data(), layersSize);
@@ -90,7 +129,22 @@ namespace BansheeEngine
 			data += clipsSize;
 
 			AnimationCurveMapping* boneMappings = (AnimationCurveMapping*)data;
-			
+			data += boneMappingSize;
+
+			TCurveCache<Vector3>* posCache = (TCurveCache<Vector3>*)data;
+			data += posCacheSize;
+
+			TCurveCache<Quaternion>* rotCache = (TCurveCache<Quaternion>*)data;
+			data += rotCacheSize;
+
+			TCurveCache<Vector3>* scaleCache = (TCurveCache<Vector3>*)data;
+			data += scaleCacheSize;
+
+			TCurveCache<float>* genCache = (TCurveCache<float>*)data;
+			data += genCacheSize;
+
+			genericCurveOutputs = (float*)data;
+
 			UINT32 curLayerIdx = 0;
 			UINT32 curStateIdx = 0;
 
@@ -110,10 +164,19 @@ namespace BansheeEngine
 					state.curves = clipInfo.clip->getCurves();
 					state.weight = clipInfo.state.weight;
 					state.loop = clipInfo.state.wrapMode == AnimWrapMode::Loop;
+					state.time = clipInfo.state.time;
+
+					state.positionCaches = posCache;
+					posCache += state.curves->position.size();
+
+					state.rotationCaches = rotCache;
+					rotCache += state.curves->rotation.size();
 
-					state.positionEval.time = clipInfo.state.time;
-					state.rotationEval.time = clipInfo.state.time;
-					state.scaleEval.time = clipInfo.state.time;
+					state.scaleCaches = scaleCache;
+					scaleCache += state.curves->scale.size();
+
+					state.genericCaches = genCache;
+					genCache += state.curves->generic.size();
 
 					clipInfo.layerIdx = curLayerIdx;
 					clipInfo.stateIdx = localStateIdx;
@@ -149,10 +212,7 @@ namespace BansheeEngine
 
 			state.loop = clipInfo.state.wrapMode == AnimWrapMode::Loop;
 			state.weight = clipInfo.state.weight;
-
-			state.positionEval.time = clipInfo.state.time;
-			state.rotationEval.time = clipInfo.state.time;
-			state.scaleEval.time = clipInfo.state.time;
+			state.time = clipInfo.state.time;
 		}
 	}
 
@@ -161,18 +221,15 @@ namespace BansheeEngine
 		for (auto& clipInfo : clipInfos)
 		{
 			AnimationState& state = layers[clipInfo.layerIdx].states[clipInfo.stateIdx];
-
-			state.positionEval.time = clipInfo.state.time;
-			state.rotationEval.time = clipInfo.state.time;
-			state.scaleEval.time = clipInfo.state.time;
+			state.time = clipInfo.state.time;
 		}
 	}
 
 	Animation::Animation()
 		:mDefaultWrapMode(AnimWrapMode::Loop), mDefaultSpeed(1.0f), mDirty(AnimDirtyStateFlag::Skeleton)
 	{
-		mAnimProxy = bs_shared_ptr_new<AnimationProxy>();
 		mId = AnimationManager::instance().registerAnimation(this);
+		mAnimProxy = bs_shared_ptr_new<AnimationProxy>(mId);
 	}
 
 	Animation::~Animation()

+ 27 - 29
Source/BansheeCore/Source/BsAnimationCurve.cpp

@@ -164,13 +164,11 @@ namespace BansheeEngine
 	}
 
 	template <class T>
-	T TAnimationCurve<T>::evaluate(const TCurveEvaluatorData<T>& animInstance, bool loop) const
+	T TAnimationCurve<T>::evaluate(float time, const TCurveCache<T>& cache, bool loop) const
 	{
 		if (mKeyframes.size() == 0)
 			return T();
 
-		float time = animInstance.time;
-
 		// Wrap time if looping
 		if(loop)
 		{
@@ -181,19 +179,19 @@ namespace BansheeEngine
 		}
 
 		// If time is within cache, evaluate it directly
-		if (time >= animInstance.cachedCurveStart && time < animInstance.cachedCurveEnd)
-			return evaluateCache(animInstance);
+		if (time >= cache.cachedCurveStart && time < cache.cachedCurveEnd)
+			return evaluateCache(time, cache);
 
 		// Clamp to start, cache constant of the first key and return
 		if(time < mStart)
 		{
-			animInstance.cachedCurveStart = -std::numeric_limits<float>::infinity();
-			animInstance.cachedCurveEnd = mStart;
-			animInstance.cachedKey = 0;
-			animInstance.cachedCubicCoefficients[0] = T();
-			animInstance.cachedCubicCoefficients[1] = T();
-			animInstance.cachedCubicCoefficients[2] = T();
-			animInstance.cachedCubicCoefficients[3] = mKeyframes[0].value;
+			cache.cachedCurveStart = -std::numeric_limits<float>::infinity();
+			cache.cachedCurveEnd = mStart;
+			cache.cachedKey = 0;
+			cache.cachedCubicCoefficients[0] = T();
+			cache.cachedCubicCoefficients[1] = T();
+			cache.cachedCubicCoefficients[2] = T();
+			cache.cachedCubicCoefficients[3] = mKeyframes[0].value;
 
 			return mKeyframes[0].value;
 		}
@@ -202,13 +200,13 @@ namespace BansheeEngine
 		{
 			UINT32 lastKey = (UINT32)mKeyframes.size() - 1;
 
-			animInstance.cachedCurveStart = mEnd;
-			animInstance.cachedCurveEnd = std::numeric_limits<float>::infinity();
-			animInstance.cachedKey = lastKey;
-			animInstance.cachedCubicCoefficients[0] = T();
-			animInstance.cachedCubicCoefficients[1] = T();
-			animInstance.cachedCubicCoefficients[2] = T();
-			animInstance.cachedCubicCoefficients[3] = mKeyframes[lastKey].value;
+			cache.cachedCurveStart = mEnd;
+			cache.cachedCurveEnd = std::numeric_limits<float>::infinity();
+			cache.cachedKey = lastKey;
+			cache.cachedCubicCoefficients[0] = T();
+			cache.cachedCubicCoefficients[1] = T();
+			cache.cachedCubicCoefficients[2] = T();
+			cache.cachedCubicCoefficients[3] = mKeyframes[lastKey].value;
 
 			return mKeyframes[lastKey].value;
 		}
@@ -217,22 +215,22 @@ namespace BansheeEngine
 		UINT32 leftKeyIdx;
 		UINT32 rightKeyIdx;
 
-		findKeys(time, animInstance, leftKeyIdx, rightKeyIdx);
+		findKeys(time, cache, leftKeyIdx, rightKeyIdx);
 
 		// Calculate cubic hermite curve coefficients so we can store them in cache
 		const KeyFrame& leftKey = mKeyframes[leftKeyIdx];
 		const KeyFrame& rightKey = mKeyframes[rightKeyIdx];
 
-		animInstance.cachedCurveStart = leftKey.time;
-		animInstance.cachedCurveEnd = rightKey.time;
+		cache.cachedCurveStart = leftKey.time;
+		cache.cachedCurveEnd = rightKey.time;
 
 		float length = rightKey.time - leftKey.time;
 		Math::cubicHermiteCoefficients(leftKey.value, rightKey.value, leftKey.outTangent, rightKey.inTangent, length,
-			animInstance.cachedCubicCoefficients);
+			cache.cachedCubicCoefficients);
 
-		setStepCoefficients(leftKey, rightKey, animInstance.cachedCubicCoefficients);
+		setStepCoefficients(leftKey, rightKey, cache.cachedCubicCoefficients);
 
-		T output = evaluateCache(animInstance);
+		T output = evaluateCache(time, cache);
 		return output;
 	}
 
@@ -295,16 +293,16 @@ namespace BansheeEngine
 	}
 
 	template <class T>
-	T TAnimationCurve<T>::evaluateCache(const TCurveEvaluatorData<T>& animInstance) const
+	T TAnimationCurve<T>::evaluateCache(float time, const TCurveCache<T>& cache) const
 	{
-		float t = animInstance.time - animInstance.cachedCurveStart;
+		float t = time - cache.cachedCurveStart;
 
-		const T* coeffs = animInstance.cachedCubicCoefficients;
+		const T* coeffs = cache.cachedCubicCoefficients;
 		return t * (t * (t * coeffs[0] + coeffs[1]) + coeffs[2]) + coeffs[3];
 	}
 
 	template <class T>
-	void TAnimationCurve<T>::findKeys(float time, const TCurveEvaluatorData<T>& animInstance, UINT32& leftKey, UINT32& rightKey) const
+	void TAnimationCurve<T>::findKeys(float time, const TCurveCache<T>& animInstance, UINT32& leftKey, UINT32& rightKey) const
 	{
 		// Check nearby keys first if there is cached data
 		if (animInstance.cachedKey != (UINT32)-1)

+ 120 - 6
Source/BansheeCore/Source/BsAnimationManager.cpp

@@ -2,6 +2,7 @@
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 #include "BsAnimationManager.h"
 #include "BsAnimation.h"
+#include "BsAnimationClip.h"
 #include "BsTaskScheduler.h"
 #include "BsTime.h"
 
@@ -9,7 +10,8 @@ namespace BansheeEngine
 {
 	AnimationManager::AnimationManager()
 		: mNextId(1), mUpdateRate(1.0f / 60.0f), mAnimationTime(0.0f), mLastAnimationUpdateTime(0.0f)
-		, mNextAnimationUpdateTime(0.0f), mPaused(false), mWorkerRunning(false)
+		, mNextAnimationUpdateTime(0.0f), mPaused(false), mWorkerRunning(false), mPoseReadBufferIdx(0)
+		, mPoseWriteBufferIdx(0), mDataReadyCount(0)
 	{
 		mAnimationWorker = Task::create("Animation", std::bind(&AnimationManager::evaluateAnimation, this));
 	}
@@ -57,10 +59,15 @@ namespace BansheeEngine
 		float timeDelta = mAnimationTime - mLastAnimationUpdateTime;
 		mLastAnimationUpdateTime = mAnimationTime;
 
+		// Update poses in the currently active buffer. Multi-buffering allows the core thread to safely read the
+		// poses without worrying about them being overwritten by another call to postUpdate, as long as the simulation
+		// thread doesn't go more than (CoreThread::NUM_SYNC_BUFFERS - 1) frames ahead.
+		mProxies.clear();
 		for (auto& anim : mAnimations)
+		{
 			anim.second->updateAnimProxy(timeDelta);
-
-		// TODO - Gather a list of all anim proxies?
+			mProxies[anim.second->mId] = anim.second->mAnimProxy;
+		}
 
 		// Make sure thread finishes writing all changes to the anim proxies as they will be read by the animation thread
 		std::atomic_thread_fence(std::memory_order_release);
@@ -78,15 +85,122 @@ namespace BansheeEngine
 		// Make sure we don't load obsolete anim proxy data written by the simulation thread
 		std::atomic_thread_fence(std::memory_order_acquire);
 
-		// TODO - Evaluate skeletal poses for clips that have a skeleton
-		// TODO - Evaluate TRS for individual scene objects that have no skeleton
-		// TODO - Evaluate generic curves
+		// No need for locking, as we are sure that only postUpdate() writes to the proxy buffer, and increments the write
+		// buffer index. And it's called sequentially ensuring previous call to evaluate finishes.
+
+		UINT32 totalNumBones = 0;
+		for (auto& anim : mProxies)
+		{
+			if (anim->skeleton != nullptr)
+				totalNumBones += anim->skeleton->getNumBones();
+		}
+
+		RendererAnimationData& renderData = mAnimData[mPoseWriteBufferIdx];
+		mPoseWriteBufferIdx = (mPoseWriteBufferIdx + 1) % CoreThread::NUM_SYNC_BUFFERS;
+
+		renderData.poseInfos.clear();
+		renderData.transforms.resize(totalNumBones);
+
+		UINT32 curBoneIdx = 0;
+		for(auto& anim : mProxies)
+		{
+			if (anim->skeleton != nullptr)
+			{
+				UINT32 numBones = anim->skeleton->getNumBones();
+
+				RendererAnimationData::PoseInfo info;
+				info.animId = anim->id;
+				info.startIdx = curBoneIdx;
+				info.numBones = numBones;
+
+				Matrix4* boneDst = renderData.transforms.data() + curBoneIdx;
+				anim->skeleton->getPose(boneDst, anim->localPose, anim->layers, anim->numLayers);
+
+				curBoneIdx += numBones;
+			}
+			else
+			{
+				// Note: No blending for non-skeletal animations, just use first animation
+				if(anim->numLayers > 0 && anim->layers[0].numStates > 0)
+				{
+					const AnimationState& state = anim->layers[0].states[0];
+
+					{
+						UINT32 numCurves = (UINT32)state.curves->position.size();
+						for(UINT32 i = 0; i < numCurves; i++)
+						{
+							const TAnimationCurve<Vector3>& curve = state.curves->position[i].curve;
+							anim->localPose.positions[i] = curve.evaluate(state.time, state.positionCaches[i], state.loop);
+						}
+					}
+
+					{
+						UINT32 numCurves = (UINT32)state.curves->rotation.size();
+						for (UINT32 i = 0; i < numCurves; i++)
+						{
+							const TAnimationCurve<Quaternion>& curve = state.curves->rotation[i].curve;
+							anim->localPose.rotations[i] = curve.evaluate(state.time, state.rotationCaches[i], state.loop);
+						}
+					}
+
+					{
+						UINT32 numCurves = (UINT32)state.curves->scale.size();
+						for (UINT32 i = 0; i < numCurves; i++)
+						{
+							const TAnimationCurve<Vector3>& curve = state.curves->scale[i].curve;
+							anim->localPose.scales[i] = curve.evaluate(state.time, state.scaleCaches[i], state.loop);
+						}
+					}
+				}
+			}
+
+			// Note: No blending for generic animations, just use first animation
+			if (anim->numLayers > 0 && anim->layers[0].numStates > 0)
+			{
+				const AnimationState& state = anim->layers[0].states[0];
+
+				{
+					UINT32 numCurves = (UINT32)state.curves->generic.size();
+					for (UINT32 i = 0; i < numCurves; i++)
+					{
+						const TAnimationCurve<float>& curve = state.curves->generic[i].curve;
+						anim->genericCurveOutputs[i] = curve.evaluate(state.time, state.genericCaches[i], state.loop);
+					}
+				}
+			}
+		}
+
+		mDataReadyCount.fetch_add(1, std::memory_order_relaxed);
 
 		// Make sure the thread finishes writing skeletal pose and other evaluation outputs as they will be read by sim and
 		// core threads
 		std::atomic_thread_fence(std::memory_order_release);
 	}
 
+	const RendererAnimationData& AnimationManager::getRendererData()
+	{
+		mAnimationWorker->wait();
+
+		// Make sure we don't load obsolete skeletal pose and other evaluation ouputs written by the animation thread
+		std::atomic_thread_fence(std::memory_order_acquire);
+
+		INT32 dataReadyCount = mDataReadyCount.load(std::memory_order_relaxed);
+		assert(dataReadyCount <= CoreThread::NUM_SYNC_BUFFERS);
+
+		if (dataReadyCount <= 0)
+		{
+			static RendererAnimationData dummy;
+			return dummy;
+		}
+
+		const RendererAnimationData& output = mAnimData[mPoseReadBufferIdx];
+
+		mPoseReadBufferIdx = (mPoseReadBufferIdx + 1) % CoreThread::NUM_SYNC_BUFFERS;
+		mDataReadyCount.fetch_add(-1, std::memory_order_relaxed);
+
+		return output;
+	}
+
 	UINT64 AnimationManager::registerAnimation(Animation* anim)
 	{
 		mAnimations[mNextId] = anim;

+ 3 - 3
Source/BansheeCore/Source/BsCoreThread.cpp

@@ -21,7 +21,7 @@ namespace BansheeEngine
 		, mMaxCommandNotifyId(0)
 		, mSyncedCoreAccessor(nullptr)
 	{
-		for (UINT32 i = 0; i < NUM_FRAME_ALLOCS; i++)
+		for (UINT32 i = 0; i < NUM_SYNC_BUFFERS; i++)
 		{
 			mFrameAllocs[i] = bs_new<FrameAlloc>();
 			mFrameAllocs[i]->setOwnerThread(BS_THREAD_CURRENT_ID); // Sim thread
@@ -56,7 +56,7 @@ namespace BansheeEngine
 			mCommandQueue = nullptr;
 		}
 
-		for (UINT32 i = 0; i < NUM_FRAME_ALLOCS; i++)
+		for (UINT32 i = 0; i < NUM_SYNC_BUFFERS; i++)
 		{
 			mFrameAllocs[i]->setOwnerThread(BS_THREAD_CURRENT_ID); // Sim thread
 			bs_delete(mFrameAllocs[i]);
@@ -243,7 +243,7 @@ namespace BansheeEngine
 
 	void CoreThread::update()
 	{
-		for (UINT32 i = 0; i < NUM_FRAME_ALLOCS; i++)
+		for (UINT32 i = 0; i < NUM_SYNC_BUFFERS; i++)
 			mFrameAllocs[i]->setOwnerThread(mCoreThreadId);
 
 		mActiveFrameAlloc = (mActiveFrameAlloc + 1) % 2;

+ 67 - 51
Source/BansheeCore/Source/BsSkeleton.cpp

@@ -6,19 +6,16 @@
 
 namespace BansheeEngine
 {
-	SkeletonPose::SkeletonPose()
-		: bonePoses(nullptr), positions(nullptr), rotations(nullptr), scales(nullptr), numBones(0)
+	LocalSkeletonPose::LocalSkeletonPose()
+		: positions(nullptr), rotations(nullptr), scales(nullptr), numBones(0)
 	{ }
 
-	SkeletonPose::SkeletonPose(UINT32 numBones)
+	LocalSkeletonPose::LocalSkeletonPose(UINT32 numBones)
 		: numBones(numBones)
 	{
-		UINT32 elementSize = sizeof(Matrix4) + sizeof(Vector3) * 2 + sizeof(Quaternion);
+		UINT32 elementSize = sizeof(Vector3) * 2 + sizeof(Quaternion);
 		UINT8* buffer = (UINT8*)bs_alloc(elementSize * sizeof(numBones));
 
-		bonePoses = (Matrix4*)buffer;
-		buffer += sizeof(Matrix4) * numBones;
-
 		positions = (Vector3*)buffer;
 		buffer += sizeof(Vector3) * numBones;
 
@@ -26,20 +23,27 @@ namespace BansheeEngine
 		buffer += sizeof(Quaternion) * numBones;
 
 		scales = (Vector3*)buffer;
+	}
 
-		for (UINT32 i = 0; i < numBones; i++)
-		{
-			bonePoses[i] = Matrix4::IDENTITY;
-			positions[i] = Vector3::ZERO;
-			rotations[i] = Quaternion::IDENTITY;
-			scales[i] = Vector3::ONE;
-		}
+	LocalSkeletonPose::LocalSkeletonPose(UINT32 numPos, UINT32 numRot, UINT32 numScale)
+		: numBones(0)
+	{
+		UINT32 bufferSize = sizeof(Vector3) * numPos + sizeof(Quaternion) * numRot + sizeof(Vector3) * numScale;
+		UINT8* buffer = (UINT8*)bs_alloc(bufferSize);
+
+		positions = (Vector3*)buffer;
+		buffer += sizeof(Vector3) * numPos;
+
+		rotations = (Quaternion*)buffer;
+		buffer += sizeof(Quaternion) * numRot;
+
+		scales = (Vector3*)buffer;
 	}
 
-	SkeletonPose::~SkeletonPose()
+	LocalSkeletonPose::~LocalSkeletonPose()
 	{
-		if(bonePoses != nullptr)
-			bs_free(bonePoses);
+		if (positions != nullptr)
+			bs_free(positions);
 	}
 
 	Skeleton::Skeleton()
@@ -73,41 +77,54 @@ namespace BansheeEngine
 		return bs_shared_ptr<Skeleton>(rawPtr);
 	}
 
-	void Skeleton::getPose(SkeletonPose& pose, const AnimationClip& clip, float time, bool loop)
+	void Skeleton::getPose(Matrix4* pose, LocalSkeletonPose& localPose, const AnimationClip& clip, float time,
+		bool loop)
 	{
-		Vector<AnimationCurveMapping> boneToCurveMapping(mNumBones);
-
-		AnimationState state;
-		state.curves = clip.getCurves();
-		state.boneToCurveMapping = boneToCurveMapping.data();
-		state.loop = loop;
-		state.weight = 1.0f;
-		state.positionEval.time = time;
-		state.rotationEval.time = time;
-		state.scaleEval.time = time;
-
-		AnimationStateLayer layer;
-		layer.index = 0;
-		layer.normalizeWeights = false;
-		layer.states = &state;
-		layer.numStates = 1;
-
-		clip.getBoneMapping(*this, state.boneToCurveMapping);
-
-		getPose(pose, &layer, 1);
+		bs_frame_mark();
+		{
+			FrameVector<AnimationCurveMapping> boneToCurveMapping(mNumBones);
+
+			AnimationState state;
+			state.curves = clip.getCurves();
+			state.boneToCurveMapping = boneToCurveMapping.data();
+			state.loop = loop;
+			state.weight = 1.0f;
+			state.time = time;
+
+			FrameVector<TCurveCache<Vector3>> positionCache(state.curves->position.size());
+			FrameVector<TCurveCache<Quaternion>> rotationCache(state.curves->rotation.size());
+			FrameVector<TCurveCache<Vector3>> scaleCache(state.curves->scale.size());
+
+			state.positionCaches = positionCache.data();
+			state.rotationCaches = rotationCache.data();
+			state.scaleCaches = scaleCache.data();
+			state.genericCaches = nullptr;
+
+			AnimationStateLayer layer;
+			layer.index = 0;
+			layer.normalizeWeights = false;
+			layer.states = &state;
+			layer.numStates = 1;
+
+			clip.getBoneMapping(*this, state.boneToCurveMapping);
+
+			getPose(pose, localPose, &layer, 1);
+		}
+		bs_frame_clear();
 	}
 
-	void Skeleton::getPose(SkeletonPose& pose, const AnimationStateLayer* layers, UINT32 numLayers)
+	void Skeleton::getPose(Matrix4* pose, LocalSkeletonPose& localPose, const AnimationStateLayer* layers,
+		UINT32 numLayers)
 	{
 		// Note: If more performance is required this method could be optimized with vector instructions
 
-		assert(pose.numBones == mNumBones);
+		assert(localPose.numBones == mNumBones);
 
 		for(UINT32 i = 0; i < mNumBones; i++)
 		{
-			pose.positions[i] = Vector3::ZERO;
-			pose.rotations[i] = Quaternion::IDENTITY;
-			pose.scales[i] = Vector3::ONE;
+			localPose.positions[i] = Vector3::ZERO;
+			localPose.rotations[i] = Quaternion::IDENTITY;
+			localPose.scales[i] = Vector3::ONE;
 		}
 
 		for(UINT32 i = 0; i < numLayers; i++)
@@ -126,7 +143,6 @@ namespace BansheeEngine
 			else
 				invLayerWeight = 1.0f;
 
-
 			for (UINT32 j = 0; j < layer.numStates; j++)
 			{
 				const AnimationState& state = layer.states[i];
@@ -139,19 +155,19 @@ namespace BansheeEngine
 					if (mapping.position != (UINT32)-1)
 					{
 						const TAnimationCurve<Vector3>& curve = state.curves->position[mapping.position].curve;
-						pose.positions[k] += curve.evaluate(state.positionEval, state.loop) * normWeight;
+						localPose.positions[k] += curve.evaluate(state.time, state.positionCaches[k], state.loop) * normWeight;
 					}
 
 					if (mapping.rotation != (UINT32)-1)
 					{
 						const TAnimationCurve<Quaternion>& curve = state.curves->rotation[mapping.rotation].curve;
-						pose.rotations[k] += curve.evaluate(state.rotationEval, state.loop) * normWeight;
+						localPose.rotations[k] += curve.evaluate(state.time, state.rotationCaches[k], state.loop) * normWeight;
 					}
 
 					if (mapping.scale != (UINT32)-1)
 					{
 						const TAnimationCurve<Vector3>& curve = state.curves->scale[mapping.scale].curve;
-						pose.scales[k] += curve.evaluate(state.scaleEval, state.loop) * normWeight;
+						localPose.scales[k] += curve.evaluate(state.time, state.scaleCaches[k], state.loop) * normWeight;
 					}
 				}
 			}
@@ -160,10 +176,10 @@ namespace BansheeEngine
 		// Calculate local pose matrices
 		for(UINT32 i = 0; i < mNumBones; i++)
 		{
-			pose.rotations[i].normalize();
+			localPose.rotations[i].normalize();
 
-			pose.bonePoses[i] = Matrix4::TRS(pose.positions[i], pose.rotations[i], pose.scales[i]);
-			pose.bonePoses[i] = pose.bonePoses[i] * mInvBindPoses[i];
+			pose[i] = Matrix4::TRS(localPose.positions[i], localPose.rotations[i], localPose.scales[i]);
+			pose[i] = pose[i] * mInvBindPoses[i];
 		}
 
 		// Calculate global poses
@@ -183,7 +199,7 @@ namespace BansheeEngine
 			if (!isGlobal[parentBoneIdx])
 				calcGlobal(parentBoneIdx);
 
-			pose.bonePoses[boneIdx] = pose.bonePoses[parentBoneIdx] * pose.bonePoses[boneIdx];
+			pose[boneIdx] = pose[parentBoneIdx] * pose[boneIdx];
 			isGlobal[boneIdx] = true;
 		};