Răsfoiți Sursa

Plenty of more work on HLSL programs

Marko Pintera 13 ani în urmă
părinte
comite
ead3f972c8

+ 81 - 81
CamelotD3D11RenderSystem/Include/CmD3D11GpuProgram.h

@@ -5,93 +5,93 @@
 
 namespace CamelotEngine
 {
-	class CM_D3D11_EXPORT D3D11GpuProgram : public GpuProgram
-	{
-	public:
-		D3D11GpuProgram(GpuProgramType type, GpuProgramProfile profile);
-
-	protected:
-		void loadImpl(void);
-
+	class CM_D3D11_EXPORT D3D11GpuProgram : public GpuProgram
+	{
+	public:
+		D3D11GpuProgram(GpuProgramType type);
+
+	protected:
+		void loadImpl(void);
+
 		/**
 		 * @brief	Loads shader from source. Not used as DX11 doesn't support
 		 * 			assembly shaders.
 		 */
-		void loadFromSource(void);
-
+		void loadFromSource(void);
+
 		/**
 		 * @brief	Loads shader from microcode.
 		 */
-		virtual void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode) = 0;
-	};
-
-	class CM_D3D11_EXPORT D3D11GpuVertexProgram : public D3D11GpuProgram
-	{
-	protected:
-		ID3D11VertexShader* mVertexShader;
-	public:
-		D3D11GpuVertexProgram(GpuProgramProfile profile);
-		~D3D11GpuVertexProgram();
-
-		ID3D11VertexShader* getVertexShader(void) const;
-	protected:
-		void unloadImpl(void);
-		void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode);
-	};
-
-	class CM_D3D11_EXPORT D3D11GpuFragmentProgram : public D3D11GpuProgram
-	{
-	protected:
-		ID3D11PixelShader* mPixelShader;
-	public:
-		D3D11GpuFragmentProgram(GpuProgramProfile profile);
-		~D3D11GpuFragmentProgram();
-
-		ID3D11PixelShader* getPixelShader(void) const;
-	protected:
-		void unloadImpl(void);
-		void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode);
-	};
-
-	class D3D11GpuDomainProgram : public D3D11GpuProgram
-	{
-	protected:
-		ID3D11DomainShader* mDomainShader;
-	public:
-		D3D11GpuDomainProgram(GpuProgramProfile profile);
-		~D3D11GpuDomainProgram();
-
-		ID3D11DomainShader* getDomainShader(void) const;
-	protected:
-		void unloadImpl(void);
-		void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode);
-	};
-
-	class D3D11GpuHullProgram : public D3D11GpuProgram
-	{
-	protected:
-		ID3D11HullShader* mHullShader;
-	public:
-		D3D11GpuHullProgram(GpuProgramProfile profile);
-		~D3D11GpuHullProgram();
-
-		ID3D11HullShader* getHullShader() const;
-	protected:
-		void unloadImpl(void);
-		void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode);
-	};
-
-	class D3D11GpuGeometryProgram : public D3D11GpuProgram
-	{
-	protected:
-		ID3D11GeometryShader* mGeometryShader;
-	public:
-		D3D11GpuGeometryProgram(GpuProgramProfile profile);
-		~D3D11GpuGeometryProgram();
-
-		ID3D11GeometryShader* getGeometryShader(void) const;
-	protected:
-		void unloadImpl(void);
-		void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode);
+		virtual void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode) = 0;
+	};
+
+	class CM_D3D11_EXPORT D3D11GpuVertexProgram : public D3D11GpuProgram
+	{
+	protected:
+		ID3D11VertexShader* mVertexShader;
+	public:
+		D3D11GpuVertexProgram();
+		~D3D11GpuVertexProgram();
+
+		ID3D11VertexShader* getVertexShader(void) const;
+	protected:
+		void unloadImpl(void);
+		void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode);
+	};
+
+	class CM_D3D11_EXPORT D3D11GpuFragmentProgram : public D3D11GpuProgram
+	{
+	protected:
+		ID3D11PixelShader* mPixelShader;
+	public:
+		D3D11GpuFragmentProgram();
+		~D3D11GpuFragmentProgram();
+
+		ID3D11PixelShader* getPixelShader(void) const;
+	protected:
+		void unloadImpl(void);
+		void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode);
+	};
+
+	class D3D11GpuDomainProgram : public D3D11GpuProgram
+	{
+	protected:
+		ID3D11DomainShader* mDomainShader;
+	public:
+		D3D11GpuDomainProgram();
+		~D3D11GpuDomainProgram();
+
+		ID3D11DomainShader* getDomainShader(void) const;
+	protected:
+		void unloadImpl(void);
+		void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode);
+	};
+
+	class D3D11GpuHullProgram : public D3D11GpuProgram
+	{
+	protected:
+		ID3D11HullShader* mHullShader;
+	public:
+		D3D11GpuHullProgram();
+		~D3D11GpuHullProgram();
+
+		ID3D11HullShader* getHullShader() const;
+	protected:
+		void unloadImpl(void);
+		void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode);
+	};
+
+	class D3D11GpuGeometryProgram : public D3D11GpuProgram
+	{
+	protected:
+		ID3D11GeometryShader* mGeometryShader;
+	public:
+		D3D11GpuGeometryProgram();
+		~D3D11GpuGeometryProgram();
+
+		ID3D11GeometryShader* getGeometryShader(void) const;
+	protected:
+		void unloadImpl(void);
+		void loadFromMicrocode(D3D11Device& device, ID3D10Blob* microcode);
 	};
 }

+ 10 - 10
CamelotD3D11RenderSystem/Include/CmD3D11GpuProgramManager.h

@@ -5,15 +5,15 @@
 
 namespace CamelotEngine
 {
-	class D3D11GpuProgramManager : public GpuProgramManager
-	{
-	public:
-		D3D11GpuProgramManager(D3D11Device& device);
-		~D3D11GpuProgramManager();
-
-	protected:
-		D3D11Device& mDevice;
-		
-		GpuProgram* create(const String& source, const String& entryPoint, const String& language, GpuProgramType gptype, GpuProgramProfile profile);
+	class D3D11GpuProgramManager : public GpuProgramManager
+	{
+	public:
+		D3D11GpuProgramManager(D3D11Device& device);
+		~D3D11GpuProgramManager();
+
+	protected:
+		D3D11Device& mDevice;
+		
+		GpuProgram* create(const String& source, const String& entryPoint, const String& language, GpuProgramType gptype, GpuProgramProfile profile);
 	};
 }

+ 58 - 14
CamelotD3D11RenderSystem/Include/CmD3D11HLSLProgram.h

@@ -22,12 +22,12 @@ namespace CamelotEngine
 		/** Gets whether backwards compatibility is enabled. */
 		bool getEnableBackwardsCompatibility() const { return mEnableBackwardsCompatibility; }
 
-		const HLSLMicroCode& getMicroCode() const;
-		unsigned int getNumInputs() const;
-		unsigned int getNumOutputs() const;
+		const HLSLMicroCode& getMicroCode() const { return mMicrocode; }
+		UINT32 getNumInputs() const { return (UINT32)mInputParameters.size(); }
+		UINT32 getNumOutputs() const { return (UINT32)mOutputParameters.size(); }
 
-		const D3D11_SIGNATURE_PARAMETER_DESC& getInputParamDesc(unsigned int index) const;
-		const D3D11_SIGNATURE_PARAMETER_DESC& getOutputParamDesc(unsigned int index) const;
+		const D3D11_SIGNATURE_PARAMETER_DESC& getInputParamDesc(unsigned int index) const { return mInputParameters.at(index); }
+		const D3D11_SIGNATURE_PARAMETER_DESC& getOutputParamDesc(unsigned int index) const { return mOutputParameters.at(index); }
 
 	protected:
 		friend class D3D11HLSLProgramFactory;
@@ -35,19 +35,63 @@ namespace CamelotEngine
 		D3D11HLSLProgram(const String& source, const String& entryPoint, const String& language, 
 			GpuProgramType gptype, GpuProgramProfile profile, bool isAdjacencyInfoRequired = false);
 
-        ///** Internal load implementation, must be implemented by subclasses.
-        //*/
-        //void loadFromSource(void);
-        ///** Internal method for creating an appropriate low-level program from this
-        //high-level program, must be implemented by subclasses. */
-        //void createLowLevelImpl(void);
-        ///// Internal unload implementation, must be implemented by subclasses
-        //void unloadHighLevelImpl(void);
+        /**
+         * @copydoc GpuProgram::loadFromSource()
+         */
+        void loadFromSource();
 
-		ID3DBlob* compileMicrocode();
+        /**
+         * @copydoc GpuProgram::unload_internal()
+         */
+        void unload_internal();
 
+		/**
+         * @copydoc HighLevelGpuProgram::buildConstantDefinitions()
+         */
+		void buildConstantDefinitions() const;
 	private:
 		bool mColumnMajorMatrices;
 		bool mEnableBackwardsCompatibility;
+
+		HLSLMicroCode mMicrocode;
+
+		struct D3D11_VariableDesc
+		{
+			String name;
+			D3D11_SHADER_TYPE_DESC desc;
+		};
+
+		struct D3D11_ShaderBufferDesc
+		{
+			D3D11_SHADER_BUFFER_DESC desc;
+			vector<D3D11_SHADER_VARIABLE_DESC>::type variables;
+			vector<D3D11_SHADER_TYPE_DESC>::type variableTypes;
+		};
+
+		vector<D3D11_ShaderBufferDesc>::type mShaderBuffers;
+		vector<D3D11_SIGNATURE_PARAMETER_DESC>::type mInputParameters;
+		vector<D3D11_SIGNATURE_PARAMETER_DESC>::type mOutputParameters;
+
+		vector<HardwareConstantBufferPtr>::type mConstantBuffers;
+
+		/**
+		 * @brief	Compiles the shader from source and generates the microcode.
+		 */
+		ID3DBlob* compileMicrocode();
+
+		/**
+		 * @brief	Reflects the microcode and extracts input/output parameters, and constant
+		 * 			buffer structures used by the program.
+		 */
+		void populateParametersAndConstants(ID3DBlob* microcode);
+
+		void populateConstantBufferParameters(ID3D11ShaderReflectionConstantBuffer* bufferReflection);
+
+		void populateParameterDefinition(const D3D11_SHADER_VARIABLE_DESC& paramDesc, const D3D11_SHADER_TYPE_DESC& d3dDesc, GpuConstantDefinition& def) const;
+
+		/**
+		 * @brief	Creates constant buffers based on available parameter and constant data.
+		 */
+		void createConstantBuffers();
 	};
 }

+ 13 - 2
CamelotD3D11RenderSystem/Include/CmD3D11Prerequisites.h

@@ -44,14 +44,25 @@ namespace CamelotEngine
 	class D3D11Device;
 	class D3D11HardwareBuffer;
 	class D3D11HardwarePixelBuffer;
+	class D3D11GpuVertexProgram;
+	class D3D11GpuFragmentProgram;
+	class D3D11GpuGeometryProgram;
+	class D3D11GpuHullProgram;
+	class D3D11GpuDomainProgram;
 
-	enum TypeID_D3D9
+	enum TypeID_D3D11
 	{
-		TID_D3D9_HLSLProgram = 12000
+		TID_D3D11_HLSLProgram = 12000
 	};
 
 	typedef vector<char*>::type HLSLMicroCode;
 
+	typedef std::shared_ptr<D3D11GpuVertexProgram> D3D11GpuVertexProgramPtr;
+	typedef std::shared_ptr<D3D11GpuFragmentProgram> D3D11GpuFragmentProgramPtr;
+	typedef std::shared_ptr<D3D11GpuGeometryProgram> D3D11GpuGeometryProgramPtr;
+	typedef std::shared_ptr<D3D11GpuHullProgram> D3D11GpuHullProgramPtr;
+	typedef std::shared_ptr<D3D11GpuDomainProgram> D3D11GpuDomainProgramPtr;
+
 // Should we ask D3D to manage vertex/index buffers automatically?
 // Doing so avoids lost devices, but also has a performance impact
 // which is unacceptably bad when using very large buffers

+ 17 - 17
CamelotD3D11RenderSystem/Source/CmD3D11GpuProgram.cpp

@@ -5,8 +5,8 @@
 
 namespace CamelotEngine
 {
-	D3D11GpuProgram::D3D11GpuProgram(GpuProgramType type, GpuProgramProfile profile) 
-		: GpuProgram("", "", "", type, profile)
+	D3D11GpuProgram::D3D11GpuProgram(GpuProgramType type) 
+		: GpuProgram("", "", "", type, GPP_NONE)
 	{
 
 	}
@@ -22,8 +22,8 @@ namespace CamelotEngine
 		CM_EXCEPT(RenderingAPIException, "DirectX 11 doesn't support assembly shaders.");
 	}
 
-	D3D11GpuVertexProgram::D3D11GpuVertexProgram(GpuProgramProfile profile) 
-		: D3D11GpuProgram(GPT_VERTEX_PROGRAM, profile)
+	D3D11GpuVertexProgram::D3D11GpuVertexProgram() 
+		: D3D11GpuProgram(GPT_VERTEX_PROGRAM)
 		, mVertexShader(nullptr)
 	{ }
 
@@ -42,7 +42,7 @@ namespace CamelotEngine
 			HRESULT hr = device.getD3D11Device()->CreateVertexShader( 
 				static_cast<DWORD*>(microcode->GetBufferPointer()), 
 				microcode->GetBufferSize(),
-				NULL,
+				device.getClassLinkage(),
 				&mVertexShader);
 
 			if (FAILED(hr) || device.hasError())
@@ -69,8 +69,8 @@ namespace CamelotEngine
 		return mVertexShader;
 	}
 
-	D3D11GpuFragmentProgram::D3D11GpuFragmentProgram(GpuProgramProfile profile) 
-		: D3D11GpuProgram(GPT_FRAGMENT_PROGRAM, profile)
+	D3D11GpuFragmentProgram::D3D11GpuFragmentProgram() 
+		: D3D11GpuProgram(GPT_FRAGMENT_PROGRAM)
 		, mPixelShader(nullptr)
 	{ }
 
@@ -87,7 +87,7 @@ namespace CamelotEngine
 			HRESULT hr = device.getD3D11Device()->CreatePixelShader(
 				static_cast<DWORD*>(microcode->GetBufferPointer()), 
 				microcode->GetBufferSize(),
-				NULL,
+				device.getClassLinkage(),
 				&mPixelShader);
 
 			if (FAILED(hr) || device.hasError())
@@ -113,8 +113,8 @@ namespace CamelotEngine
 		return mPixelShader;
 	}
 
-	D3D11GpuGeometryProgram::D3D11GpuGeometryProgram(GpuProgramProfile profile) 
-		: D3D11GpuProgram(GPT_GEOMETRY_PROGRAM, profile)
+	D3D11GpuGeometryProgram::D3D11GpuGeometryProgram() 
+		: D3D11GpuProgram(GPT_GEOMETRY_PROGRAM)
 		, mGeometryShader(nullptr)
 	{ }
 
@@ -131,7 +131,7 @@ namespace CamelotEngine
 			HRESULT hr = device.getD3D11Device()->CreateGeometryShader(
 				static_cast<DWORD*>(microcode->GetBufferPointer()), 
 				microcode->GetBufferSize(),
-				NULL,
+				device.getClassLinkage(),
 				&mGeometryShader);
 
 			if (FAILED(hr) || device.hasError())
@@ -157,8 +157,8 @@ namespace CamelotEngine
 		return mGeometryShader;
 	}
 
-	D3D11GpuDomainProgram::D3D11GpuDomainProgram(GpuProgramProfile profile) 
-		: D3D11GpuProgram(GPT_DOMAIN_PROGRAM, profile)
+	D3D11GpuDomainProgram::D3D11GpuDomainProgram() 
+		: D3D11GpuProgram(GPT_DOMAIN_PROGRAM)
 		, mDomainShader(nullptr)
 	{ }
 
@@ -175,7 +175,7 @@ namespace CamelotEngine
 			HRESULT hr = device.getD3D11Device()->CreateDomainShader(
 				static_cast<DWORD*>(microcode->GetBufferPointer()), 
 				microcode->GetBufferSize(),
-				NULL,
+				device.getClassLinkage(),
 				&mDomainShader);
 
 			if (FAILED(hr) || device.hasError())
@@ -201,8 +201,8 @@ namespace CamelotEngine
 		return mDomainShader;
 	}
 
-	D3D11GpuHullProgram::D3D11GpuHullProgram(GpuProgramProfile profile) 
-		: D3D11GpuProgram(GPT_HULL_PROGRAM, profile)
+	D3D11GpuHullProgram::D3D11GpuHullProgram() 
+		: D3D11GpuProgram(GPT_HULL_PROGRAM)
 		, mHullShader(nullptr)
 	{ }
 
@@ -219,7 +219,7 @@ namespace CamelotEngine
 			HRESULT hr = device.getD3D11Device()->CreateHullShader(
 				static_cast<DWORD*>(microcode->GetBufferPointer()), 
 				microcode->GetBufferSize(),
-				NULL,
+				device.getClassLinkage(),
 				&mHullShader);
 
 			if (FAILED(hr) || device.hasError())

+ 5 - 5
CamelotD3D11RenderSystem/Source/CmD3D11GpuProgramManager.cpp

@@ -16,15 +16,15 @@ namespace CamelotEngine
 		switch(gptype)
 		{
 		case GPT_VERTEX_PROGRAM:
-			return new D3D11GpuVertexProgram(profile);
+			return new D3D11GpuVertexProgram();
 		case GPT_FRAGMENT_PROGRAM:
-			return new D3D11GpuFragmentProgram(profile);
+			return new D3D11GpuFragmentProgram();
 		case GPT_HULL_PROGRAM:
-			return new D3D11GpuHullProgram(profile);
+			return new D3D11GpuHullProgram();
 		case GPT_DOMAIN_PROGRAM:
-			return new D3D11GpuDomainProgram(profile);
+			return new D3D11GpuDomainProgram();
 		case GPT_GEOMETRY_PROGRAM:
-			return new D3D11GpuGeometryProgram(profile);
+			return new D3D11GpuGeometryProgram();
 		}
 		
 		return nullptr;

+ 365 - 21
CamelotD3D11RenderSystem/Source/CmD3D11HLSLProgram.cpp

@@ -1,7 +1,12 @@
 #include "CmD3D11HLSLProgram.h"
 #include "CmRenderSystemManager.h"
 #include "CmRenderSystem.h"
+#include "CmGpuProgramManager.h"
+#include "CmD3D11GpuProgram.h"
+#include "CmHardwareBufferManager.h"
+#include "CmHardwareConstantBuffer.h"
 #include "CmException.h"
+#include "CmDebug.h"
 
 namespace CamelotEngine
 {
@@ -17,6 +22,56 @@ namespace CamelotEngine
 		unload_internal();
 	}
 
+    void D3D11HLSLProgram::loadFromSource()
+	{
+		ID3DBlob* microcode = compileMicrocode();
+
+		mMicrocode.resize(microcode->GetBufferSize());
+		memcpy(&mMicrocode[0], microcode->GetBufferPointer(), microcode->GetBufferSize());
+
+		populateParametersAndConstants(microcode);
+
+		createConstantBuffers();
+
+		mAssemblerProgram = GpuProgramManager::instance().createProgram("", "", "", mType, GPP_NONE); // We load it from microcode, so none of this matters
+
+		switch(mType)
+		{
+		case GPT_VERTEX_PROGRAM:
+			D3D11GpuVertexProgramPtr vertProgram = std::static_pointer_cast<D3D11GpuVertexProgram>(mAssemblerProgram);
+			vertProgram->loadFromMicrocode(D3D11RenderSystem::getPrimaryDevice(), microcode);
+			break;
+		case GPT_FRAGMENT_PROGRAM:
+			D3D11GpuFragmentProgramPtr fragProgram = std::static_pointer_cast<D3D11GpuFragmentProgram>(mAssemblerProgram);
+			fragProgram->loadFromMicrocode(D3D11RenderSystem::getPrimaryDevice(), microcode);
+			break;
+		case GPT_GEOMETRY_PROGRAM:
+			D3D11GpuGeometryProgramPtr geomProgram = std::static_pointer_cast<D3D11GpuGeometryProgram>(mAssemblerProgram);
+			geomProgram->loadFromMicrocode(D3D11RenderSystem::getPrimaryDevice(), microcode);
+			break;
+		case GPT_HULL_PROGRAM:
+			D3D11GpuHullProgramPtr hullProgram = std::static_pointer_cast<D3D11GpuHullProgram>(mAssemblerProgram);
+			hullProgram->loadFromMicrocode(D3D11RenderSystem::getPrimaryDevice(), microcode);
+			break;
+		case GPT_DOMAIN_PROGRAM:
+			D3D11GpuDomainProgramPtr domainProgram = std::static_pointer_cast<D3D11GpuDomainProgram>(mAssemblerProgram);
+			domainProgram->loadFromMicrocode(D3D11RenderSystem::getPrimaryDevice(), microcode);
+			break;
+		}
+
+		SAFE_RELEASE(microcode);
+	}
+
+    void D3D11HLSLProgram::unload_internal()
+	{
+		mAssemblerProgram = nullptr;
+		mShaderBuffers.clear();
+		mInputParameters.clear();
+		mOutputParameters.clear();
+		mConstantBuffers.clear();
+		mMicrocode.clear();
+	}
+
 	const String& D3D11HLSLProgram::getLanguage() const
 	{
 		static String name = "hlsl";
@@ -80,28 +135,317 @@ namespace CamelotEngine
 		return microCode;
 	}
 
-	const HLSLMicroCode& D3D11HLSLProgram::getMicroCode() const
-	{
-		CM_EXCEPT(NotImplementedException, "Not implemented");
+	void D3D11HLSLProgram::populateParametersAndConstants(ID3DBlob* microcode)
+	{
+		assert(microcode != nullptr);
+
+		mShaderBuffers.clear();
+		mInputParameters.clear();
+		mOutputParameters.clear();
+
+		const char* commentString = nullptr;
+		ID3DBlob* pIDisassembly = nullptr;
+		char* pDisassembly = nullptr;
+
+		HRESULT hr = D3DDisassemble((UINT*)microcode->GetBufferPointer(), 
+			microcode->GetBufferSize(), D3D_DISASM_ENABLE_COLOR_CODE, commentString, &pIDisassembly);
+
+		const char* assemblyCode =  static_cast<const char*>(pIDisassembly->GetBufferPointer());
+
+		if (FAILED(hr))
+			CM_EXCEPT(RenderingAPIException, "Unable to disassemble shader.");
+
+		ID3D11ShaderReflection* shaderReflection;
+		HRESULT hr = D3DReflect((void*)microcode->GetBufferPointer(), microcode->GetBufferSize(),
+			IID_ID3D11ShaderReflection, (void**)&shaderReflection);
+
+		if (FAILED(hr))
+			CM_EXCEPT(RenderingAPIException, "Cannot reflect D3D11 high-level shader.");
+
+		D3D11_SHADER_DESC shaderDesc;
+		hr = shaderReflection->GetDesc(&shaderDesc);
+
+		if (FAILED(hr))
+			CM_EXCEPT(RenderingAPIException, "Cannot reflect D3D11 high-level shader.");
+
+		mInputParameters.resize(shaderDesc.InputParameters);
+		for (UINT32 i = 0; i < shaderDesc.InputParameters; i++)
+			shaderReflection->GetInputParameterDesc(i, &(mInputParameters[i]));
+
+		mOutputParameters.resize(shaderDesc.OutputParameters);
+		for (UINT32 i = 0; i < shaderDesc.OutputParameters; i++)
+			shaderReflection->GetOutputParameterDesc(i, &(mOutputParameters[i]));
+
+		mShaderBuffers.resize(shaderDesc.ConstantBuffers);
+		for(UINT32 i = 0; i < shaderDesc.ConstantBuffers; i++)
+		{
+			ID3D11ShaderReflectionConstantBuffer* shaderReflectionConstantBuffer;
+			shaderReflectionConstantBuffer = shaderReflection->GetConstantBufferByIndex(i);
+
+			populateConstantBufferParameters(shaderReflectionConstantBuffer);
+		}
+
+		shaderReflection->Release();
 	}
 
-	unsigned int D3D11HLSLProgram::getNumInputs() const
-	{
-		CM_EXCEPT(NotImplementedException, "Not implemented");
-	}
-
-	unsigned int D3D11HLSLProgram::getNumOutputs() const
-	{
-		CM_EXCEPT(NotImplementedException, "Not implemented");
-	}
-
-	const D3D11_SIGNATURE_PARAMETER_DESC& D3D11HLSLProgram::getInputParamDesc(unsigned int index) const
-	{
-		CM_EXCEPT(NotImplementedException, "Not implemented");
-	}
-
-	const D3D11_SIGNATURE_PARAMETER_DESC& D3D11HLSLProgram::getOutputParamDesc(unsigned int index) const
-	{
-		CM_EXCEPT(NotImplementedException, "Not implemented");
+	void D3D11HLSLProgram::buildConstantDefinitions() const
+	{
+		createParameterMappingStructures(true);
+
+		for(auto shaderBufferIter = mShaderBuffers.begin(); shaderBufferIter != mShaderBuffers.end; ++shaderBufferIter)
+		{
+			for(size_t i = 0; i < shaderBufferIter->variables.size(); i++)
+			{
+				const D3D11_SHADER_VARIABLE_DESC& variableDesc = shaderBufferIter->variables[i];
+				const D3D11_SHADER_TYPE_DESC& variableType = shaderBufferIter->variableTypes[i];
+
+				String name = variableDesc.Name;
+				if (name.at(0) == '$')
+					name.erase(name.begin());
+
+				// Also trim the '[0]' suffix if it exists, we will add our own indexing later
+				if (StringUtil::endsWith(name, "[0]", false))
+					name.erase(name.size() - 3);
+
+				UINT32 paramIndex = (UINT32)i;
+
+				// TODO - Need to add support for more types. ESPECIALLY TEXTURES & STRUCTS!
+				if(variableType.Type == D3D_SVT_FLOAT || variableType.Type == D3D_SVT_INT || variableType.Type == D3D_SVT_BOOL || variableType.Type == D3D_SVT_SAMPLER1D || 
+					variableType.Type == D3D_SVT_SAMPLER2D || variableType.Type == D3D_SVT_SAMPLER3D || variableType.Type == D3D_SVT_SAMPLERCUBE)
+				{
+					GpuConstantDefinition def;
+					def.logicalIndex = paramIndex;
+					// populate type, array size & element size
+					populateParameterDefinition(variableDesc, variableType, def);
+
+					if(def.isSampler())
+					{
+						def.physicalIndex = variableDesc.StartSampler;
+						CM_LOCK_MUTEX(mSamplerLogicalToPhysical->mutex)
+							mSamplerLogicalToPhysical->map.insert(
+							GpuLogicalIndexUseMap::value_type(paramIndex, 
+							GpuLogicalIndexUse(def.physicalIndex, def.arraySize, GPV_GLOBAL)));
+						mSamplerLogicalToPhysical->bufferSize = std::max(mSamplerLogicalToPhysical->bufferSize, def.physicalIndex + def.arraySize);
+						mConstantDefs->samplerCount = mSamplerLogicalToPhysical->bufferSize;
+					}
+					else
+					{
+						if (def.isFloat())
+						{
+							def.physicalIndex = variableDesc.StartOffset;
+							CM_LOCK_MUTEX(mFloatLogicalToPhysical->mutex)
+								mFloatLogicalToPhysical->map.insert(
+								GpuLogicalIndexUseMap::value_type(paramIndex, 
+								GpuLogicalIndexUse(def.physicalIndex, def.arraySize * def.elementSize, GPV_GLOBAL)));
+							mFloatLogicalToPhysical->bufferSize = std::max(mFloatLogicalToPhysical->bufferSize, def.physicalIndex + def.arraySize);
+							mConstantDefs->floatBufferSize = mFloatLogicalToPhysical->bufferSize;
+						}
+						else
+						{
+							def.physicalIndex = variableDesc.StartOffset;
+							CM_LOCK_MUTEX(mIntLogicalToPhysical->mutex)
+								mIntLogicalToPhysical->map.insert(
+								GpuLogicalIndexUseMap::value_type(paramIndex, 
+								GpuLogicalIndexUse(def.physicalIndex, def.arraySize * def.elementSize, GPV_GLOBAL)));
+							mIntLogicalToPhysical->bufferSize = std::max(mIntLogicalToPhysical->bufferSize, def.physicalIndex + def.arraySize);
+							mConstantDefs->intBufferSize = mIntLogicalToPhysical->bufferSize;
+						}
+					}
+
+					mConstantDefs->map.insert(GpuConstantDefinitionMap::value_type(name, def));
+
+					// Now deal with arrays
+					mConstantDefs->generateConstantDefinitionArrayEntries(name, def);
+				}
+			}
+		}
+	}
+
+	void D3D11HLSLProgram::populateConstantBufferParameters(ID3D11ShaderReflectionConstantBuffer* bufferReflection)
+	{
+		D3D11_SHADER_BUFFER_DESC constantBufferDesc;
+		HRESULT hr = bufferReflection->GetDesc(&constantBufferDesc);
+		if (FAILED(hr))
+			CM_EXCEPT(RenderingAPIException, "Failed to retrieve HLSL constant buffer description.");
+
+		if(constantBufferDesc.Type != D3D_CBUFFER_TYPE::D3D_CT_CBUFFER && constantBufferDesc.Type != D3D_CBUFFER_TYPE::D3D_CT_TBUFFER)
+		{
+			LOGDBG("D3D11 HLSL parsing: Unsupported constant buffer type, skipping. Type: " + toString(constantBufferDesc.Type));
+			return;
+		}
+
+		mShaderBuffers.push_back(D3D11_ShaderBufferDesc());
+		D3D11_ShaderBufferDesc& newShaderBufferDesc = *mShaderBuffers.end();
+
+		for(UINT32 j = 0; j < constantBufferDesc.Variables; j++)
+		{
+			ID3D11ShaderReflectionVariable* varRef;
+			varRef = bufferReflection->GetVariableByIndex(j);
+			D3D11_SHADER_VARIABLE_DESC varDesc;
+			HRESULT hr = varRef->GetDesc(&varDesc);
+
+			if (FAILED(hr))
+				CM_EXCEPT(RenderingAPIException, "Failed to retrieve HLSL constant buffer variable description.");
+
+			ID3D11ShaderReflectionType* varRefType;
+			varRefType = varRef->GetType();
+			D3D11_SHADER_TYPE_DESC varTypeDesc;
+			varRefType->GetDesc(&varTypeDesc);
+
+			switch(varTypeDesc.Type)
+			{
+			case D3D_SVT_FLOAT:
+			case D3D_SVT_INT:
+			case D3D_SVT_SAMPLER1D:
+			case D3D_SVT_SAMPLER2D: 
+			case D3D_SVT_SAMPLER3D:
+			case D3D_SVT_SAMPLERCUBE: // TODO - Need to add support for other types!
+				newShaderBufferDesc.variables.push_back(varDesc);
+				newShaderBufferDesc.variableTypes.push_back(varTypeDesc);
+			default:
+				CM_EXCEPT(RenderingAPIException, "Unsupported shader variable type!");
+			}
+		}
+	}
+
+	void D3D11HLSLProgram::populateParameterDefinition(const D3D11_SHADER_VARIABLE_DESC& paramDesc, const D3D11_SHADER_TYPE_DESC& paramType, GpuConstantDefinition& def) const
+	{
+		def.arraySize = paramType.Elements + 1;
+		switch(paramType.Type)
+		{
+		case D3D_SVT_SAMPLER1D:
+			def.constType = GCT_SAMPLER1D;
+			def.elementSize = paramDesc.SamplerSize / def.arraySize;
+			break;
+		case D3D_SVT_SAMPLER2D:
+			CM_EXCEPT(NotImplementedException, "Break here because I want to check what is the elementSize of the sampler. It has to be 1.");
+
+			def.constType = GCT_SAMPLER2D;
+			def.elementSize = paramDesc.SamplerSize / def.arraySize;
+			break;
+		case D3D_SVT_SAMPLER3D:
+			def.constType = GCT_SAMPLER3D;
+			def.elementSize = paramDesc.SamplerSize / def.arraySize;
+			break;
+		case D3D_SVT_SAMPLERCUBE:
+			def.constType = GCT_SAMPLERCUBE;
+			def.elementSize = paramDesc.SamplerSize / def.arraySize;
+			break;
+		case D3D_SVT_INT:
+			switch(paramType.Columns)
+			{
+			case 1:
+				def.constType = GCT_INT1;
+				def.elementSize = paramDesc.Size / def.arraySize;
+				break;
+			case 2:
+				def.constType = GCT_INT2;
+				def.elementSize = paramDesc.Size / def.arraySize;
+				break;
+			case 3:
+				def.constType = GCT_INT3;
+				def.elementSize = paramDesc.Size / def.arraySize;
+				break;
+			case 4:
+				def.constType = GCT_INT4;
+				def.elementSize = paramDesc.Size / def.arraySize;
+				break;
+			} // columns
+			break;
+		case D3D_SVT_FLOAT:
+
+			CM_EXCEPT(NotImplementedException, "Break here because I want to check if paramDesc.Size is size per element or total size of the array.");
+
+			switch(paramType.Rows)
+			{
+			case 1:
+				switch(paramType.Columns)
+				{
+				case 1:
+					def.constType = GCT_FLOAT1;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				case 2:
+					def.constType = GCT_FLOAT2;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				case 3:
+					def.constType = GCT_FLOAT3;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				case 4:
+					def.constType = GCT_FLOAT4;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				} // columns
+				break;
+			case 2:
+				switch(paramType.Columns)
+				{
+				case 2:
+					def.constType = GCT_MATRIX_2X2;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				case 3:
+					def.constType = GCT_MATRIX_2X3;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				case 4:
+					def.constType = GCT_MATRIX_2X4;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				} // columns
+				break;
+			case 3:
+				switch(paramType.Columns)
+				{
+				case 2:
+					def.constType = GCT_MATRIX_3X2;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				case 3:
+					def.constType = GCT_MATRIX_3X3;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				case 4:
+					def.constType = GCT_MATRIX_3X4;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				} // columns
+				break;
+			case 4:
+				switch(paramType.Columns)
+				{
+				case 2:
+					def.constType = GCT_MATRIX_4X2;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				case 3:
+					def.constType = GCT_MATRIX_4X3;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				case 4:
+					def.constType = GCT_MATRIX_4X4;
+					def.elementSize = paramDesc.Size / def.arraySize;
+					break;
+				} // columns
+				break;
+
+			} // rows
+			break;
+		default:
+			break;
+		};
+	}
+
+	void D3D11HLSLProgram::createConstantBuffers()
+	{
+		mConstantBuffers.clear();
+
+		for(auto shaderBufferIter = mShaderBuffers.begin(); shaderBufferIter != mShaderBuffers.end; ++shaderBufferIter)
+		{
+			HardwareConstantBufferPtr constantBuffer = HardwareBufferManager::instance().createConstantBuffer(shaderBufferIter->desc.Size, HardwareBuffer::HBU_DYNAMIC_WRITE_ONLY);
+			mConstantBuffers.push_back(constantBuffer);
+		}
 	}
 }

+ 4 - 3
CamelotRenderer/Include/CmGpuProgramParams.h

@@ -169,22 +169,23 @@ namespace CamelotEngine {
 			{
 				switch(ctype)
 				{
-				case GCT_FLOAT1:
-				case GCT_INT1:
 				case GCT_SAMPLER1D:
 				case GCT_SAMPLER2D:
 				case GCT_SAMPLER3D:
 				case GCT_SAMPLERCUBE:
 				case GCT_SAMPLER1DSHADOW:
 				case GCT_SAMPLER2DSHADOW:
+					return 1; // Samplers aren't like other variables so they won't be padded
+				case GCT_FLOAT1:
+				case GCT_INT1:
 				case GCT_FLOAT2:
 				case GCT_INT2:
 				case GCT_FLOAT3:
 				case GCT_INT3:
 				case GCT_FLOAT4:
 				case GCT_INT4:
-					return 4;
 				case GCT_MATRIX_2X2:
+					return 4;
 				case GCT_MATRIX_2X3:
 				case GCT_MATRIX_2X4:
 					return 8; // 2 float4s

+ 32 - 66
CamelotRenderer/TODO.txt

@@ -1,72 +1,47 @@
 
-High-level TODO:
+-----------------------LONGTERM TODO----------------------------------------------------------------
  - Debug tools 
    - Camera controls + world grid
  - Renderable (contains mesh/material)
  - RenderManager (iterated through Renderables and displays them on screen)
    - And its plugin implementation ForwardRenderingManager
- - Material/Pass
- - Resource loading thread safety and background loading
-   - Make sure all resources have default resource that will be used before actual resource is loaded
  - Integrate with Camelot Editor
  - SceneManager plugin
    - Frustum culling and octree (or some other) acceleration structure
    - Render queue and sorting
- - DX11 render system
-
- DX11 specific features:
-  - Deferred render contexts and multithreaded rendering
-  - Tesselation (hull/domain) shader
-  - Compute pipeline
-  - Stream out (write vertex buffers)
-  - Instancing
-  - Dynamic shader linkage (Interfaces and similar)
-  - Append/Consume buffer
-  - 1D/2D/Cube texture arrays
-  - What's missing:
-    - No proper support for texture arrays, or multisampled texture resources
-	- Staging and immutable buffers. See D3D11Mapping, DWORD D3D11Mappings::get(HardwareBuffer::Usage usage)
-	- Multiple adapters (multi gpu)
-  - GL equivalents that are missing:
+
+-----------------------IMMEDIATE TODO---------------------------------------------------------------
+
+RenderSystem needed modifications
+ - HLSL specifies Texture & Sampler separately
+   - Will probably need a rendersystem specific sampler object implementation
+   - Right now HLSL11 CANNOT SPECIFY TEXTURE PARAMETERS! (Pass needs to be extended)
+ - Generic buffers (Normal/Structured/Raw/Append/Consume/Indirect)
+ - Ability to bind buffers and texture with different type of view (SHADER_RESOURCE & UNORDERED_ACCESS primarily, major others too)
+ - Pass needs to be modified
+  - Texture & Sampler needs to be specified separately
+  - SamplerState object needs to be created, to be used by all render systems. It needs to have a render specific implementation
+  - Needs to support bool/double/texture1D/textue3D/textureCUBE/texture arrays/MS textures/all types of buffers/structs
+ - Deferred render contexts and multithreaded rendering
+ - TextureBuffer support? (Use for bones when skinning for exaple, as constant buffers are too slow)
+ - RW buffers?
+ - Tesselation (hull/domain) shader
+ - Compute pipeline
+ - Stream out (write vertex buffers)
+ - Instancing
+ - Dynamic shader linkage (Interfaces and similar)
+ - Append/Consume buffer
+ - 1D/2D/Cube texture arrays
+ - Multisampled texture resources
+ - Multiple adapters (multi gpu)
+ - Process OpenGL and make sure to equivalents of DX11 features
+  - Exact features that are missing (not an exhaustive list):
     - Vertex buffer stream out (Transform Feedback)
 	- GL constant buffers
-
---HARDWARE BUFFERS--
-New set of HardwareBuffer usage flags:
- IMMUTABLE
-  - DX11: IMMUTABLE
-  - DX9/GL: STATIC (D3DUSAGE_WRITEONLY)
-  - ensures and read/write operations fail after initialization
- DEFAULT,
-   - DX11: DEFAULT
-   - DX9/GL: STATIC (D3DUSAGE_WRITEONLY) & STATIC_DISCARDABLE
- CPUWRITE
-   - DX11: DYNAMIC
-   - DX9/GL: DYNAMIC (D3DUSAGE_DYNAMIC)
- CPUREADWRITE
-   - DX11: STAGING
-   - DX9/GL: DYNAMIC (D3DUSAGE_DYNAMIC)
- DISCARDABLE
-   - Flag used in addition to the others. Ignored with DX11 and OpenGL
- Revisit hardware buffers (especially system memory & shadow buffers)
-
-DX11 HLSL:
- - Preprocessor defines & includes
-
-DX11 classes to port (in that order):
- VideoMode
-Driver
-VideoModeList
-DriverList
-GpuProgramManager
-HardwareBuffer
-HardwareIndexBuffer
-HardwareVertexBuffer
-VertexDeclaration/HLSLProgram
-HLSLProgramFactory
+	- HLSL11 shader equivalents (domain/hull/compute shaders), and advanced shader parameters
+ - HLSL9/HLSL11/GLSL/Cg shaders need preprocessor defines & includes
 
 Command buffer TODO:
- - Redo OpenGL shaders as they seem to be using pretty old methods (glCreateShaderObjectARB seems to be deprectated for example)
  - When importing a resource, and registering it with Resources I don't think it properly gets added to the loaded resources array? For some reason shaders get created twice.
  - My current approach doesn't allow multiple threads to use the RenderSystem (contexts should be handled differently)
    - Instead of requiring the user to constantly call setActiveContext, make the call peristent per thread. 
@@ -77,13 +52,6 @@ Command buffer TODO:
  - Doing setPixels_async in the texture doesn't make sure that the user doesn't actually modify the provided PixelData after
     that call.
 
- Texture updates:
-   - Make this per texture and apply when texture is applied:
-	 filterMode
-	 anisoLevel
-	 wrapMode
-	 mipMapBias
-
 Mesh
  - Make sure to queue up mesh deletion to make sure it gets destroyed on the render thread
 
@@ -102,12 +70,13 @@ When saving a resource, make sure resource is properly loaded before saving
   - Add documentation that tells the user that reading a resource non-async will block the thread and execute all queued render commands first
   - Remove Response handlers from Resources
 
+-----------------------BACKLOG TODO---------------------------------------------------------------
+
 HIGH PRIORITY TODO:
  - Issue with deserialization and value types:
   - Value types are only set at the end of deserialization, because I want to be sure all of their fields are initialized. However there is nothing stopping a custom RTTI method from accessing a (yet uninitialized) value in a ptr field. (See CmMaterialRTTI, setTexParam). I need to initialize fields in a better order.
    - Solution?: Set (empty) ptr values immediately, and only load them later
  - GetRenderOperation doesn't consider sub-meshes
- - HLSL & Cg don't handle include files yet
 
 Mid priority TODO:
  - Add multithreaded version of RenderTarget::getCustomAttribute_internal
@@ -157,9 +126,6 @@ Optional TODO:
  -----------------------------------------------------------------------------------------------
 
 After everything is polished
- - DX11 render system
-   - Make sure to add OpenGL equivalents of DX11 features (tesselation, stream out, compute, etc.)
- - SLI / multiple device support
  - How do I handle multiple mesh formats? Some files need animation, other don't. Some would mabye like to use QTangent, others the proper tangent frame.
  - Load texture mips separately so we can unload HQ textures from far away objects (like UE3)
  - Add Unified shader so I can easily switch between HLSL and GLSL shaders (they need same parameters usually, just different code)