Просмотр исходного кода

Gradient descent functional

Signed-off-by: kberg-amzn <[email protected]>
kberg-amzn 2 лет назад
Родитель
Сommit
22df176c0d
22 измененных файлов с 671 добавлено и 50 удалено
  1. 47 5
      Gems/MachineLearning/Code/Include/MachineLearning/INeuralNetwork.h
  2. 42 0
      Gems/MachineLearning/Code/Source/Algorithms/Activations.h
  3. 121 0
      Gems/MachineLearning/Code/Source/Algorithms/Activations.inl
  4. 38 0
      Gems/MachineLearning/Code/Source/Algorithms/LossFunctions.h
  5. 75 0
      Gems/MachineLearning/Code/Source/Algorithms/LossFunctions.inl
  6. 53 0
      Gems/MachineLearning/Code/Source/MachineLearningSystemComponent.cpp
  7. 56 5
      Gems/MachineLearning/Code/Source/Models/Layer.cpp
  8. 20 2
      Gems/MachineLearning/Code/Source/Models/Layer.h
  9. 20 18
      Gems/MachineLearning/Code/Source/Models/MultilayerPerceptron.cpp
  10. 10 8
      Gems/MachineLearning/Code/Source/Models/MultilayerPerceptron.h
  11. 1 1
      Gems/MachineLearning/Code/Source/Nodes/ComputeCost.ScriptCanvasNodeable.xml
  12. 1 1
      Gems/MachineLearning/Code/Source/Nodes/ComputeCost.cpp
  13. 1 1
      Gems/MachineLearning/Code/Source/Nodes/CreateModel.ScriptCanvasNodeable.xml
  14. 4 4
      Gems/MachineLearning/Code/Source/Nodes/CreateModel.cpp
  15. 1 0
      Gems/MachineLearning/Code/Source/Nodes/CreateModel.h
  16. 1 1
      Gems/MachineLearning/Code/Source/Nodes/FeedForward.cpp
  17. 33 0
      Gems/MachineLearning/Code/Tests/Algorithms/ActivationTests.cpp
  18. 20 0
      Gems/MachineLearning/Code/Tests/Algorithms/LossFunctionTests.cpp
  19. 24 1
      Gems/MachineLearning/Code/Tests/Models/LayerTests.cpp
  20. 97 0
      Gems/MachineLearning/Code/Tests/Models/MultilayerPerceptronTests.cpp
  21. 4 3
      Gems/MachineLearning/Code/machinelearning_private_files.cmake
  22. 2 0
      Gems/MachineLearning/Code/machinelearning_tests_files.cmake

+ 47 - 5
Gems/MachineLearning/Code/Include/MachineLearning/INeuralNetwork.h

@@ -12,11 +12,21 @@
 
 namespace MachineLearning
 {
-    enum class CostFunctions
+    enum class LossFunctions
     {
-        Quadratic
+        MeanSquaredError, 
+        CrossEntropyLoss
     };
 
+    enum class ActivationFunctions
+    {
+        ReLU,
+        Sigmoid,
+        Linear
+    };
+
+    class Layer;
+
     class INeuralNetwork
     {
     public:
@@ -25,16 +35,48 @@ namespace MachineLearning
 
         virtual ~INeuralNetwork() = default;
 
+        //! Adds a new layer to the network.
+        virtual void AddLayer(AZStd::size_t layerDimensionality, ActivationFunctions activationFunction = ActivationFunctions::Linear) = 0;
+
+        //! Returns the total number of layers in the network.
+        virtual AZStd::size_t GetLayerCount() const = 0;
+
+        //! Retrieves a specific layer from the network indexed by the layerIndex.
+        virtual Layer& GetLayer(AZStd::size_t layerIndex) = 0;
+
         //! Returns the total number of parameters in the neural network.
         virtual AZStd::size_t GetParameterCount() const = 0;
 
         //! Performs a basic feed-forward operation to compute the output from a set of activation values.
-        virtual const AZ::VectorN& FeedForward(const AZ::VectorN& activations) = 0;
+        virtual const AZ::VectorN& Forward(const AZ::VectorN& activations) = 0;
+
+        //! Accumulates the loss gradients given a loss function, an activation vector and a corresponding label vector.
+        virtual void Reverse(LossFunctions lossFunction, const AZ::VectorN& activations, const AZ::VectorN& expected) = 0;
+
+        //! Performs a gradient descent step and resets all gradient accumulators to zero.
+        virtual void GradientDescent(float learningRate) = 0;
+    };
+
+    struct LayerParams
+    {
+        AZ_TYPE_INFO(LayerParams, "{DD9A7E7C-8D11-4805-83CF-6A5262B4580C}");
+
+        //! AzCore Reflection.
+        //! @param context reflection context
+        static void Reflect(class AZ::ReflectContext* context);
+
+        LayerParams() = default;
+        inline LayerParams(AZStd::size_t size, ActivationFunctions activationFunction)
+            : m_layerSize(size)
+            , m_activationFunction(activationFunction)
+        {
+        }
 
-        //! Given a set of activations and an expected output, computes the cost of the 
-        virtual float ComputeCost(const AZ::VectorN& activations, const AZ::VectorN& expectedOutput, CostFunctions costFunction) = 0;
+        AZStd::size_t m_layerSize = 0;
+        ActivationFunctions m_activationFunction = ActivationFunctions::ReLU;
     };
 
     using INeuralNetworkPtr = AZStd::shared_ptr<INeuralNetwork>;
+    //using HiddenLayerParams = AZStd::vector<LayerParams>;
     using HiddenLayerParams = AZStd::vector<AZStd::size_t>;
 }

+ 42 - 0
Gems/MachineLearning/Code/Source/Algorithms/Activations.h

@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#pragma once
+
+#include <AzCore/Math/VectorN.h>
+#include <MachineLearning/INeuralNetwork.h>
+
+namespace MachineLearning
+{
+    //! Computes the requested activation function applied to all elements of the source vector.
+    void Activate(ActivationFunctions activationFunction, const AZ::VectorN& sourceVector, AZ::VectorN& output);
+
+    //! Computes the rectified linear unit function (ReLU) applied to all elements of the source vector.
+    void ReLU(const AZ::VectorN& sourceVector, AZ::VectorN& output);
+
+    //! Computes the sigmoid applied to all elements of the source vector, but scaled and offset to return values in the range 0..1.
+    void Sigmoid(const AZ::VectorN& sourceVector, AZ::VectorN& output);
+
+    //! Computes the linear activation function applied to all elements of the source vector.
+    void Linear(const AZ::VectorN& sourceVector, AZ::VectorN& output);
+
+    //! Computes the derivative of the requested activation function applied to all elements provided vector.
+    //! The activationOutput input here is simply the output of calling Activate on the original source vector.
+    void Activate_Derivative(ActivationFunctions activationFunction, const AZ::VectorN& activationOutput, AZ::VectorN& output);
+
+    //! Computes the derivative of the rectified linear unit function (ReLU) applied to all elements of the original source vector.
+    void ReLU_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output);
+
+    //! Computes the derivative of the sigmoid activation function applied to all elements of the original source vector.
+    void Sigmoid_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output);
+
+    //! Computes the derivative linear activation function applied to all elements of the original source vector.
+    void Linear_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output);
+}
+
+#include <Algorithms/Activations.inl>

+ 121 - 0
Gems/MachineLearning/Code/Source/Algorithms/Activations.inl

@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#include <Algorithms/Activations.h>
+#include <AzCore/Math/SimdMath.h>
+
+namespace MachineLearning
+{
+    inline void Activate(ActivationFunctions activationFunction, const AZ::VectorN& sourceVector, AZ::VectorN& output)
+    {
+        output.Resize(sourceVector.GetDimensionality());
+        switch (activationFunction)
+        {
+        case ActivationFunctions::ReLU:
+            ReLU(sourceVector, output);
+            break;
+        case ActivationFunctions::Sigmoid:
+            Sigmoid(sourceVector, output);
+            break;
+        case ActivationFunctions::Linear:
+            Linear(sourceVector, output);
+            break;
+        }
+    }
+
+    inline void ReLU(const AZ::VectorN& sourceVector, AZ::VectorN& output)
+    {
+        const AZStd::size_t numElements = sourceVector.GetVectorValues().size();
+        const AZ::Simd::Vec4::FloatType zero = AZ::Simd::Vec4::ZeroFloat();
+        output.Resize(sourceVector.GetDimensionality());
+        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
+        {
+            const AZ::Vector4& sourceElement = sourceVector.GetVectorValues()[iter];
+            const AZ::Simd::Vec4::FloatType mask = AZ::Simd::Vec4::CmpGtEq(sourceElement.GetSimdValue(), zero); // 1's if >= 0, 0's otherwise
+            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
+            outputElement.SetSimdValue(AZ::Simd::Vec4::And(sourceElement.GetSimdValue(), mask)); // Zeros out negative elements
+        }
+        output.FixLastVectorElement();
+    }
+
+    inline void Sigmoid(const AZ::VectorN& sourceVector, AZ::VectorN& output)
+    {
+        const AZStd::size_t numElements = sourceVector.GetVectorValues().size();
+        const AZ::Vector4 vecOne = AZ::Vector4::CreateOne();
+        output.Resize(sourceVector.GetDimensionality());
+        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
+        {
+            const AZ::Vector4& sourceElement = sourceVector.GetVectorValues()[iter];
+            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
+            outputElement = vecOne / (vecOne + (-sourceElement).GetExpEstimate());
+        }
+        output.FixLastVectorElement();
+    }
+
+    inline void Linear(const AZ::VectorN& sourceVector, AZ::VectorN& output)
+    {
+        if (&output != &sourceVector)
+        {
+            output = sourceVector;
+        }
+    }
+
+    inline void Activate_Derivative(ActivationFunctions activationFunction, const AZ::VectorN& activationOutput, AZ::VectorN& output)
+    {
+        output.Resize(activationOutput.GetDimensionality());
+        switch (activationFunction)
+        {
+        case ActivationFunctions::ReLU:
+            ReLU_Derivative(activationOutput, output);
+            break;
+        case ActivationFunctions::Sigmoid:
+            Sigmoid_Derivative(activationOutput, output);
+            break;
+        case ActivationFunctions::Linear:
+            Linear_Derivative(activationOutput, output);
+            break;
+        }
+    }
+
+    inline void ReLU_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output)
+    {
+        const AZStd::size_t numElements = activationOutput.GetVectorValues().size();
+        const AZ::Simd::Vec4::FloatType zero = AZ::Simd::Vec4::ZeroFloat();
+        const AZ::Simd::Vec4::FloatType one = AZ::Simd::Vec4::Splat(1.0f);
+        output.Resize(activationOutput.GetDimensionality());
+        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
+        {
+            const AZ::Vector4& sourceElement = activationOutput.GetVectorValues()[iter];
+            // 1's if > 0, 0's otherwise
+            // Strictly greater than is required as any negative inputs in the original source vector will have been clamped to zero by activation
+            const AZ::Simd::Vec4::FloatType mask = AZ::Simd::Vec4::CmpGt(sourceElement.GetSimdValue(), zero);
+            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
+            outputElement.SetSimdValue(AZ::Simd::Vec4::And(one, mask)); // Returns one if mask is non-zero, returns zero otherwise
+        }
+        output.FixLastVectorElement();
+    }
+
+    inline void Sigmoid_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output)
+    {
+        const AZStd::size_t numElements = activationOutput.GetVectorValues().size();
+        const AZ::Vector4 vecOne = AZ::Vector4::CreateOne();
+        output.Resize(activationOutput.GetDimensionality());
+        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
+        {
+            const AZ::Vector4& activationElement = activationOutput.GetVectorValues()[iter];
+            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
+            outputElement = activationElement * (vecOne - activationElement);
+        }
+        output.FixLastVectorElement();
+    }
+
+    inline void Linear_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output)
+    {
+        output = AZ::VectorN(activationOutput.GetDimensionality(), 1.0f);
+    }
+}

+ 38 - 0
Gems/MachineLearning/Code/Source/Algorithms/LossFunctions.h

@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#pragma once
+
+#include <AzCore/Math/VectorN.h>
+#include <MachineLearning/INeuralNetwork.h>
+
+namespace MachineLearning
+{
+    //! This is a useful helper that simply computes the total cost provided a loss function, and expected and actual outputs.
+    float ComputeTotalCost(LossFunctions lossFunction, const AZ::VectorN& expected, const AZ::VectorN& actual);
+
+    //! Computes the gradient of the loss using across all elements of the source vectors using the requested cost function.
+    void ComputeLoss(LossFunctions lossFunction, const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output);
+
+    //! Computes the derivative of the rectified linear unit function (ReLU) applied to all elements of the source vector.
+    void MeanSquaredError(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output);
+
+    //! Computes the derivative of the rectified linear unit function (ReLU) applied to all elements of the source vector.
+    void CrossEntropyLoss(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output);
+
+    //! Computes the gradient of the loss using across all elements of the source vectors using the requested cost function.
+    void ComputeLoss_Derivative(LossFunctions lossFunction, const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output);
+
+    //! Computes the derivative of the rectified linear unit function (ReLU) applied to all elements of the source vector.
+    void MeanSquaredError_Derivative(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output);
+
+    //! Computes the derivative of the rectified linear unit function (ReLU) applied to all elements of the source vector.
+    void CrossEntropyLoss_Derivative(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output);
+}
+
+#include <Algorithms/LossFunctions.inl>

+ 75 - 0
Gems/MachineLearning/Code/Source/Algorithms/LossFunctions.inl

@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#include <Algorithms/LossFunctions.h>
+#include <AzCore/Math/SimdMath.h>
+
+namespace MachineLearning
+{
+    inline float ComputeTotalCost(LossFunctions lossFunction, const AZ::VectorN& expected, const AZ::VectorN& actual)
+    {
+        AZ::VectorN costs;
+        ComputeLoss(lossFunction, expected, actual, costs);
+        AZ::Vector4 accumulator = AZ::Vector4::CreateZero();
+        for (const AZ::Vector4& element : costs.GetVectorValues())
+        {
+            accumulator += element;
+        }
+        return accumulator.Dot(AZ::Vector4::CreateOne());
+    }
+
+    inline void ComputeLoss(LossFunctions costFunction, const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
+    {
+        AZ_Assert(expected.GetDimensionality() == actual.GetDimensionality(), "The dimensionality of expected and actual must match");
+        output.Resize(actual.GetDimensionality());
+        switch (costFunction)
+        {
+        case LossFunctions::MeanSquaredError:
+            MeanSquaredError(expected, actual, output);
+            break;
+        case LossFunctions::CrossEntropyLoss:
+            CrossEntropyLoss(expected, actual, output);
+            break;
+        }
+    }
+
+    inline void MeanSquaredError(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
+    {
+        output = 0.5f * (actual - expected).GetSquare();
+    }
+
+    inline void CrossEntropyLoss(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
+    {
+        output = -(expected / actual) + (1.0f - expected) / (1.0f - actual);
+    }
+
+    inline void ComputeLoss_Derivative(LossFunctions costFunction, const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
+    {
+        AZ_Assert(expected.GetDimensionality() == actual.GetDimensionality(), "The dimensionality of expected and actual must match");
+        output.Resize(actual.GetDimensionality());
+        switch (costFunction)
+        {
+        case LossFunctions::MeanSquaredError:
+            MeanSquaredError_Derivative(expected, actual, output);
+            break;
+        case LossFunctions::CrossEntropyLoss:
+            CrossEntropyLoss_Derivative(expected, actual, output);
+            break;
+        }
+    }
+
+    inline void MeanSquaredError_Derivative(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
+    {
+        output = (expected - actual);
+    }
+
+    inline void CrossEntropyLoss_Derivative(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
+    {
+        output = -(expected / actual) + (1.0f - expected) / (1.0f - actual);
+    }
+}

+ 53 - 0
Gems/MachineLearning/Code/Source/MachineLearningSystemComponent.cpp

@@ -9,6 +9,7 @@
 #include "MachineLearningSystemComponent.h"
 #include <MachineLearning/MachineLearningTypeIds.h>
 #include <AzCore/Serialization/SerializeContext.h>
+#include <AzCore/Serialization/EditContext.h>
 #include <AzCore/RTTI/BehaviorContext.h>
 #include <Models/Layer.h>
 #include <Models/MultilayerPerceptron.h>
@@ -16,10 +17,49 @@
 
 static ScriptCanvas::MachineLearningPrivateObjectNodeableRegistry s_MachineLearningPrivateObjectNodeableRegistry;
 
+namespace AZ
+{
+    AZ_TYPE_INFO_SPECIALIZE(MachineLearning::ActivationFunctions, "{2ABF758E-CA69-41AC-BC95-B47AD7DEA31B}");
+    AZ_TYPE_INFO_SPECIALIZE(MachineLearning::LossFunctions, "{18098C74-9AD0-4F1D-8093-545344620AD1}");
+}
+
 namespace MachineLearning
 {
     AZ_COMPONENT_IMPL(MachineLearningSystemComponent, "MachineLearningSystemComponent", MachineLearningSystemComponentTypeId);
 
+    void LayerParams::Reflect(AZ::ReflectContext* context)
+    {
+        if (auto serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
+        {
+            serializeContext->Class<LayerParams>()
+                ->Version(1)
+                ->Field("Size", &LayerParams::m_layerSize)
+                ->Field("ActivationFunction", &LayerParams::m_activationFunction)
+                ;
+
+            if (AZ::EditContext* editContext = serializeContext->GetEditContext())
+            {
+                editContext->Class<LayerParams>("Parameters defining a single layer of a neural network", "")
+                    ->ClassElement(AZ::Edit::ClassElements::EditorData, "")
+                    ->DataElement(AZ::Edit::UIHandlers::Default, &LayerParams::m_layerSize, "Layer Size", "The number of neurons this layer should have")
+                    ->DataElement(AZ::Edit::UIHandlers::ComboBox, &LayerParams::m_activationFunction, "Activation Function", "The activation function applied to this layer")
+                    ;
+            }
+        }
+
+        auto behaviorContext = azrtti_cast<AZ::BehaviorContext*>(context);
+        if (behaviorContext)
+        {
+            behaviorContext->Class<LayerParams>()->
+                Attribute(AZ::Script::Attributes::Scope, AZ::Script::Attributes::ScopeFlags::Common)->
+                Attribute(AZ::Script::Attributes::Module, "machineLearning")->
+                Attribute(AZ::Script::Attributes::ExcludeFrom, AZ::Script::Attributes::ExcludeFlags::ListOnly)->
+                Constructor<AZStd::size_t, ActivationFunctions>()->
+                Attribute(AZ::Script::Attributes::Storage, AZ::Script::Attributes::StorageType::Value)
+                ;
+        }
+    }
+
     void MachineLearningSystemComponent::Reflect(AZ::ReflectContext* context)
     {
         if (auto serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
@@ -27,13 +67,26 @@ namespace MachineLearning
             serializeContext->Class<MachineLearningSystemComponent, AZ::Component>()->Version(0);
             serializeContext->Class<Layer>()->Version(0);
             serializeContext->Class<MultilayerPerceptron>()->Version(0);
+            serializeContext->Class<INeuralNetwork>()->Version(0);
+            serializeContext->Class<LayerParams>();
+            serializeContext->RegisterGenericType<INeuralNetworkPtr>();
+            serializeContext->RegisterGenericType<HiddenLayerParams>();
         }
 
         if (auto behaviorContext = azrtti_cast<AZ::BehaviorContext*>(context))
         {
             behaviorContext->Class<MachineLearningSystemComponent>();
+            behaviorContext->Class<LayerParams>();
             behaviorContext->Class<Layer>();
             behaviorContext->Class<MultilayerPerceptron>();
+
+            behaviorContext
+                ->Enum<static_cast<int>(ActivationFunctions::Linear)>("Linear activation function")
+                ->Enum<static_cast<int>(ActivationFunctions::ReLU)>("ReLU activation function");
+
+            behaviorContext
+                ->Enum<static_cast<int>(LossFunctions::MeanSquaredError)>("Quadratic cost function")
+                ->Enum<static_cast<int>(LossFunctions::CrossEntropyLoss)>("Cross entropy loss function");
         }
     }
 

+ 56 - 5
Gems/MachineLearning/Code/Source/Models/Layer.cpp

@@ -7,6 +7,8 @@
  */
 
 #include <Models/MultilayerPerceptron.h>
+#include <Algorithms/Activations.h>
+#include <Algorithms/LossFunctions.h>
 #include <AzCore/RTTI/RTTI.h>
 #include <AzCore/RTTI/BehaviorContext.h>
 #include <AzCore/Serialization/EditContext.h>
@@ -24,6 +26,7 @@ namespace MachineLearning
                 ->Field("OutputSize", &Layer::m_outputSize)
                 ->Field("Weights", &Layer::m_weights)
                 ->Field("Biases", &Layer::m_biases)
+                ->Field("ActivationFunction", &Layer::m_activationFunction)
                 ;
 
             if (AZ::EditContext* editContext = serializeContext->GetEditContext())
@@ -34,6 +37,7 @@ namespace MachineLearning
                     ->Attribute(AZ::Edit::Attributes::ChangeNotify, &Layer::OnSizesChanged)
                     ->DataElement(AZ::Edit::UIHandlers::Default, &Layer::m_outputSize, "Output Size", "This value must match the input size of the next layer, if one exists")
                     ->Attribute(AZ::Edit::Attributes::ChangeNotify, &Layer::OnSizesChanged)
+                    ->DataElement(AZ::Edit::UIHandlers::Default, &Layer::m_activationFunction, "Activation Function", "The activation function applied to this layer")
                     ;
             }
         }
@@ -45,26 +49,73 @@ namespace MachineLearning
                 Attribute(AZ::Script::Attributes::Scope, AZ::Script::Attributes::ScopeFlags::Common)->
                 Attribute(AZ::Script::Attributes::Module, "machineLearning")->
                 Attribute(AZ::Script::Attributes::ExcludeFrom, AZ::Script::Attributes::ExcludeFlags::ListOnly)->
-                Constructor<AZStd::size_t, AZStd::size_t>()->
+                Constructor<ActivationFunctions, AZStd::size_t, AZStd::size_t>()->
                 Attribute(AZ::Script::Attributes::Storage, AZ::Script::Attributes::StorageType::Value)
                 ;
         }
     }
 
-    Layer::Layer(AZStd::size_t activationDimensionality, AZStd::size_t layerDimensionality)
-        : m_inputSize(activationDimensionality)
+    Layer::Layer(ActivationFunctions activationFunction, AZStd::size_t activationDimensionality, AZStd::size_t layerDimensionality)
+        : m_activationFunction(activationFunction)
+        , m_inputSize(activationDimensionality)
         , m_outputSize(layerDimensionality)
     {
         OnSizesChanged();
     }
 
-    const AZ::VectorN& Layer::ActivateLayer(const AZ::VectorN& activations)
+    const AZ::VectorN& Layer::Forward(const AZ::VectorN& activations)
     {
+        m_lastInput = activations;
         m_output = m_biases;
-        AZ::VectorMatrixMultiply(m_weights, activations, m_output);
+        AZ::VectorMatrixMultiply(m_weights, m_lastInput, m_output);
+        Activate(m_activationFunction, m_output, m_output);
         return m_output;
     }
 
+    void Layer::AccumulateGradients(const AZ::VectorN& previousLayerGradients)
+    {
+        // Ensure our bias gradient vector is appropriately sized
+        if (m_biasGradients.GetDimensionality() != m_outputSize)
+        {
+            m_biasGradients = AZ::VectorN::CreateZero(m_outputSize);
+        }
+
+        // Ensure our weight gradient matrix is appropriately sized
+        if ((m_weightGradients.GetRowCount() != m_outputSize) || (m_weightGradients.GetColumnCount() != m_inputSize))
+        {
+            m_weightGradients = AZ::MatrixMxN::CreateZero(m_outputSize, m_inputSize);
+        }
+
+        // Ensure our backpropagation gradient vector is appropriately sized
+        if (m_backpropagationGradients.GetDimensionality() != m_inputSize)
+        {
+            m_backpropagationGradients = AZ::VectorN::CreateZero(m_inputSize);
+        }
+
+        // Compute the partial derivatives of the output with respect to the activation function
+        Activate_Derivative(m_activationFunction, m_output, m_activationGradients);
+        m_activationGradients *= previousLayerGradients;
+
+        // Accumulate the partial derivatives of the weight matrix with respect to the loss function
+        AZ::OuterProduct(m_activationGradients, m_lastInput, m_weightGradients);
+
+        // Accumulate the partial derivatives of the bias vector with respect to the loss function
+        m_biasGradients += m_activationGradients;
+
+        // Accumulate the gradients to pass to the preceding layer for back-propagation
+        AZ::VectorMatrixMultiplyLeft(m_activationGradients, m_weights, m_backpropagationGradients);
+    }
+
+    void Layer::ApplyGradients(float learningRate)
+    {
+        m_weights -= m_weightGradients * learningRate;
+        m_biases -= m_biasGradients * learningRate;
+
+        m_biasGradients.SetZero();
+        m_weightGradients.SetZero();
+        m_backpropagationGradients.SetZero();
+    }
+
     void Layer::OnSizesChanged()
     {
         m_weights = AZ::MatrixMxN::CreateRandom(m_outputSize, m_inputSize);

+ 20 - 2
Gems/MachineLearning/Code/Source/Models/Layer.h

@@ -25,16 +25,34 @@ namespace MachineLearning
         static void Reflect(AZ::ReflectContext* context);
 
         Layer() = default;
-        Layer(AZStd::size_t activationDimensionality, AZStd::size_t layerDimensionality);
+        Layer(ActivationFunctions activationFunction, AZStd::size_t activationDimensionality, AZStd::size_t layerDimensionality);
 
-        const AZ::VectorN& ActivateLayer(const AZ::VectorN& activations);
+        //! Performs a basic forward pass on this layer, outputs are stored in m_output.
+        const AZ::VectorN& Forward(const AZ::VectorN& activations);
 
+        //! Performs a gradient computation against the provided expected output using the provided gradients from the previous layer.
+        //! This method presumes that we've completed a forward pass immediately prior to fill all the relevant vectors
+        void AccumulateGradients(const AZ::VectorN& expected);
+
+        //! Applies the current gradient values to the layers weights and biases and resets the gradient values for a new accumulation pass.
+        void ApplyGradients(float learningRate);
+
+        //! Updates layer internals for it's requested dimensionalities.
         void OnSizesChanged();
 
+        // These are intentionally left public so that unit testing can exhaustively examine all layer state
         AZStd::size_t m_inputSize = 0;
         AZStd::size_t m_outputSize = 0;
         AZ::MatrixMxN m_weights;
         AZ::VectorN m_biases;
         AZ::VectorN m_output;
+        ActivationFunctions m_activationFunction = ActivationFunctions::ReLU;
+
+        // These values will only be populated if backward propagation is performed
+        AZ::VectorN m_lastInput;
+        AZ::VectorN m_activationGradients;
+        AZ::VectorN m_biasGradients;
+        AZ::MatrixMxN m_weightGradients;
+        AZ::VectorN m_backpropagationGradients;
     };
 }

+ 20 - 18
Gems/MachineLearning/Code/Source/Models/MultilayerPerceptron.cpp

@@ -48,8 +48,8 @@ namespace MachineLearning
                 Method("AddLayer", &MultilayerPerceptron::AddLayer)->
                 Method("GetLayerCount", &MultilayerPerceptron::GetLayerCount)->
                 Method("GetLayer", &MultilayerPerceptron::GetLayer)->
-                Method("FeedForward", &MultilayerPerceptron::FeedForward)->
-                Method("ComputeCost", &MultilayerPerceptron::ComputeCost)
+                Method("Forward", &MultilayerPerceptron::Forward)->
+                Method("Reverse", &MultilayerPerceptron::Reverse)
                 ;
         }
     }
@@ -59,14 +59,14 @@ namespace MachineLearning
     {
     }
 
-    void MultilayerPerceptron::AddLayer(AZStd::size_t layerDimensionality)
+    void MultilayerPerceptron::AddLayer(AZStd::size_t layerDimensionality, ActivationFunctions activationFunction)
     {
         AZStd::size_t lastLayerDimensionality = m_activationCount;
         if (!m_layers.empty())
         {
             lastLayerDimensionality = m_layers.back().m_biases.GetDimensionality();
         }
-        m_layers.push_back(AZStd::move(Layer(lastLayerDimensionality, layerDimensionality)));
+        m_layers.push_back(AZStd::move(Layer(activationFunction, lastLayerDimensionality, layerDimensionality)));
     }
 
     AZStd::size_t MultilayerPerceptron::GetLayerCount() const
@@ -89,37 +89,39 @@ namespace MachineLearning
         return parameterCount;
     }
 
-    const AZ::VectorN& MultilayerPerceptron::FeedForward(const AZ::VectorN& activations)
+    const AZ::VectorN& MultilayerPerceptron::Forward(const AZ::VectorN& activations)
     {
         const AZ::VectorN* lastLayerOutput = &activations;
         for (Layer& layer : m_layers)
         {
-            layer.ActivateLayer(*lastLayerOutput);
+            layer.Forward(*lastLayerOutput);
             lastLayerOutput = &layer.m_output;
         }
         return *lastLayerOutput;
     }
 
-    float MultilayerPerceptron::ComputeCost(const AZ::VectorN& activations, const AZ::VectorN& expectedOutput, CostFunctions costFunction)
+    void MultilayerPerceptron::Reverse(LossFunctions lossFunction, const AZ::VectorN& activations, const AZ::VectorN& expected)
     {
-        switch (costFunction)
+        // First feed-forward the activations to get our current model predictions
+        const AZ::VectorN& output = Forward(activations);
+
+        // Compute the partial derivatives of the loss function with respect to the final layer output
+        AZ::VectorN costGradients;
+        ComputeLoss_Derivative(lossFunction, output, expected, costGradients);
+
+        for (auto iter = m_layers.rbegin(); iter != m_layers.rend(); ++iter)
         {
-        case CostFunctions::Quadratic:
-            return ComputeCost_Quadratic(activations, expectedOutput);
+            iter->AccumulateGradients(costGradients);
+            costGradients = iter->m_backpropagationGradients;
         }
-        return 0.0f;
     }
 
-    float MultilayerPerceptron::ComputeCost_Quadratic(const AZ::VectorN& activations, const AZ::VectorN& expectedOutput)
+    void MultilayerPerceptron::GradientDescent(float learningRate)
     {
-        const AZ::VectorN& output = FeedForward(activations);
-        const AZ::VectorN squareDifference = (output - expectedOutput).GetSquare();
-        float summedCost = 0;
-        for (AZStd::size_t iter = 0; iter < squareDifference.GetDimensionality(); ++iter)
+        for (auto iter = m_layers.rbegin(); iter != m_layers.rend(); ++iter)
         {
-            summedCost += squareDifference.GetElement(iter);
+            iter->ApplyGradients(learningRate);
         }
-        return summedCost;
     }
 
     void MultilayerPerceptron::OnActivationCountChanged()

+ 10 - 8
Gems/MachineLearning/Code/Source/Models/MultilayerPerceptron.h

@@ -20,7 +20,7 @@ namespace MachineLearning
     {
     public:
 
-        AZ_TYPE_INFO(MultilayerPerceptron, "{E12EF761-41A5-48C3-BF55-7179B280D45F}");
+        AZ_RTTI(MultilayerPerceptron, "{E12EF761-41A5-48C3-BF55-7179B280D45F}", INeuralNetwork);
 
         //! AzCore Reflection.
         //! @param context reflection context
@@ -30,18 +30,20 @@ namespace MachineLearning
         MultilayerPerceptron(AZStd::size_t activationCount);
         virtual ~MultilayerPerceptron() = default;
 
-        void AddLayer(AZStd::size_t layerDimensionality);
-        AZStd::size_t GetLayerCount() const;
-        Layer& GetLayer(AZStd::size_t layerIndex);
-
+        //! INeuralNetwork interface
+        //! @{
+        void AddLayer(AZStd::size_t layerDimensionality, ActivationFunctions activationFunction = ActivationFunctions::Linear) override;
+        AZStd::size_t GetLayerCount() const override;
+        Layer& GetLayer(AZStd::size_t layerIndex) override;
         AZStd::size_t GetParameterCount() const override;
-        const AZ::VectorN& FeedForward(const AZ::VectorN& activations) override;
-        float ComputeCost(const AZ::VectorN& activations, const AZ::VectorN& expectedOutput, CostFunctions costFunction) override;
+        const AZ::VectorN& Forward(const AZ::VectorN& activations) override;
+        void Reverse(LossFunctions lossFunction, const AZ::VectorN& activations, const AZ::VectorN& expected) override;
+        void GradientDescent(float learningRate) override;
+        //! @}
 
     private:
 
         void OnActivationCountChanged();
-        float ComputeCost_Quadratic(const AZ::VectorN& activations, const AZ::VectorN& expectedOutput);
 
         //! The number of neurons in the activation layer.
         AZStd::size_t m_activationCount = 0;

+ 1 - 1
Gems/MachineLearning/Code/Source/Nodes/ComputeCost.ScriptCanvasNodeable.xml

@@ -11,7 +11,7 @@
             <Parameter Name="Model" Type="MachineLearning::INeuralNetworkPtr" Description="The model to compute the cost gradient for."/>
             <Parameter Name="Activations" Type="AZ::VectorN" Description="The set of activation values to apply to the model (must match the models input count)."/>
             <Parameter Name="ExpectedOutput" Type="AZ::VectorN" Description="The expected outputs given the provided inputs (must match the models output count)."/>
-            <Return Name="Cost" Type="float" Shared="true"/>
+            <Return Name="Cost" Type="AZ::VectorN" Shared="true"/>
         </Input>
     </Class>
 </ScriptCanvas>

+ 1 - 1
Gems/MachineLearning/Code/Source/Nodes/ComputeCost.cpp

@@ -11,7 +11,7 @@
 
 namespace MachineLearning
 {
-    float ComputeCost::In(MachineLearning::INeuralNetworkPtr Model, AZ::VectorN Activations, AZ::VectorN ExpectedOutput)
+    AZ::VectorN ComputeCost::In(MachineLearning::INeuralNetworkPtr Model, AZ::VectorN Activations, AZ::VectorN ExpectedOutput)
     {
         return Model->ComputeCost(Activations, ExpectedOutput, CostFunctions::Quadratic);
     }

+ 1 - 1
Gems/MachineLearning/Code/Source/Nodes/CreateModel.ScriptCanvasNodeable.xml

@@ -9,7 +9,7 @@
 
         <Input Name="In" DisplayGroup="In" Description="Parameters controlling the new model">
             <Parameter Name="Input neurons" Type="AZStd::size_t" Description="The number of input neurons for the model to have."/>
-            <Parameter Name="Output neurons" Type="AZStd::size_t" Description="The number of output neurons for the model to have."/>
+            <Parameter Name="Output params" Type="AZStd::size_t" Description="The output layer parameters."/>
             <Parameter Name="Hidden layers" Type="MachineLearning::HiddenLayerParams" Description="The array of hidden layers to generate."/>
             <Return Name="Model" Type="MachineLearning::INeuralNetworkPtr" Shared="true"/>
         </Input>

+ 4 - 4
Gems/MachineLearning/Code/Source/Nodes/CreateModel.cpp

@@ -11,15 +11,15 @@
 
 namespace MachineLearning
 {
-    INeuralNetworkPtr CreateModel::In(AZStd::size_t Inputneurons, AZStd::size_t Outputneurons, MachineLearning::HiddenLayerParams Hiddenlayers)
+    INeuralNetworkPtr CreateModel::In(AZStd::size_t Inputneurons, AZStd::size_t Outputparams, HiddenLayerParams Hiddenlayers)
     {
         INeuralNetworkPtr result = AZStd::make_unique<MultilayerPerceptron>(Inputneurons);
         MultilayerPerceptron* modelPtr = static_cast<MultilayerPerceptron*>(result.get());
-        for (AZStd::size_t layerSize : Hiddenlayers)
+        for (auto layerParams : Hiddenlayers)
         {
-            modelPtr->AddLayer(layerSize);
+            modelPtr->AddLayer(layerParams); //.m_layerSize, layerParams.m_activationFunction);
         }
-        modelPtr->AddLayer(Outputneurons);
+        modelPtr->AddLayer(Outputparams); //.m_layerSize, Outputparams.m_activationFunction);
         return result;
     }
 }

+ 1 - 0
Gems/MachineLearning/Code/Source/Nodes/CreateModel.h

@@ -12,6 +12,7 @@
 #include <ScriptCanvas/Core/Nodeable.h>
 #include <ScriptCanvas/Core/NodeableNode.h>
 #include <MachineLearning/INeuralNetwork.h>
+#include <Models/Layer.h>
 #include <Source/Nodes/CreateModel.generated.h>
 
 namespace MachineLearning

+ 1 - 1
Gems/MachineLearning/Code/Source/Nodes/FeedForward.cpp

@@ -13,7 +13,7 @@ namespace MachineLearning
 {
     AZ::VectorN FeedForward::In(MachineLearning::INeuralNetworkPtr Model, AZ::VectorN Activations)
     {
-        AZ::VectorN results = Model->FeedForward(Activations);
+        AZ::VectorN results = Model->Forward(Activations);
         return results;
     }
 }

+ 33 - 0
Gems/MachineLearning/Code/Tests/Algorithms/ActivationTests.cpp

@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#include <AzTest/AzTest.h>
+#include <AzCore/UnitTest/TestTypes.h>
+#include <Algorithms/Activations.h>
+
+namespace UnitTest
+{
+    class MachineLearning_Activations
+        : public UnitTest::LeakDetectionFixture
+    {
+    };
+
+    TEST_F(MachineLearning_Activations, TestRelu)
+    {
+        AZ::VectorN output = AZ::VectorN::CreateZero(1024);
+        AZ::VectorN sourceVector = AZ::VectorN::CreateRandom(1024);
+        sourceVector *= 100.0f;
+        sourceVector -= 50.0f;
+        MachineLearning::ReLU(sourceVector, output);
+    
+        for (AZStd::size_t iter = 0; iter < output.GetDimensionality(); ++iter)
+        {
+            ASSERT_GE(output.GetElement(iter), 0.0f);
+        }
+    }
+}

+ 20 - 0
Gems/MachineLearning/Code/Tests/Algorithms/LossFunctionTests.cpp

@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#include <AzTest/AzTest.h>
+#include <AzCore/UnitTest/TestTypes.h>
+#include <Algorithms/LossFunctions.h>
+
+namespace UnitTest
+{
+    class MachineLearning_LossFunctions
+        : public UnitTest::LeakDetectionFixture
+    {
+    };
+
+}

+ 24 - 1
Gems/MachineLearning/Code/Tests/Models/LayerTests.cpp

@@ -20,7 +20,7 @@ namespace UnitTest
     TEST_F(MachineLearning_Layers, TestConstructor)
     {
         // Construct a layer that takes 8 inputs and generates 4 outputs
-        MachineLearning::Layer testLayer(8, 4);
+        MachineLearning::Layer testLayer(MachineLearning::ActivationFunctions::Linear, 8, 4);
         EXPECT_EQ(testLayer.m_inputSize, 8);
         EXPECT_EQ(testLayer.m_outputSize, 4);
         EXPECT_EQ(testLayer.m_weights.GetColumnCount(), 8);
@@ -28,4 +28,27 @@ namespace UnitTest
         EXPECT_EQ(testLayer.m_biases.GetDimensionality(), 4);
         EXPECT_EQ(testLayer.m_output.GetDimensionality(), 4);
     }
+
+    TEST_F(MachineLearning_Layers, TestForward)
+    {
+        // Construct a layer that takes 8 inputs and generates 4 outputs
+        MachineLearning::Layer testLayer(MachineLearning::ActivationFunctions::Linear, 8, 4);
+        testLayer.m_biases = AZ::VectorN::CreateOne(testLayer.m_biases.GetDimensionality());
+        testLayer.m_weights = AZ::MatrixMxN::CreateZero(testLayer.m_weights.GetRowCount(), testLayer.m_weights.GetColumnCount());
+        testLayer.m_weights += 1.0f;
+
+        const AZ::VectorN ones = AZ::VectorN::CreateOne(8); // Input of all ones
+        testLayer.Forward(ones);
+        for (AZStd::size_t iter = 0; iter < testLayer.m_output.GetDimensionality(); ++iter)
+        {
+            ASSERT_FLOAT_EQ(testLayer.m_output.GetElement(iter), 9.0f); // 8 edges of 1's + 1 for the bias
+        }
+
+        const AZ::VectorN zeros = AZ::VectorN::CreateZero(8); // Input of all zeros
+        testLayer.Forward(zeros);
+        for (AZStd::size_t iter = 0; iter < testLayer.m_output.GetDimensionality(); ++iter)
+        {
+            ASSERT_FLOAT_EQ(testLayer.m_output.GetElement(iter), 1.0f); // Weights are all zero, leaving only the layer biases which are all set to 1
+        }
+    }
 }

+ 97 - 0
Gems/MachineLearning/Code/Tests/Models/MultilayerPerceptronTests.cpp

@@ -7,8 +7,105 @@
  */
 
 #include <AzTest/AzTest.h>
+#include <AzCore/UnitTest/TestTypes.h>
 #include <Models/MultilayerPerceptron.h>
+#include <Algorithms/LossFunctions.h>
 
 namespace UnitTest
 {
+    class MachineLearning_MLP
+        : public UnitTest::LeakDetectionFixture
+    {
+    };
+
+    TEST_F(MachineLearning_MLP, TestGradientCalculations)
+    {
+        // As the computations performed during gradient descent are non-trivial, this unit test carefully replicates the backward propagation example as laid out in this article
+        // https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/
+        // The example is extremely simple, so in the case of a unit test failure, this allows the maintainer to carefully trace unit test execution and compare its output with the article
+        const float layer0Weights[] =
+        {
+            0.15f, 0.20f, 0.25f, 0.30f
+        };
+        const float layer0Biases[] = { 0.35f, 0.35f };
+
+        const float layer1Weights[] =
+        {
+            0.40f, 0.45f, 0.50f, 0.55f
+        };
+        const float layer1Biases[] = { 0.60f, 0.60f };
+
+        MachineLearning::MultilayerPerceptron mlp(2);
+        mlp.AddLayer(2, MachineLearning::ActivationFunctions::Sigmoid);
+        mlp.AddLayer(2, MachineLearning::ActivationFunctions::Sigmoid);
+
+        MachineLearning::Layer& layer0 = mlp.GetLayer(0);
+        layer0.m_weights = AZ::MatrixMxN::CreateFromPackedFloats(2, 2, layer0Weights);
+        layer0.m_biases = AZ::VectorN::CreateFromFloats(2, layer0Biases);
+
+        MachineLearning::Layer& layer1 = mlp.GetLayer(1);
+        layer1.m_weights = AZ::MatrixMxN::CreateFromPackedFloats(2, 2, layer1Weights);
+        layer1.m_biases = AZ::VectorN::CreateFromFloats(2, layer1Biases);
+
+        const float activations[] = { 0.05f, 0.10f };
+        const float labels[] = { 0.01f, 0.99f };
+
+        const AZ::VectorN trainingInput = AZ::VectorN::CreateFromFloats(2, activations);
+        const AZ::VectorN trainingOutput = AZ::VectorN::CreateFromFloats(2, labels);
+
+        const AZ::VectorN& actualOutput = mlp.Forward(trainingInput);
+
+        // Validate intermediate layer output given the initial weights and biases
+        EXPECT_TRUE(AZ::IsCloseMag(layer0.m_output.GetElement(0), 0.5933f, 0.01f));
+        EXPECT_TRUE(AZ::IsCloseMag(layer0.m_output.GetElement(1), 0.5969f, 0.01f));
+
+        // Validate final model output given the initial weights and biases
+        EXPECT_TRUE(AZ::IsCloseMag(actualOutput.GetElement(0), 0.75f, 0.01f));
+        EXPECT_TRUE(AZ::IsCloseMag(actualOutput.GetElement(1), 0.77f, 0.01f));
+
+        float cost = MachineLearning::ComputeTotalCost(MachineLearning::LossFunctions::MeanSquaredError, trainingOutput, actualOutput);
+        EXPECT_TRUE(AZ::IsCloseMag(cost, 0.30f, 0.01f));
+
+        mlp.Reverse(MachineLearning::LossFunctions::MeanSquaredError, trainingInput, trainingOutput);
+
+        // Check the activation gradients
+        EXPECT_NEAR(layer1.m_activationGradients.GetElement(0),  0.1385f, 0.01f);
+        EXPECT_NEAR(layer1.m_activationGradients.GetElement(1), -0.0381f, 0.01f);
+
+        EXPECT_NEAR(layer1.m_weightGradients.GetElement(0, 0),  0.0822f, 0.01f);
+        EXPECT_NEAR(layer1.m_weightGradients.GetElement(0, 1),  0.0826f, 0.01f);
+        EXPECT_NEAR(layer1.m_weightGradients.GetElement(1, 0), -0.0226f, 0.01f);
+        EXPECT_NEAR(layer1.m_weightGradients.GetElement(1, 1), -0.0227f, 0.01f);
+
+        EXPECT_NEAR(layer1.m_backpropagationGradients.GetElement(0), 0.0364f, 0.01f);
+        EXPECT_NEAR(layer1.m_backpropagationGradients.GetElement(1), 0.0414f, 0.01f);
+
+        EXPECT_NEAR(layer0.m_weightGradients.GetElement(0, 0),  0.0004f, 0.01f);
+        EXPECT_NEAR(layer0.m_weightGradients.GetElement(0, 1),  0.0008f, 0.01f);
+
+        mlp.GradientDescent(0.5f);
+
+        EXPECT_NEAR(layer1.m_weights.GetElement(0, 0), 0.3590f, 0.01f);
+        EXPECT_NEAR(layer1.m_weights.GetElement(0, 1), 0.4087f, 0.01f);
+        EXPECT_NEAR(layer1.m_weights.GetElement(1, 0), 0.5113f, 0.01f);
+        EXPECT_NEAR(layer1.m_weights.GetElement(1, 1), 0.5614f, 0.01f);
+
+        EXPECT_NEAR(layer0.m_weights.GetElement(0, 0), 0.1498f, 0.01f);
+        EXPECT_NEAR(layer0.m_weights.GetElement(0, 1), 0.1996f, 0.01f);
+        EXPECT_NEAR(layer0.m_weights.GetElement(1, 0), 0.2495f, 0.01f);
+        EXPECT_NEAR(layer0.m_weights.GetElement(1, 1), 0.2995f, 0.01f);
+
+        // Now lets evaluate a whole training cycle
+        const AZStd::size_t numTrainingLoops = 10000;
+        for (AZStd::size_t iter = 0; iter < numTrainingLoops; ++iter)
+        {
+            mlp.Reverse(MachineLearning::LossFunctions::MeanSquaredError, trainingInput, trainingOutput);
+            mlp.GradientDescent(0.5f);
+        }
+
+        // We expect the total cost of the network on the training sample to be much lower after training
+        const AZ::VectorN& trainedOutput = mlp.Forward(trainingInput);
+        float trainedCost = MachineLearning::ComputeTotalCost(MachineLearning::LossFunctions::MeanSquaredError, trainingOutput, trainedOutput);
+        EXPECT_LT(trainedCost, 3.5e-6f);
+    }
 }

+ 4 - 3
Gems/MachineLearning/Code/machinelearning_private_files.cmake

@@ -11,13 +11,14 @@ set(FILES
     Source/MachineLearningModuleInterface.h
     Source/MachineLearningSystemComponent.cpp
     Source/MachineLearningSystemComponent.h
+	Source/Algorithms/Activations.h
+	Source/Algorithms/Activations.inl
+	Source/Algorithms/LossFunctions.h
+	Source/Algorithms/LossFunctions.inl
 	Source/Models/Layer.cpp
 	Source/Models/Layer.h
 	Source/Models/MultilayerPerceptron.cpp
 	Source/Models/MultilayerPerceptron.h
-    Source/Nodes/ComputeCost.ScriptCanvasNodeable.xml
-    Source/Nodes/ComputeCost.cpp
-    Source/Nodes/ComputeCost.h
     Source/Nodes/CreateModel.ScriptCanvasNodeable.xml
     Source/Nodes/CreateModel.cpp
     Source/Nodes/CreateModel.h

+ 2 - 0
Gems/MachineLearning/Code/machinelearning_tests_files.cmake

@@ -7,6 +7,8 @@
 #
 
 set(FILES
+    Tests/Algorithms/ActivationTests.cpp
+    Tests/Algorithms/LossFunctionTests.cpp
     Tests/Models/LayerTests.cpp
     Tests/Models/MultilayerPerceptronTests.cpp
     Tests/MachineLearningTests.cpp