Browse Source

Updates to the machine learning gem

Signed-off-by: kberg-amzn <[email protected]>
kberg-amzn 2 years ago
parent
commit
b9abb6f090
50 changed files with 1372 additions and 406 deletions
  1. 1 0
      Gems/MachineLearning/Assets/Editor/Icons/Components/NeuralNetwork.svg
  2. 8 0
      Gems/MachineLearning/Assets/Editor/Icons/Components/Viewport/NeuralNetwork.svg
  3. 60 0
      Gems/MachineLearning/Assets/TranslationAssets/Nodes/Accumulatetraininggradient_4E0AAD3E-A05D-3BCB-2F80-B80A602913F9.names
  4. 11 5
      Gems/MachineLearning/Assets/TranslationAssets/Nodes/Computecostgradient_0FA8FC88-6C4B-9260-6CC3-F2154691B279.names
  5. 36 0
      Gems/MachineLearning/Assets/TranslationAssets/Nodes/Loadtrainingdata_73A0AA0F-C71C-2FA9-0D04-B0927EB81CEC.names
  6. 48 0
      Gems/MachineLearning/Assets/TranslationAssets/Nodes/Onehotencoder_F5A5E296-F8C1-8316-87D7-80FB5DED6BF7.names
  7. 9 15
      Gems/MachineLearning/Assets/TranslationAssets/Nodes/Performsagradientdescentstep_716A4682-81F5-11EF-B2F0-938CED78825E.names
  8. 84 0
      Gems/MachineLearning/Assets/TranslationAssets/Nodes/Supervisedlearning_B7433D60-ECE2-3970-773F-D71FD979B9E3.names
  9. 36 0
      Gems/MachineLearning/Code/Include/MachineLearning/ILabeledTrainingData.h
  10. 24 9
      Gems/MachineLearning/Code/Include/MachineLearning/INeuralNetwork.h
  11. 0 28
      Gems/MachineLearning/Code/Include/MachineLearning/LabeledTrainingData.h
  12. 6 30
      Gems/MachineLearning/Code/Include/MachineLearning/Types.h
  13. 203 0
      Gems/MachineLearning/Code/Source/Algorithms/Activations.cpp
  14. 17 7
      Gems/MachineLearning/Code/Source/Algorithms/Activations.h
  15. 0 121
      Gems/MachineLearning/Code/Source/Algorithms/Activations.inl
  16. 6 6
      Gems/MachineLearning/Code/Source/Algorithms/LossFunctions.cpp
  17. 0 2
      Gems/MachineLearning/Code/Source/Algorithms/LossFunctions.h
  18. 81 0
      Gems/MachineLearning/Code/Source/Algorithms/Training.cpp
  19. 37 0
      Gems/MachineLearning/Code/Source/Algorithms/Training.h
  20. 199 0
      Gems/MachineLearning/Code/Source/Assets/MnistDataLoader.cpp
  21. 69 0
      Gems/MachineLearning/Code/Source/Assets/MnistDataLoader.h
  22. 82 0
      Gems/MachineLearning/Code/Source/Components/MultilayerPerceptronComponent.cpp
  23. 55 0
      Gems/MachineLearning/Code/Source/Components/MultilayerPerceptronComponent.h
  24. 7 0
      Gems/MachineLearning/Code/Source/MachineLearningModule.cpp
  25. 4 5
      Gems/MachineLearning/Code/Source/MachineLearningModuleInterface.cpp
  26. 23 88
      Gems/MachineLearning/Code/Source/MachineLearningSystemComponent.cpp
  27. 11 8
      Gems/MachineLearning/Code/Source/Models/Layer.cpp
  28. 6 0
      Gems/MachineLearning/Code/Source/Models/Layer.h
  29. 20 10
      Gems/MachineLearning/Code/Source/Models/MultilayerPerceptron.cpp
  30. 12 7
      Gems/MachineLearning/Code/Source/Models/MultilayerPerceptron.h
  31. 1 1
      Gems/MachineLearning/Code/Source/Nodes/AccumulateTrainingGradients.cpp
  32. 3 3
      Gems/MachineLearning/Code/Source/Nodes/ComputeCost.cpp
  33. 0 17
      Gems/MachineLearning/Code/Source/Nodes/CreateModel.ScriptCanvasNodeable.xml
  34. 0 25
      Gems/MachineLearning/Code/Source/Nodes/CreateModel.cpp
  35. 2 2
      Gems/MachineLearning/Code/Source/Nodes/FeedForward.cpp
  36. 1 1
      Gems/MachineLearning/Code/Source/Nodes/GradientDescent.cpp
  37. 3 1
      Gems/MachineLearning/Code/Source/Nodes/LoadTrainingData.ScriptCanvasNodeable.xml
  38. 5 2
      Gems/MachineLearning/Code/Source/Nodes/LoadTrainingData.cpp
  39. 1 1
      Gems/MachineLearning/Code/Source/Nodes/LoadTrainingData.h
  40. 16 0
      Gems/MachineLearning/Code/Source/Nodes/OneHot.ScriptCanvasNodeable.xml
  41. 20 0
      Gems/MachineLearning/Code/Source/Nodes/OneHot.cpp
  42. 3 4
      Gems/MachineLearning/Code/Source/Nodes/OneHot.h
  43. 25 0
      Gems/MachineLearning/Code/Source/Nodes/SupervisedLearning.ScriptCanvasNodeable.xml
  44. 33 0
      Gems/MachineLearning/Code/Source/Nodes/SupervisedLearning.cpp
  45. 25 0
      Gems/MachineLearning/Code/Source/Nodes/SupervisedLearning.h
  46. 54 0
      Gems/MachineLearning/Code/Tests/Algorithms/ActivationTests.cpp
  47. 8 0
      Gems/MachineLearning/Code/Tests/Algorithms/LossFunctionTests.cpp
  48. 2 2
      Gems/MachineLearning/Code/Tests/Models/MultilayerPerceptronTests.cpp
  49. 1 1
      Gems/MachineLearning/Code/machinelearning_api_files.cmake
  50. 14 5
      Gems/MachineLearning/Code/machinelearning_private_files.cmake

File diff suppressed because it is too large
+ 1 - 0
Gems/MachineLearning/Assets/Editor/Icons/Components/NeuralNetwork.svg


File diff suppressed because it is too large
+ 8 - 0
Gems/MachineLearning/Assets/Editor/Icons/Components/Viewport/NeuralNetwork.svg


+ 60 - 0
Gems/MachineLearning/Assets/TranslationAssets/Nodes/Accumulatetraininggradient_4E0AAD3E-A05D-3BCB-2F80-B80A602913F9.names

@@ -0,0 +1,60 @@
+{
+    "entries": [
+        {
+            "base": "{4E0AAD3E-A05D-3BCB-2F80-B80A602913F9}",
+            "context": "ScriptCanvas::Node",
+            "variant": "",
+            "details": {
+                "name": "Accumulate training gradients",
+                "category": "MachineLearning",
+                "tooltip": "Accumulates the loss gradients for a machine learning model against a set of activations and a set of expected outputs."
+            },
+            "slots": [
+                {
+                    "base": "Input_In_0",
+                    "details": {
+                        "name": "In",
+                        "tooltip": "Parameters controlling loss gradient calculation"
+                    }
+                },
+                {
+                    "base": "DataInput_Model_0",
+                    "details": {
+                        "name": "Model"
+                    }
+                },
+                {
+                    "base": "DataInput_CostFunction_1",
+                    "details": {
+                        "name": "CostFunction"
+                    }
+                },
+                {
+                    "base": "DataInput_Activations_2",
+                    "details": {
+                        "name": "Activations"
+                    }
+                },
+                {
+                    "base": "DataInput_ExpectedOutput_3",
+                    "details": {
+                        "name": "ExpectedOutput"
+                    }
+                },
+                {
+                    "base": "Output_On In_0",
+                    "details": {
+                        "name": "On In",
+                        "tooltip": "Parameters controlling loss gradient calculation"
+                    }
+                },
+                {
+                    "base": "DataOutput_Model_0",
+                    "details": {
+                        "name": "Model"
+                    }
+                }
+            ]
+        }
+    ]
+}

+ 11 - 5
Gems/MachineLearning/Assets/TranslationAssets/Nodes/Computecostgradient_0FA8FC88-6C4B-9260-6CC3-F2154691B279.names

@@ -7,14 +7,14 @@
             "details": {
                 "name": "Compute cost",
                 "category": "MachineLearning",
-                "tooltip": "Calculates the total cost for a machine learning model against a set of activations and a set of expected outputs."
+                "tooltip": "Calculates the total 'cost' of a machine learning model against a set of activations and a set of expected outputs."
             },
             "slots": [
                 {
                     "base": "Input_In_0",
                     "details": {
                         "name": "In",
-                        "tooltip": "Parameters controlling cost calculation"
+                        "tooltip": "Parameters controlling the cost calculation"
                     }
                 },
                 {
@@ -24,13 +24,19 @@
                     }
                 },
                 {
-                    "base": "DataInput_Activations_1",
+                    "base": "DataInput_CostFunction_1",
+                    "details": {
+                        "name": "CostFunction"
+                    }
+                },
+                {
+                    "base": "DataInput_Activations_2",
                     "details": {
                         "name": "Activations"
                     }
                 },
                 {
-                    "base": "DataInput_ExpectedOutput_2",
+                    "base": "DataInput_ExpectedOutput_3",
                     "details": {
                         "name": "ExpectedOutput"
                     }
@@ -39,7 +45,7 @@
                     "base": "Output_On In_0",
                     "details": {
                         "name": "On In",
-                        "tooltip": "Parameters controlling cost calculation"
+                        "tooltip": "Parameters controlling the cost calculation"
                     }
                 },
                 {

+ 36 - 0
Gems/MachineLearning/Assets/TranslationAssets/Nodes/Loadtrainingdata_73A0AA0F-C71C-2FA9-0D04-B0927EB81CEC.names

@@ -0,0 +1,36 @@
+{
+    "entries": [
+        {
+            "base": "{73A0AA0F-C71C-2FA9-0D04-B0927EB81CEC}",
+            "context": "ScriptCanvas::Node",
+            "variant": "",
+            "details": {
+                "name": "Load training data",
+                "category": "MachineLearning",
+                "tooltip": "Loads a set of labeled training data."
+            },
+            "slots": [
+                {
+                    "base": "Input_In_0",
+                    "details": {
+                        "name": "In",
+                        "tooltip": "Parameters controlling what data to load"
+                    }
+                },
+                {
+                    "base": "Output_On In_0",
+                    "details": {
+                        "name": "On In",
+                        "tooltip": "Parameters controlling what data to load"
+                    }
+                },
+                {
+                    "base": "DataOutput_Data_0",
+                    "details": {
+                        "name": "Data"
+                    }
+                }
+            ]
+        }
+    ]
+}

+ 48 - 0
Gems/MachineLearning/Assets/TranslationAssets/Nodes/Onehotencoder_F5A5E296-F8C1-8316-87D7-80FB5DED6BF7.names

@@ -0,0 +1,48 @@
+{
+    "entries": [
+        {
+            "base": "{F5A5E296-F8C1-8316-87D7-80FB5DED6BF7}",
+            "context": "ScriptCanvas::Node",
+            "variant": "",
+            "details": {
+                "name": "One hot encoder",
+                "category": "MachineLearning",
+                "tooltip": "One-hot encodes a value."
+            },
+            "slots": [
+                {
+                    "base": "Input_In_0",
+                    "details": {
+                        "name": "In",
+                        "tooltip": "Parameters controlling the one-hot encoding"
+                    }
+                },
+                {
+                    "base": "DataInput_value_0",
+                    "details": {
+                        "name": "value"
+                    }
+                },
+                {
+                    "base": "DataInput_maxValue_1",
+                    "details": {
+                        "name": "maxValue"
+                    }
+                },
+                {
+                    "base": "Output_On In_0",
+                    "details": {
+                        "name": "On In",
+                        "tooltip": "Parameters controlling the one-hot encoding"
+                    }
+                },
+                {
+                    "base": "DataOutput_oneHot_0",
+                    "details": {
+                        "name": "oneHot"
+                    }
+                }
+            ]
+        }
+    ]
+}

+ 9 - 15
Gems/MachineLearning/Assets/TranslationAssets/Nodes/Createsanewmachinelearningmodel_8B5BA564-BEAF-B290-6224-F82B7618735C.names → Gems/MachineLearning/Assets/TranslationAssets/Nodes/Performsagradientdescentstep_716A4682-81F5-11EF-B2F0-938CED78825E.names

@@ -1,45 +1,39 @@
 {
     "entries": [
         {
-            "base": "{8B5BA564-BEAF-B290-6224-F82B7618735C}",
+            "base": "{716A4682-81F5-11EF-B2F0-938CED78825E}",
             "context": "ScriptCanvas::Node",
             "variant": "",
             "details": {
-                "name": "Create new model",
+                "name": "Gradient descent",
                 "category": "MachineLearning",
-                "tooltip": "Creates a new untrained machine learning model. All parameters will be initialized to random values."
+                "tooltip": "Performs a gradient descent step on a model, and zeroes all gradient vectors."
             },
             "slots": [
                 {
                     "base": "Input_In_0",
                     "details": {
                         "name": "In",
-                        "tooltip": "Parameters controlling the new model"
+                        "tooltip": "Parameters controlling gradient descent"
                     }
                 },
                 {
-                    "base": "DataInput_Input neurons_0",
+                    "base": "DataInput_Model_0",
                     "details": {
-                        "name": "Input neurons"
-                    }
-                },
-                {
-                    "base": "DataInput_Output neurons_1",
-                    "details": {
-                        "name": "Output neurons"
+                        "name": "Model"
                     }
                 },
                 {
-                    "base": "DataInput_Hidden layers_2",
+                    "base": "DataInput_LearningRate_1",
                     "details": {
-                        "name": "Hidden layers"
+                        "name": "LearningRate"
                     }
                 },
                 {
                     "base": "Output_On In_0",
                     "details": {
                         "name": "On In",
-                        "tooltip": "Parameters controlling the new model"
+                        "tooltip": "Parameters controlling gradient descent"
                     }
                 },
                 {

+ 84 - 0
Gems/MachineLearning/Assets/TranslationAssets/Nodes/Supervisedlearning_B7433D60-ECE2-3970-773F-D71FD979B9E3.names

@@ -0,0 +1,84 @@
+{
+    "entries": [
+        {
+            "base": "{B7433D60-ECE2-3970-773F-D71FD979B9E3}",
+            "context": "ScriptCanvas::Node",
+            "variant": "",
+            "details": {
+                "name": "Supervised learning",
+                "category": "MachineLearning",
+                "tooltip": "Performs a fully supervised training session of a neural network using stochastic gradient descent."
+            },
+            "slots": [
+                {
+                    "base": "Input_In_0",
+                    "details": {
+                        "name": "In",
+                        "tooltip": "Parameters controlling model training"
+                    }
+                },
+                {
+                    "base": "DataInput_Model_0",
+                    "details": {
+                        "name": "Model"
+                    }
+                },
+                {
+                    "base": "DataInput_TrainingData_1",
+                    "details": {
+                        "name": "TrainingData"
+                    }
+                },
+                {
+                    "base": "DataInput_TestData_2",
+                    "details": {
+                        "name": "TestData"
+                    }
+                },
+                {
+                    "base": "DataInput_CostFunction_3",
+                    "details": {
+                        "name": "CostFunction"
+                    }
+                },
+                {
+                    "base": "DataInput_TotalIterations_4",
+                    "details": {
+                        "name": "TotalIterations"
+                    }
+                },
+                {
+                    "base": "DataInput_BatchSize_5",
+                    "details": {
+                        "name": "BatchSize"
+                    }
+                },
+                {
+                    "base": "DataInput_LearningRate_6",
+                    "details": {
+                        "name": "LearningRate"
+                    }
+                },
+                {
+                    "base": "DataInput_EarlyStopCost_7",
+                    "details": {
+                        "name": "EarlyStopCost"
+                    }
+                },
+                {
+                    "base": "Output_On In_0",
+                    "details": {
+                        "name": "On In",
+                        "tooltip": "Parameters controlling model training"
+                    }
+                },
+                {
+                    "base": "DataOutput_Model_0",
+                    "details": {
+                        "name": "Model"
+                    }
+                }
+            ]
+        }
+    ]
+}

+ 36 - 0
Gems/MachineLearning/Code/Include/MachineLearning/ILabeledTrainingData.h

@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#pragma once
+
+#include <MachineLearning/Types.h>
+#include <AzCore/std/string/string.h>
+
+namespace MachineLearning
+{
+    struct ILabeledTrainingData
+    {
+        AZ_TYPE_INFO(ILabeledTrainingData, "{50DF457E-3EAC-4114-8444-023E64973AD9}");
+
+        virtual ~ILabeledTrainingData() = default;
+
+        //! Loads the indicated label and data files.
+        virtual bool LoadArchive(const AZStd::string& imageFilename, const AZStd::string& labelFilename) = 0;
+
+        //! Returns the total number of samples contained in the training data set.
+        virtual AZStd::size_t GetSampleCount() const = 0;
+
+        //! Returns the index-th label in the training data set.
+        virtual const AZ::VectorN& GetLabelByIndex(AZStd::size_t index) = 0;
+
+        //! Returns the index-th set of activations in the training data set.
+        virtual const AZ::VectorN& GetDataByIndex(AZStd::size_t index) = 0;
+    };
+
+    using ILabeledTrainingDataPtr = AZStd::shared_ptr<ILabeledTrainingData>;
+}

+ 24 - 9
Gems/MachineLearning/Code/Include/MachineLearning/INeuralNetwork.h

@@ -10,38 +10,53 @@
 
 #include <AzCore/Math/VectorN.h>
 #include <MachineLearning/Types.h>
+#include <AzCore/EBus/EBus.h>
 
 namespace MachineLearning
 {
+    class Layer;
+
     class INeuralNetwork
     {
     public:
 
-        AZ_TYPE_INFO(INeuralNetwork, "{64E5B5B1-4A7D-489D-9A29-D9510BB7E17A}");
+        AZ_RTTI(INeuralNetwork, "{64E5B5B1-4A7D-489D-9A29-D9510BB7E17A}");
 
+        INeuralNetwork() = default;
+        INeuralNetwork(INeuralNetwork&&) = default;
+        INeuralNetwork(const INeuralNetwork&) = default;
         virtual ~INeuralNetwork() = default;
 
+        INeuralNetwork& operator=(INeuralNetwork&&) = default;
+        INeuralNetwork& operator=(const INeuralNetwork&) = default;
+
         //! Adds a new layer to the network.
-        virtual void AddLayer(AZStd::size_t layerDimensionality, ActivationFunctions activationFunction = ActivationFunctions::Linear) = 0;
+        virtual void AddLayer([[maybe_unused]] AZStd::size_t layerDimensionality, [[maybe_unused]] ActivationFunctions activationFunction = ActivationFunctions::ReLU) {}
 
         //! Returns the total number of layers in the network.
-        virtual AZStd::size_t GetLayerCount() const = 0;
+        virtual AZStd::size_t GetLayerCount() const { return 0; }
 
         //! Retrieves a specific layer from the network indexed by the layerIndex.
-        virtual Layer& GetLayer(AZStd::size_t layerIndex) = 0;
+        virtual Layer* GetLayer([[maybe_unused]] AZStd::size_t layerIndex) { return nullptr; }
 
         //! Returns the total number of parameters in the neural network.
-        virtual AZStd::size_t GetParameterCount() const = 0;
+        virtual AZStd::size_t GetParameterCount() const { return 0; }
 
         //! Performs a basic feed-forward operation to compute the output from a set of activation values.
-        virtual const AZ::VectorN& Forward(const AZ::VectorN& activations) = 0;
+        virtual const AZ::VectorN* Forward([[maybe_unused]] const AZ::VectorN& activations) { return nullptr; }
 
         //! Accumulates the loss gradients given a loss function, an activation vector and a corresponding label vector.
-        virtual void Reverse(LossFunctions lossFunction, const AZ::VectorN& activations, const AZ::VectorN& expected) = 0;
+        virtual void Reverse([[maybe_unused]] LossFunctions lossFunction, [[maybe_unused]] const AZ::VectorN& activations, [[maybe_unused]] const AZ::VectorN& expected) {}
 
         //! Performs a gradient descent step and resets all gradient accumulators to zero.
-        virtual void GradientDescent(float learningRate) = 0;
+        virtual void GradientDescent([[maybe_unused]] float learningRate) {}
+
+        //! For intrusive_ptr support
+        //! @{
+        void add_ref() {}
+        void release() {}
+        //! @}
     };
 
-    using INeuralNetworkPtr = AZStd::shared_ptr<INeuralNetwork>;
+    using INeuralNetworkPtr = AZStd::intrusive_ptr<INeuralNetwork>;
 }

+ 0 - 28
Gems/MachineLearning/Code/Include/MachineLearning/LabeledTrainingData.h

@@ -1,28 +0,0 @@
-/*
- * Copyright (c) Contributors to the Open 3D Engine Project.
- * For complete copyright and license terms please see the LICENSE at the root of this distribution.
- *
- * SPDX-License-Identifier: Apache-2.0 OR MIT
- *
- */
-
-#pragma once
-
-#include <MachineLearning/Types.h>
-
-namespace MachineLearning
-{
-    struct LabeledTrainingData
-    {
-        AZ_TYPE_INFO(LabeledTrainingData, "{50DF457E-3EAC-4114-8444-023E64973AD9}");
-
-        //! AzCore Reflection.
-        //! @param context reflection context
-        static void Reflect(class AZ::ReflectContext* context);
-
-        AZ::VectorN m_activations; // Must be of the same dimensionality as the input layer of the model
-        AZ::VectorN m_label;       // Must be of the same dimensionality as the output layer of the model
-    };
-
-    using LabeledTrainingDataSet = AZStd::vector<LabeledTrainingData>;
-}

+ 6 - 30
Gems/MachineLearning/Code/Include/MachineLearning/Types.h

@@ -9,42 +9,18 @@
 #pragma once
 
 #include <AzCore/Math/VectorN.h>
+#include <AzCore/std/smart_ptr/shared_ptr.h>
 
 namespace MachineLearning
 {
-    enum class LossFunctions
-    {
+    AZ_ENUM_CLASS(LossFunctions,
         MeanSquaredError
-    };
+    );
 
-    enum class ActivationFunctions
-    {
+    AZ_ENUM_CLASS(ActivationFunctions,
         ReLU,
         Sigmoid,
+        Softmax,
         Linear
-    };
-
-    class Layer;
-
-    struct LayerParams
-    {
-        AZ_TYPE_INFO(LayerParams, "{DD9A7E7C-8D11-4805-83CF-6A5262B4580C}");
-
-        //! AzCore Reflection.
-        //! @param context reflection context
-        static void Reflect(class AZ::ReflectContext* context);
-
-        LayerParams() = default;
-        inline LayerParams(AZStd::size_t size, ActivationFunctions activationFunction)
-            : m_layerSize(size)
-            , m_activationFunction(activationFunction)
-        {
-        }
-
-        AZStd::size_t m_layerSize = 0;
-        ActivationFunctions m_activationFunction = ActivationFunctions::ReLU;
-    };
-
-    //using HiddenLayerParams = AZStd::vector<LayerParams>;
-    using HiddenLayerParams = AZStd::vector<AZStd::size_t>;
+    );
 }

+ 203 - 0
Gems/MachineLearning/Code/Source/Algorithms/Activations.cpp

@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#include <Algorithms/Activations.h>
+#include <AzCore/Math/SimdMath.h>
+#include <AzCore/Math/MatrixMxN.h>
+
+namespace MachineLearning
+{
+    AZStd::vector<AZ::Edit::EnumConstant<ActivationFunctions>> GetActivationEnumValues()
+    {
+        AZStd::vector<AZ::Edit::EnumConstant<ActivationFunctions>> values;
+        values.emplace_back(ActivationFunctions::ReLU, "ReLU");
+        values.emplace_back(ActivationFunctions::Sigmoid, "Sigmoid");
+        values.emplace_back(ActivationFunctions::Softmax, "Softmax");
+        values.emplace_back(ActivationFunctions::Linear, "Linear");
+        return values;
+    }
+
+    void OneHotEncode(AZStd::size_t value, AZStd::size_t maxValue, AZ::VectorN& output)
+    {
+        AZ_Assert(value <= maxValue, "Requested one-hot encode of an out of range value");
+        output.Resize(maxValue);
+        output.SetZero();
+        output.SetElement(value, 1.0f);
+    }
+
+    void Activate(ActivationFunctions activationFunction, const AZ::VectorN& sourceVector, AZ::VectorN& output)
+    {
+        output.Resize(sourceVector.GetDimensionality());
+        switch (activationFunction)
+        {
+        case ActivationFunctions::ReLU:
+            ReLU(sourceVector, output);
+            break;
+        case ActivationFunctions::Sigmoid:
+            Sigmoid(sourceVector, output);
+            break;
+        case ActivationFunctions::Softmax:
+            Softmax(sourceVector, output);
+            break;
+        case ActivationFunctions::Linear:
+            Linear(sourceVector, output);
+            break;
+        }
+    }
+
+    void ReLU(const AZ::VectorN& sourceVector, AZ::VectorN& output)
+    {
+        const AZStd::size_t numElements = sourceVector.GetVectorValues().size();
+        const AZ::Simd::Vec4::FloatType zero = AZ::Simd::Vec4::ZeroFloat();
+        output.Resize(sourceVector.GetDimensionality());
+        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
+        {
+            const AZ::Vector4& sourceElement = sourceVector.GetVectorValues()[iter];
+            const AZ::Simd::Vec4::FloatType mask = AZ::Simd::Vec4::CmpGtEq(sourceElement.GetSimdValue(), zero); // 1's if >= 0, 0's otherwise
+            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
+            outputElement.SetSimdValue(AZ::Simd::Vec4::And(sourceElement.GetSimdValue(), mask)); // Zeros out negative elements
+        }
+        output.FixLastVectorElement();
+    }
+
+    void Sigmoid(const AZ::VectorN& sourceVector, AZ::VectorN& output)
+    {
+        const AZ::Vector4 vecZero = AZ::Vector4::CreateZero();
+        const AZ::Vector4 vecOne = AZ::Vector4::CreateOne();
+        const AZStd::size_t numElements = sourceVector.GetVectorValues().size();
+        output.Resize(sourceVector.GetDimensionality());
+        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
+        {
+            const AZ::Vector4& sourceElement = sourceVector.GetVectorValues()[iter];
+            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
+            outputElement = vecOne / (vecOne + (-sourceElement).GetExpEstimate());
+            outputElement = outputElement.GetClamp(vecZero, vecOne);
+        }
+        output.FixLastVectorElement();
+    }
+
+    void Softmax(const AZ::VectorN& sourceVector, AZ::VectorN& output)
+    {
+        const AZ::Vector4 vecZero = AZ::Vector4::CreateZero();
+        const AZ::Vector4 vecOne = AZ::Vector4::CreateOne();
+
+        // Naive softmax is simply softmax(source) = exp(source) / sum(exp(source))
+        // Here we apply the exp-normalization trick to avoid exp overflow
+        // x = max(source)
+        // y = exp(source - x)
+        // softmax(source) = y / sum(y)
+        const AZStd::size_t numElements = sourceVector.GetVectorValues().size();
+        output.Resize(sourceVector.GetDimensionality());
+
+        AZ::Vector4 max = sourceVector.GetVectorValues()[0];
+        for (AZStd::size_t iter = 1; iter < numElements; ++iter)
+        {
+            max.GetMax(sourceVector.GetVectorValues()[iter]);
+        }
+
+        AZ::Vector4 partialSum = vecZero;
+        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
+        {
+            const AZ::Vector4& sourceElement = sourceVector.GetVectorValues()[iter];
+            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
+            outputElement = (sourceElement - max).GetExpEstimate();
+            outputElement = outputElement.GetClamp(vecZero, vecOne);
+            partialSum += outputElement;
+        }
+
+        const float divisor = 1.0f / partialSum.Dot(vecOne);
+        for (AZ::Vector4& element : output.GetVectorValues())
+        {
+            element = element * divisor;
+        }
+        output.FixLastVectorElement();
+    }
+
+    void Linear(const AZ::VectorN& sourceVector, AZ::VectorN& output)
+    {
+        if (&output != &sourceVector)
+        {
+            output = sourceVector;
+        }
+    }
+
+    void Activate_Derivative(ActivationFunctions activationFunction, const AZ::VectorN& activationOutput, const AZ::VectorN& backGradients, AZ::VectorN& output)
+    {
+        output.Resize(activationOutput.GetDimensionality());
+        switch (activationFunction)
+        {
+        case ActivationFunctions::ReLU:
+            ReLU_Derivative(activationOutput, backGradients, output);
+            break;
+        case ActivationFunctions::Sigmoid:
+            Sigmoid_Derivative(activationOutput, backGradients, output);
+            break;
+        case ActivationFunctions::Softmax:
+            Softmax_Derivative(activationOutput, backGradients, output);
+            break;
+        case ActivationFunctions::Linear:
+            Linear_Derivative(activationOutput, backGradients, output);
+            break;
+        }
+    }
+
+    void ReLU_Derivative(const AZ::VectorN& activationOutput, const AZ::VectorN& backGradients, AZ::VectorN& output)
+    {
+        const AZStd::size_t numElements = activationOutput.GetVectorValues().size();
+        const AZ::Simd::Vec4::FloatType zero = AZ::Simd::Vec4::ZeroFloat();
+        output.Resize(activationOutput.GetDimensionality());
+        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
+        {
+            const AZ::Vector4& activationElement = activationOutput.GetVectorValues()[iter];
+            const AZ::Vector4& backGradientElement = backGradients.GetVectorValues()[iter];
+            // 1's if > 0, 0's otherwise
+            // Strictly greater than is required as any negative inputs in the original source vector will have been clamped to zero by activation
+            const AZ::Simd::Vec4::FloatType mask = AZ::Simd::Vec4::CmpGt(activationElement.GetSimdValue(), zero);
+            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
+            outputElement.SetSimdValue(AZ::Simd::Vec4::And(backGradientElement.GetSimdValue(), mask)); // Returns the backpropagated gradient if mask is non-zero, returns zero otherwise
+        }
+        output.FixLastVectorElement();
+    }
+
+    void Sigmoid_Derivative(const AZ::VectorN& activationOutput, const AZ::VectorN& backGradients, AZ::VectorN& output)
+    {
+        const AZStd::size_t numElements = activationOutput.GetVectorValues().size();
+        const AZ::Vector4 vecOne = AZ::Vector4::CreateOne();
+        output.Resize(activationOutput.GetDimensionality());
+        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
+        {
+            const AZ::Vector4& activationElement = activationOutput.GetVectorValues()[iter];
+            const AZ::Vector4& backGradientElement = backGradients.GetVectorValues()[iter];
+            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
+            outputElement = backGradientElement * activationElement * (vecOne - activationElement);
+        }
+        output.FixLastVectorElement();
+    }
+
+    void Softmax_Derivative(const AZ::VectorN& activationOutput, const AZ::VectorN& backGradients, AZ::VectorN& output)
+    {
+        // Note that this is completely unvectorized
+        output.Resize(activationOutput.GetDimensionality());
+        for (AZStd::size_t i = 0; i < activationOutput.GetDimensionality(); ++i)
+        {
+            float gradient = 0.0f;
+            for (AZStd::size_t j = 0; j < activationOutput.GetDimensionality(); ++j)
+            {
+                const float ithElement = activationOutput.GetElement(i);
+                const float jthElement = activationOutput.GetElement(j) * backGradients.GetElement(j);
+                gradient += (i == j) ? (1.0f - ithElement) * jthElement : -ithElement * jthElement;
+            }
+            output.SetElement(i, gradient);
+        }
+    }
+
+    void Linear_Derivative([[maybe_unused]] const AZ::VectorN& activationOutput, const AZ::VectorN& backGradients, AZ::VectorN& output)
+    {
+        output = backGradients;
+    }
+}

+ 17 - 7
Gems/MachineLearning/Code/Source/Algorithms/Activations.h

@@ -13,30 +13,40 @@
 
 namespace MachineLearning
 {
+    //! Reflection helper function
+    AZStd::vector<AZ::Edit::EnumConstant<ActivationFunctions>> GetActivationEnumValues();
+
+    //! One-hot encodes the provided value into the resulting vector output, which will have dimensionality maxValue.
+    void OneHotEncode(AZStd::size_t value, AZStd::size_t maxValue, AZ::VectorN& output);
+
     //! Computes the requested activation function applied to all elements of the source vector.
     void Activate(ActivationFunctions activationFunction, const AZ::VectorN& sourceVector, AZ::VectorN& output);
 
     //! Computes the rectified linear unit function (ReLU) applied to all elements of the source vector.
     void ReLU(const AZ::VectorN& sourceVector, AZ::VectorN& output);
 
-    //! Computes the sigmoid applied to all elements of the source vector, but scaled and offset to return values in the range 0..1.
+    //! Computes the sigmoid applied to all elements of the source vector.
     void Sigmoid(const AZ::VectorN& sourceVector, AZ::VectorN& output);
 
+    //! Computes the softmax applied to all elements of the source vector.
+    void Softmax(const AZ::VectorN& sourceVector, AZ::VectorN& output);
+
     //! Computes the linear activation function applied to all elements of the source vector.
     void Linear(const AZ::VectorN& sourceVector, AZ::VectorN& output);
 
     //! Computes the derivative of the requested activation function applied to all elements provided vector.
     //! The activationOutput input here is simply the output of calling Activate on the original source vector.
-    void Activate_Derivative(ActivationFunctions activationFunction, const AZ::VectorN& activationOutput, AZ::VectorN& output);
+    void Activate_Derivative(ActivationFunctions activationFunction, const AZ::VectorN& activationOutput, const AZ::VectorN& backGradients, AZ::VectorN& output);
 
     //! Computes the derivative of the rectified linear unit function (ReLU) applied to all elements of the original source vector.
-    void ReLU_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output);
+    void ReLU_Derivative(const AZ::VectorN& activationOutput, const AZ::VectorN& backGradients, AZ::VectorN& output);
 
     //! Computes the derivative of the sigmoid activation function applied to all elements of the original source vector.
-    void Sigmoid_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output);
+    void Sigmoid_Derivative(const AZ::VectorN& activationOutput, const AZ::VectorN& backGradients, AZ::VectorN& output);
+
+    //! Computes the derivative of the sigmoid activation function applied to all elements of the original source vector.
+    void Softmax_Derivative(const AZ::VectorN& activationOutput, const AZ::VectorN& backGradients, AZ::VectorN& output);
 
     //! Computes the derivative linear activation function applied to all elements of the original source vector.
-    void Linear_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output);
+    void Linear_Derivative(const AZ::VectorN& activationOutput, const AZ::VectorN& backGradients, AZ::VectorN& output);
 }
-
-#include <Algorithms/Activations.inl>

+ 0 - 121
Gems/MachineLearning/Code/Source/Algorithms/Activations.inl

@@ -1,121 +0,0 @@
-/*
- * Copyright (c) Contributors to the Open 3D Engine Project.
- * For complete copyright and license terms please see the LICENSE at the root of this distribution.
- *
- * SPDX-License-Identifier: Apache-2.0 OR MIT
- *
- */
-
-#include <Algorithms/Activations.h>
-#include <AzCore/Math/SimdMath.h>
-
-namespace MachineLearning
-{
-    inline void Activate(ActivationFunctions activationFunction, const AZ::VectorN& sourceVector, AZ::VectorN& output)
-    {
-        output.Resize(sourceVector.GetDimensionality());
-        switch (activationFunction)
-        {
-        case ActivationFunctions::ReLU:
-            ReLU(sourceVector, output);
-            break;
-        case ActivationFunctions::Sigmoid:
-            Sigmoid(sourceVector, output);
-            break;
-        case ActivationFunctions::Linear:
-            Linear(sourceVector, output);
-            break;
-        }
-    }
-
-    inline void ReLU(const AZ::VectorN& sourceVector, AZ::VectorN& output)
-    {
-        const AZStd::size_t numElements = sourceVector.GetVectorValues().size();
-        const AZ::Simd::Vec4::FloatType zero = AZ::Simd::Vec4::ZeroFloat();
-        output.Resize(sourceVector.GetDimensionality());
-        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
-        {
-            const AZ::Vector4& sourceElement = sourceVector.GetVectorValues()[iter];
-            const AZ::Simd::Vec4::FloatType mask = AZ::Simd::Vec4::CmpGtEq(sourceElement.GetSimdValue(), zero); // 1's if >= 0, 0's otherwise
-            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
-            outputElement.SetSimdValue(AZ::Simd::Vec4::And(sourceElement.GetSimdValue(), mask)); // Zeros out negative elements
-        }
-        output.FixLastVectorElement();
-    }
-
-    inline void Sigmoid(const AZ::VectorN& sourceVector, AZ::VectorN& output)
-    {
-        const AZStd::size_t numElements = sourceVector.GetVectorValues().size();
-        const AZ::Vector4 vecOne = AZ::Vector4::CreateOne();
-        output.Resize(sourceVector.GetDimensionality());
-        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
-        {
-            const AZ::Vector4& sourceElement = sourceVector.GetVectorValues()[iter];
-            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
-            outputElement = vecOne / (vecOne + (-sourceElement).GetExpEstimate());
-        }
-        output.FixLastVectorElement();
-    }
-
-    inline void Linear(const AZ::VectorN& sourceVector, AZ::VectorN& output)
-    {
-        if (&output != &sourceVector)
-        {
-            output = sourceVector;
-        }
-    }
-
-    inline void Activate_Derivative(ActivationFunctions activationFunction, const AZ::VectorN& activationOutput, AZ::VectorN& output)
-    {
-        output.Resize(activationOutput.GetDimensionality());
-        switch (activationFunction)
-        {
-        case ActivationFunctions::ReLU:
-            ReLU_Derivative(activationOutput, output);
-            break;
-        case ActivationFunctions::Sigmoid:
-            Sigmoid_Derivative(activationOutput, output);
-            break;
-        case ActivationFunctions::Linear:
-            Linear_Derivative(activationOutput, output);
-            break;
-        }
-    }
-
-    inline void ReLU_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output)
-    {
-        const AZStd::size_t numElements = activationOutput.GetVectorValues().size();
-        const AZ::Simd::Vec4::FloatType zero = AZ::Simd::Vec4::ZeroFloat();
-        const AZ::Simd::Vec4::FloatType one = AZ::Simd::Vec4::Splat(1.0f);
-        output.Resize(activationOutput.GetDimensionality());
-        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
-        {
-            const AZ::Vector4& sourceElement = activationOutput.GetVectorValues()[iter];
-            // 1's if > 0, 0's otherwise
-            // Strictly greater than is required as any negative inputs in the original source vector will have been clamped to zero by activation
-            const AZ::Simd::Vec4::FloatType mask = AZ::Simd::Vec4::CmpGt(sourceElement.GetSimdValue(), zero);
-            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
-            outputElement.SetSimdValue(AZ::Simd::Vec4::And(one, mask)); // Returns one if mask is non-zero, returns zero otherwise
-        }
-        output.FixLastVectorElement();
-    }
-
-    inline void Sigmoid_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output)
-    {
-        const AZStd::size_t numElements = activationOutput.GetVectorValues().size();
-        const AZ::Vector4 vecOne = AZ::Vector4::CreateOne();
-        output.Resize(activationOutput.GetDimensionality());
-        for (AZStd::size_t iter = 0; iter < numElements; ++iter)
-        {
-            const AZ::Vector4& activationElement = activationOutput.GetVectorValues()[iter];
-            AZ::Vector4& outputElement = output.GetVectorValues()[iter];
-            outputElement = activationElement * (vecOne - activationElement);
-        }
-        output.FixLastVectorElement();
-    }
-
-    inline void Linear_Derivative(const AZ::VectorN& activationOutput, AZ::VectorN& output)
-    {
-        output = AZ::VectorN(activationOutput.GetDimensionality(), 1.0f);
-    }
-}

+ 6 - 6
Gems/MachineLearning/Code/Source/Algorithms/LossFunctions.inl → Gems/MachineLearning/Code/Source/Algorithms/LossFunctions.cpp

@@ -11,7 +11,7 @@
 
 namespace MachineLearning
 {
-    inline float ComputeTotalCost(LossFunctions lossFunction, const AZ::VectorN& expected, const AZ::VectorN& actual)
+    float ComputeTotalCost(LossFunctions lossFunction, const AZ::VectorN& expected, const AZ::VectorN& actual)
     {
         AZ::VectorN costs;
         ComputeLoss(lossFunction, expected, actual, costs);
@@ -23,7 +23,7 @@ namespace MachineLearning
         return accumulator.Dot(AZ::Vector4::CreateOne());
     }
 
-    inline void ComputeLoss(LossFunctions costFunction, const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
+    void ComputeLoss(LossFunctions costFunction, const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
     {
         AZ_Assert(expected.GetDimensionality() == actual.GetDimensionality(), "The dimensionality of expected and actual must match");
         output.Resize(actual.GetDimensionality());
@@ -35,12 +35,12 @@ namespace MachineLearning
         }
     }
 
-    inline void MeanSquaredError(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
+    void MeanSquaredError(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
     {
-        output = 0.5f * (actual - expected).GetSquare();
+        output = (actual - expected).GetSquare();
     }
 
-    inline void ComputeLoss_Derivative(LossFunctions costFunction, const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
+    void ComputeLoss_Derivative(LossFunctions costFunction, const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
     {
         AZ_Assert(expected.GetDimensionality() == actual.GetDimensionality(), "The dimensionality of expected and actual must match");
         output.Resize(actual.GetDimensionality());
@@ -52,7 +52,7 @@ namespace MachineLearning
         }
     }
 
-    inline void MeanSquaredError_Derivative(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
+    void MeanSquaredError_Derivative(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output)
     {
         output = (expected - actual);
     }

+ 0 - 2
Gems/MachineLearning/Code/Source/Algorithms/LossFunctions.h

@@ -28,5 +28,3 @@ namespace MachineLearning
     //! Computes the derivative of the rectified linear unit function (ReLU) applied to all elements of the source vector.
     void MeanSquaredError_Derivative(const AZ::VectorN& expected, const AZ::VectorN& actual, AZ::VectorN& output);
 }
-
-#include <Algorithms/LossFunctions.inl>

+ 81 - 0
Gems/MachineLearning/Code/Source/Algorithms/Training.cpp

@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#include <Algorithms/Training.h>
+#include <Algorithms/LossFunctions.h>
+#include <AzCore/Math/SimdMath.h>
+#include <AzCore/Console/ILogger.h>
+#include <numeric>
+#include <random>
+
+namespace MachineLearning
+{
+    float ComputeCurrentCost(INeuralNetworkPtr Model, ILabeledTrainingDataPtr TestData, LossFunctions CostFunction)
+    {
+        const AZStd::size_t totalTestSize = TestData->GetSampleCount();
+
+        double result = 0.0;
+        for (uint32_t iter = 0; iter < totalTestSize; ++iter)
+        {
+            const AZ::VectorN& activations = TestData->GetDataByIndex(iter);
+            const AZ::VectorN& label = TestData->GetLabelByIndex(iter);
+            const AZ::VectorN* output = Model->Forward(activations);
+            result += static_cast<double>(ComputeTotalCost(CostFunction, label, *output));
+        }
+        result /= static_cast<double>(totalTestSize);
+        return static_cast<float>(result);
+    }
+
+    void SupervisedLearningCycle
+    (
+        INeuralNetworkPtr model,
+        ILabeledTrainingDataPtr trainingData,
+        ILabeledTrainingDataPtr testData,
+        LossFunctions costFunction, 
+        AZStd::size_t totalIterations,
+        AZStd::size_t batchSize,
+        float learningRate,
+        float learningRateDecay,
+        float earlyStopCost
+    )
+    {
+        const AZStd::size_t totalTrainingSize = trainingData->GetSampleCount();
+        const float initialCost = ComputeCurrentCost(model, testData, costFunction);
+        AZLOG_INFO("Initial model cost prior to training: %f", initialCost);
+
+        // Generate a set of training indices that we can later shuffle
+        AZStd::vector<AZStd::size_t> indices;
+        indices.resize(totalTrainingSize);
+        std::iota(indices.begin(), indices.end(), 0);
+
+        for (uint32_t epoch = 0; epoch < totalIterations; ++epoch)
+        {
+            // We reshuffle the training data indices each epoch to avoid patterns in the training data
+            std::shuffle(indices.begin(), indices.end(), std::mt19937(std::random_device{}()));
+            AZStd::size_t sampleCount = 0;
+            for (uint32_t batch = 0; (batch < batchSize) && (sampleCount < totalTrainingSize); ++batch, ++sampleCount)
+            {
+                const AZ::VectorN& activations = trainingData->GetDataByIndex(indices[sampleCount]);
+                const AZ::VectorN& label = trainingData->GetLabelByIndex(indices[sampleCount]);
+                model->Reverse(costFunction, activations, label);
+            }
+            model->GradientDescent(learningRate);
+
+            const float currentTestCost = ComputeCurrentCost(model, testData, costFunction);
+            const float currentTrainCost = ComputeCurrentCost(model, trainingData, costFunction);
+            AZLOG_INFO("Epoch %u, Test cost: %f, Train cost: %f, Learning rate: %f", epoch, currentTestCost, currentTrainCost, learningRate);
+            if (currentTestCost < earlyStopCost)
+            {
+                AZLOG_INFO("Early stop threshold reached, exiting training loop: %f, %f", currentTestCost, earlyStopCost);
+                break;
+            }
+
+            learningRate *= learningRateDecay;
+        }
+    }
+}

+ 37 - 0
Gems/MachineLearning/Code/Source/Algorithms/Training.h

@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#pragma once
+
+#include <AzCore/Math/VectorN.h>
+#include <MachineLearning/INeuralNetwork.h>
+#include <MachineLearning/ILabeledTrainingData.h>
+
+namespace MachineLearning
+{
+    //! Calculates the average cost of the provided model on the set of labeled test data using the requested loss function.
+    float ComputeCurrentCost(INeuralNetworkPtr Model, ILabeledTrainingDataPtr TestData, LossFunctions CostFunction);
+
+    //! Performs a supervised learning training cycle.
+    //! Supervised learning is a form of machine learning where a model is provided a set of training data with expected output
+    //! Training then takes place in an iterative loop where the total error (cost, loss) of the model is minimized
+    //! This differs from unsupervised learning, where the training data lacks any form of labeling (expected correct output), and
+    //! the model is expected to learn the underlying structures of data on its own.
+    void SupervisedLearningCycle
+    (
+        INeuralNetworkPtr model,
+        ILabeledTrainingDataPtr trainingData,
+        ILabeledTrainingDataPtr testData,
+        LossFunctions costFunction,
+        AZStd::size_t totalIterations,
+        AZStd::size_t batchSize,
+        float learningRate,
+        float learningRateDecay,
+        float earlyStopCost
+    );
+}

+ 199 - 0
Gems/MachineLearning/Code/Source/Assets/MnistDataLoader.cpp

@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#pragma once
+
+#include <Assets/MnistDataLoader.h>
+#include <Algorithms/Activations.h>
+#include <AzCore/IO/FileReader.h>
+#include <AzCore/IO/Path/Path.h>
+#include <AzCore/Console/ILogger.h>
+#include <AzNetworking/Utilities/Endian.h>
+#include <AzCore/RTTI/RTTI.h>
+#include <AzCore/RTTI/BehaviorContext.h>
+#include <AzCore/Serialization/EditContext.h>
+#include <AzCore/Serialization/SerializeContext.h>
+#pragma optimize("", off)
+namespace MachineLearning
+{
+    void MnistDataLoader::Reflect(AZ::ReflectContext* context)
+    {
+        if (auto serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
+        {
+            serializeContext->Class<MnistDataLoader>()
+                ->Version(1)
+                ;
+
+            if (AZ::EditContext* editContext = serializeContext->GetEditContext())
+            {
+                editContext->Class<MnistDataLoader>("Parameters defining a single training data instance", "")
+                    ->ClassElement(AZ::Edit::ClassElements::EditorData, "")
+                    ;
+            }
+        }
+
+        auto behaviorContext = azrtti_cast<AZ::BehaviorContext*>(context);
+        if (behaviorContext)
+        {
+            behaviorContext->Class<MnistDataLoader>()->
+                Attribute(AZ::Script::Attributes::Scope, AZ::Script::Attributes::ScopeFlags::Common)->
+                Attribute(AZ::Script::Attributes::Module, "machineLearning")->
+                Attribute(AZ::Script::Attributes::ExcludeFrom, AZ::Script::Attributes::ExcludeFlags::ListOnly)->
+                Constructor<>()->
+                Attribute(AZ::Script::Attributes::Storage, AZ::Script::Attributes::StorageType::Value)
+                ;
+        }
+    }
+
+    bool MnistDataLoader::LoadArchive(const AZStd::string& imageFilename, const AZStd::string& labelFilename)
+    {
+        return LoadImageFile(imageFilename) && LoadLabelFile(labelFilename);
+    }
+
+    AZStd::size_t MnistDataLoader::GetSampleCount() const
+    {
+        return m_dataHeader.m_imageCount;
+    }
+
+    const AZ::VectorN& MnistDataLoader::GetLabelByIndex(AZStd::size_t index)
+    {
+        OneHotEncode(m_labelBuffer[index], 10, m_labelVector);
+        return m_labelVector;
+    }
+
+    const AZ::VectorN& MnistDataLoader::GetDataByIndex(AZStd::size_t index)
+    {
+        const AZStd::size_t imageDataStride = m_dataHeader.m_height * m_dataHeader.m_width;
+        m_imageVector.Resize(imageDataStride);
+        for (AZStd::size_t iter = 0; iter < imageDataStride; ++iter)
+        {
+            m_imageVector.SetElement(iter, static_cast<float>(m_imageBuffer[index * imageDataStride + iter]) / 255.0f);
+        }
+        return m_imageVector;
+    }
+
+    bool MnistDataLoader::LoadImageFile(const AZStd::string& imageFilename)
+    {
+        AZ::IO::FixedMaxPath filePathFixed = imageFilename.c_str();
+        if (AZ::IO::FileIOBase* fileIOBase = AZ::IO::FileIOBase::GetInstance())
+        {
+            fileIOBase->ResolvePath(filePathFixed, imageFilename.c_str());
+        }
+
+        if (!m_imageFile.Open(filePathFixed.c_str(), AZ::IO::SystemFile::SF_OPEN_READ_ONLY))
+        {
+            AZLOG_ERROR("Failed to load '%s'. File could not be opened.", filePathFixed.c_str());
+            return false;
+        }
+
+        const AZ::IO::SizeType length = m_imageFile.Length();
+        if (length == 0)
+        {
+            AZLOG_ERROR("Failed to load '%s'. File is empty.", filePathFixed.c_str());
+            return false;
+        }
+
+        m_imageFile.Seek(0, AZ::IO::SystemFile::SF_SEEK_BEGIN);
+
+        AZ::IO::SizeType bytesRead = m_imageFile.Read(sizeof(MnistDataHeader), &m_dataHeader);
+
+        if (bytesRead != sizeof(MnistDataHeader))
+        {
+            // Failed to read the whole header
+            AZLOG_ERROR("Failed to load '%s', failed to read archive header.", filePathFixed.c_str());
+            m_imageFile.Close();
+            return false;
+        }
+
+        m_dataHeader.m_imageHeader = ntohl(m_dataHeader.m_imageHeader);
+        m_dataHeader.m_imageCount = ntohl(m_dataHeader.m_imageCount);
+        m_dataHeader.m_height = ntohl(m_dataHeader.m_height);
+        m_dataHeader.m_width = ntohl(m_dataHeader.m_width);
+
+        constexpr uint32_t MnistImageHeaderValue = 2051;
+        if (m_dataHeader.m_imageHeader != MnistImageHeaderValue)
+        {
+            // Invalid format
+            AZLOG_ERROR("Failed to load '%s', file is not an MNIST archive (expected %u, encountered %u).", filePathFixed.c_str(), MnistImageHeaderValue, m_dataHeader.m_imageHeader);
+            m_imageFile.Close();
+            return false;
+        }
+
+        const AZStd::size_t imageDataStride = m_dataHeader.m_height * m_dataHeader.m_width;
+        m_imageBuffer.resize(m_dataHeader.m_imageCount * imageDataStride);
+        m_imageFile.Read(m_dataHeader.m_imageCount * imageDataStride, m_imageBuffer.data());
+        return true;
+    }
+
+    bool MnistDataLoader::LoadLabelFile(const AZStd::string& labelFilename)
+    {
+        AZ::IO::FixedMaxPath filePathFixed = labelFilename.c_str();
+        if (AZ::IO::FileIOBase* fileIOBase = AZ::IO::FileIOBase::GetInstance())
+        {
+            fileIOBase->ResolvePath(filePathFixed, labelFilename.c_str());
+        }
+
+        if (!m_labelFile.Open(filePathFixed.c_str(), AZ::IO::SystemFile::SF_OPEN_READ_ONLY))
+        {
+            AZLOG_ERROR("Failed to load '%s'. File could not be opened.", filePathFixed.c_str());
+            return false;
+        }
+
+        const AZ::IO::SizeType length = m_labelFile.Length();
+        if (length == 0)
+        {
+            AZLOG_ERROR("Failed to load '%s'. File is empty.", filePathFixed.c_str());
+            return false;
+        }
+
+        m_labelFile.Seek(0, AZ::IO::SystemFile::SF_SEEK_BEGIN);
+
+        struct MnistLabelHeader
+        {
+            uint32_t m_labelHeader = 0;
+            uint32_t m_labelCount = 0;
+        };
+
+        MnistLabelHeader labelHeader;
+        AZ::IO::SizeType bytesRead = m_labelFile.Read(sizeof(MnistLabelHeader), &labelHeader);
+
+        if (bytesRead != sizeof(MnistLabelHeader))
+        {
+            // Failed to read the whole header
+            AZLOG_ERROR("Failed to load '%s', failed to read label header.", filePathFixed.c_str());
+            m_labelFile.Close();
+            return false;
+        }
+
+        labelHeader.m_labelHeader = ntohl(labelHeader.m_labelHeader);
+        labelHeader.m_labelCount = ntohl(labelHeader.m_labelCount);
+
+        constexpr uint32_t MnistLabelHeaderValue = 2049;
+
+        if (labelHeader.m_labelHeader != MnistLabelHeaderValue)
+        {
+            // Invalid format
+            AZLOG_ERROR("Failed to load '%s', file is not an MNIST archive (expected %u, encountered %u).", filePathFixed.c_str(), MnistLabelHeaderValue, labelHeader.m_labelHeader);
+            m_labelFile.Close();
+            return false;
+        }
+
+        if (m_dataHeader.m_imageCount != labelHeader.m_labelCount)
+        {
+            AZLOG_ERROR("Failed to load '%s', mismatch between image count (%u) and label count (%u).", filePathFixed.c_str(), m_dataHeader.m_imageCount, labelHeader.m_labelCount);
+            m_labelFile.Close();
+            return false;
+        }
+
+        m_labelBuffer.resize(labelHeader.m_labelCount);
+        m_imageFile.Read(labelHeader.m_labelCount, m_labelBuffer.data());
+        AZLOG_INFO("Loaded MNIST archive %s containing %u samples", filePathFixed.c_str(), m_dataHeader.m_imageCount);
+        return true;
+    }
+}
+#pragma optimize("", on)

+ 69 - 0
Gems/MachineLearning/Code/Source/Assets/MnistDataLoader.h

@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#pragma once
+
+#include <MachineLearning/INeuralNetwork.h>
+#include <MachineLearning/ILabeledTrainingData.h>
+#include <AzCore/std/string/string.h>
+#include <AzCore/IO/FileIO.h>
+
+namespace MachineLearning
+{
+    //! A class that can load the MNIST training data set.
+    //! https://en.wikipedia.org/wiki/MNIST_database
+    class MnistDataLoader
+        : public ILabeledTrainingData
+    {
+    public:
+
+        AZ_TYPE_INFO(MnistDataLoader, "{3F4C0F29-4E7E-4CAF-A331-EAC3D2D9409E}", ILabeledTrainingData);
+
+        //! AzCore Reflection.
+        //! @param context reflection context
+        static void Reflect(AZ::ReflectContext* context);
+
+        MnistDataLoader() = default;
+
+        //! ILabeledTrainingData interface
+        //! @{
+        bool LoadArchive(const AZStd::string& imageFilename, const AZStd::string& labelFilename) override;
+        AZStd::size_t GetSampleCount() const override;
+        const AZ::VectorN& GetLabelByIndex(AZStd::size_t index) override;
+        const AZ::VectorN& GetDataByIndex(AZStd::size_t index) override;
+        //! @}
+
+    private:
+
+        bool LoadImageFile(const AZStd::string& imageFilename);
+        bool LoadLabelFile(const AZStd::string& labelFilename);
+
+        struct MnistDataHeader
+        {
+            uint32_t m_imageHeader = 0;
+            uint32_t m_imageCount = 0;
+            uint32_t m_height = 0;
+            uint32_t m_width = 0;
+        };
+
+        MnistDataHeader m_dataHeader;
+
+        AZ::IO::SystemFile m_imageFile;
+        AZ::IO::SystemFile m_labelFile;
+
+        AZStd::size_t m_imageDataStart = 0;
+        AZStd::size_t m_labelDataStart = 0;
+
+        AZStd::size_t m_currentIndex = 0xFFFFFFFF;
+        AZStd::vector<uint8_t> m_imageBuffer;
+        AZStd::vector<uint8_t> m_labelBuffer;
+
+        AZ::VectorN m_imageVector;
+        AZ::VectorN m_labelVector;
+    };
+}

+ 82 - 0
Gems/MachineLearning/Code/Source/Components/MultilayerPerceptronComponent.cpp

@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#pragma once
+
+#include <Components/MultilayerPerceptronComponent.h>
+#include <AzCore/RTTI/RTTI.h>
+#include <AzCore/RTTI/BehaviorContext.h>
+#include <AzCore/Serialization/EditContext.h>
+#include <AzCore/Serialization/SerializeContext.h>
+
+namespace MachineLearning
+{
+    void MultilayerPerceptronComponent::Reflect(AZ::ReflectContext* context)
+    {
+        if (auto serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
+        {
+            serializeContext->Class<MultilayerPerceptronComponent>()
+                ->Version(0)
+                ->Field("Model", &MultilayerPerceptronComponent::m_model)
+                ;
+
+            if (AZ::EditContext* editContext = serializeContext->GetEditContext())
+            {
+                editContext->Class<MultilayerPerceptronComponent>("Multilayer Perceptron", "")
+                    ->ClassElement(AZ::Edit::ClassElements::EditorData, "")
+                    ->Attribute(AZ::Edit::Attributes::Category, "MachineLearning")
+                    ->Attribute(AZ::Edit::Attributes::Icon, "Editor/Icons/Components/NeuralNetwork.svg")
+                    ->Attribute(AZ::Edit::Attributes::ViewportIcon, "Editor/Icons/Components/Viewport/NeuralNetwork.svg")
+                    ->Attribute(AZ::Edit::Attributes::AppearsInAddComponentMenu, AZ_CRC_CE("Game"))
+                    ->DataElement(AZ::Edit::UIHandlers::Default, &MultilayerPerceptronComponent::m_model, "Model", "This is the machine-learning model provided by this component")
+                    ;
+            }
+        }
+
+        auto behaviorContext = azrtti_cast<AZ::BehaviorContext*>(context);
+        if (behaviorContext)
+        {
+            behaviorContext->Class<MultilayerPerceptronComponent>("MultilayerPerceptron Component")->
+                Attribute(AZ::Script::Attributes::Scope, AZ::Script::Attributes::ScopeFlags::Common)->
+                Attribute(AZ::Script::Attributes::Module, "machineLearning")->
+                Attribute(AZ::Script::Attributes::ExcludeFrom, AZ::Script::Attributes::ExcludeFlags::ListOnly)->
+                Constructor<>()->
+                Attribute(AZ::Script::Attributes::Storage, AZ::Script::Attributes::StorageType::Value)->
+                Property("Model", BehaviorValueProperty(&MultilayerPerceptronComponent::m_model))
+                ;
+
+            behaviorContext->EBus<MultilayerPerceptronComponentRequestBus>("Multilayer perceptron requests")
+                ->Attribute(AZ::Script::Attributes::Scope, AZ::Script::Attributes::ScopeFlags::Common)
+                ->Attribute(AZ::Script::Attributes::Module, "machinelearning")
+                ->Attribute(AZ::Script::Attributes::Category, "MachineLearning")
+                ->Event("Get model", &MachineLearning::MultilayerPerceptronComponentRequestBus::Events::GetModel)
+                ;
+        }
+    }
+
+    void MultilayerPerceptronComponent::GetProvidedServices(AZ::ComponentDescriptor::DependencyArrayType& provided)
+    {
+        provided.push_back(AZ_CRC("MultilayerPerceptronService"));
+    }
+
+    void MultilayerPerceptronComponent::Activate()
+    {
+        m_handle.reset(&m_model);
+        MultilayerPerceptronComponentRequestBus::Handler::BusConnect(GetEntityId());
+    }
+
+    void MultilayerPerceptronComponent::Deactivate()
+    {
+        MultilayerPerceptronComponentRequestBus::Handler::BusDisconnect();
+    }
+
+    INeuralNetworkPtr MultilayerPerceptronComponent::GetModel()
+    {
+        return m_handle;
+    }
+}

+ 55 - 0
Gems/MachineLearning/Code/Source/Components/MultilayerPerceptronComponent.h

@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#pragma once
+
+#include <AzCore/Component/Component.h>
+#include <Models/MultilayerPerceptron.h>
+
+namespace MachineLearning
+{
+    //! Scriptbind
+    class MultilayerPerceptronComponentRequests
+        : public AZ::ComponentBus
+    {
+    public:
+        //! Returns the underlying machine learning model.
+        virtual INeuralNetworkPtr GetModel() = 0;
+    };
+    using MultilayerPerceptronComponentRequestBus = AZ::EBus<MultilayerPerceptronComponentRequests>;
+
+    class MultilayerPerceptronComponent
+        : public AZ::Component
+        , public MultilayerPerceptronComponentRequestBus::Handler
+    {
+    public:
+
+        AZ_COMPONENT(MultilayerPerceptronComponent, "{022E7841-1DB9-4AE5-9984-79B00A92DE58}");
+
+        //! AzCore Reflection.
+        //! @param context reflection context
+        static void Reflect(AZ::ReflectContext* context);
+        static void GetProvidedServices(AZ::ComponentDescriptor::DependencyArrayType& provided);
+
+        //! AZ::Component overrides
+        //! @{
+        void Activate() override;
+        void Deactivate() override;
+        //! @}
+
+        //! MultilayerPerceptronComponentBus
+        //! @{
+        INeuralNetworkPtr GetModel() override;
+        //! @}
+
+    private:
+
+        MultilayerPerceptron m_model;
+        INeuralNetworkPtr m_handle;
+    };
+}

+ 7 - 0
Gems/MachineLearning/Code/Source/MachineLearningModule.cpp

@@ -1,3 +1,10 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
 
 #include <MachineLearning/MachineLearningTypeIds.h>
 #include <MachineLearningModuleInterface.h>

+ 4 - 5
Gems/MachineLearning/Code/Source/MachineLearningModuleInterface.cpp

@@ -8,15 +8,13 @@
 
 #include "MachineLearningModuleInterface.h"
 #include <AzCore/Memory/Memory.h>
-
 #include <MachineLearning/MachineLearningTypeIds.h>
-
 #include <MachineLearningSystemComponent.h>
+#include <Components/MultilayerPerceptronComponent.h>
 
 namespace MachineLearning
 {
-    AZ_TYPE_INFO_WITH_NAME_IMPL(MachineLearningModuleInterface,
-        "MachineLearningModuleInterface", MachineLearningModuleInterfaceTypeId);
+    AZ_TYPE_INFO_WITH_NAME_IMPL(MachineLearningModuleInterface, "MachineLearningModuleInterface", MachineLearningModuleInterfaceTypeId);
     AZ_RTTI_NO_TYPE_INFO_IMPL(MachineLearningModuleInterface, AZ::Module);
     AZ_CLASS_ALLOCATOR_IMPL(MachineLearningModuleInterface, AZ::SystemAllocator);
 
@@ -28,7 +26,8 @@ namespace MachineLearning
         // This happens through the [MyComponent]::Reflect() function.
         m_descriptors.insert(m_descriptors.end(), {
             MachineLearningSystemComponent::CreateDescriptor(),
-            });
+            MultilayerPerceptronComponent::CreateDescriptor()
+        });
     }
 
     AZ::ComponentTypeList MachineLearningModuleInterface::GetRequiredSystemComponents() const

+ 23 - 88
Gems/MachineLearning/Code/Source/MachineLearningSystemComponent.cpp

@@ -9,10 +9,12 @@
 #include "MachineLearningSystemComponent.h"
 #include <MachineLearning/MachineLearningTypeIds.h>
 #include <MachineLearning/Types.h>
-#include <MachineLearning/LabeledTrainingData.h>
 #include <AzCore/Serialization/SerializeContext.h>
 #include <AzCore/Serialization/EditContext.h>
 #include <AzCore/RTTI/BehaviorContext.h>
+#include <AzCore/Preprocessor/EnumReflectUtils.h>
+#include <Algorithms/Activations.h>
+#include <Assets/MnistDataLoader.h>
 #include <Models/Layer.h>
 #include <Models/MultilayerPerceptron.h>
 #include <AutoGenNodeableRegistry.generated.h>
@@ -27,109 +29,42 @@ namespace AZ
 
 namespace MachineLearning
 {
-    AZ_COMPONENT_IMPL(MachineLearningSystemComponent, "MachineLearningSystemComponent", MachineLearningSystemComponentTypeId);
-
-    void LayerParams::Reflect(AZ::ReflectContext* context)
-    {
-        if (auto serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
-        {
-            serializeContext->Class<LayerParams>()
-                ->Version(1)
-                ->Field("Size", &LayerParams::m_layerSize)
-                ->Field("ActivationFunction", &LayerParams::m_activationFunction)
-                ;
-
-            if (AZ::EditContext* editContext = serializeContext->GetEditContext())
-            {
-                editContext->Class<LayerParams>("Parameters defining a single layer of a neural network", "")
-                    ->ClassElement(AZ::Edit::ClassElements::EditorData, "")
-                    ->DataElement(AZ::Edit::UIHandlers::Default, &LayerParams::m_layerSize, "Layer Size", "The number of neurons this layer should have")
-                    ->DataElement(AZ::Edit::UIHandlers::ComboBox, &LayerParams::m_activationFunction, "Activation Function", "The activation function applied to this layer")
-                    ;
-            }
-        }
+    AZ_ENUM_DEFINE_REFLECT_UTILITIES(ActivationFunctions);
+    AZ_ENUM_DEFINE_REFLECT_UTILITIES(LossFunctions);
 
-        auto behaviorContext = azrtti_cast<AZ::BehaviorContext*>(context);
-        if (behaviorContext)
-        {
-            behaviorContext->Class<LayerParams>()->
-                Attribute(AZ::Script::Attributes::Scope, AZ::Script::Attributes::ScopeFlags::Common)->
-                Attribute(AZ::Script::Attributes::Module, "machineLearning")->
-                Attribute(AZ::Script::Attributes::ExcludeFrom, AZ::Script::Attributes::ExcludeFlags::ListOnly)->
-                Constructor<AZStd::size_t, ActivationFunctions>()->
-                Attribute(AZ::Script::Attributes::Storage, AZ::Script::Attributes::StorageType::Value)->
-                Property("Size", BehaviorValueProperty(&LayerParams::m_layerSize))->
-                Property("ActivationFunction", BehaviorValueProperty(&LayerParams::m_activationFunction))
-                ;
-        }
-    }
+    AZ_COMPONENT_IMPL(MachineLearningSystemComponent, "MachineLearningSystemComponent", MachineLearningSystemComponentTypeId);
 
-    void LabeledTrainingData::Reflect(AZ::ReflectContext* context)
+    void MachineLearningSystemComponent::Reflect(AZ::ReflectContext* context)
     {
         if (auto serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
         {
-            serializeContext->Class<LabeledTrainingData>()
-                ->Version(1)
-                ->Field("Activations", &LabeledTrainingData::m_activations)
-                ->Field("Label", &LabeledTrainingData::m_label)
-                ;
-
-            if (AZ::EditContext* editContext = serializeContext->GetEditContext())
-            {
-                editContext->Class<LabeledTrainingData>("Parameters defining a single training data instance", "")
-                    ->ClassElement(AZ::Edit::ClassElements::EditorData, "")
-                    ->DataElement(AZ::Edit::UIHandlers::Default, &LabeledTrainingData::m_activations, "Activations", "The inputs to be fed into the model to generate a prediction")
-                    ->DataElement(AZ::Edit::UIHandlers::Default, &LabeledTrainingData::m_label, "Label", "The expected output that should be generated given the input")
-                    ;
-            }
+            serializeContext->Class<MachineLearningSystemComponent, AZ::Component>()->Version(0);
+            serializeContext->Class<ILabeledTrainingData>()->Version(0);
+            serializeContext->Class<INeuralNetwork>()->Version(0);
+            serializeContext->RegisterGenericType<INeuralNetworkPtr>();
+            serializeContext->RegisterGenericType<ILabeledTrainingDataPtr>();
         }
 
-        auto behaviorContext = azrtti_cast<AZ::BehaviorContext*>(context);
-        if (behaviorContext)
+        if (auto behaviorContext = azrtti_cast<AZ::BehaviorContext*>(context))
         {
-            behaviorContext->Class<LabeledTrainingData>()->
+            behaviorContext->Class<MachineLearningSystemComponent>();
+            behaviorContext->Class<INeuralNetwork>("INeuralNetwork")->
                 Attribute(AZ::Script::Attributes::Scope, AZ::Script::Attributes::ScopeFlags::Common)->
                 Attribute(AZ::Script::Attributes::Module, "machineLearning")->
                 Attribute(AZ::Script::Attributes::ExcludeFrom, AZ::Script::Attributes::ExcludeFlags::ListOnly)->
                 Constructor<>()->
                 Attribute(AZ::Script::Attributes::Storage, AZ::Script::Attributes::StorageType::Value)->
-                Property("Activations", BehaviorValueProperty(&LabeledTrainingData::m_activations))->
-                Property("Label", BehaviorValueProperty(&LabeledTrainingData::m_label))
+                Method("AddLayer", &INeuralNetwork::AddLayer)->
+                Method("GetLayerCount", &INeuralNetwork::GetLayerCount)->
+                Method("GetLayer", &INeuralNetwork::GetLayer)->
+                Method("Forward", &INeuralNetwork::Forward)->
+                Method("Reverse", &INeuralNetwork::Reverse)
                 ;
         }
-    }
 
-    void MachineLearningSystemComponent::Reflect(AZ::ReflectContext* context)
-    {
-        if (auto serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
-        {
-            serializeContext->Class<MachineLearningSystemComponent, AZ::Component>()->Version(0);
-            serializeContext->Class<Layer>()->Version(0);
-            serializeContext->Class<MultilayerPerceptron>()->Version(0);
-            serializeContext->Class<INeuralNetwork>()->Version(0);
-            serializeContext->Class<LabeledTrainingData>()->Version(0);
-            serializeContext->Class<LayerParams>();
-            serializeContext->RegisterGenericType<INeuralNetworkPtr>();
-            serializeContext->RegisterGenericType<HiddenLayerParams>();
-            serializeContext->RegisterGenericType<LabeledTrainingDataSet>();
-        }
-
-        if (auto behaviorContext = azrtti_cast<AZ::BehaviorContext*>(context))
-        {
-            behaviorContext->Class<MachineLearningSystemComponent>();
-            behaviorContext->Class<LabeledTrainingData>();
-            behaviorContext->Class<LayerParams>();
-            behaviorContext->Class<Layer>();
-            behaviorContext->Class<MultilayerPerceptron>();
-
-            behaviorContext
-                ->Enum<static_cast<int>(ActivationFunctions::ReLU)>("ReLU activation function")
-                ->Enum<static_cast<int>(ActivationFunctions::Sigmoid)>("Sigmoid activation function")
-                ->Enum<static_cast<int>(ActivationFunctions::Linear)>("Linear activation function");
-            
-            behaviorContext
-                ->Enum<static_cast<int>(LossFunctions::MeanSquaredError)>("Mean Squared Error");
-        }
+        Layer::Reflect(context);
+        MnistDataLoader::Reflect(context);
+        MultilayerPerceptron::Reflect(context);
     }
 
     void MachineLearningSystemComponent::GetProvidedServices(AZ::ComponentDescriptor::DependencyArrayType& provided)

+ 11 - 8
Gems/MachineLearning/Code/Source/Models/Layer.cpp

@@ -33,11 +33,10 @@ namespace MachineLearning
             {
                 editContext->Class<Layer>("A single layer of a neural network", "")
                     ->ClassElement(AZ::Edit::ClassElements::EditorData, "")
-                    ->DataElement(AZ::Edit::UIHandlers::Default, &Layer::m_inputSize, "Input Size", "This value must match the output size of the previous layer, or the number of neurons in the activation layer if this is the first layer")
+                    ->DataElement(AZ::Edit::UIHandlers::Default, &Layer::m_outputSize, "Layer Size", "The number of neurons the layer should have")
                     ->Attribute(AZ::Edit::Attributes::ChangeNotify, &Layer::OnSizesChanged)
-                    ->DataElement(AZ::Edit::UIHandlers::Default, &Layer::m_outputSize, "Output Size", "This value must match the input size of the next layer, if one exists")
-                    ->Attribute(AZ::Edit::Attributes::ChangeNotify, &Layer::OnSizesChanged)
-                    ->DataElement(AZ::Edit::UIHandlers::Default, &Layer::m_activationFunction, "Activation Function", "The activation function applied to this layer")
+                    ->DataElement(AZ::Edit::UIHandlers::ComboBox, &Layer::m_activationFunction, "Activation Function", "The activation function applied to this layer")
+                        ->Attribute(AZ::Edit::Attributes::EnumValues, &GetActivationEnumValues)
                     ;
             }
         }
@@ -50,7 +49,10 @@ namespace MachineLearning
                 Attribute(AZ::Script::Attributes::Module, "machineLearning")->
                 Attribute(AZ::Script::Attributes::ExcludeFrom, AZ::Script::Attributes::ExcludeFlags::ListOnly)->
                 Constructor<ActivationFunctions, AZStd::size_t, AZStd::size_t>()->
-                Attribute(AZ::Script::Attributes::Storage, AZ::Script::Attributes::StorageType::Value)
+                Attribute(AZ::Script::Attributes::Storage, AZ::Script::Attributes::StorageType::Value)->
+                Property("InputSize", BehaviorValueProperty(&Layer::m_inputSize))->
+                Property("OutputSize", BehaviorValueProperty(&Layer::m_outputSize))->
+                Property("ActivationFunction", BehaviorValueProperty(&Layer::m_activationFunction))
                 ;
         }
     }
@@ -93,8 +95,7 @@ namespace MachineLearning
         }
 
         // Compute the partial derivatives of the output with respect to the activation function
-        Activate_Derivative(m_activationFunction, m_output, m_activationGradients);
-        m_activationGradients *= previousLayerGradients;
+        Activate_Derivative(m_activationFunction, m_output, previousLayerGradients, m_activationGradients);
 
         // Accumulate the partial derivatives of the weight matrix with respect to the loss function
         AZ::OuterProduct(m_activationGradients, m_lastInput, m_weightGradients);
@@ -119,7 +120,9 @@ namespace MachineLearning
     void Layer::OnSizesChanged()
     {
         m_weights = AZ::MatrixMxN::CreateRandom(m_outputSize, m_inputSize);
-        m_biases = AZ::VectorN::CreateRandom(m_outputSize);
+        m_weights -= 0.5f; // It's preferable for efficient training to keep initial weights centered around zero
+
+        m_biases = AZ::VectorN(m_outputSize, 0.01f);
         m_output = AZ::VectorN::CreateZero(m_outputSize);
     }
 }

+ 6 - 0
Gems/MachineLearning/Code/Source/Models/Layer.h

@@ -25,7 +25,13 @@ namespace MachineLearning
         static void Reflect(AZ::ReflectContext* context);
 
         Layer() = default;
+        Layer(Layer&&) = default;
+        Layer(const Layer&) = default;
         Layer(ActivationFunctions activationFunction, AZStd::size_t activationDimensionality, AZStd::size_t layerDimensionality);
+        ~Layer() = default;
+
+        Layer& operator=(Layer&&) = default;
+        Layer& operator=(const Layer&) = default;
 
         //! Performs a basic forward pass on this layer, outputs are stored in m_output.
         const AZ::VectorN& Forward(const AZ::VectorN& activations);

+ 20 - 10
Gems/MachineLearning/Code/Source/Models/MultilayerPerceptron.cpp

@@ -7,6 +7,7 @@
  */
 
 #include <Models/MultilayerPerceptron.h>
+#include <Algorithms/LossFunctions.h>
 #include <AzCore/RTTI/RTTI.h>
 #include <AzCore/RTTI/BehaviorContext.h>
 #include <AzCore/Serialization/EditContext.h>
@@ -39,7 +40,7 @@ namespace MachineLearning
         auto behaviorContext = azrtti_cast<AZ::BehaviorContext*>(context);
         if (behaviorContext)
         {
-            behaviorContext->Class<MultilayerPerceptron>()->
+            behaviorContext->Class<MultilayerPerceptron>("Multilayer perceptron")->
                 Attribute(AZ::Script::Attributes::Scope, AZ::Script::Attributes::ScopeFlags::Common)->
                 Attribute(AZ::Script::Attributes::Module, "machineLearning")->
                 Attribute(AZ::Script::Attributes::ExcludeFrom, AZ::Script::Attributes::ExcludeFlags::ListOnly)->
@@ -49,7 +50,9 @@ namespace MachineLearning
                 Method("GetLayerCount", &MultilayerPerceptron::GetLayerCount)->
                 Method("GetLayer", &MultilayerPerceptron::GetLayer)->
                 Method("Forward", &MultilayerPerceptron::Forward)->
-                Method("Reverse", &MultilayerPerceptron::Reverse)
+                Method("Reverse", &MultilayerPerceptron::Reverse)->
+                Property("ActivationCount", BehaviorValueProperty(&MultilayerPerceptron::m_activationCount))->
+                Property("Layers", BehaviorValueProperty(&MultilayerPerceptron::m_layers))
                 ;
         }
     }
@@ -74,9 +77,9 @@ namespace MachineLearning
         return m_layers.size();
     }
 
-    Layer& MultilayerPerceptron::GetLayer(AZStd::size_t layerIndex)
+    Layer* MultilayerPerceptron::GetLayer(AZStd::size_t layerIndex)
     {
-        return m_layers[layerIndex];
+        return &m_layers[layerIndex];
     }
 
     AZStd::size_t MultilayerPerceptron::GetParameterCount() const
@@ -89,7 +92,7 @@ namespace MachineLearning
         return parameterCount;
     }
 
-    const AZ::VectorN& MultilayerPerceptron::Forward(const AZ::VectorN& activations)
+    const AZ::VectorN* MultilayerPerceptron::Forward(const AZ::VectorN& activations)
     {
         const AZ::VectorN* lastLayerOutput = &activations;
         for (Layer& layer : m_layers)
@@ -97,17 +100,19 @@ namespace MachineLearning
             layer.Forward(*lastLayerOutput);
             lastLayerOutput = &layer.m_output;
         }
-        return *lastLayerOutput;
+        return lastLayerOutput;
     }
 
     void MultilayerPerceptron::Reverse(LossFunctions lossFunction, const AZ::VectorN& activations, const AZ::VectorN& expected)
     {
+        ++m_trainingSampleSize;
+
         // First feed-forward the activations to get our current model predictions
-        const AZ::VectorN& output = Forward(activations);
+        const AZ::VectorN* output = Forward(activations);
 
         // Compute the partial derivatives of the loss function with respect to the final layer output
         AZ::VectorN costGradients;
-        ComputeLoss_Derivative(lossFunction, output, expected, costGradients);
+        ComputeLoss_Derivative(lossFunction, *output, expected, costGradients);
 
         for (auto iter = m_layers.rbegin(); iter != m_layers.rend(); ++iter)
         {
@@ -118,10 +123,15 @@ namespace MachineLearning
 
     void MultilayerPerceptron::GradientDescent(float learningRate)
     {
-        for (auto iter = m_layers.rbegin(); iter != m_layers.rend(); ++iter)
+        if (m_trainingSampleSize > 0)
         {
-            iter->ApplyGradients(learningRate);
+            const float adjustedLearningRate = learningRate / static_cast<float>(m_trainingSampleSize);
+            for (auto iter = m_layers.rbegin(); iter != m_layers.rend(); ++iter)
+            {
+                iter->ApplyGradients(adjustedLearningRate);
+            }
         }
+        m_trainingSampleSize = 0;
     }
 
     void MultilayerPerceptron::OnActivationCountChanged()

+ 12 - 7
Gems/MachineLearning/Code/Source/Models/MultilayerPerceptron.h

@@ -11,35 +11,37 @@
 #include <AzCore/Math/MatrixMxN.h>
 #include <MachineLearning/INeuralNetwork.h>
 #include <Models/Layer.h>
-#include <AzCore/Asset/AssetCommon.h>
 
 namespace MachineLearning
 {
     //! This is a basic multilayer perceptron neural network capable of basic training and feed forward operations.
     class MultilayerPerceptron
         : public INeuralNetwork
-        , public AZ::Data::AssetData
     {
     public:
 
-        AZ_CLASS_ALLOCATOR(MultilayerPerceptron, AZ::SystemAllocator);
-        AZ_RTTI(MultilayerPerceptron, "{E12EF761-41A5-48C3-BF55-7179B280D45F}", AZ::Data::AssetData, INeuralNetwork);
+        AZ_RTTI(MultilayerPerceptron, "{E12EF761-41A5-48C3-BF55-7179B280D45F}", INeuralNetwork);
 
         //! AzCore Reflection.
         //! @param context reflection context
         static void Reflect(AZ::ReflectContext* context);
 
         MultilayerPerceptron() = default;
+        MultilayerPerceptron(MultilayerPerceptron&&) = default;
+        MultilayerPerceptron(const MultilayerPerceptron&) = default;
         MultilayerPerceptron(AZStd::size_t activationCount);
         virtual ~MultilayerPerceptron() = default;
 
+        MultilayerPerceptron& operator=(MultilayerPerceptron&&) = default;
+        MultilayerPerceptron& operator=(const MultilayerPerceptron&) = default;
+
         //! INeuralNetwork interface
         //! @{
-        void AddLayer(AZStd::size_t layerDimensionality, ActivationFunctions activationFunction = ActivationFunctions::Linear) override;
+        void AddLayer(AZStd::size_t layerDimensionality, ActivationFunctions activationFunction = ActivationFunctions::ReLU) override;
         AZStd::size_t GetLayerCount() const override;
-        Layer& GetLayer(AZStd::size_t layerIndex) override;
+        Layer* GetLayer(AZStd::size_t layerIndex) override;
         AZStd::size_t GetParameterCount() const override;
-        const AZ::VectorN& Forward(const AZ::VectorN& activations) override;
+        const AZ::VectorN* Forward(const AZ::VectorN& activations) override;
         void Reverse(LossFunctions lossFunction, const AZ::VectorN& activations, const AZ::VectorN& expected) override;
         void GradientDescent(float learningRate) override;
         //! @}
@@ -51,6 +53,9 @@ namespace MachineLearning
         //! The number of neurons in the activation layer.
         AZStd::size_t m_activationCount = 0;
 
+        //! The number of accumulated training samples.
+        AZStd::size_t m_trainingSampleSize = 0;
+
         //! The set of layers in the network.
         AZStd::vector<Layer> m_layers;
     };

+ 1 - 1
Gems/MachineLearning/Code/Source/Nodes/AccumulateTrainingGradients.cpp

@@ -11,7 +11,7 @@
 
 namespace MachineLearning
 {
-    MachineLearning::INeuralNetworkPtr AccumulateTrainingGradients::In(MachineLearning::INeuralNetworkPtr Model, MachineLearning::LossFunctions LossFunction, AZ::VectorN Activations, AZ::VectorN ExpectedOutput)
+    INeuralNetworkPtr AccumulateTrainingGradients::In(INeuralNetworkPtr Model, LossFunctions LossFunction, AZ::VectorN Activations, AZ::VectorN ExpectedOutput)
     {
         Model->Reverse(LossFunction, Activations, ExpectedOutput);
         return Model;

+ 3 - 3
Gems/MachineLearning/Code/Source/Nodes/ComputeCost.cpp

@@ -12,9 +12,9 @@
 
 namespace MachineLearning
 {
-    float ComputeCost::In(MachineLearning::INeuralNetworkPtr Model, MachineLearning::LossFunctions LossFunction, AZ::VectorN Activations, AZ::VectorN ExpectedOutput)
+    float ComputeCost::In(INeuralNetworkPtr Model, LossFunctions LossFunction, AZ::VectorN Activations, AZ::VectorN ExpectedOutput)
     {
-        const AZ::VectorN& modelOutput = Model->Forward(Activations);
-        return ComputeTotalCost(LossFunction, ExpectedOutput, modelOutput);
+        const AZ::VectorN* modelOutput = Model->Forward(Activations);
+        return ComputeTotalCost(LossFunction, ExpectedOutput, *modelOutput);
     }
 }

+ 0 - 17
Gems/MachineLearning/Code/Source/Nodes/CreateModel.ScriptCanvasNodeable.xml

@@ -1,17 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-
-<ScriptCanvas Include="Nodes/CreateModel.h" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-    <Class Name="CreateModel"
-           QualifiedName="MachineLearning::CreateModel"
-           PreferredClassName="Creates a new machine learning model"
-           Category="MachineLearning"
-           Description="Creates a new untrained machine learning model. All parameters will be initialized to random values.">
-
-        <Input Name="In" DisplayGroup="In" Description="Parameters controlling the new model">
-            <Parameter Name="Input neurons" Type="AZStd::size_t" Description="The number of input neurons for the model to have."/>
-            <Parameter Name="Output params" Type="AZStd::size_t" Description="The output layer parameters."/>
-            <Parameter Name="Hidden layers" Type="MachineLearning::HiddenLayerParams" Description="The array of hidden layers to generate."/>
-            <Return Name="Model" Type="MachineLearning::INeuralNetworkPtr" Shared="true"/>
-        </Input>
-    </Class>
-</ScriptCanvas>

+ 0 - 25
Gems/MachineLearning/Code/Source/Nodes/CreateModel.cpp

@@ -1,25 +0,0 @@
-/*
- * Copyright (c) Contributors to the Open 3D Engine Project.
- * For complete copyright and license terms please see the LICENSE at the root of this distribution.
- *
- * SPDX-License-Identifier: Apache-2.0 OR MIT
- *
- */
-
-#include <Nodes/CreateModel.h>
-#include <Models/MultilayerPerceptron.h>
-
-namespace MachineLearning
-{
-    INeuralNetworkPtr CreateModel::In(AZStd::size_t Inputneurons, AZStd::size_t Outputparams, HiddenLayerParams Hiddenlayers)
-    {
-        INeuralNetworkPtr result = AZStd::make_unique<MultilayerPerceptron>(Inputneurons);
-        MultilayerPerceptron* modelPtr = static_cast<MultilayerPerceptron*>(result.get());
-        for (auto layerParams : Hiddenlayers)
-        {
-            modelPtr->AddLayer(layerParams); //.m_layerSize, layerParams.m_activationFunction);
-        }
-        modelPtr->AddLayer(Outputparams); //.m_layerSize, Outputparams.m_activationFunction);
-        return result;
-    }
-}

+ 2 - 2
Gems/MachineLearning/Code/Source/Nodes/FeedForward.cpp

@@ -11,9 +11,9 @@
 
 namespace MachineLearning
 {
-    AZ::VectorN FeedForward::In(MachineLearning::INeuralNetworkPtr Model, AZ::VectorN Activations)
+    AZ::VectorN FeedForward::In(INeuralNetworkPtr Model, AZ::VectorN Activations)
     {
-        AZ::VectorN results = Model->Forward(Activations);
+        AZ::VectorN results = *Model->Forward(Activations);
         return results;
     }
 }

+ 1 - 1
Gems/MachineLearning/Code/Source/Nodes/GradientDescent.cpp

@@ -11,7 +11,7 @@
 
 namespace MachineLearning
 {
-    MachineLearning::INeuralNetworkPtr GradientDescent::In(MachineLearning::INeuralNetworkPtr Model, float LearningRate)
+    INeuralNetworkPtr GradientDescent::In(INeuralNetworkPtr Model, float LearningRate)
     {
         Model->GradientDescent(LearningRate);
         return Model;

+ 3 - 1
Gems/MachineLearning/Code/Source/Nodes/LoadTrainingData.ScriptCanvasNodeable.xml

@@ -8,7 +8,9 @@
            Description="Loads a set of labeled training data.">
 
         <Input Name="In" DisplayGroup="In" Description="Parameters controlling what data to load">
-            <Return Name="Data" Type="MachineLearning::LabeledTrainingDataSet" Shared="true"/>
+            <Parameter Name="ImageFile" Type="AZStd::string" Description="The archive file to load images from"/>
+            <Parameter Name="LabelFile" Type="AZStd::string" Description="The archive file to load labels from"/>
+            <Return Name="Data" Type="MachineLearning::ILabeledTrainingDataPtr" Shared="true"/>
         </Input>
     </Class>
 </ScriptCanvas>

+ 5 - 2
Gems/MachineLearning/Code/Source/Nodes/LoadTrainingData.cpp

@@ -7,11 +7,14 @@
  */
 
 #include <Nodes/LoadTrainingData.h>
+#include <Assets/MnistDataLoader.h>
 
 namespace MachineLearning
 {
-    LabeledTrainingDataSet LoadTrainingData::In()
+    ILabeledTrainingDataPtr LoadTrainingData::In(AZStd::string ImageFile, AZStd::string LabelFile)
     {
-        return LabeledTrainingDataSet();
+        ILabeledTrainingDataPtr result = AZStd::make_shared<MnistDataLoader>();
+        result->LoadArchive(ImageFile, LabelFile);
+        return result;
     }
 }

+ 1 - 1
Gems/MachineLearning/Code/Source/Nodes/LoadTrainingData.h

@@ -12,7 +12,7 @@
 #include <ScriptCanvas/Core/Nodeable.h>
 #include <ScriptCanvas/Core/NodeableNode.h>
 #include <MachineLearning/INeuralNetwork.h>
-#include <MachineLearning/LabeledTrainingData.h>
+#include <MachineLearning/ILabeledTrainingData.h>
 #include <Source/Nodes/LoadTrainingData.generated.h>
 
 namespace MachineLearning

+ 16 - 0
Gems/MachineLearning/Code/Source/Nodes/OneHot.ScriptCanvasNodeable.xml

@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<ScriptCanvas Include="Nodes/OneHot.h" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <Class Name="OneHot"
+           QualifiedName="MachineLearning::OneHot"
+           PreferredClassName="One hot encoder"
+           Category="MachineLearning"
+           Description="One-hot encodes a value.">
+
+        <Input Name="In" DisplayGroup="In" Description="Parameters controlling the one-hot encoding">
+            <Parameter Name="value" Type="AZStd::size_t" Description="The value to encode."/>
+            <Parameter Name="maxValue" Type="AZStd::size_t" Description="The maximum value possible."/>
+            <Return Name="oneHot" Type="AZ::VectorN" Shared="true"/>
+        </Input>
+    </Class>
+</ScriptCanvas>

+ 20 - 0
Gems/MachineLearning/Code/Source/Nodes/OneHot.cpp

@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#include <Nodes/OneHot.h>
+#include <Algorithms/Activations.h>
+
+namespace MachineLearning
+{
+    AZ::VectorN OneHot::In(AZStd::size_t Value, AZStd::size_t MaxValue)
+    {
+        AZ::VectorN result;
+        OneHotEncode(Value, MaxValue, result);
+        return result;
+    }
+}

+ 3 - 4
Gems/MachineLearning/Code/Source/Nodes/CreateModel.h → Gems/MachineLearning/Code/Source/Nodes/OneHot.h

@@ -12,14 +12,13 @@
 #include <ScriptCanvas/Core/Nodeable.h>
 #include <ScriptCanvas/Core/NodeableNode.h>
 #include <MachineLearning/INeuralNetwork.h>
-#include <Models/Layer.h>
-#include <Source/Nodes/CreateModel.generated.h>
+#include <Source/Nodes/OneHot.generated.h>
 
 namespace MachineLearning
 {
-    class CreateModel
+    class OneHot
         : public ScriptCanvas::Nodeable
     {
-        SCRIPTCANVAS_NODE_CreateModel;
+        SCRIPTCANVAS_NODE_OneHot;
     };
 }

+ 25 - 0
Gems/MachineLearning/Code/Source/Nodes/SupervisedLearning.ScriptCanvasNodeable.xml

@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="utf-8"?>
+
+<ScriptCanvas Include="Nodes/SupervisedLearning.h" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <Class Name="SupervisedLearning"
+           QualifiedName="MachineLearning::SupervisedLearning"
+           PreferredClassName="Supervised learning"
+           Category="MachineLearning"
+           Description="Performs a fully supervised training session of a neural network using stochastic gradient descent.">
+
+        <Input Name="In" DisplayGroup="In" Description="Parameters controlling model training">
+            <Parameter Name="Model" Type="MachineLearning::INeuralNetworkPtr" Description="The model to perform a gradient descent step on."/>
+            <Parameter Name="TrainingData" Type="MachineLearning::ILabeledTrainingDataPtr" Description="The set of labeled training data to use."/>
+            <Parameter Name="TestData" Type="MachineLearning::ILabeledTrainingDataPtr" Description="The set of labeled test data to evaluate against."/>
+            <!-- Can't enable this until script canvas supports enumeration-type pins -->
+            <!-- Parameter Name="CostFunction" Type="MachineLearning::LossFunctions" Description="The loss function to use to compute the cost."/ -->
+            <Parameter Name="CostFunction" Type="AZStd::size_t" Description="The loss function to use to compute the cost."/>
+            <Parameter Name="TotalIterations" Type="AZStd::size_t" Description="The total number of times to iterate (epochs) when training."/>
+            <Parameter Name="BatchSize" Type="AZStd::size_t" Description="The batch size to use."/>
+            <Parameter Name="LearningRate" Type="float" Description="The learning rate to use."/>
+            <Parameter Name="LearningRateDecay" Type="float" Description="The decay factor to use after each iteration (epoch) of training."/>
+            <Parameter Name="EarlyStopCost" Type="float" Description="If the total cost of the model drops below this value, training will halt. 0 will always complete the whole training cycle."/>
+            <Return Name="Model" Type="MachineLearning::INeuralNetworkPtr" Shared="true"/>
+        </Input>
+    </Class>
+</ScriptCanvas>

+ 33 - 0
Gems/MachineLearning/Code/Source/Nodes/SupervisedLearning.cpp

@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#include <Nodes/SupervisedLearning.h>
+#include <Models/MultilayerPerceptron.h>
+#include <MachineLearning/ILabeledTrainingData.h>
+#include <Algorithms/Training.h>
+
+namespace MachineLearning
+{
+    INeuralNetworkPtr SupervisedLearning::In
+    (
+        INeuralNetworkPtr Model,
+        ILabeledTrainingDataPtr TrainingData, 
+        ILabeledTrainingDataPtr TestData,
+        //LossFunctions CostFunction, 
+        AZStd::size_t CostFunction,
+        AZStd::size_t TotalIterations,
+        AZStd::size_t BatchSize,
+        float LearningRate,
+        float LearningRateDecay,
+        float EarlyStopCost
+    )
+    {
+        SupervisedLearningCycle(Model, TrainingData, TestData, static_cast<LossFunctions>(CostFunction), TotalIterations, BatchSize, LearningRate, LearningRateDecay, EarlyStopCost);
+        return Model;
+    }
+}

+ 25 - 0
Gems/MachineLearning/Code/Source/Nodes/SupervisedLearning.h

@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) Contributors to the Open 3D Engine Project.
+ * For complete copyright and license terms please see the LICENSE at the root of this distribution.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR MIT
+ *
+ */
+
+#pragma once
+
+#include <ScriptCanvas/CodeGen/NodeableCodegen.h>
+#include <ScriptCanvas/Core/Nodeable.h>
+#include <ScriptCanvas/Core/NodeableNode.h>
+#include <MachineLearning/INeuralNetwork.h>
+#include <MachineLearning/ILabeledTrainingData.h>
+#include <Source/Nodes/SupervisedLearning.generated.h>
+
+namespace MachineLearning
+{
+    class SupervisedLearning
+        : public ScriptCanvas::Nodeable
+    {
+        SCRIPTCANVAS_NODE_SupervisedLearning;
+    };
+}

+ 54 - 0
Gems/MachineLearning/Code/Tests/Algorithms/ActivationTests.cpp

@@ -30,4 +30,58 @@ namespace UnitTest
             ASSERT_GE(output.GetElement(iter), 0.0f);
         }
     }
+
+    TEST_F(MachineLearning_Activations, TestSigmoid)
+    {
+        AZ::VectorN output = AZ::VectorN::CreateZero(1024);
+        AZ::VectorN sourceVector = AZ::VectorN::CreateRandom(1024);
+        sourceVector *= 100.0f;
+        sourceVector -= 50.0f;
+        MachineLearning::Sigmoid(sourceVector, output);
+
+        // Sigmoid guarantees all outputs get squished between 0 and 1
+        for (AZStd::size_t iter = 0; iter < output.GetDimensionality(); ++iter)
+        {
+            ASSERT_GE(output.GetElement(iter), 0.0f);
+            ASSERT_LE(output.GetElement(iter), 1.0f);
+        }
+    }
+
+    TEST_F(MachineLearning_Activations, TestSoftmax)
+    {
+        AZ::VectorN output = AZ::VectorN::CreateZero(1024);
+        AZ::VectorN sourceVector = AZ::VectorN::CreateRandom(1024);
+        sourceVector *= 100.0f;
+        sourceVector -= 50.0f;
+        MachineLearning::Softmax(sourceVector, output);
+
+        // Sigmoid guarantees all outputs get squished between 0 and 1
+        for (AZStd::size_t iter = 0; iter < output.GetDimensionality(); ++iter)
+        {
+            ASSERT_GE(output.GetElement(iter), 0.0f);
+            ASSERT_LE(output.GetElement(iter), 1.0f);
+        }
+
+        // Additionally, the sum of all the elements should be <= 1, as softmax returns a probability distribution
+        const float totalSum = output.L1Norm();
+        // Between floating point precision and the estimates we use for exp(x), the total sum probability can be slightly greater than one
+        // We add a small epsilon to account for this error
+        ASSERT_LE(totalSum, 1.0f + AZ::Constants::Tolerance);
+    }
+
+    TEST_F(MachineLearning_Activations, TestLinear)
+    {
+        AZ::VectorN output = AZ::VectorN::CreateZero(1024);
+        AZ::VectorN sourceVector = AZ::VectorN::CreateRandom(1024);
+        sourceVector *= 100.0f;
+        sourceVector -= 50.0f;
+        MachineLearning::Linear(sourceVector, output);
+
+        // Linear just returns the input provided
+        // This makes it not suitable for anything with more than one layer
+        for (AZStd::size_t iter = 0; iter < output.GetDimensionality(); ++iter)
+        {
+            ASSERT_EQ(output.GetElement(iter), sourceVector.GetElement(iter));
+        }
+    }
 }

+ 8 - 0
Gems/MachineLearning/Code/Tests/Algorithms/LossFunctionTests.cpp

@@ -17,4 +17,12 @@ namespace UnitTest
     {
     };
 
+    TEST_F(MachineLearning_LossFunctions, TestMeanSquaredError)
+    {
+        AZ::VectorN expected = AZ::VectorN::CreateZero(1024);
+        AZ::VectorN actual = AZ::VectorN::CreateOne(1024);
+
+        const float totalLoss1 = MachineLearning::ComputeTotalCost(MachineLearning::LossFunctions::MeanSquaredError, expected, actual);
+        EXPECT_EQ(totalLoss1, 1024.0f);
+    }
 }

+ 2 - 2
Gems/MachineLearning/Code/Tests/Models/MultilayerPerceptronTests.cpp

@@ -64,7 +64,7 @@ namespace UnitTest
         EXPECT_TRUE(AZ::IsCloseMag(actualOutput.GetElement(1), 0.77f, 0.01f));
 
         float cost = MachineLearning::ComputeTotalCost(MachineLearning::LossFunctions::MeanSquaredError, trainingOutput, actualOutput);
-        EXPECT_TRUE(AZ::IsCloseMag(cost, 0.30f, 0.01f));
+        EXPECT_TRUE(AZ::IsCloseMag(cost, 0.60f, 0.01f));
 
         mlp.Reverse(MachineLearning::LossFunctions::MeanSquaredError, trainingInput, trainingOutput);
 
@@ -106,6 +106,6 @@ namespace UnitTest
         // We expect the total cost of the network on the training sample to be much lower after training
         const AZ::VectorN& trainedOutput = mlp.Forward(trainingInput);
         float trainedCost = MachineLearning::ComputeTotalCost(MachineLearning::LossFunctions::MeanSquaredError, trainingOutput, trainedOutput);
-        EXPECT_LT(trainedCost, 3.5e-6f);
+        EXPECT_LT(trainedCost, 5.0e-6f);
     }
 }

+ 1 - 1
Gems/MachineLearning/Code/machinelearning_api_files.cmake

@@ -8,7 +8,7 @@
 
 set(FILES
     Include/MachineLearning/INeuralNetwork.h
-    Include/MachineLearning/LabeledTrainingData.h
+    Include/MachineLearning/ILabeledTrainingData.h
     Include/MachineLearning/MachineLearningBus.h
     Include/MachineLearning/MachineLearningTypeIds.h
     Include/MachineLearning/Types.h

+ 14 - 5
Gems/MachineLearning/Code/machinelearning_private_files.cmake

@@ -11,10 +11,16 @@ set(FILES
     Source/MachineLearningModuleInterface.h
     Source/MachineLearningSystemComponent.cpp
     Source/MachineLearningSystemComponent.h
+    Source/Algorithms/Activations.cpp
     Source/Algorithms/Activations.h
-    Source/Algorithms/Activations.inl
+    Source/Algorithms/LossFunctions.cpp
     Source/Algorithms/LossFunctions.h
-    Source/Algorithms/LossFunctions.inl
+    Source/Algorithms/Training.cpp
+    Source/Algorithms/Training.h
+    Source/Assets/MnistDataLoader.cpp
+    Source/Assets/MnistDataLoader.h
+    Source/Components/MultilayerPerceptronComponent.cpp
+    Source/Components/MultilayerPerceptronComponent.h
     Source/Models/Layer.cpp
     Source/Models/Layer.h
     Source/Models/MultilayerPerceptron.cpp
@@ -25,9 +31,6 @@ set(FILES
     Source/Nodes/ComputeCost.ScriptCanvasNodeable.xml
     Source/Nodes/ComputeCost.cpp
     Source/Nodes/ComputeCost.h
-    Source/Nodes/CreateModel.ScriptCanvasNodeable.xml
-    Source/Nodes/CreateModel.cpp
-    Source/Nodes/CreateModel.h
     Source/Nodes/FeedForward.ScriptCanvasNodeable.xml
     Source/Nodes/FeedForward.cpp
     Source/Nodes/FeedForward.h
@@ -37,4 +40,10 @@ set(FILES
     Source/Nodes/LoadTrainingData.ScriptCanvasNodeable.xml
     Source/Nodes/LoadTrainingData.cpp
     Source/Nodes/LoadTrainingData.h
+    Source/Nodes/OneHot.ScriptCanvasNodeable.xml
+    Source/Nodes/OneHot.cpp
+    Source/Nodes/OneHot.h
+    Source/Nodes/SupervisedLearning.ScriptCanvasNodeable.xml
+    Source/Nodes/SupervisedLearning.cpp
+    Source/Nodes/SupervisedLearning.h
 )

Some files were not shown because too many files changed in this diff