MultilayerPerceptronTests.cpp 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <AzTest/AzTest.h>
  9. #include <AzCore/UnitTest/TestTypes.h>
  10. #include <Models/MultilayerPerceptron.h>
  11. #include <Algorithms/LossFunctions.h>
  12. namespace UnitTest
  13. {
  14. class MachineLearning_MLP
  15. : public UnitTest::LeakDetectionFixture
  16. {
  17. };
  18. TEST_F(MachineLearning_MLP, TestGradientCalculations)
  19. {
  20. // As the computations performed during gradient descent are non-trivial, this unit test carefully replicates the backward propagation example as laid out in this article
  21. // https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/
  22. // The example is extremely simple, so in the case of a unit test failure, this allows the maintainer to carefully trace unit test execution and compare its output with the article
  23. const float layer0Weights[] =
  24. {
  25. 0.15f, 0.20f, 0.25f, 0.30f
  26. };
  27. const float layer0Biases[] = { 0.35f, 0.35f };
  28. const float layer1Weights[] =
  29. {
  30. 0.40f, 0.45f, 0.50f, 0.55f
  31. };
  32. const float layer1Biases[] = { 0.60f, 0.60f };
  33. MachineLearning::MultilayerPerceptron mlp(2);
  34. MachineLearning::MlpInferenceContext inferenceData;
  35. MachineLearning::MlpTrainingContext trainingData;
  36. mlp.AddLayer(2, MachineLearning::ActivationFunctions::Sigmoid);
  37. mlp.AddLayer(2, MachineLearning::ActivationFunctions::Sigmoid);
  38. MachineLearning::Layer* layer0 = mlp.GetLayer(0);
  39. layer0->m_weights = AZ::MatrixMxN::CreateFromPackedFloats(2, 2, layer0Weights);
  40. layer0->m_biases = AZ::VectorN::CreateFromFloats(2, layer0Biases);
  41. MachineLearning::Layer* layer1 = mlp.GetLayer(1);
  42. layer1->m_weights = AZ::MatrixMxN::CreateFromPackedFloats(2, 2, layer1Weights);
  43. layer1->m_biases = AZ::VectorN::CreateFromFloats(2, layer1Biases);
  44. const float activations[] = { 0.05f, 0.10f };
  45. const float labels[] = { 0.01f, 0.99f };
  46. const AZ::VectorN trainingInput = AZ::VectorN::CreateFromFloats(2, activations);
  47. const AZ::VectorN trainingOutput = AZ::VectorN::CreateFromFloats(2, labels);
  48. const AZ::VectorN* actualOutput = mlp.Forward(&inferenceData, trainingInput);
  49. // Validate intermediate layer output given the initial weights and biases
  50. EXPECT_TRUE(AZ::IsCloseMag(inferenceData.m_layerData[0].m_output.GetElement(0), 0.5933f, 0.01f));
  51. EXPECT_TRUE(AZ::IsCloseMag(inferenceData.m_layerData[0].m_output.GetElement(1), 0.5969f, 0.01f));
  52. // Validate final model output given the initial weights and biases
  53. EXPECT_TRUE(AZ::IsCloseMag(actualOutput->GetElement(0), 0.75f, 0.01f));
  54. EXPECT_TRUE(AZ::IsCloseMag(actualOutput->GetElement(1), 0.77f, 0.01f));
  55. float cost = MachineLearning::ComputeTotalCost(MachineLearning::LossFunctions::MeanSquaredError, trainingOutput, *actualOutput);
  56. EXPECT_TRUE(AZ::IsCloseMag(cost, 0.60f, 0.01f));
  57. mlp.Reverse(&trainingData, MachineLearning::LossFunctions::MeanSquaredError, trainingInput, trainingOutput);
  58. // Check the activation gradients
  59. EXPECT_NEAR(trainingData.m_layerData[1].m_activationGradients.GetElement(0), 0.1385f, 0.01f);
  60. EXPECT_NEAR(trainingData.m_layerData[1].m_activationGradients.GetElement(1), -0.0381f, 0.01f);
  61. EXPECT_NEAR(trainingData.m_layerData[1].m_weightGradients.GetElement(0, 0), 0.0822f, 0.01f);
  62. EXPECT_NEAR(trainingData.m_layerData[1].m_weightGradients.GetElement(0, 1), 0.0826f, 0.01f);
  63. EXPECT_NEAR(trainingData.m_layerData[1].m_weightGradients.GetElement(1, 0), -0.0226f, 0.01f);
  64. EXPECT_NEAR(trainingData.m_layerData[1].m_weightGradients.GetElement(1, 1), -0.0227f, 0.01f);
  65. EXPECT_NEAR(trainingData.m_layerData[1].m_backpropagationGradients.GetElement(0), 0.0364f, 0.01f);
  66. EXPECT_NEAR(trainingData.m_layerData[1].m_backpropagationGradients.GetElement(1), 0.0414f, 0.01f);
  67. EXPECT_NEAR(trainingData.m_layerData[0].m_weightGradients.GetElement(0, 0), 0.0004f, 0.01f);
  68. EXPECT_NEAR(trainingData.m_layerData[0].m_weightGradients.GetElement(0, 1), 0.0008f, 0.01f);
  69. mlp.GradientDescent(&trainingData, 0.5f);
  70. EXPECT_NEAR(layer1->m_weights.GetElement(0, 0), 0.3590f, 0.01f);
  71. EXPECT_NEAR(layer1->m_weights.GetElement(0, 1), 0.4087f, 0.01f);
  72. EXPECT_NEAR(layer1->m_weights.GetElement(1, 0), 0.5113f, 0.01f);
  73. EXPECT_NEAR(layer1->m_weights.GetElement(1, 1), 0.5614f, 0.01f);
  74. EXPECT_NEAR(layer0->m_weights.GetElement(0, 0), 0.1498f, 0.01f);
  75. EXPECT_NEAR(layer0->m_weights.GetElement(0, 1), 0.1996f, 0.01f);
  76. EXPECT_NEAR(layer0->m_weights.GetElement(1, 0), 0.2495f, 0.01f);
  77. EXPECT_NEAR(layer0->m_weights.GetElement(1, 1), 0.2995f, 0.01f);
  78. // Now lets evaluate a whole training cycle
  79. const AZStd::size_t numTrainingLoops = 10000;
  80. for (AZStd::size_t iter = 0; iter < numTrainingLoops; ++iter)
  81. {
  82. mlp.Reverse(&trainingData, MachineLearning::LossFunctions::MeanSquaredError, trainingInput, trainingOutput);
  83. mlp.GradientDescent(&trainingData, 0.5f);
  84. }
  85. // We expect the total cost of the network on the training sample to be much lower after training
  86. const AZ::VectorN* trainedOutput = mlp.Forward(&inferenceData, trainingInput);
  87. float trainedCost = MachineLearning::ComputeTotalCost(MachineLearning::LossFunctions::MeanSquaredError, trainingOutput, *trainedOutput);
  88. EXPECT_LT(trainedCost, 5.0e-6f);
  89. }
  90. }