MultilayerPerceptron.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <Models/MultilayerPerceptron.h>
  9. #include <Algorithms/LossFunctions.h>
  10. #include <AzCore/RTTI/RTTI.h>
  11. #include <AzCore/RTTI/BehaviorContext.h>
  12. #include <AzCore/Serialization/EditContext.h>
  13. #include <AzCore/Serialization/SerializeContext.h>
  14. #include <AzCore/IO/FileIO.h>
  15. #include <AzCore/IO/FileReader.h>
  16. #include <AzCore/IO/Path/Path.h>
  17. #include <AzCore/Console/ILogger.h>
  18. #include <AzNetworking/Serialization/NetworkInputSerializer.h>
  19. #include <AzNetworking/Serialization/NetworkOutputSerializer.h>
  20. namespace MachineLearning
  21. {
  22. void MultilayerPerceptron::Reflect(AZ::ReflectContext* context)
  23. {
  24. if (auto serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
  25. {
  26. serializeContext->Class<MultilayerPerceptron>()
  27. ->Version(1)
  28. ->Field("ModelAsset", &MultilayerPerceptron::m_asset)
  29. ->Field("Name", &MultilayerPerceptron::m_name)
  30. ->Field("ModelFile", &MultilayerPerceptron::m_modelFile)
  31. ->Field("TestDataFile", &MultilayerPerceptron::m_testDataFile)
  32. ->Field("TestLabelFile", &MultilayerPerceptron::m_testLabelFile)
  33. ->Field("TrainDataFile", &MultilayerPerceptron::m_trainDataFile)
  34. ->Field("TrainLabelFile", &MultilayerPerceptron::m_trainLabelFile)
  35. ->Field("ActivationCount", &MultilayerPerceptron::m_activationCount)
  36. ->Field("Layers", &MultilayerPerceptron::m_layers)
  37. ;
  38. if (AZ::EditContext* editContext = serializeContext->GetEditContext())
  39. {
  40. editContext->Class<MultilayerPerceptron>("A basic multilayer perceptron class", "")
  41. ->ClassElement(AZ::Edit::ClassElements::EditorData, "")
  42. ->DataElement(AZ::Edit::UIHandlers::Default, &MultilayerPerceptron::m_asset, "ModelAsset", "The model asset")
  43. ->DataElement(AZ::Edit::UIHandlers::Default, &MultilayerPerceptron::m_name, "Name", "The name for this model")
  44. ->DataElement(AZ::Edit::UIHandlers::Default, &MultilayerPerceptron::m_modelFile, "ModelFile", "The file this model is saved to and loaded from")
  45. ->DataElement(AZ::Edit::UIHandlers::Default, &MultilayerPerceptron::m_testDataFile, "TestDataFile", "The file test data should be loaded from")
  46. ->DataElement(AZ::Edit::UIHandlers::Default, &MultilayerPerceptron::m_testLabelFile, "TestLabelFile", "The file test labels should be loaded from")
  47. ->DataElement(AZ::Edit::UIHandlers::Default, &MultilayerPerceptron::m_trainDataFile, "TrainDataFile", "The file training data should be loaded from")
  48. ->DataElement(AZ::Edit::UIHandlers::Default, &MultilayerPerceptron::m_trainLabelFile, "TrainLabelFile", "The file training labels should be loaded from")
  49. ->DataElement(AZ::Edit::UIHandlers::Default, &MultilayerPerceptron::m_activationCount, "Activation Count", "The number of neurons in the activation layer")
  50. ->Attribute(AZ::Edit::Attributes::ChangeNotify, &MultilayerPerceptron::OnActivationCountChanged)
  51. ->DataElement(AZ::Edit::UIHandlers::Default, &MultilayerPerceptron::m_layers, "Layers", "The layers of the neural network")
  52. ->Attribute(AZ::Edit::Attributes::ChangeNotify, &MultilayerPerceptron::OnActivationCountChanged)
  53. ;
  54. }
  55. }
  56. auto behaviorContext = azrtti_cast<AZ::BehaviorContext*>(context);
  57. if (behaviorContext)
  58. {
  59. behaviorContext->Class<MultilayerPerceptron>("Multilayer perceptron")->
  60. Attribute(AZ::Script::Attributes::Scope, AZ::Script::Attributes::ScopeFlags::Common)->
  61. Attribute(AZ::Script::Attributes::Module, "machineLearning")->
  62. Attribute(AZ::Script::Attributes::ExcludeFrom, AZ::Script::Attributes::ExcludeFlags::ListOnly)->
  63. Constructor<AZStd::size_t>()->
  64. Attribute(AZ::Script::Attributes::Storage, AZ::Script::Attributes::StorageType::Value)->
  65. Method("GetName", &MultilayerPerceptron::GetName)->
  66. Method("GetLayerCount", &MultilayerPerceptron::GetLayerCount)->
  67. Property("ActivationCount", BehaviorValueProperty(&MultilayerPerceptron::m_activationCount))->
  68. Property("Layers", BehaviorValueProperty(&MultilayerPerceptron::m_layers))
  69. ;
  70. }
  71. }
  72. MultilayerPerceptron::MultilayerPerceptron()
  73. {
  74. }
  75. MultilayerPerceptron::MultilayerPerceptron(const MultilayerPerceptron& rhs)
  76. : m_name(rhs.m_name)
  77. , m_modelFile(rhs.m_modelFile)
  78. , m_testDataFile(rhs.m_testDataFile)
  79. , m_testLabelFile(rhs.m_testLabelFile)
  80. , m_trainDataFile(rhs.m_trainDataFile)
  81. , m_trainLabelFile(rhs.m_trainLabelFile)
  82. , m_activationCount(rhs.m_activationCount)
  83. , m_layers(rhs.m_layers)
  84. {
  85. }
  86. MultilayerPerceptron::MultilayerPerceptron(AZStd::size_t activationCount)
  87. : m_activationCount(activationCount)
  88. {
  89. }
  90. MultilayerPerceptron::~MultilayerPerceptron()
  91. {
  92. }
  93. MultilayerPerceptron& MultilayerPerceptron::operator=(const MultilayerPerceptron& rhs)
  94. {
  95. m_name = rhs.m_name;
  96. m_modelFile = rhs.m_modelFile;
  97. m_testDataFile = rhs.m_testDataFile;
  98. m_testLabelFile = rhs.m_testLabelFile;
  99. m_trainDataFile = rhs.m_trainDataFile;
  100. m_trainLabelFile = rhs.m_trainLabelFile;
  101. m_activationCount = rhs.m_activationCount;
  102. m_layers = rhs.m_layers;
  103. return *this;
  104. }
  105. AZStd::string MultilayerPerceptron::GetName() const
  106. {
  107. return m_name;
  108. }
  109. AZStd::string MultilayerPerceptron::GetAssetFile(AssetTypes assetType) const
  110. {
  111. switch (assetType)
  112. {
  113. case AssetTypes::Model:
  114. return m_modelFile;
  115. case AssetTypes::TestData:
  116. return m_testDataFile;
  117. case AssetTypes::TestLabels:
  118. return m_testLabelFile;
  119. case AssetTypes::TrainingData:
  120. return m_trainDataFile;
  121. case AssetTypes::TrainingLabels:
  122. return m_trainLabelFile;
  123. }
  124. return "";
  125. }
  126. AZStd::size_t MultilayerPerceptron::GetInputDimensionality() const
  127. {
  128. return m_activationCount;
  129. }
  130. AZStd::size_t MultilayerPerceptron::GetOutputDimensionality() const
  131. {
  132. if (!m_layers.empty())
  133. {
  134. return m_layers.back().m_biases.GetDimensionality();
  135. }
  136. return m_activationCount;
  137. }
  138. AZStd::size_t MultilayerPerceptron::GetLayerCount() const
  139. {
  140. return m_layers.size();
  141. }
  142. AZ::MatrixMxN MultilayerPerceptron::GetLayerWeights(AZStd::size_t layerIndex) const
  143. {
  144. return m_layers[layerIndex].m_weights;
  145. }
  146. AZ::VectorN MultilayerPerceptron::GetLayerBiases(AZStd::size_t layerIndex) const
  147. {
  148. return m_layers[layerIndex].m_biases;
  149. }
  150. AZStd::size_t MultilayerPerceptron::GetParameterCount() const
  151. {
  152. AZStd::size_t parameterCount = 0;
  153. for (const Layer& layer : m_layers)
  154. {
  155. parameterCount += layer.m_inputSize * layer.m_outputSize + layer.m_outputSize;
  156. }
  157. return parameterCount;
  158. }
  159. IInferenceContextPtr MultilayerPerceptron::CreateInferenceContext()
  160. {
  161. return new MlpInferenceContext();
  162. }
  163. ITrainingContextPtr MultilayerPerceptron::CreateTrainingContext()
  164. {
  165. return new MlpTrainingContext();
  166. }
  167. const AZ::VectorN* MultilayerPerceptron::Forward(IInferenceContextPtr context, const AZ::VectorN& activations)
  168. {
  169. MlpInferenceContext* forwardContext = static_cast<MlpInferenceContext*>(context);
  170. forwardContext->m_layerData.resize(m_layers.size());
  171. const AZ::VectorN* lastLayerOutput = &activations;
  172. for (AZStd::size_t iter = 0; iter < m_layers.size(); ++iter)
  173. {
  174. m_layers[iter].Forward(forwardContext->m_layerData[iter], *lastLayerOutput);
  175. lastLayerOutput = &forwardContext->m_layerData[iter].m_output;
  176. }
  177. return lastLayerOutput;
  178. }
  179. void MultilayerPerceptron::Reverse(ITrainingContextPtr context, LossFunctions lossFunction, const AZ::VectorN& activations, const AZ::VectorN& expected)
  180. {
  181. MlpTrainingContext* reverseContext = static_cast<MlpTrainingContext*>(context);
  182. MlpInferenceContext* forwardContext = &reverseContext->m_forward;
  183. reverseContext->m_layerData.resize(m_layers.size());
  184. forwardContext->m_layerData.resize(m_layers.size());
  185. ++reverseContext->m_trainingSampleSize;
  186. // First feed-forward the activations to get our current model predictions
  187. // We do additional book-keeping over a standard forward pass to make gradient calculations easier
  188. const AZ::VectorN* lastLayerOutput = &activations;
  189. for (AZStd::size_t iter = 0; iter < m_layers.size(); ++iter)
  190. {
  191. reverseContext->m_layerData[iter].m_lastInput = lastLayerOutput;
  192. m_layers[iter].Forward(forwardContext->m_layerData[iter], *lastLayerOutput);
  193. lastLayerOutput = &forwardContext->m_layerData[iter].m_output;
  194. }
  195. // Compute the partial derivatives of the loss function with respect to the final layer output
  196. AZ::VectorN costGradients;
  197. ComputeLoss_Derivative(lossFunction, *lastLayerOutput, expected, costGradients);
  198. AZ::VectorN* lossGradient = &costGradients;
  199. for (int64_t iter = static_cast<int64_t>(m_layers.size()) - 1; iter >= 0; --iter)
  200. {
  201. m_layers[iter].AccumulateGradients(reverseContext->m_trainingSampleSize, reverseContext->m_layerData[iter], forwardContext->m_layerData[iter], *lossGradient);
  202. lossGradient = &reverseContext->m_layerData[iter].m_backpropagationGradients;
  203. }
  204. }
  205. void MultilayerPerceptron::GradientDescent(ITrainingContextPtr context, float learningRate)
  206. {
  207. MlpTrainingContext* reverseContext = static_cast<MlpTrainingContext*>(context);
  208. if (reverseContext->m_trainingSampleSize > 0)
  209. {
  210. for (AZStd::size_t iter = 0; iter < m_layers.size(); ++iter)
  211. {
  212. m_layers[iter].ApplyGradients(reverseContext->m_layerData[iter], learningRate);
  213. }
  214. }
  215. reverseContext->m_trainingSampleSize = 0;
  216. }
  217. void MultilayerPerceptron::OnActivationCountChanged()
  218. {
  219. AZStd::size_t lastLayerDimensionality = m_activationCount;
  220. for (Layer& layer : m_layers)
  221. {
  222. layer.m_inputSize = lastLayerDimensionality;
  223. layer.OnSizesChanged();
  224. lastLayerDimensionality = layer.m_outputSize;
  225. }
  226. }
  227. bool MultilayerPerceptron::LoadModel()
  228. {
  229. AZ::IO::SystemFile modelFile;
  230. AZ::IO::FixedMaxPath filePathFixed = m_modelFile.c_str();
  231. if (AZ::IO::FileIOBase* fileIOBase = AZ::IO::FileIOBase::GetInstance())
  232. {
  233. fileIOBase->ResolvePath(filePathFixed, m_modelFile.c_str());
  234. }
  235. if (!modelFile.Open(filePathFixed.c_str(), AZ::IO::SystemFile::SF_OPEN_READ_ONLY))
  236. {
  237. AZLOG_ERROR("Failed to load '%s'. File could not be opened.", filePathFixed.c_str());
  238. return false;
  239. }
  240. const AZ::IO::SizeType length = modelFile.Length();
  241. if (length == 0)
  242. {
  243. AZLOG_ERROR("Failed to load '%s'. File is empty.", filePathFixed.c_str());
  244. return false;
  245. }
  246. AZStd::vector<uint8_t> serializeBuffer;
  247. serializeBuffer.resize(length);
  248. modelFile.Seek(0, AZ::IO::SystemFile::SF_SEEK_BEGIN);
  249. modelFile.Read(serializeBuffer.size(), serializeBuffer.data());
  250. AzNetworking::NetworkOutputSerializer serializer(serializeBuffer.data(), static_cast<uint32_t>(serializeBuffer.size()));
  251. return Serialize(serializer);
  252. }
  253. bool MultilayerPerceptron::SaveModel()
  254. {
  255. AZ::IO::SystemFile modelFile;
  256. AZ::IO::FixedMaxPath filePathFixed = m_modelFile.c_str();
  257. if (AZ::IO::FileIOBase* fileIOBase = AZ::IO::FileIOBase::GetInstance())
  258. {
  259. fileIOBase->ResolvePath(filePathFixed, m_modelFile.c_str());
  260. }
  261. if (!modelFile.Open(filePathFixed.c_str(), AZ::IO::SystemFile::SF_OPEN_READ_WRITE | AZ::IO::SystemFile::SF_OPEN_CREATE))
  262. {
  263. AZLOG_ERROR("Failed to save to '%s'. File could not be opened for writing.", filePathFixed.c_str());
  264. return false;
  265. }
  266. modelFile.Seek(0, AZ::IO::SystemFile::SF_SEEK_BEGIN);
  267. AZStd::vector<uint8_t> serializeBuffer;
  268. serializeBuffer.resize(EstimateSerializeSize());
  269. AzNetworking::NetworkInputSerializer serializer(serializeBuffer.data(), static_cast<uint32_t>(serializeBuffer.size()));
  270. if (Serialize(serializer))
  271. {
  272. modelFile.Write(serializeBuffer.data(), serializeBuffer.size());
  273. return true;
  274. }
  275. return false;
  276. }
  277. void MultilayerPerceptron::AddLayer(AZStd::size_t layerDimensionality, ActivationFunctions activationFunction)
  278. {
  279. // This is not thread safe, this should only be used during model configuration
  280. const AZStd::size_t lastLayerDimensionality = GetOutputDimensionality();
  281. m_layers.push_back(AZStd::move(Layer(activationFunction, lastLayerDimensionality, layerDimensionality)));
  282. }
  283. Layer* MultilayerPerceptron::GetLayer(AZStd::size_t layerIndex)
  284. {
  285. // This is not thread safe, this method should only be used by unit testing to inspect layer weights and biases for correctness
  286. return &m_layers[layerIndex];
  287. }
  288. bool MultilayerPerceptron::Serialize(AzNetworking::ISerializer& serializer)
  289. {
  290. return serializer.Serialize(m_name, "Name")
  291. && serializer.Serialize(m_activationCount, "activationCount")
  292. && serializer.Serialize(m_layers, "layers");
  293. }
  294. AZStd::size_t MultilayerPerceptron::EstimateSerializeSize() const
  295. {
  296. const AZStd::size_t padding = 64; // 64 bytes of extra padding just in case
  297. AZStd::size_t estimatedSize = padding
  298. + sizeof(AZStd::size_t)
  299. + m_name.size()
  300. + sizeof(m_activationCount)
  301. + sizeof(AZStd::size_t);
  302. for (const Layer& layer : m_layers)
  303. {
  304. estimatedSize += layer.EstimateSerializeSize();
  305. }
  306. return estimatedSize;
  307. }
  308. }