fastlsolve.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. /*************************************************************************
  2. * *
  3. * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
  4. * All rights reserved. Email: [email protected] Web: www.q12.org *
  5. * *
  6. * This library is free software; you can redistribute it and/or *
  7. * modify it under the terms of EITHER: *
  8. * (1) The GNU Lesser General Public License as published by the Free *
  9. * Software Foundation; either version 2.1 of the License, or (at *
  10. * your option) any later version. The text of the GNU Lesser *
  11. * General Public License is included with this library in the *
  12. * file LICENSE.TXT. *
  13. * (2) The BSD-style license that is included with this library in *
  14. * the file LICENSE-BSD.TXT. *
  15. * *
  16. * This library is distributed in the hope that it will be useful, *
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
  19. * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
  20. * *
  21. *************************************************************************/
  22. /*
  23. * L1Straight Equation Solving Routines
  24. * Copyright (c) 2017-2020 Oleh Derevenko, [email protected] (change all "a" to "e")
  25. */
  26. #include <ode/common.h>
  27. #include <ode/matrix.h>
  28. #include <ode/matrix_coop.h>
  29. #include "config.h"
  30. #include "threaded_solver_ldlt.h"
  31. #include "threading_base.h"
  32. #include "resource_control.h"
  33. #include "error.h"
  34. #include "fastlsolve_impl.h"
  35. /*static */
  36. void ThreadedEquationSolverLDLT::estimateCooperativeSolvingL1StraightResourceRequirements(
  37. dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
  38. unsigned allowedThreadCount, unsigned rowCount)
  39. {
  40. dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
  41. unsigned limitedThreadCount = restrictSolvingL1StraightAllowedThreadCount(threading, allowedThreadCount, rowCount);
  42. if (limitedThreadCount > 1)
  43. {
  44. doEstimateCooperativeSolvingL1StraightResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
  45. }
  46. }
  47. /*static */
  48. void ThreadedEquationSolverLDLT::cooperativelySolveL1Straight(
  49. dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount,
  50. const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
  51. {
  52. dAASSERT(rowCount != 0);
  53. dxThreadingBase *threading = resourceContainer->getThreadingInstance();
  54. unsigned limitedThreadCount = restrictSolvingL1StraightAllowedThreadCount(threading, allowedThreadCount, rowCount);
  55. if (limitedThreadCount <= 1)
  56. {
  57. solveL1Straight<SL1S_B_STRIDE>(L, b, rowCount, rowSkip);
  58. }
  59. else
  60. {
  61. doCooperativelySolveL1StraightValidated(resourceContainer, limitedThreadCount, L, b, rowCount, rowSkip);
  62. }
  63. }
  64. /*static */
  65. unsigned ThreadedEquationSolverLDLT::restrictSolvingL1StraightAllowedThreadCount(
  66. dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount)
  67. {
  68. unsigned limitedThreadCount = 1;
  69. #if dCOOPERATIVE_ENABLED
  70. const unsigned int blockStep = SL1S_BLOCK_SIZE; // Required by the implementation
  71. unsigned solvingBlockCount = deriveSolvingL1StraightBlockCount(rowCount, blockStep);
  72. dIASSERT(deriveSolvingL1StraightThreadCount(SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
  73. if (solvingBlockCount >= SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM)
  74. {
  75. limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, true);
  76. }
  77. #endif // #if dCOOPERATIVE_ENABLED
  78. return limitedThreadCount;
  79. }
  80. /*static */
  81. void ThreadedEquationSolverLDLT::doEstimateCooperativeSolvingL1StraightResourceRequirementsValidated(
  82. dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
  83. unsigned allowedThreadCount, unsigned rowCount)
  84. {
  85. const unsigned int blockStep = SL1S_BLOCK_SIZE; // Required by the implementation
  86. unsigned blockCount = deriveSolvingL1StraightBlockCount(rowCount, blockStep);
  87. dIASSERT(blockCount >= 1);
  88. unsigned threadCountToUse = deriveSolvingL1StraightThreadCount(blockCount, allowedThreadCount);
  89. dIASSERT(threadCountToUse > 1);
  90. unsigned simultaneousCallCount = 1 + (threadCountToUse - 1);
  91. SolvingL1StraightMemoryEstimates solvingMemoryEstimates;
  92. sizeint solvingMemoryRequired = estimateCooperativelySolvingL1StraightMemoryRequirement<blockStep>(rowCount, solvingMemoryEstimates);
  93. const unsigned solvingAlignmentRequired = ALLOCATION_DEFAULT_ALIGNMENT;
  94. unsigned featureRequirement = dxResourceRequirementDescriptor::STOCK_CALLWAIT_REQUIRED;
  95. summaryRequirementsDescriptor->mergeAnotherDescriptorIn(solvingMemoryRequired, solvingAlignmentRequired, simultaneousCallCount, featureRequirement);
  96. }
  97. /*static */
  98. void ThreadedEquationSolverLDLT::doCooperativelySolveL1StraightValidated(
  99. dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount,
  100. const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
  101. {
  102. dIASSERT(allowedThreadCount > 1);
  103. const unsigned int blockStep = SL1S_BLOCK_SIZE; // Required by the implementation
  104. unsigned blockCount = deriveSolvingL1StraightBlockCount(rowCount, blockStep);
  105. dIASSERT(blockCount >= 1);
  106. unsigned threadCountToUse = deriveSolvingL1StraightThreadCount(blockCount, allowedThreadCount);
  107. dIASSERT(threadCountToUse > 1);
  108. dCallWaitID completionWait = resourceContainer->getStockCallWait();
  109. dAASSERT(completionWait != NULL);
  110. atomicord32 blockCompletionProgress;
  111. cellindexint *blockProgressDescriptors;
  112. SolveL1StraightCellContext *cellContexts;
  113. SolvingL1StraightMemoryEstimates solvingMemoryEstimates;
  114. sizeint solvingMemoryRequired = estimateCooperativelySolvingL1StraightMemoryRequirement<blockStep>(rowCount, solvingMemoryEstimates);
  115. dIASSERT(solvingMemoryRequired <= resourceContainer->getMemoryBufferSize());
  116. void *bufferAllocated = resourceContainer->getMemoryBufferPointer();
  117. dIASSERT(bufferAllocated != NULL);
  118. dIASSERT(dALIGN_PTR(bufferAllocated, ALLOCATION_DEFAULT_ALIGNMENT) == bufferAllocated);
  119. void *bufferCurrentLocation = bufferAllocated;
  120. bufferCurrentLocation = markCooperativelySolvingL1StraightMemoryStructuresOut(bufferCurrentLocation, solvingMemoryEstimates, blockProgressDescriptors, cellContexts);
  121. dIVERIFY(bufferCurrentLocation <= (uint8 *)bufferAllocated + solvingMemoryRequired);
  122. initializeCooperativelySolveL1StraightMemoryStructures<blockStep>(rowCount, blockCompletionProgress, blockProgressDescriptors, cellContexts);
  123. dCallReleaseeID calculationFinishReleasee;
  124. SolveL1StraightWorkerContext workerContext; // The variable must exist in the outer scope
  125. workerContext.init(L, b, rowCount, rowSkip, blockCompletionProgress, blockProgressDescriptors, cellContexts);
  126. dxThreadingBase *threading = resourceContainer->getThreadingInstance();
  127. threading->PostThreadedCall(NULL, &calculationFinishReleasee, threadCountToUse - 1, NULL, completionWait, &solveL1Straight_completion_callback, NULL, 0, "SolveL1Straight Completion");
  128. threading->PostThreadedCallsGroup(NULL, threadCountToUse - 1, calculationFinishReleasee, &solveL1Straight_worker_callback, &workerContext, "SolveL1Straight Work");
  129. participateSolvingL1Straight<blockStep, SL1S_B_STRIDE>(L, b, rowCount, rowSkip, blockCompletionProgress, blockProgressDescriptors, cellContexts, threadCountToUse - 1);
  130. threading->WaitThreadedCallExclusively(NULL, completionWait, NULL, "SolveL1Straight End Wait");
  131. }
  132. /*static */
  133. int ThreadedEquationSolverLDLT::solveL1Straight_worker_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID dUNUSED(callThisReleasee))
  134. {
  135. SolveL1StraightWorkerContext *ptrContext = (SolveL1StraightWorkerContext *)callContext;
  136. solveL1Straight_worker(*ptrContext, dCAST_TO_SMALLER(unsigned, callInstanceIndex));
  137. return 1;
  138. }
  139. /*static */
  140. void ThreadedEquationSolverLDLT::solveL1Straight_worker(SolveL1StraightWorkerContext &ref_context, unsigned ownThreadIndex)
  141. {
  142. const unsigned blockStep = SL1S_BLOCK_SIZE;
  143. participateSolvingL1Straight<blockStep, SL1S_B_STRIDE>(ref_context.m_L, ref_context.m_b, ref_context.m_rowCount, ref_context.m_rowSkip,
  144. *ref_context.m_ptrBlockCompletionProgress, ref_context.m_blockProgressDescriptors, ref_context.m_cellContexts, ownThreadIndex);
  145. }
  146. /*static */
  147. int ThreadedEquationSolverLDLT::solveL1Straight_completion_callback(void *dUNUSED(callContext), dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
  148. {
  149. return 1;
  150. }
  151. //////////////////////////////////////////////////////////////////////////
  152. // Public interface functions
  153. /*extern ODE_API */
  154. void dSolveL1(const dReal *L, dReal *B, int n, int lskip1)
  155. {
  156. dAASSERT(n != 0);
  157. if (n != 0)
  158. {
  159. dAASSERT(L != NULL);
  160. dAASSERT(B != NULL);
  161. solveL1Straight<1>(L, B, n, lskip1);
  162. }
  163. }
  164. /*extern ODE_API */
  165. void dEstimateCooperativelySolveL1StraightResourceRequirements(dResourceRequirementsID requirements,
  166. unsigned maximalAllowedThreadCount, unsigned maximalRowCount)
  167. {
  168. dAASSERT(requirements != NULL);
  169. dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
  170. ThreadedEquationSolverLDLT::estimateCooperativeSolvingL1StraightResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalRowCount);
  171. }
  172. /*extern ODE_API */
  173. void dCooperativelySolveL1Straight(dResourceContainerID resources, unsigned allowedThreadCount,
  174. const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
  175. {
  176. dAASSERT(resources != NULL);
  177. dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
  178. ThreadedEquationSolverLDLT::cooperativelySolveL1Straight(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);
  179. }