loop_fusion.h 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. // Copyright (c) 2018 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #ifndef SOURCE_OPT_LOOP_FUSION_H_
  15. #define SOURCE_OPT_LOOP_FUSION_H_
  16. #include <map>
  17. #include <set>
  18. #include <utility>
  19. #include <vector>
  20. #include "source/opt/ir_context.h"
  21. #include "source/opt/loop_descriptor.h"
  22. #include "source/opt/loop_utils.h"
  23. #include "source/opt/scalar_analysis.h"
  24. namespace spvtools {
  25. namespace opt {
  26. class LoopFusion {
  27. public:
  28. LoopFusion(IRContext* context, Loop* loop_0, Loop* loop_1)
  29. : context_(context),
  30. loop_0_(loop_0),
  31. loop_1_(loop_1),
  32. containing_function_(loop_0->GetHeaderBlock()->GetParent()) {}
  33. // Checks if the |loop_0| and |loop_1| are compatible for fusion.
  34. // That means:
  35. // * they both have one induction variable
  36. // * they have the same upper and lower bounds
  37. // - same initial value
  38. // - same condition
  39. // * they have the same update step
  40. // * they are adjacent, with |loop_0| appearing before |loop_1|
  41. // * there are no break/continue in either of them
  42. // * they both have pre-header blocks (required for ScalarEvolutionAnalysis
  43. // and dependence checking).
  44. bool AreCompatible();
  45. // Checks if compatible |loop_0| and |loop_1| are legal to fuse.
  46. // * fused loops do not have any dependencies with dependence distance greater
  47. // than 0 that did not exist in the original loops.
  48. // * there are no function calls in the loops (could have side-effects)
  49. bool IsLegal();
  50. // Perform the actual fusion of |loop_0_| and |loop_1_|. The loops have to be
  51. // compatible and the fusion has to be legal.
  52. void Fuse();
  53. private:
  54. // Check that the initial values are the same.
  55. bool CheckInit();
  56. // Check that the conditions are the same.
  57. bool CheckCondition();
  58. // Check that the steps are the same.
  59. bool CheckStep();
  60. // Returns |true| if |instruction| is used in the continue or condition block
  61. // of |loop|.
  62. bool UsedInContinueOrConditionBlock(Instruction* instruction, Loop* loop);
  63. // Remove entries in |instructions| that are not used in the continue or
  64. // condition block of |loop|.
  65. void RemoveIfNotUsedContinueOrConditionBlock(
  66. std::vector<Instruction*>* instructions, Loop* loop);
  67. // Returns |true| if |instruction| is used in |loop|.
  68. bool IsUsedInLoop(Instruction* instruction, Loop* loop);
  69. // Returns |true| if |loop| has at least one barrier or function call.
  70. bool ContainsBarriersOrFunctionCalls(Loop* loop);
  71. // Get all instructions in the |loop| (except in the latch block) that have
  72. // the opcode |opcode|.
  73. std::pair<std::vector<Instruction*>, std::vector<Instruction*>>
  74. GetLoadsAndStoresInLoop(Loop* loop);
  75. // Given a vector of memory operations (OpLoad/OpStore), constructs a map from
  76. // variables to the loads/stores that those variables.
  77. std::map<Instruction*, std::vector<Instruction*>> LocationToMemOps(
  78. const std::vector<Instruction*>& mem_ops);
  79. IRContext* context_;
  80. // The original loops to be fused.
  81. Loop* loop_0_;
  82. Loop* loop_1_;
  83. // The function that contains |loop_0_| and |loop_1_|.
  84. Function* containing_function_ = nullptr;
  85. // The induction variables for |loop_0_| and |loop_1_|.
  86. Instruction* induction_0_ = nullptr;
  87. Instruction* induction_1_ = nullptr;
  88. };
  89. } // namespace opt
  90. } // namespace spvtools
  91. #endif // SOURCE_OPT_LOOP_FUSION_H_