loop_peeling.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. // Copyright (c) 2018 Google LLC.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #ifndef SOURCE_OPT_LOOP_PEELING_H_
  15. #define SOURCE_OPT_LOOP_PEELING_H_
  16. #include <algorithm>
  17. #include <limits>
  18. #include <memory>
  19. #include <tuple>
  20. #include <unordered_map>
  21. #include <unordered_set>
  22. #include <utility>
  23. #include <vector>
  24. #include "source/opt/ir_context.h"
  25. #include "source/opt/loop_descriptor.h"
  26. #include "source/opt/loop_utils.h"
  27. #include "source/opt/pass.h"
  28. #include "source/opt/scalar_analysis.h"
  29. namespace spvtools {
  30. namespace opt {
  31. // Utility class to perform the peeling of a given loop.
  32. // The loop peeling transformation make a certain amount of a loop iterations to
  33. // be executed either before (peel before) or after (peel after) the transformed
  34. // loop.
  35. //
  36. // For peeling cases the transformation does the following steps:
  37. // - It clones the loop and inserts the cloned loop before the original loop;
  38. // - It connects all iterating values of the cloned loop with the
  39. // corresponding original loop values so that the second loop starts with
  40. // the appropriate values.
  41. // - It inserts a new induction variable "i" is inserted into the cloned that
  42. // starts with the value 0 and increment by step of one.
  43. //
  44. // The last step is specific to each case:
  45. // - Peel before: the transformation is to peel the "N" first iterations.
  46. // The exit condition of the cloned loop is changed so that the loop
  47. // exits when "i < N" becomes false. The original loop is then protected to
  48. // only execute if there is any iteration left to do.
  49. // - Peel after: the transformation is to peel the "N" last iterations,
  50. // then the exit condition of the cloned loop is changed so that the loop
  51. // exits when "i + N < max_iteration" becomes false, where "max_iteration"
  52. // is the upper bound of the loop. The cloned loop is then protected to
  53. // only execute if there is any iteration left to do no covered by the
  54. // second.
  55. //
  56. // To be peelable:
  57. // - The loop must be in LCSSA form;
  58. // - The loop must not contain any breaks;
  59. // - The loop must not have any ambiguous iterators updates (see
  60. // "CanPeelLoop").
  61. // The method "CanPeelLoop" checks that those constrained are met.
  62. class LoopPeeling {
  63. public:
  64. // LoopPeeling constructor.
  65. // |loop| is the loop to peel.
  66. // |loop_iteration_count| is the instruction holding the |loop| iteration
  67. // count, must be invariant for |loop| and must be of an int 32 type (signed
  68. // or unsigned).
  69. // |canonical_induction_variable| is an induction variable that can be used to
  70. // count the number of iterations, must be of the same type as
  71. // |loop_iteration_count| and start at 0 and increase by step of one at each
  72. // iteration. The value nullptr is interpreted as no suitable variable exists
  73. // and one will be created.
  74. LoopPeeling(Loop* loop, Instruction* loop_iteration_count,
  75. Instruction* canonical_induction_variable = nullptr)
  76. : context_(loop->GetContext()),
  77. loop_utils_(loop->GetContext(), loop),
  78. loop_(loop),
  79. loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count)
  80. ? loop_iteration_count
  81. : nullptr),
  82. int_type_(nullptr),
  83. original_loop_canonical_induction_variable_(
  84. canonical_induction_variable),
  85. canonical_induction_variable_(nullptr) {
  86. if (loop_iteration_count_) {
  87. int_type_ = context_->get_type_mgr()
  88. ->GetType(loop_iteration_count_->type_id())
  89. ->AsInteger();
  90. if (canonical_induction_variable_) {
  91. assert(canonical_induction_variable_->type_id() ==
  92. loop_iteration_count_->type_id() &&
  93. "loop_iteration_count and canonical_induction_variable do not "
  94. "have the same type");
  95. }
  96. }
  97. GetIteratingExitValues();
  98. }
  99. // Returns true if the loop can be peeled.
  100. // To be peelable, all operation involved in the update of the loop iterators
  101. // must not dominates the exit condition. This restriction is a work around to
  102. // not miss compile code like:
  103. //
  104. // for (int i = 0; i + 1 < N; i++) {}
  105. // for (int i = 0; ++i < N; i++) {}
  106. //
  107. // The increment will happen before the test on the exit condition leading to
  108. // very look-a-like code.
  109. //
  110. // This restriction will not apply if a loop rotate is applied before (i.e.
  111. // becomes a do-while loop).
  112. bool CanPeelLoop() const {
  113. CFG& cfg = *context_->cfg();
  114. if (!loop_iteration_count_) {
  115. return false;
  116. }
  117. if (!int_type_) {
  118. return false;
  119. }
  120. if (int_type_->width() != 32) {
  121. return false;
  122. }
  123. if (!loop_->IsLCSSA()) {
  124. return false;
  125. }
  126. if (!loop_->GetMergeBlock()) {
  127. return false;
  128. }
  129. if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
  130. return false;
  131. }
  132. if (!IsConditionCheckSideEffectFree()) {
  133. return false;
  134. }
  135. return !std::any_of(exit_value_.cbegin(), exit_value_.cend(),
  136. [](std::pair<uint32_t, Instruction*> it) {
  137. return it.second == nullptr;
  138. });
  139. }
  140. // Moves the execution of the |factor| first iterations of the loop into a
  141. // dedicated loop.
  142. void PeelBefore(uint32_t factor);
  143. // Moves the execution of the |factor| last iterations of the loop into a
  144. // dedicated loop.
  145. void PeelAfter(uint32_t factor);
  146. // Returns the cloned loop.
  147. Loop* GetClonedLoop() { return cloned_loop_; }
  148. // Returns the original loop.
  149. Loop* GetOriginalLoop() { return loop_; }
  150. private:
  151. IRContext* context_;
  152. LoopUtils loop_utils_;
  153. // The original loop.
  154. Loop* loop_;
  155. // The initial |loop_| upper bound.
  156. Instruction* loop_iteration_count_;
  157. // The int type to use for the canonical_induction_variable_.
  158. analysis::Integer* int_type_;
  159. // The cloned loop.
  160. Loop* cloned_loop_;
  161. // This is set to true when the exit and back-edge branch instruction is the
  162. // same.
  163. bool do_while_form_;
  164. // The canonical induction variable from the original loop if it exists.
  165. Instruction* original_loop_canonical_induction_variable_;
  166. // The canonical induction variable of the cloned loop. The induction variable
  167. // is initialized to 0 and incremented by step of 1.
  168. Instruction* canonical_induction_variable_;
  169. // Map between loop iterators and exit values. Loop iterators
  170. std::unordered_map<uint32_t, Instruction*> exit_value_;
  171. // Duplicate |loop_| and place the new loop before the cloned loop. Iterating
  172. // values from the cloned loop are then connected to the original loop as
  173. // initializer.
  174. void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results);
  175. // Insert the canonical induction variable into the first loop as a simplified
  176. // counter.
  177. void InsertCanonicalInductionVariable(
  178. LoopUtils::LoopCloningResult* clone_results);
  179. // Fixes the exit condition of the before loop. The function calls
  180. // |condition_builder| to get the condition to use in the conditional branch
  181. // of the loop exit. The loop will be exited if the condition evaluate to
  182. // true. |condition_builder| takes an Instruction* that represent the
  183. // insertion point.
  184. void FixExitCondition(
  185. const std::function<uint32_t(Instruction*)>& condition_builder);
  186. // Gathers all operations involved in the update of |iterator| into
  187. // |operations|.
  188. void GetIteratorUpdateOperations(
  189. const Loop* loop, Instruction* iterator,
  190. std::unordered_set<Instruction*>* operations);
  191. // Gathers exiting iterator values. The function builds a map between each
  192. // iterating value in the loop (a phi instruction in the loop header) and its
  193. // SSA value when it exit the loop. If no exit value can be accurately found,
  194. // it is map to nullptr (see comment on CanPeelLoop).
  195. void GetIteratingExitValues();
  196. // Returns true if a for-loop has no instruction with effects before the
  197. // condition check.
  198. bool IsConditionCheckSideEffectFree() const;
  199. // Creates a new basic block and insert it between |bb| and the predecessor of
  200. // |bb|.
  201. BasicBlock* CreateBlockBefore(BasicBlock* bb);
  202. // Inserts code to only execute |loop| only if the given |condition| is true.
  203. // |if_merge| is a suitable basic block to be used by the if condition as
  204. // merge block.
  205. // The function returns the if block protecting the loop.
  206. BasicBlock* ProtectLoop(Loop* loop, Instruction* condition,
  207. BasicBlock* if_merge);
  208. };
  209. // Implements a loop peeling optimization.
  210. // For each loop, the pass will try to peel it if there is conditions that
  211. // are true for the "N" first or last iterations of the loop.
  212. // To avoid code size explosion, too large loops will not be peeled.
  213. class LoopPeelingPass : public Pass {
  214. public:
  215. // Describes the peeling direction.
  216. enum class PeelDirection {
  217. kNone, // Cannot peel
  218. kBefore, // Can peel before
  219. kAfter // Can peel last
  220. };
  221. // Holds some statistics about peeled function.
  222. struct LoopPeelingStats {
  223. std::vector<std::tuple<const Loop*, PeelDirection, uint32_t>> peeled_loops_;
  224. };
  225. LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {}
  226. // Sets the loop peeling growth threshold. If the code size increase is above
  227. // |code_grow_threshold|, the loop will not be peeled. The code size is
  228. // measured in terms of SPIR-V instructions.
  229. static void SetLoopPeelingThreshold(size_t code_grow_threshold) {
  230. code_grow_threshold_ = code_grow_threshold;
  231. }
  232. // Returns the loop peeling code growth threshold.
  233. static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; }
  234. const char* name() const override { return "loop-peeling"; }
  235. // Processes the given |module|. Returns Status::Failure if errors occur when
  236. // processing. Returns the corresponding Status::Success if processing is
  237. // successful to indicate whether changes have been made to the module.
  238. Pass::Status Process() override;
  239. private:
  240. // Describes the peeling direction.
  241. enum class CmpOperator {
  242. kLT, // less than
  243. kGT, // greater than
  244. kLE, // less than or equal
  245. kGE, // greater than or equal
  246. };
  247. class LoopPeelingInfo {
  248. public:
  249. using Direction = std::pair<PeelDirection, uint32_t>;
  250. LoopPeelingInfo(Loop* loop, size_t loop_max_iterations,
  251. ScalarEvolutionAnalysis* scev_analysis)
  252. : context_(loop->GetContext()),
  253. loop_(loop),
  254. scev_analysis_(scev_analysis),
  255. loop_max_iterations_(loop_max_iterations) {}
  256. // Returns by how much and to which direction a loop should be peeled to
  257. // make the conditional branch of the basic block |bb| an unconditional
  258. // branch. If |bb|'s terminator is not a conditional branch or the condition
  259. // is not workable then it returns PeelDirection::kNone and a 0 factor.
  260. Direction GetPeelingInfo(BasicBlock* bb) const;
  261. private:
  262. // Returns the id of the loop invariant operand of the conditional
  263. // expression |condition|. It returns if no operand is invariant.
  264. uint32_t GetFirstLoopInvariantOperand(Instruction* condition) const;
  265. // Returns the id of the non loop invariant operand of the conditional
  266. // expression |condition|. It returns if all operands are invariant.
  267. uint32_t GetFirstNonLoopInvariantOperand(Instruction* condition) const;
  268. // Returns the value of |rec| at the first loop iteration.
  269. SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const;
  270. // Returns the value of |rec| at the given |iteration|.
  271. SExpression GetValueAtIteration(SERecurrentNode* rec,
  272. int64_t iteration) const;
  273. // Returns the value of |rec| at the last loop iteration.
  274. SExpression GetValueAtLastIteration(SERecurrentNode* rec) const;
  275. bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs,
  276. bool* result) const;
  277. Direction HandleEquality(SExpression lhs, SExpression rhs) const;
  278. Direction HandleInequality(CmpOperator cmp_op, SExpression lhs,
  279. SERecurrentNode* rhs) const;
  280. static Direction GetNoneDirection() {
  281. return Direction{LoopPeelingPass::PeelDirection::kNone, 0};
  282. }
  283. IRContext* context_;
  284. Loop* loop_;
  285. ScalarEvolutionAnalysis* scev_analysis_;
  286. size_t loop_max_iterations_;
  287. };
  288. // Peel profitable loops in |f|.
  289. bool ProcessFunction(Function* f);
  290. // Peel |loop| if profitable.
  291. std::pair<bool, Loop*> ProcessLoop(Loop* loop, CodeMetrics* loop_size);
  292. static size_t code_grow_threshold_;
  293. LoopPeelingStats* stats_;
  294. };
  295. } // namespace opt
  296. } // namespace spvtools
  297. #endif // SOURCE_OPT_LOOP_PEELING_H_