PlaceSafepoints.cpp 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994
  1. //===- PlaceSafepoints.cpp - Place GC Safepoints --------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // Place garbage collection safepoints at appropriate locations in the IR. This
  11. // does not make relocation semantics or variable liveness explicit. That's
  12. // done by RewriteStatepointsForGC.
  13. //
  14. // Terminology:
  15. // - A call is said to be "parseable" if there is a stack map generated for the
  16. // return PC of the call. A runtime can determine where values listed in the
  17. // deopt arguments and (after RewriteStatepointsForGC) gc arguments are located
  18. // on the stack when the code is suspended inside such a call. Every parse
  19. // point is represented by a call wrapped in an gc.statepoint intrinsic.
  20. // - A "poll" is an explicit check in the generated code to determine if the
  21. // runtime needs the generated code to cooperate by calling a helper routine
  22. // and thus suspending its execution at a known state. The call to the helper
  23. // routine will be parseable. The (gc & runtime specific) logic of a poll is
  24. // assumed to be provided in a function of the name "gc.safepoint_poll".
  25. //
  26. // We aim to insert polls such that running code can quickly be brought to a
  27. // well defined state for inspection by the collector. In the current
  28. // implementation, this is done via the insertion of poll sites at method entry
  29. // and the backedge of most loops. We try to avoid inserting more polls than
  30. // are neccessary to ensure a finite period between poll sites. This is not
  31. // because the poll itself is expensive in the generated code; it's not. Polls
  32. // do tend to impact the optimizer itself in negative ways; we'd like to avoid
  33. // perturbing the optimization of the method as much as we can.
  34. //
  35. // We also need to make most call sites parseable. The callee might execute a
  36. // poll (or otherwise be inspected by the GC). If so, the entire stack
  37. // (including the suspended frame of the current method) must be parseable.
  38. //
  39. // This pass will insert:
  40. // - Call parse points ("call safepoints") for any call which may need to
  41. // reach a safepoint during the execution of the callee function.
  42. // - Backedge safepoint polls and entry safepoint polls to ensure that
  43. // executing code reaches a safepoint poll in a finite amount of time.
  44. //
  45. // We do not currently support return statepoints, but adding them would not
  46. // be hard. They are not required for correctness - entry safepoints are an
  47. // alternative - but some GCs may prefer them. Patches welcome.
  48. //
  49. //===----------------------------------------------------------------------===//
  50. #include "llvm/Pass.h"
  51. #include "llvm/IR/LegacyPassManager.h"
  52. #include "llvm/ADT/SetOperations.h"
  53. #include "llvm/ADT/SetVector.h"
  54. #include "llvm/ADT/Statistic.h"
  55. #include "llvm/ADT/StringRef.h"
  56. #include "llvm/Analysis/LoopPass.h"
  57. #include "llvm/Analysis/LoopInfo.h"
  58. #include "llvm/Analysis/ScalarEvolution.h"
  59. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  60. #include "llvm/Analysis/CFG.h"
  61. #include "llvm/Analysis/InstructionSimplify.h"
  62. #include "llvm/IR/BasicBlock.h"
  63. #include "llvm/IR/CallSite.h"
  64. #include "llvm/IR/Dominators.h"
  65. #include "llvm/IR/Function.h"
  66. #include "llvm/IR/IRBuilder.h"
  67. #include "llvm/IR/InstIterator.h"
  68. #include "llvm/IR/Instructions.h"
  69. #include "llvm/IR/Intrinsics.h"
  70. #include "llvm/IR/IntrinsicInst.h"
  71. #include "llvm/IR/Module.h"
  72. #include "llvm/IR/Statepoint.h"
  73. #include "llvm/IR/Value.h"
  74. #include "llvm/IR/Verifier.h"
  75. #include "llvm/Support/Debug.h"
  76. #include "llvm/Support/CommandLine.h"
  77. #include "llvm/Support/raw_ostream.h"
  78. #include "llvm/Transforms/Scalar.h"
  79. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  80. #include "llvm/Transforms/Utils/Cloning.h"
  81. #include "llvm/Transforms/Utils/Local.h"
  82. #define DEBUG_TYPE "safepoint-placement"
  83. STATISTIC(NumEntrySafepoints, "Number of entry safepoints inserted");
  84. STATISTIC(NumCallSafepoints, "Number of call safepoints inserted");
  85. STATISTIC(NumBackedgeSafepoints, "Number of backedge safepoints inserted");
  86. STATISTIC(CallInLoop, "Number of loops w/o safepoints due to calls in loop");
  87. STATISTIC(FiniteExecution, "Number of loops w/o safepoints finite execution");
  88. using namespace llvm;
  89. // Ignore oppurtunities to avoid placing safepoints on backedges, useful for
  90. // validation
  91. static cl::opt<bool> AllBackedges("spp-all-backedges", cl::Hidden,
  92. cl::init(false));
  93. /// If true, do not place backedge safepoints in counted loops.
  94. static cl::opt<bool> SkipCounted("spp-counted", cl::Hidden, cl::init(true));
  95. // If true, split the backedge of a loop when placing the safepoint, otherwise
  96. // split the latch block itself. Both are useful to support for
  97. // experimentation, but in practice, it looks like splitting the backedge
  98. // optimizes better.
  99. static cl::opt<bool> SplitBackedge("spp-split-backedge", cl::Hidden,
  100. cl::init(false));
  101. // Print tracing output
  102. static cl::opt<bool> TraceLSP("spp-trace", cl::Hidden, cl::init(false));
  103. namespace {
  104. /// An analysis pass whose purpose is to identify each of the backedges in
  105. /// the function which require a safepoint poll to be inserted.
  106. struct PlaceBackedgeSafepointsImpl : public FunctionPass {
  107. static char ID;
  108. /// The output of the pass - gives a list of each backedge (described by
  109. /// pointing at the branch) which need a poll inserted.
  110. std::vector<TerminatorInst *> PollLocations;
  111. /// True unless we're running spp-no-calls in which case we need to disable
  112. /// the call dependend placement opts.
  113. bool CallSafepointsEnabled;
  114. ScalarEvolution *SE = nullptr;
  115. DominatorTree *DT = nullptr;
  116. LoopInfo *LI = nullptr;
  117. PlaceBackedgeSafepointsImpl(bool CallSafepoints = false)
  118. : FunctionPass(ID), CallSafepointsEnabled(CallSafepoints) {
  119. initializePlaceBackedgeSafepointsImplPass(*PassRegistry::getPassRegistry());
  120. }
  121. bool runOnLoop(Loop *);
  122. void runOnLoopAndSubLoops(Loop *L) {
  123. // Visit all the subloops
  124. for (auto I = L->begin(), E = L->end(); I != E; I++)
  125. runOnLoopAndSubLoops(*I);
  126. runOnLoop(L);
  127. }
  128. bool runOnFunction(Function &F) override {
  129. SE = &getAnalysis<ScalarEvolution>();
  130. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  131. LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  132. for (auto I = LI->begin(), E = LI->end(); I != E; I++) {
  133. runOnLoopAndSubLoops(*I);
  134. }
  135. return false;
  136. }
  137. void getAnalysisUsage(AnalysisUsage &AU) const override {
  138. AU.addRequired<DominatorTreeWrapperPass>();
  139. AU.addRequired<ScalarEvolution>();
  140. AU.addRequired<LoopInfoWrapperPass>();
  141. // We no longer modify the IR at all in this pass. Thus all
  142. // analysis are preserved.
  143. AU.setPreservesAll();
  144. }
  145. };
  146. }
  147. static cl::opt<bool> NoEntry("spp-no-entry", cl::Hidden, cl::init(false));
  148. static cl::opt<bool> NoCall("spp-no-call", cl::Hidden, cl::init(false));
  149. static cl::opt<bool> NoBackedge("spp-no-backedge", cl::Hidden, cl::init(false));
  150. namespace {
  151. struct PlaceSafepoints : public FunctionPass {
  152. static char ID; // Pass identification, replacement for typeid
  153. PlaceSafepoints() : FunctionPass(ID) {
  154. initializePlaceSafepointsPass(*PassRegistry::getPassRegistry());
  155. }
  156. bool runOnFunction(Function &F) override;
  157. void getAnalysisUsage(AnalysisUsage &AU) const override {
  158. // We modify the graph wholesale (inlining, block insertion, etc). We
  159. // preserve nothing at the moment. We could potentially preserve dom tree
  160. // if that was worth doing
  161. }
  162. };
  163. }
  164. // Insert a safepoint poll immediately before the given instruction. Does
  165. // not handle the parsability of state at the runtime call, that's the
  166. // callers job.
  167. static void
  168. InsertSafepointPoll(Instruction *InsertBefore,
  169. std::vector<CallSite> &ParsePointsNeeded /*rval*/);
  170. static bool isGCLeafFunction(const CallSite &CS);
  171. static bool needsStatepoint(const CallSite &CS) {
  172. if (isGCLeafFunction(CS))
  173. return false;
  174. if (CS.isCall()) {
  175. CallInst *call = cast<CallInst>(CS.getInstruction());
  176. if (call->isInlineAsm())
  177. return false;
  178. }
  179. if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS)) {
  180. return false;
  181. }
  182. return true;
  183. }
  184. static Value *ReplaceWithStatepoint(const CallSite &CS, Pass *P);
  185. /// Returns true if this loop is known to contain a call safepoint which
  186. /// must unconditionally execute on any iteration of the loop which returns
  187. /// to the loop header via an edge from Pred. Returns a conservative correct
  188. /// answer; i.e. false is always valid.
  189. static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
  190. BasicBlock *Pred,
  191. DominatorTree &DT) {
  192. // In general, we're looking for any cut of the graph which ensures
  193. // there's a call safepoint along every edge between Header and Pred.
  194. // For the moment, we look only for the 'cuts' that consist of a single call
  195. // instruction in a block which is dominated by the Header and dominates the
  196. // loop latch (Pred) block. Somewhat surprisingly, walking the entire chain
  197. // of such dominating blocks gets substaintially more occurences than just
  198. // checking the Pred and Header blocks themselves. This may be due to the
  199. // density of loop exit conditions caused by range and null checks.
  200. // TODO: structure this as an analysis pass, cache the result for subloops,
  201. // avoid dom tree recalculations
  202. assert(DT.dominates(Header, Pred) && "loop latch not dominated by header?");
  203. BasicBlock *Current = Pred;
  204. while (true) {
  205. for (Instruction &I : *Current) {
  206. if (auto CS = CallSite(&I))
  207. // Note: Technically, needing a safepoint isn't quite the right
  208. // condition here. We should instead be checking if the target method
  209. // has an
  210. // unconditional poll. In practice, this is only a theoretical concern
  211. // since we don't have any methods with conditional-only safepoint
  212. // polls.
  213. if (needsStatepoint(CS))
  214. return true;
  215. }
  216. if (Current == Header)
  217. break;
  218. Current = DT.getNode(Current)->getIDom()->getBlock();
  219. }
  220. return false;
  221. }
  222. /// Returns true if this loop is known to terminate in a finite number of
  223. /// iterations. Note that this function may return false for a loop which
  224. /// does actual terminate in a finite constant number of iterations due to
  225. /// conservatism in the analysis.
  226. static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
  227. BasicBlock *Pred) {
  228. // Only used when SkipCounted is off
  229. const unsigned upperTripBound = 8192;
  230. // A conservative bound on the loop as a whole.
  231. const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L);
  232. if (MaxTrips != SE->getCouldNotCompute()) {
  233. if (SE->getUnsignedRange(MaxTrips).getUnsignedMax().ult(upperTripBound))
  234. return true;
  235. if (SkipCounted &&
  236. SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(32))
  237. return true;
  238. }
  239. // If this is a conditional branch to the header with the alternate path
  240. // being outside the loop, we can ask questions about the execution frequency
  241. // of the exit block.
  242. if (L->isLoopExiting(Pred)) {
  243. // This returns an exact expression only. TODO: We really only need an
  244. // upper bound here, but SE doesn't expose that.
  245. const SCEV *MaxExec = SE->getExitCount(L, Pred);
  246. if (MaxExec != SE->getCouldNotCompute()) {
  247. if (SE->getUnsignedRange(MaxExec).getUnsignedMax().ult(upperTripBound))
  248. return true;
  249. if (SkipCounted &&
  250. SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(32))
  251. return true;
  252. }
  253. }
  254. return /* not finite */ false;
  255. }
  256. static void scanOneBB(Instruction *start, Instruction *end,
  257. std::vector<CallInst *> &calls,
  258. std::set<BasicBlock *> &seen,
  259. std::vector<BasicBlock *> &worklist) {
  260. for (BasicBlock::iterator itr(start);
  261. itr != start->getParent()->end() && itr != BasicBlock::iterator(end);
  262. itr++) {
  263. if (CallInst *CI = dyn_cast<CallInst>(&*itr)) {
  264. calls.push_back(CI);
  265. }
  266. // FIXME: This code does not handle invokes
  267. assert(!dyn_cast<InvokeInst>(&*itr) &&
  268. "support for invokes in poll code needed");
  269. // Only add the successor blocks if we reach the terminator instruction
  270. // without encountering end first
  271. if (itr->isTerminator()) {
  272. BasicBlock *BB = itr->getParent();
  273. for (BasicBlock *Succ : successors(BB)) {
  274. if (seen.count(Succ) == 0) {
  275. worklist.push_back(Succ);
  276. seen.insert(Succ);
  277. }
  278. }
  279. }
  280. }
  281. }
  282. static void scanInlinedCode(Instruction *start, Instruction *end,
  283. std::vector<CallInst *> &calls,
  284. std::set<BasicBlock *> &seen) {
  285. calls.clear();
  286. std::vector<BasicBlock *> worklist;
  287. seen.insert(start->getParent());
  288. scanOneBB(start, end, calls, seen, worklist);
  289. while (!worklist.empty()) {
  290. BasicBlock *BB = worklist.back();
  291. worklist.pop_back();
  292. scanOneBB(&*BB->begin(), end, calls, seen, worklist);
  293. }
  294. }
  295. bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
  296. // Loop through all loop latches (branches controlling backedges). We need
  297. // to place a safepoint on every backedge (potentially).
  298. // Note: In common usage, there will be only one edge due to LoopSimplify
  299. // having run sometime earlier in the pipeline, but this code must be correct
  300. // w.r.t. loops with multiple backedges.
  301. BasicBlock *header = L->getHeader();
  302. SmallVector<BasicBlock*, 16> LoopLatches;
  303. L->getLoopLatches(LoopLatches);
  304. for (BasicBlock *pred : LoopLatches) {
  305. assert(L->contains(pred));
  306. // Make a policy decision about whether this loop needs a safepoint or
  307. // not. Note that this is about unburdening the optimizer in loops, not
  308. // avoiding the runtime cost of the actual safepoint.
  309. if (!AllBackedges) {
  310. if (mustBeFiniteCountedLoop(L, SE, pred)) {
  311. if (TraceLSP)
  312. errs() << "skipping safepoint placement in finite loop\n";
  313. FiniteExecution++;
  314. continue;
  315. }
  316. if (CallSafepointsEnabled &&
  317. containsUnconditionalCallSafepoint(L, header, pred, *DT)) {
  318. // Note: This is only semantically legal since we won't do any further
  319. // IPO or inlining before the actual call insertion.. If we hadn't, we
  320. // might latter loose this call safepoint.
  321. if (TraceLSP)
  322. errs() << "skipping safepoint placement due to unconditional call\n";
  323. CallInLoop++;
  324. continue;
  325. }
  326. }
  327. // TODO: We can create an inner loop which runs a finite number of
  328. // iterations with an outer loop which contains a safepoint. This would
  329. // not help runtime performance that much, but it might help our ability to
  330. // optimize the inner loop.
  331. // Safepoint insertion would involve creating a new basic block (as the
  332. // target of the current backedge) which does the safepoint (of all live
  333. // variables) and branches to the true header
  334. TerminatorInst *term = pred->getTerminator();
  335. if (TraceLSP) {
  336. errs() << "[LSP] terminator instruction: ";
  337. term->dump();
  338. }
  339. PollLocations.push_back(term);
  340. }
  341. return false;
  342. }
  343. /// Returns true if an entry safepoint is not required before this callsite in
  344. /// the caller function.
  345. static bool doesNotRequireEntrySafepointBefore(const CallSite &CS) {
  346. Instruction *Inst = CS.getInstruction();
  347. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
  348. switch (II->getIntrinsicID()) {
  349. case Intrinsic::experimental_gc_statepoint:
  350. case Intrinsic::experimental_patchpoint_void:
  351. case Intrinsic::experimental_patchpoint_i64:
  352. // The can wrap an actual call which may grow the stack by an unbounded
  353. // amount or run forever.
  354. return false;
  355. default:
  356. // Most LLVM intrinsics are things which do not expand to actual calls, or
  357. // at least if they do, are leaf functions that cause only finite stack
  358. // growth. In particular, the optimizer likes to form things like memsets
  359. // out of stores in the original IR. Another important example is
  360. // llvm.localescape which must occur in the entry block. Inserting a
  361. // safepoint before it is not legal since it could push the localescape
  362. // out of the entry block.
  363. return true;
  364. }
  365. }
  366. return false;
  367. }
  368. static Instruction *findLocationForEntrySafepoint(Function &F,
  369. DominatorTree &DT) {
  370. // Conceptually, this poll needs to be on method entry, but in
  371. // practice, we place it as late in the entry block as possible. We
  372. // can place it as late as we want as long as it dominates all calls
  373. // that can grow the stack. This, combined with backedge polls,
  374. // give us all the progress guarantees we need.
  375. // hasNextInstruction and nextInstruction are used to iterate
  376. // through a "straight line" execution sequence.
  377. auto hasNextInstruction = [](Instruction *I) {
  378. if (!I->isTerminator()) {
  379. return true;
  380. }
  381. BasicBlock *nextBB = I->getParent()->getUniqueSuccessor();
  382. return nextBB && (nextBB->getUniquePredecessor() != nullptr);
  383. };
  384. auto nextInstruction = [&hasNextInstruction](Instruction *I) {
  385. assert(hasNextInstruction(I) &&
  386. "first check if there is a next instruction!");
  387. (void)hasNextInstruction; // HLSL Change - unused var
  388. if (I->isTerminator()) {
  389. return I->getParent()->getUniqueSuccessor()->begin();
  390. } else {
  391. return std::next(BasicBlock::iterator(I));
  392. }
  393. };
  394. Instruction *cursor = nullptr;
  395. for (cursor = F.getEntryBlock().begin(); hasNextInstruction(cursor);
  396. cursor = nextInstruction(cursor)) {
  397. // We need to ensure a safepoint poll occurs before any 'real' call. The
  398. // easiest way to ensure finite execution between safepoints in the face of
  399. // recursive and mutually recursive functions is to enforce that each take
  400. // a safepoint. Additionally, we need to ensure a poll before any call
  401. // which can grow the stack by an unbounded amount. This isn't required
  402. // for GC semantics per se, but is a common requirement for languages
  403. // which detect stack overflow via guard pages and then throw exceptions.
  404. if (auto CS = CallSite(cursor)) {
  405. if (doesNotRequireEntrySafepointBefore(CS))
  406. continue;
  407. break;
  408. }
  409. }
  410. assert((hasNextInstruction(cursor) || cursor->isTerminator()) &&
  411. "either we stopped because of a call, or because of terminator");
  412. return cursor;
  413. }
  414. /// Identify the list of call sites which need to be have parseable state
  415. static void findCallSafepoints(Function &F,
  416. std::vector<CallSite> &Found /*rval*/) {
  417. assert(Found.empty() && "must be empty!");
  418. for (Instruction &I : inst_range(F)) {
  419. Instruction *inst = &I;
  420. if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
  421. CallSite CS(inst);
  422. // No safepoint needed or wanted
  423. if (!needsStatepoint(CS)) {
  424. continue;
  425. }
  426. Found.push_back(CS);
  427. }
  428. }
  429. }
  430. /// Implement a unique function which doesn't require we sort the input
  431. /// vector. Doing so has the effect of changing the output of a couple of
  432. /// tests in ways which make them less useful in testing fused safepoints.
  433. template <typename T> static void unique_unsorted(std::vector<T> &vec) {
  434. std::set<T> seen;
  435. std::vector<T> tmp;
  436. vec.reserve(vec.size());
  437. std::swap(tmp, vec);
  438. for (auto V : tmp) {
  439. if (seen.insert(V).second) {
  440. vec.push_back(V);
  441. }
  442. }
  443. }
  444. static const char *const GCSafepointPollName = "gc.safepoint_poll";
  445. static bool isGCSafepointPoll(Function &F) {
  446. return F.getName().equals(GCSafepointPollName);
  447. }
  448. /// Returns true if this function should be rewritten to include safepoint
  449. /// polls and parseable call sites. The main point of this function is to be
  450. /// an extension point for custom logic.
  451. static bool shouldRewriteFunction(Function &F) {
  452. // TODO: This should check the GCStrategy
  453. if (F.hasGC()) {
  454. const char *FunctionGCName = F.getGC();
  455. const StringRef StatepointExampleName("statepoint-example");
  456. const StringRef CoreCLRName("coreclr");
  457. return (StatepointExampleName == FunctionGCName) ||
  458. (CoreCLRName == FunctionGCName);
  459. } else
  460. return false;
  461. }
  462. // TODO: These should become properties of the GCStrategy, possibly with
  463. // command line overrides.
  464. static bool enableEntrySafepoints(Function &F) { return !NoEntry; }
  465. static bool enableBackedgeSafepoints(Function &F) { return !NoBackedge; }
  466. static bool enableCallSafepoints(Function &F) { return !NoCall; }
  467. // Normalize basic block to make it ready to be target of invoke statepoint.
  468. // Ensure that 'BB' does not have phi nodes. It may require spliting it.
  469. static BasicBlock *normalizeForInvokeSafepoint(BasicBlock *BB,
  470. BasicBlock *InvokeParent) {
  471. BasicBlock *ret = BB;
  472. if (!BB->getUniquePredecessor()) {
  473. ret = SplitBlockPredecessors(BB, InvokeParent, "");
  474. }
  475. // Now that 'ret' has unique predecessor we can safely remove all phi nodes
  476. // from it
  477. FoldSingleEntryPHINodes(ret);
  478. assert(!isa<PHINode>(ret->begin()));
  479. return ret;
  480. }
  481. bool PlaceSafepoints::runOnFunction(Function &F) {
  482. if (F.isDeclaration() || F.empty()) {
  483. // This is a declaration, nothing to do. Must exit early to avoid crash in
  484. // dom tree calculation
  485. return false;
  486. }
  487. if (isGCSafepointPoll(F)) {
  488. // Given we're inlining this inside of safepoint poll insertion, this
  489. // doesn't make any sense. Note that we do make any contained calls
  490. // parseable after we inline a poll.
  491. return false;
  492. }
  493. if (!shouldRewriteFunction(F))
  494. return false;
  495. bool modified = false;
  496. // In various bits below, we rely on the fact that uses are reachable from
  497. // defs. When there are basic blocks unreachable from the entry, dominance
  498. // and reachablity queries return non-sensical results. Thus, we preprocess
  499. // the function to ensure these properties hold.
  500. modified |= removeUnreachableBlocks(F);
  501. // STEP 1 - Insert the safepoint polling locations. We do not need to
  502. // actually insert parse points yet. That will be done for all polls and
  503. // calls in a single pass.
  504. DominatorTree DT;
  505. DT.recalculate(F);
  506. SmallVector<Instruction *, 16> PollsNeeded;
  507. std::vector<CallSite> ParsePointNeeded;
  508. if (enableBackedgeSafepoints(F)) {
  509. // Construct a pass manager to run the LoopPass backedge logic. We
  510. // need the pass manager to handle scheduling all the loop passes
  511. // appropriately. Doing this by hand is painful and just not worth messing
  512. // with for the moment.
  513. legacy::FunctionPassManager FPM(F.getParent());
  514. bool CanAssumeCallSafepoints = enableCallSafepoints(F);
  515. PlaceBackedgeSafepointsImpl *PBS =
  516. new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
  517. FPM.add(PBS);
  518. FPM.run(F);
  519. // We preserve dominance information when inserting the poll, otherwise
  520. // we'd have to recalculate this on every insert
  521. DT.recalculate(F);
  522. auto &PollLocations = PBS->PollLocations;
  523. auto OrderByBBName = [](Instruction *a, Instruction *b) {
  524. return a->getParent()->getName() < b->getParent()->getName();
  525. };
  526. // We need the order of list to be stable so that naming ends up stable
  527. // when we split edges. This makes test cases much easier to write.
  528. std::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName);
  529. // We can sometimes end up with duplicate poll locations. This happens if
  530. // a single loop is visited more than once. The fact this happens seems
  531. // wrong, but it does happen for the split-backedge.ll test case.
  532. PollLocations.erase(std::unique(PollLocations.begin(),
  533. PollLocations.end()),
  534. PollLocations.end());
  535. // Insert a poll at each point the analysis pass identified
  536. // The poll location must be the terminator of a loop latch block.
  537. for (TerminatorInst *Term : PollLocations) {
  538. // We are inserting a poll, the function is modified
  539. modified = true;
  540. if (SplitBackedge) {
  541. // Split the backedge of the loop and insert the poll within that new
  542. // basic block. This creates a loop with two latches per original
  543. // latch (which is non-ideal), but this appears to be easier to
  544. // optimize in practice than inserting the poll immediately before the
  545. // latch test.
  546. // Since this is a latch, at least one of the successors must dominate
  547. // it. Its possible that we have a) duplicate edges to the same header
  548. // and b) edges to distinct loop headers. We need to insert pools on
  549. // each.
  550. SetVector<BasicBlock *> Headers;
  551. for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
  552. BasicBlock *Succ = Term->getSuccessor(i);
  553. if (DT.dominates(Succ, Term->getParent())) {
  554. Headers.insert(Succ);
  555. }
  556. }
  557. assert(!Headers.empty() && "poll location is not a loop latch?");
  558. // The split loop structure here is so that we only need to recalculate
  559. // the dominator tree once. Alternatively, we could just keep it up to
  560. // date and use a more natural merged loop.
  561. SetVector<BasicBlock *> SplitBackedges;
  562. for (BasicBlock *Header : Headers) {
  563. BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT);
  564. PollsNeeded.push_back(NewBB->getTerminator());
  565. NumBackedgeSafepoints++;
  566. }
  567. } else {
  568. // Split the latch block itself, right before the terminator.
  569. PollsNeeded.push_back(Term);
  570. NumBackedgeSafepoints++;
  571. }
  572. }
  573. }
  574. if (enableEntrySafepoints(F)) {
  575. Instruction *Location = findLocationForEntrySafepoint(F, DT);
  576. if (!Location) {
  577. // policy choice not to insert?
  578. } else {
  579. PollsNeeded.push_back(Location);
  580. modified = true;
  581. NumEntrySafepoints++;
  582. }
  583. }
  584. // Now that we've identified all the needed safepoint poll locations, insert
  585. // safepoint polls themselves.
  586. for (Instruction *PollLocation : PollsNeeded) {
  587. std::vector<CallSite> RuntimeCalls;
  588. InsertSafepointPoll(PollLocation, RuntimeCalls);
  589. ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
  590. RuntimeCalls.end());
  591. }
  592. PollsNeeded.clear(); // make sure we don't accidentally use
  593. // The dominator tree has been invalidated by the inlining performed in the
  594. // above loop. TODO: Teach the inliner how to update the dom tree?
  595. DT.recalculate(F);
  596. if (enableCallSafepoints(F)) {
  597. std::vector<CallSite> Calls;
  598. findCallSafepoints(F, Calls);
  599. NumCallSafepoints += Calls.size();
  600. ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end());
  601. }
  602. // Unique the vectors since we can end up with duplicates if we scan the call
  603. // site for call safepoints after we add it for entry or backedge. The
  604. // only reason we need tracking at all is that some functions might have
  605. // polls but not call safepoints and thus we might miss marking the runtime
  606. // calls for the polls. (This is useful in test cases!)
  607. unique_unsorted(ParsePointNeeded);
  608. // Any parse point (no matter what source) will be handled here
  609. // We're about to start modifying the function
  610. if (!ParsePointNeeded.empty())
  611. modified = true;
  612. // Now run through and insert the safepoints, but do _NOT_ update or remove
  613. // any existing uses. We have references to live variables that need to
  614. // survive to the last iteration of this loop.
  615. std::vector<Value *> Results;
  616. Results.reserve(ParsePointNeeded.size());
  617. for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
  618. CallSite &CS = ParsePointNeeded[i];
  619. // For invoke statepoints we need to remove all phi nodes at the normal
  620. // destination block.
  621. // Reason for this is that we can place gc_result only after last phi node
  622. // in basic block. We will get malformed code after RAUW for the
  623. // gc_result if one of this phi nodes uses result from the invoke.
  624. if (InvokeInst *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) {
  625. normalizeForInvokeSafepoint(Invoke->getNormalDest(),
  626. Invoke->getParent());
  627. }
  628. Value *GCResult = ReplaceWithStatepoint(CS, nullptr);
  629. Results.push_back(GCResult);
  630. }
  631. assert(Results.size() == ParsePointNeeded.size());
  632. // Adjust all users of the old call sites to use the new ones instead
  633. for (size_t i = 0; i < ParsePointNeeded.size(); i++) {
  634. CallSite &CS = ParsePointNeeded[i];
  635. Value *GCResult = Results[i];
  636. if (GCResult) {
  637. // Can not RAUW for the invoke gc result in case of phi nodes preset.
  638. assert(CS.isCall() || !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin()));
  639. // Replace all uses with the new call
  640. CS.getInstruction()->replaceAllUsesWith(GCResult);
  641. }
  642. // Now that we've handled all uses, remove the original call itself
  643. // Note: The insert point can't be the deleted instruction!
  644. CS.getInstruction()->eraseFromParent();
  645. }
  646. return modified;
  647. }
  648. char PlaceBackedgeSafepointsImpl::ID = 0;
  649. char PlaceSafepoints::ID = 0;
  650. FunctionPass *llvm::createPlaceSafepointsPass() {
  651. return new PlaceSafepoints();
  652. }
  653. INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsImpl,
  654. "place-backedge-safepoints-impl",
  655. "Place Backedge Safepoints", false, false)
  656. INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  657. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  658. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  659. INITIALIZE_PASS_END(PlaceBackedgeSafepointsImpl,
  660. "place-backedge-safepoints-impl",
  661. "Place Backedge Safepoints", false, false)
  662. INITIALIZE_PASS_BEGIN(PlaceSafepoints, "place-safepoints", "Place Safepoints",
  663. false, false)
  664. INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
  665. false, false)
  666. static bool isGCLeafFunction(const CallSite &CS) {
  667. Instruction *inst = CS.getInstruction();
  668. if (isa<IntrinsicInst>(inst)) {
  669. // Most LLVM intrinsics are things which can never take a safepoint.
  670. // As a result, we don't need to have the stack parsable at the
  671. // callsite. This is a highly useful optimization since intrinsic
  672. // calls are fairly prevelent, particularly in debug builds.
  673. return true;
  674. }
  675. // If this function is marked explicitly as a leaf call, we don't need to
  676. // place a safepoint of it. In fact, for correctness we *can't* in many
  677. // cases. Note: Indirect calls return Null for the called function,
  678. // these obviously aren't runtime functions with attributes
  679. // TODO: Support attributes on the call site as well.
  680. const Function *F = CS.getCalledFunction();
  681. bool isLeaf =
  682. F &&
  683. F->getFnAttribute("gc-leaf-function").getValueAsString().equals("true");
  684. if (isLeaf) {
  685. return true;
  686. }
  687. return false;
  688. }
  689. static void
  690. InsertSafepointPoll(Instruction *InsertBefore,
  691. std::vector<CallSite> &ParsePointsNeeded /*rval*/) {
  692. BasicBlock *OrigBB = InsertBefore->getParent();
  693. Module *M = InsertBefore->getModule();
  694. assert(M && "must be part of a module");
  695. // Inline the safepoint poll implementation - this will get all the branch,
  696. // control flow, etc.. Most importantly, it will introduce the actual slow
  697. // path call - where we need to insert a safepoint (parsepoint).
  698. auto *F = M->getFunction(GCSafepointPollName);
  699. assert(F->getType()->getElementType() ==
  700. FunctionType::get(Type::getVoidTy(M->getContext()), false) &&
  701. "gc.safepoint_poll declared with wrong type");
  702. assert(!F->empty() && "gc.safepoint_poll must be a non-empty function");
  703. CallInst *PollCall = CallInst::Create(F, "", InsertBefore);
  704. // Record some information about the call site we're replacing
  705. BasicBlock::iterator before(PollCall), after(PollCall);
  706. bool isBegin(false);
  707. if (before == OrigBB->begin()) {
  708. isBegin = true;
  709. } else {
  710. before--;
  711. }
  712. after++;
  713. assert(after != OrigBB->end() && "must have successor");
  714. // do the actual inlining
  715. InlineFunctionInfo IFI;
  716. bool InlineStatus = InlineFunction(PollCall, IFI);
  717. assert(InlineStatus && "inline must succeed");
  718. (void)InlineStatus; // suppress warning in release-asserts
  719. // Check post conditions
  720. assert(IFI.StaticAllocas.empty() && "can't have allocs");
  721. std::vector<CallInst *> calls; // new calls
  722. std::set<BasicBlock *> BBs; // new BBs + insertee
  723. // Include only the newly inserted instructions, Note: begin may not be valid
  724. // if we inserted to the beginning of the basic block
  725. BasicBlock::iterator start;
  726. if (isBegin) {
  727. start = OrigBB->begin();
  728. } else {
  729. start = before;
  730. start++;
  731. }
  732. // If your poll function includes an unreachable at the end, that's not
  733. // valid. Bugpoint likes to create this, so check for it.
  734. assert(isPotentiallyReachable(&*start, &*after, nullptr, nullptr) &&
  735. "malformed poll function");
  736. scanInlinedCode(&*(start), &*(after), calls, BBs);
  737. assert(!calls.empty() && "slow path not found for safepoint poll");
  738. // Record the fact we need a parsable state at the runtime call contained in
  739. // the poll function. This is required so that the runtime knows how to
  740. // parse the last frame when we actually take the safepoint (i.e. execute
  741. // the slow path)
  742. assert(ParsePointsNeeded.empty());
  743. for (size_t i = 0; i < calls.size(); i++) {
  744. // No safepoint needed or wanted
  745. if (!needsStatepoint(calls[i])) {
  746. continue;
  747. }
  748. // These are likely runtime calls. Should we assert that via calling
  749. // convention or something?
  750. ParsePointsNeeded.push_back(CallSite(calls[i]));
  751. }
  752. assert(ParsePointsNeeded.size() <= calls.size());
  753. }
  754. /// Replaces the given call site (Call or Invoke) with a gc.statepoint
  755. /// intrinsic with an empty deoptimization arguments list. This does
  756. /// NOT do explicit relocation for GC support.
  757. static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
  758. Pass *P) {
  759. assert(CS.getInstruction()->getParent()->getParent()->getParent() &&
  760. "must be set");
  761. // TODO: technically, a pass is not allowed to get functions from within a
  762. // function pass since it might trigger a new function addition. Refactor
  763. // this logic out to the initialization of the pass. Doesn't appear to
  764. // matter in practice.
  765. // Then go ahead and use the builder do actually do the inserts. We insert
  766. // immediately before the previous instruction under the assumption that all
  767. // arguments will be available here. We can't insert afterwards since we may
  768. // be replacing a terminator.
  769. IRBuilder<> Builder(CS.getInstruction());
  770. // Note: The gc args are not filled in at this time, that's handled by
  771. // RewriteStatepointsForGC (which is currently under review).
  772. // Create the statepoint given all the arguments
  773. Instruction *Token = nullptr;
  774. uint64_t ID;
  775. uint32_t NumPatchBytes;
  776. AttributeSet OriginalAttrs = CS.getAttributes();
  777. Attribute AttrID =
  778. OriginalAttrs.getAttribute(AttributeSet::FunctionIndex, "statepoint-id");
  779. Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
  780. AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
  781. AttrBuilder AttrsToRemove;
  782. bool HasID = AttrID.isStringAttribute() &&
  783. !AttrID.getValueAsString().getAsInteger(10, ID);
  784. if (HasID)
  785. AttrsToRemove.addAttribute("statepoint-id");
  786. else
  787. ID = 0xABCDEF00;
  788. bool HasNumPatchBytes =
  789. AttrNumPatchBytes.isStringAttribute() &&
  790. !AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
  791. if (HasNumPatchBytes)
  792. AttrsToRemove.addAttribute("statepoint-num-patch-bytes");
  793. else
  794. NumPatchBytes = 0;
  795. OriginalAttrs = OriginalAttrs.removeAttributes(
  796. CS.getInstruction()->getContext(), AttributeSet::FunctionIndex,
  797. AttrsToRemove);
  798. Value *StatepointTarget = NumPatchBytes == 0
  799. ? CS.getCalledValue()
  800. : ConstantPointerNull::get(cast<PointerType>(
  801. CS.getCalledValue()->getType()));
  802. if (CS.isCall()) {
  803. CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
  804. CallInst *Call = Builder.CreateGCStatepointCall(
  805. ID, NumPatchBytes, StatepointTarget,
  806. makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None,
  807. "safepoint_token");
  808. Call->setTailCall(ToReplace->isTailCall());
  809. Call->setCallingConv(ToReplace->getCallingConv());
  810. // In case if we can handle this set of attributes - set up function
  811. // attributes directly on statepoint and return attributes later for
  812. // gc_result intrinsic.
  813. Call->setAttributes(OriginalAttrs.getFnAttributes());
  814. Token = Call;
  815. // Put the following gc_result and gc_relocate calls immediately after the
  816. // the old call (which we're about to delete).
  817. assert(ToReplace->getNextNode() && "not a terminator, must have next");
  818. Builder.SetInsertPoint(ToReplace->getNextNode());
  819. Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
  820. } else if (CS.isInvoke()) {
  821. InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
  822. // Insert the new invoke into the old block. We'll remove the old one in a
  823. // moment at which point this will become the new terminator for the
  824. // original block.
  825. Builder.SetInsertPoint(ToReplace->getParent());
  826. InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
  827. ID, NumPatchBytes, StatepointTarget, ToReplace->getNormalDest(),
  828. ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
  829. None, None, "safepoint_token");
  830. Invoke->setCallingConv(ToReplace->getCallingConv());
  831. // In case if we can handle this set of attributes - set up function
  832. // attributes directly on statepoint and return attributes later for
  833. // gc_result intrinsic.
  834. Invoke->setAttributes(OriginalAttrs.getFnAttributes());
  835. Token = Invoke;
  836. // We'll insert the gc.result into the normal block
  837. BasicBlock *NormalDest = ToReplace->getNormalDest();
  838. // Can not insert gc.result in case of phi nodes preset.
  839. // Should have removed this cases prior to runnning this function
  840. assert(!isa<PHINode>(NormalDest->begin()));
  841. Instruction *IP = &*(NormalDest->getFirstInsertionPt());
  842. Builder.SetInsertPoint(IP);
  843. } else {
  844. llvm_unreachable("unexpect type of CallSite");
  845. }
  846. assert(Token);
  847. // Handle the return value of the original call - update all uses to use a
  848. // gc_result hanging off the statepoint node we just inserted
  849. // Only add the gc_result iff there is actually a used result
  850. if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
  851. std::string TakenName =
  852. CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
  853. CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), TakenName);
  854. GCResult->setAttributes(OriginalAttrs.getRetAttributes());
  855. return GCResult;
  856. } else {
  857. // No return value for the call.
  858. return nullptr;
  859. }
  860. }