ScheduleDAGInstrs.cpp 60 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605
  1. //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This implements the ScheduleDAGInstrs class, which implements re-scheduling
  11. // of MachineInstrs.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  15. #include "llvm/ADT/MapVector.h"
  16. #include "llvm/ADT/SmallPtrSet.h"
  17. #include "llvm/ADT/SmallSet.h"
  18. #include "llvm/Analysis/AliasAnalysis.h"
  19. #include "llvm/Analysis/ValueTracking.h"
  20. #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  21. #include "llvm/CodeGen/MachineFunctionPass.h"
  22. #include "llvm/CodeGen/MachineFrameInfo.h"
  23. #include "llvm/CodeGen/MachineInstrBuilder.h"
  24. #include "llvm/CodeGen/MachineMemOperand.h"
  25. #include "llvm/CodeGen/MachineRegisterInfo.h"
  26. #include "llvm/CodeGen/PseudoSourceValue.h"
  27. #include "llvm/CodeGen/RegisterPressure.h"
  28. #include "llvm/CodeGen/ScheduleDFS.h"
  29. #include "llvm/IR/Operator.h"
  30. #include "llvm/Support/CommandLine.h"
  31. #include "llvm/Support/Debug.h"
  32. #include "llvm/Support/Format.h"
  33. #include "llvm/Support/raw_ostream.h"
  34. #include "llvm/Target/TargetInstrInfo.h"
  35. #include "llvm/Target/TargetMachine.h"
  36. #include "llvm/Target/TargetRegisterInfo.h"
  37. #include "llvm/Target/TargetSubtargetInfo.h"
  38. #include <queue>
  39. using namespace llvm;
  40. #define DEBUG_TYPE "misched"
  41. static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
  42. cl::ZeroOrMore, cl::init(false),
  43. cl::desc("Enable use of AA during MI DAG construction"));
  44. static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
  45. cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
  46. ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
  47. const MachineLoopInfo *mli,
  48. bool IsPostRAFlag, bool RemoveKillFlags,
  49. LiveIntervals *lis)
  50. : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
  51. IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
  52. CanHandleTerminators(false), FirstDbgValue(nullptr) {
  53. assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
  54. DbgValues.clear();
  55. assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
  56. "Virtual registers must be removed prior to PostRA scheduling");
  57. const TargetSubtargetInfo &ST = mf.getSubtarget();
  58. SchedModel.init(ST.getSchedModel(), &ST, TII);
  59. }
  60. /// getUnderlyingObjectFromInt - This is the function that does the work of
  61. /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
  62. static const Value *getUnderlyingObjectFromInt(const Value *V) {
  63. do {
  64. if (const Operator *U = dyn_cast<Operator>(V)) {
  65. // If we find a ptrtoint, we can transfer control back to the
  66. // regular getUnderlyingObjectFromInt.
  67. if (U->getOpcode() == Instruction::PtrToInt)
  68. return U->getOperand(0);
  69. // If we find an add of a constant, a multiplied value, or a phi, it's
  70. // likely that the other operand will lead us to the base
  71. // object. We don't have to worry about the case where the
  72. // object address is somehow being computed by the multiply,
  73. // because our callers only care when the result is an
  74. // identifiable object.
  75. if (U->getOpcode() != Instruction::Add ||
  76. (!isa<ConstantInt>(U->getOperand(1)) &&
  77. Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
  78. !isa<PHINode>(U->getOperand(1))))
  79. return V;
  80. V = U->getOperand(0);
  81. } else {
  82. return V;
  83. }
  84. assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
  85. } while (1);
  86. }
  87. /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
  88. /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
  89. static void getUnderlyingObjects(const Value *V,
  90. SmallVectorImpl<Value *> &Objects,
  91. const DataLayout &DL) {
  92. SmallPtrSet<const Value *, 16> Visited;
  93. SmallVector<const Value *, 4> Working(1, V);
  94. do {
  95. V = Working.pop_back_val();
  96. SmallVector<Value *, 4> Objs;
  97. GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
  98. for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
  99. I != IE; ++I) {
  100. V = *I;
  101. if (!Visited.insert(V).second)
  102. continue;
  103. if (Operator::getOpcode(V) == Instruction::IntToPtr) {
  104. const Value *O =
  105. getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
  106. if (O->getType()->isPointerTy()) {
  107. Working.push_back(O);
  108. continue;
  109. }
  110. }
  111. Objects.push_back(const_cast<Value *>(V));
  112. }
  113. } while (!Working.empty());
  114. }
  115. typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
  116. typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4>
  117. UnderlyingObjectsVector;
  118. /// getUnderlyingObjectsForInstr - If this machine instr has memory reference
  119. /// information and it can be tracked to a normal reference to a known
  120. /// object, return the Value for that object.
  121. static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
  122. const MachineFrameInfo *MFI,
  123. UnderlyingObjectsVector &Objects,
  124. const DataLayout &DL) {
  125. if (!MI->hasOneMemOperand() ||
  126. (!(*MI->memoperands_begin())->getValue() &&
  127. !(*MI->memoperands_begin())->getPseudoValue()) ||
  128. (*MI->memoperands_begin())->isVolatile())
  129. return;
  130. if (const PseudoSourceValue *PSV =
  131. (*MI->memoperands_begin())->getPseudoValue()) {
  132. // Function that contain tail calls don't have unique PseudoSourceValue
  133. // objects. Two PseudoSourceValues might refer to the same or overlapping
  134. // locations. The client code calling this function assumes this is not the
  135. // case. So return a conservative answer of no known object.
  136. if (MFI->hasTailCall())
  137. return;
  138. // For now, ignore PseudoSourceValues which may alias LLVM IR values
  139. // because the code that uses this function has no way to cope with
  140. // such aliases.
  141. if (!PSV->isAliased(MFI)) {
  142. bool MayAlias = PSV->mayAlias(MFI);
  143. Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
  144. }
  145. return;
  146. }
  147. const Value *V = (*MI->memoperands_begin())->getValue();
  148. if (!V)
  149. return;
  150. SmallVector<Value *, 4> Objs;
  151. getUnderlyingObjects(V, Objs, DL);
  152. for (Value *V : Objs) {
  153. if (!isIdentifiedObject(V)) {
  154. Objects.clear();
  155. return;
  156. }
  157. Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
  158. }
  159. }
  160. void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
  161. BB = bb;
  162. }
  163. void ScheduleDAGInstrs::finishBlock() {
  164. // Subclasses should no longer refer to the old block.
  165. BB = nullptr;
  166. }
  167. /// Initialize the DAG and common scheduler state for the current scheduling
  168. /// region. This does not actually create the DAG, only clears it. The
  169. /// scheduling driver may call BuildSchedGraph multiple times per scheduling
  170. /// region.
  171. void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
  172. MachineBasicBlock::iterator begin,
  173. MachineBasicBlock::iterator end,
  174. unsigned regioninstrs) {
  175. assert(bb == BB && "startBlock should set BB");
  176. RegionBegin = begin;
  177. RegionEnd = end;
  178. NumRegionInstrs = regioninstrs;
  179. }
  180. /// Close the current scheduling region. Don't clear any state in case the
  181. /// driver wants to refer to the previous scheduling region.
  182. void ScheduleDAGInstrs::exitRegion() {
  183. // Nothing to do.
  184. }
  185. /// addSchedBarrierDeps - Add dependencies from instructions in the current
  186. /// list of instructions being scheduled to scheduling barrier by adding
  187. /// the exit SU to the register defs and use list. This is because we want to
  188. /// make sure instructions which define registers that are either used by
  189. /// the terminator or are live-out are properly scheduled. This is
  190. /// especially important when the definition latency of the return value(s)
  191. /// are too high to be hidden by the branch or when the liveout registers
  192. /// used by instructions in the fallthrough block.
  193. void ScheduleDAGInstrs::addSchedBarrierDeps() {
  194. MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
  195. ExitSU.setInstr(ExitMI);
  196. bool AllDepKnown = ExitMI &&
  197. (ExitMI->isCall() || ExitMI->isBarrier());
  198. if (ExitMI && AllDepKnown) {
  199. // If it's a call or a barrier, add dependencies on the defs and uses of
  200. // instruction.
  201. for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
  202. const MachineOperand &MO = ExitMI->getOperand(i);
  203. if (!MO.isReg() || MO.isDef()) continue;
  204. unsigned Reg = MO.getReg();
  205. if (Reg == 0) continue;
  206. if (TRI->isPhysicalRegister(Reg))
  207. Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
  208. else {
  209. assert(!IsPostRA && "Virtual register encountered after regalloc.");
  210. if (MO.readsReg()) // ignore undef operands
  211. addVRegUseDeps(&ExitSU, i);
  212. }
  213. }
  214. } else {
  215. // For others, e.g. fallthrough, conditional branch, assume the exit
  216. // uses all the registers that are livein to the successor blocks.
  217. assert(Uses.empty() && "Uses in set before adding deps?");
  218. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
  219. SE = BB->succ_end(); SI != SE; ++SI)
  220. for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
  221. E = (*SI)->livein_end(); I != E; ++I) {
  222. unsigned Reg = *I;
  223. if (!Uses.contains(Reg))
  224. Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
  225. }
  226. }
  227. }
  228. /// MO is an operand of SU's instruction that defines a physical register. Add
  229. /// data dependencies from SU to any uses of the physical register.
  230. void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
  231. const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
  232. assert(MO.isDef() && "expect physreg def");
  233. // Ask the target if address-backscheduling is desirable, and if so how much.
  234. const TargetSubtargetInfo &ST = MF.getSubtarget();
  235. for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
  236. Alias.isValid(); ++Alias) {
  237. if (!Uses.contains(*Alias))
  238. continue;
  239. for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
  240. SUnit *UseSU = I->SU;
  241. if (UseSU == SU)
  242. continue;
  243. // Adjust the dependence latency using operand def/use information,
  244. // then allow the target to perform its own adjustments.
  245. int UseOp = I->OpIdx;
  246. MachineInstr *RegUse = nullptr;
  247. SDep Dep;
  248. if (UseOp < 0)
  249. Dep = SDep(SU, SDep::Artificial);
  250. else {
  251. // Set the hasPhysRegDefs only for physreg defs that have a use within
  252. // the scheduling region.
  253. SU->hasPhysRegDefs = true;
  254. Dep = SDep(SU, SDep::Data, *Alias);
  255. RegUse = UseSU->getInstr();
  256. }
  257. Dep.setLatency(
  258. SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
  259. UseOp));
  260. ST.adjustSchedDependency(SU, UseSU, Dep);
  261. UseSU->addPred(Dep);
  262. }
  263. }
  264. }
  265. /// addPhysRegDeps - Add register dependencies (data, anti, and output) from
  266. /// this SUnit to following instructions in the same scheduling region that
  267. /// depend the physical register referenced at OperIdx.
  268. void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
  269. MachineInstr *MI = SU->getInstr();
  270. MachineOperand &MO = MI->getOperand(OperIdx);
  271. // Optionally add output and anti dependencies. For anti
  272. // dependencies we use a latency of 0 because for a multi-issue
  273. // target we want to allow the defining instruction to issue
  274. // in the same cycle as the using instruction.
  275. // TODO: Using a latency of 1 here for output dependencies assumes
  276. // there's no cost for reusing registers.
  277. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
  278. for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
  279. Alias.isValid(); ++Alias) {
  280. if (!Defs.contains(*Alias))
  281. continue;
  282. for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
  283. SUnit *DefSU = I->SU;
  284. if (DefSU == &ExitSU)
  285. continue;
  286. if (DefSU != SU &&
  287. (Kind != SDep::Output || !MO.isDead() ||
  288. !DefSU->getInstr()->registerDefIsDead(*Alias))) {
  289. if (Kind == SDep::Anti)
  290. DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
  291. else {
  292. SDep Dep(SU, Kind, /*Reg=*/*Alias);
  293. Dep.setLatency(
  294. SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
  295. DefSU->addPred(Dep);
  296. }
  297. }
  298. }
  299. }
  300. if (!MO.isDef()) {
  301. SU->hasPhysRegUses = true;
  302. // Either insert a new Reg2SUnits entry with an empty SUnits list, or
  303. // retrieve the existing SUnits list for this register's uses.
  304. // Push this SUnit on the use list.
  305. Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
  306. if (RemoveKillFlags)
  307. MO.setIsKill(false);
  308. }
  309. else {
  310. addPhysRegDataDeps(SU, OperIdx);
  311. unsigned Reg = MO.getReg();
  312. // clear this register's use list
  313. if (Uses.contains(Reg))
  314. Uses.eraseAll(Reg);
  315. if (!MO.isDead()) {
  316. Defs.eraseAll(Reg);
  317. } else if (SU->isCall) {
  318. // Calls will not be reordered because of chain dependencies (see
  319. // below). Since call operands are dead, calls may continue to be added
  320. // to the DefList making dependence checking quadratic in the size of
  321. // the block. Instead, we leave only one call at the back of the
  322. // DefList.
  323. Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
  324. Reg2SUnitsMap::iterator B = P.first;
  325. Reg2SUnitsMap::iterator I = P.second;
  326. for (bool isBegin = I == B; !isBegin; /* empty */) {
  327. isBegin = (--I) == B;
  328. if (!I->SU->isCall)
  329. break;
  330. I = Defs.erase(I);
  331. }
  332. }
  333. // Defs are pushed in the order they are visited and never reordered.
  334. Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
  335. }
  336. }
  337. /// addVRegDefDeps - Add register output and data dependencies from this SUnit
  338. /// to instructions that occur later in the same scheduling region if they read
  339. /// from or write to the virtual register defined at OperIdx.
  340. ///
  341. /// TODO: Hoist loop induction variable increments. This has to be
  342. /// reevaluated. Generally, IV scheduling should be done before coalescing.
  343. void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
  344. const MachineInstr *MI = SU->getInstr();
  345. unsigned Reg = MI->getOperand(OperIdx).getReg();
  346. // Singly defined vregs do not have output/anti dependencies.
  347. // The current operand is a def, so we have at least one.
  348. // Check here if there are any others...
  349. if (MRI.hasOneDef(Reg))
  350. return;
  351. // Add output dependence to the next nearest def of this vreg.
  352. //
  353. // Unless this definition is dead, the output dependence should be
  354. // transitively redundant with antidependencies from this definition's
  355. // uses. We're conservative for now until we have a way to guarantee the uses
  356. // are not eliminated sometime during scheduling. The output dependence edge
  357. // is also useful if output latency exceeds def-use latency.
  358. VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
  359. if (DefI == VRegDefs.end())
  360. VRegDefs.insert(VReg2SUnit(Reg, SU));
  361. else {
  362. SUnit *DefSU = DefI->SU;
  363. if (DefSU != SU && DefSU != &ExitSU) {
  364. SDep Dep(SU, SDep::Output, Reg);
  365. Dep.setLatency(
  366. SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
  367. DefSU->addPred(Dep);
  368. }
  369. DefI->SU = SU;
  370. }
  371. }
  372. /// addVRegUseDeps - Add a register data dependency if the instruction that
  373. /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
  374. /// register antidependency from this SUnit to instructions that occur later in
  375. /// the same scheduling region if they write the virtual register.
  376. ///
  377. /// TODO: Handle ExitSU "uses" properly.
  378. void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
  379. MachineInstr *MI = SU->getInstr();
  380. unsigned Reg = MI->getOperand(OperIdx).getReg();
  381. // Record this local VReg use.
  382. VReg2UseMap::iterator UI = VRegUses.find(Reg);
  383. for (; UI != VRegUses.end(); ++UI) {
  384. if (UI->SU == SU)
  385. break;
  386. }
  387. if (UI == VRegUses.end())
  388. VRegUses.insert(VReg2SUnit(Reg, SU));
  389. // Lookup this operand's reaching definition.
  390. assert(LIS && "vreg dependencies requires LiveIntervals");
  391. LiveQueryResult LRQ
  392. = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
  393. VNInfo *VNI = LRQ.valueIn();
  394. // VNI will be valid because MachineOperand::readsReg() is checked by caller.
  395. assert(VNI && "No value to read by operand");
  396. MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
  397. // Phis and other noninstructions (after coalescing) have a NULL Def.
  398. if (Def) {
  399. SUnit *DefSU = getSUnit(Def);
  400. if (DefSU) {
  401. // The reaching Def lives within this scheduling region.
  402. // Create a data dependence.
  403. SDep dep(DefSU, SDep::Data, Reg);
  404. // Adjust the dependence latency using operand def/use information, then
  405. // allow the target to perform its own adjustments.
  406. int DefOp = Def->findRegisterDefOperandIdx(Reg);
  407. dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
  408. const TargetSubtargetInfo &ST = MF.getSubtarget();
  409. ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
  410. SU->addPred(dep);
  411. }
  412. }
  413. // Add antidependence to the following def of the vreg it uses.
  414. VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
  415. if (DefI != VRegDefs.end() && DefI->SU != SU)
  416. DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
  417. }
  418. /// Return true if MI is an instruction we are unable to reason about
  419. /// (like a call or something with unmodeled side effects).
  420. static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
  421. if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
  422. (MI->hasOrderedMemoryRef() &&
  423. (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
  424. return true;
  425. return false;
  426. }
  427. // This MI might have either incomplete info, or known to be unsafe
  428. // to deal with (i.e. volatile object).
  429. static inline bool isUnsafeMemoryObject(MachineInstr *MI,
  430. const MachineFrameInfo *MFI,
  431. const DataLayout &DL) {
  432. if (!MI || MI->memoperands_empty())
  433. return true;
  434. // We purposefully do no check for hasOneMemOperand() here
  435. // in hope to trigger an assert downstream in order to
  436. // finish implementation.
  437. if ((*MI->memoperands_begin())->isVolatile() ||
  438. MI->hasUnmodeledSideEffects())
  439. return true;
  440. if ((*MI->memoperands_begin())->getPseudoValue()) {
  441. // Similarly to getUnderlyingObjectForInstr:
  442. // For now, ignore PseudoSourceValues which may alias LLVM IR values
  443. // because the code that uses this function has no way to cope with
  444. // such aliases.
  445. return true;
  446. }
  447. const Value *V = (*MI->memoperands_begin())->getValue();
  448. if (!V)
  449. return true;
  450. SmallVector<Value *, 4> Objs;
  451. getUnderlyingObjects(V, Objs, DL);
  452. for (Value *V : Objs) {
  453. // Does this pointer refer to a distinct and identifiable object?
  454. if (!isIdentifiedObject(V))
  455. return true;
  456. }
  457. return false;
  458. }
  459. /// This returns true if the two MIs need a chain edge betwee them.
  460. /// If these are not even memory operations, we still may need
  461. /// chain deps between them. The question really is - could
  462. /// these two MIs be reordered during scheduling from memory dependency
  463. /// point of view.
  464. static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
  465. const DataLayout &DL, MachineInstr *MIa,
  466. MachineInstr *MIb) {
  467. const MachineFunction *MF = MIa->getParent()->getParent();
  468. const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
  469. // Cover a trivial case - no edge is need to itself.
  470. if (MIa == MIb)
  471. return false;
  472. // Let the target decide if memory accesses cannot possibly overlap.
  473. if ((MIa->mayLoad() || MIa->mayStore()) &&
  474. (MIb->mayLoad() || MIb->mayStore()))
  475. if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
  476. return false;
  477. // FIXME: Need to handle multiple memory operands to support all targets.
  478. if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
  479. return true;
  480. if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
  481. return true;
  482. // If we are dealing with two "normal" loads, we do not need an edge
  483. // between them - they could be reordered.
  484. if (!MIa->mayStore() && !MIb->mayStore())
  485. return false;
  486. // To this point analysis is generic. From here on we do need AA.
  487. if (!AA)
  488. return true;
  489. MachineMemOperand *MMOa = *MIa->memoperands_begin();
  490. MachineMemOperand *MMOb = *MIb->memoperands_begin();
  491. if (!MMOa->getValue() || !MMOb->getValue())
  492. return true;
  493. // The following interface to AA is fashioned after DAGCombiner::isAlias
  494. // and operates with MachineMemOperand offset with some important
  495. // assumptions:
  496. // - LLVM fundamentally assumes flat address spaces.
  497. // - MachineOperand offset can *only* result from legalization and
  498. // cannot affect queries other than the trivial case of overlap
  499. // checking.
  500. // - These offsets never wrap and never step outside
  501. // of allocated objects.
  502. // - There should never be any negative offsets here.
  503. //
  504. // FIXME: Modify API to hide this math from "user"
  505. // FIXME: Even before we go to AA we can reason locally about some
  506. // memory objects. It can save compile time, and possibly catch some
  507. // corner cases not currently covered.
  508. assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
  509. assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
  510. int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
  511. int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
  512. int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
  513. AliasResult AAResult =
  514. AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
  515. UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
  516. MemoryLocation(MMOb->getValue(), Overlapb,
  517. UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
  518. return (AAResult != NoAlias);
  519. }
  520. /// This recursive function iterates over chain deps of SUb looking for
  521. /// "latest" node that needs a chain edge to SUa.
  522. static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
  523. const DataLayout &DL, SUnit *SUa, SUnit *SUb,
  524. SUnit *ExitSU, unsigned *Depth,
  525. SmallPtrSetImpl<const SUnit *> &Visited) {
  526. if (!SUa || !SUb || SUb == ExitSU)
  527. return *Depth;
  528. // Remember visited nodes.
  529. if (!Visited.insert(SUb).second)
  530. return *Depth;
  531. // If there is _some_ dependency already in place, do not
  532. // descend any further.
  533. // TODO: Need to make sure that if that dependency got eliminated or ignored
  534. // for any reason in the future, we would not violate DAG topology.
  535. // Currently it does not happen, but makes an implicit assumption about
  536. // future implementation.
  537. //
  538. // Independently, if we encounter node that is some sort of global
  539. // object (like a call) we already have full set of dependencies to it
  540. // and we can stop descending.
  541. if (SUa->isSucc(SUb) ||
  542. isGlobalMemoryObject(AA, SUb->getInstr()))
  543. return *Depth;
  544. // If we do need an edge, or we have exceeded depth budget,
  545. // add that edge to the predecessors chain of SUb,
  546. // and stop descending.
  547. if (*Depth > 200 ||
  548. MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
  549. SUb->addPred(SDep(SUa, SDep::MayAliasMem));
  550. return *Depth;
  551. }
  552. // Track current depth.
  553. (*Depth)++;
  554. // Iterate over memory dependencies only.
  555. for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
  556. I != E; ++I)
  557. if (I->isNormalMemoryOrBarrier())
  558. iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
  559. return *Depth;
  560. }
  561. /// This function assumes that "downward" from SU there exist
  562. /// tail/leaf of already constructed DAG. It iterates downward and
  563. /// checks whether SU can be aliasing any node dominated
  564. /// by it.
  565. static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
  566. const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
  567. std::set<SUnit *> &CheckList,
  568. unsigned LatencyToLoad) {
  569. if (!SU)
  570. return;
  571. SmallPtrSet<const SUnit*, 16> Visited;
  572. unsigned Depth = 0;
  573. for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
  574. I != IE; ++I) {
  575. if (SU == *I)
  576. continue;
  577. if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
  578. SDep Dep(SU, SDep::MayAliasMem);
  579. Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
  580. (*I)->addPred(Dep);
  581. }
  582. // Iterate recursively over all previously added memory chain
  583. // successors. Keep track of visited nodes.
  584. for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
  585. JE = (*I)->Succs.end(); J != JE; ++J)
  586. if (J->isNormalMemoryOrBarrier())
  587. iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
  588. Visited);
  589. }
  590. }
  591. /// Check whether two objects need a chain edge, if so, add it
  592. /// otherwise remember the rejected SU.
  593. static inline void addChainDependency(AliasAnalysis *AA,
  594. const MachineFrameInfo *MFI,
  595. const DataLayout &DL, SUnit *SUa,
  596. SUnit *SUb, std::set<SUnit *> &RejectList,
  597. unsigned TrueMemOrderLatency = 0,
  598. bool isNormalMemory = false) {
  599. // If this is a false dependency,
  600. // do not add the edge, but rememeber the rejected node.
  601. if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
  602. SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
  603. Dep.setLatency(TrueMemOrderLatency);
  604. SUb->addPred(Dep);
  605. }
  606. else {
  607. // Duplicate entries should be ignored.
  608. RejectList.insert(SUb);
  609. DEBUG(dbgs() << "\tReject chain dep between SU("
  610. << SUa->NodeNum << ") and SU("
  611. << SUb->NodeNum << ")\n");
  612. }
  613. }
  614. /// Create an SUnit for each real instruction, numbered in top-down toplological
  615. /// order. The instruction order A < B, implies that no edge exists from B to A.
  616. ///
  617. /// Map each real instruction to its SUnit.
  618. ///
  619. /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
  620. /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
  621. /// instead of pointers.
  622. ///
  623. /// MachineScheduler relies on initSUnits numbering the nodes by their order in
  624. /// the original instruction list.
  625. void ScheduleDAGInstrs::initSUnits() {
  626. // We'll be allocating one SUnit for each real instruction in the region,
  627. // which is contained within a basic block.
  628. SUnits.reserve(NumRegionInstrs);
  629. for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
  630. MachineInstr *MI = I;
  631. if (MI->isDebugValue())
  632. continue;
  633. SUnit *SU = newSUnit(MI);
  634. MISUnitMap[MI] = SU;
  635. SU->isCall = MI->isCall();
  636. SU->isCommutable = MI->isCommutable();
  637. // Assign the Latency field of SU using target-provided information.
  638. SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
  639. // If this SUnit uses a reserved or unbuffered resource, mark it as such.
  640. //
  641. // Reserved resources block an instruction from issuing and stall the
  642. // entire pipeline. These are identified by BufferSize=0.
  643. //
  644. // Unbuffered resources prevent execution of subsequent instructions that
  645. // require the same resources. This is used for in-order execution pipelines
  646. // within an out-of-order core. These are identified by BufferSize=1.
  647. if (SchedModel.hasInstrSchedModel()) {
  648. const MCSchedClassDesc *SC = getSchedClass(SU);
  649. for (TargetSchedModel::ProcResIter
  650. PI = SchedModel.getWriteProcResBegin(SC),
  651. PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
  652. switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
  653. case 0:
  654. SU->hasReservedResource = true;
  655. break;
  656. case 1:
  657. SU->isUnbuffered = true;
  658. break;
  659. default:
  660. break;
  661. }
  662. }
  663. }
  664. }
  665. }
  666. /// If RegPressure is non-null, compute register pressure as a side effect. The
  667. /// DAG builder is an efficient place to do it because it already visits
  668. /// operands.
  669. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
  670. RegPressureTracker *RPTracker,
  671. PressureDiffs *PDiffs) {
  672. const TargetSubtargetInfo &ST = MF.getSubtarget();
  673. bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
  674. : ST.useAA();
  675. AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
  676. MISUnitMap.clear();
  677. ScheduleDAG::clearDAG();
  678. // Create an SUnit for each real instruction.
  679. initSUnits();
  680. if (PDiffs)
  681. PDiffs->init(SUnits.size());
  682. // We build scheduling units by walking a block's instruction list from bottom
  683. // to top.
  684. // Remember where a generic side-effecting instruction is as we procede.
  685. SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
  686. // Memory references to specific known memory locations are tracked
  687. // so that they can be given more precise dependencies. We track
  688. // separately the known memory locations that may alias and those
  689. // that are known not to alias
  690. MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs;
  691. MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
  692. std::set<SUnit*> RejectMemNodes;
  693. // Remove any stale debug info; sometimes BuildSchedGraph is called again
  694. // without emitting the info from the previous call.
  695. DbgValues.clear();
  696. FirstDbgValue = nullptr;
  697. assert(Defs.empty() && Uses.empty() &&
  698. "Only BuildGraph should update Defs/Uses");
  699. Defs.setUniverse(TRI->getNumRegs());
  700. Uses.setUniverse(TRI->getNumRegs());
  701. assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
  702. VRegUses.clear();
  703. VRegDefs.setUniverse(MRI.getNumVirtRegs());
  704. VRegUses.setUniverse(MRI.getNumVirtRegs());
  705. // Model data dependencies between instructions being scheduled and the
  706. // ExitSU.
  707. addSchedBarrierDeps();
  708. // Walk the list of instructions, from bottom moving up.
  709. MachineInstr *DbgMI = nullptr;
  710. for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
  711. MII != MIE; --MII) {
  712. MachineInstr *MI = std::prev(MII);
  713. if (MI && DbgMI) {
  714. DbgValues.push_back(std::make_pair(DbgMI, MI));
  715. DbgMI = nullptr;
  716. }
  717. if (MI->isDebugValue()) {
  718. DbgMI = MI;
  719. continue;
  720. }
  721. SUnit *SU = MISUnitMap[MI];
  722. assert(SU && "No SUnit mapped to this MI");
  723. if (RPTracker) {
  724. PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr;
  725. RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
  726. assert(RPTracker->getPos() == std::prev(MII) &&
  727. "RPTracker can't find MI");
  728. }
  729. assert(
  730. (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) &&
  731. "Cannot schedule terminators or labels!");
  732. // Add register-based dependencies (data, anti, and output).
  733. bool HasVRegDef = false;
  734. for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
  735. const MachineOperand &MO = MI->getOperand(j);
  736. if (!MO.isReg()) continue;
  737. unsigned Reg = MO.getReg();
  738. if (Reg == 0) continue;
  739. if (TRI->isPhysicalRegister(Reg))
  740. addPhysRegDeps(SU, j);
  741. else {
  742. assert(!IsPostRA && "Virtual register encountered!");
  743. if (MO.isDef()) {
  744. HasVRegDef = true;
  745. addVRegDefDeps(SU, j);
  746. }
  747. else if (MO.readsReg()) // ignore undef operands
  748. addVRegUseDeps(SU, j);
  749. }
  750. }
  751. // If we haven't seen any uses in this scheduling region, create a
  752. // dependence edge to ExitSU to model the live-out latency. This is required
  753. // for vreg defs with no in-region use, and prefetches with no vreg def.
  754. //
  755. // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
  756. // check currently relies on being called before adding chain deps.
  757. if (SU->NumSuccs == 0 && SU->Latency > 1
  758. && (HasVRegDef || MI->mayLoad())) {
  759. SDep Dep(SU, SDep::Artificial);
  760. Dep.setLatency(SU->Latency - 1);
  761. ExitSU.addPred(Dep);
  762. }
  763. // Add chain dependencies.
  764. // Chain dependencies used to enforce memory order should have
  765. // latency of 0 (except for true dependency of Store followed by
  766. // aliased Load... we estimate that with a single cycle of latency
  767. // assuming the hardware will bypass)
  768. // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
  769. // after stack slots are lowered to actual addresses.
  770. // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
  771. // produce more precise dependence information.
  772. unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
  773. if (isGlobalMemoryObject(AA, MI)) {
  774. // Be conservative with these and add dependencies on all memory
  775. // references, even those that are known to not alias.
  776. for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
  777. NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
  778. for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
  779. I->second[i]->addPred(SDep(SU, SDep::Barrier));
  780. }
  781. }
  782. for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
  783. NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
  784. for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
  785. SDep Dep(SU, SDep::Barrier);
  786. Dep.setLatency(TrueMemOrderLatency);
  787. I->second[i]->addPred(Dep);
  788. }
  789. }
  790. // Add SU to the barrier chain.
  791. if (BarrierChain)
  792. BarrierChain->addPred(SDep(SU, SDep::Barrier));
  793. BarrierChain = SU;
  794. // This is a barrier event that acts as a pivotal node in the DAG,
  795. // so it is safe to clear list of exposed nodes.
  796. adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
  797. TrueMemOrderLatency);
  798. RejectMemNodes.clear();
  799. NonAliasMemDefs.clear();
  800. NonAliasMemUses.clear();
  801. // fall-through
  802. new_alias_chain:
  803. // Chain all possibly aliasing memory references through SU.
  804. if (AliasChain) {
  805. unsigned ChainLatency = 0;
  806. if (AliasChain->getInstr()->mayLoad())
  807. ChainLatency = TrueMemOrderLatency;
  808. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
  809. RejectMemNodes, ChainLatency);
  810. }
  811. AliasChain = SU;
  812. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
  813. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
  814. PendingLoads[k], RejectMemNodes,
  815. TrueMemOrderLatency);
  816. for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
  817. AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
  818. for (unsigned i = 0, e = I->second.size(); i != e; ++i)
  819. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
  820. I->second[i], RejectMemNodes);
  821. }
  822. for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
  823. AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
  824. for (unsigned i = 0, e = I->second.size(); i != e; ++i)
  825. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
  826. I->second[i], RejectMemNodes, TrueMemOrderLatency);
  827. }
  828. adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
  829. TrueMemOrderLatency);
  830. PendingLoads.clear();
  831. AliasMemDefs.clear();
  832. AliasMemUses.clear();
  833. } else if (MI->mayStore()) {
  834. // Add dependence on barrier chain, if needed.
  835. // There is no point to check aliasing on barrier event. Even if
  836. // SU and barrier _could_ be reordered, they should not. In addition,
  837. // we have lost all RejectMemNodes below barrier.
  838. if (BarrierChain)
  839. BarrierChain->addPred(SDep(SU, SDep::Barrier));
  840. UnderlyingObjectsVector Objs;
  841. getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
  842. if (Objs.empty()) {
  843. // Treat all other stores conservatively.
  844. goto new_alias_chain;
  845. }
  846. bool MayAlias = false;
  847. for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
  848. K != KE; ++K) {
  849. ValueType V = K->getPointer();
  850. bool ThisMayAlias = K->getInt();
  851. if (ThisMayAlias)
  852. MayAlias = true;
  853. // A store to a specific PseudoSourceValue. Add precise dependencies.
  854. // Record the def in MemDefs, first adding a dep if there is
  855. // an existing def.
  856. MapVector<ValueType, std::vector<SUnit *> >::iterator I =
  857. ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
  858. MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
  859. ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
  860. if (I != IE) {
  861. for (unsigned i = 0, e = I->second.size(); i != e; ++i)
  862. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
  863. I->second[i], RejectMemNodes, 0, true);
  864. // If we're not using AA, then we only need one store per object.
  865. if (!AAForDep)
  866. I->second.clear();
  867. I->second.push_back(SU);
  868. } else {
  869. if (ThisMayAlias) {
  870. if (!AAForDep)
  871. AliasMemDefs[V].clear();
  872. AliasMemDefs[V].push_back(SU);
  873. } else {
  874. if (!AAForDep)
  875. NonAliasMemDefs[V].clear();
  876. NonAliasMemDefs[V].push_back(SU);
  877. }
  878. }
  879. // Handle the uses in MemUses, if there are any.
  880. MapVector<ValueType, std::vector<SUnit *> >::iterator J =
  881. ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
  882. MapVector<ValueType, std::vector<SUnit *> >::iterator JE =
  883. ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
  884. if (J != JE) {
  885. for (unsigned i = 0, e = J->second.size(); i != e; ++i)
  886. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
  887. J->second[i], RejectMemNodes,
  888. TrueMemOrderLatency, true);
  889. J->second.clear();
  890. }
  891. }
  892. if (MayAlias) {
  893. // Add dependencies from all the PendingLoads, i.e. loads
  894. // with no underlying object.
  895. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
  896. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
  897. PendingLoads[k], RejectMemNodes,
  898. TrueMemOrderLatency);
  899. // Add dependence on alias chain, if needed.
  900. if (AliasChain)
  901. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
  902. RejectMemNodes);
  903. }
  904. adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
  905. TrueMemOrderLatency);
  906. } else if (MI->mayLoad()) {
  907. bool MayAlias = true;
  908. if (MI->isInvariantLoad(AA)) {
  909. // Invariant load, no chain dependencies needed!
  910. } else {
  911. UnderlyingObjectsVector Objs;
  912. getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
  913. if (Objs.empty()) {
  914. // A load with no underlying object. Depend on all
  915. // potentially aliasing stores.
  916. for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
  917. AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
  918. for (unsigned i = 0, e = I->second.size(); i != e; ++i)
  919. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
  920. I->second[i], RejectMemNodes);
  921. PendingLoads.push_back(SU);
  922. MayAlias = true;
  923. } else {
  924. MayAlias = false;
  925. }
  926. for (UnderlyingObjectsVector::iterator
  927. J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
  928. ValueType V = J->getPointer();
  929. bool ThisMayAlias = J->getInt();
  930. if (ThisMayAlias)
  931. MayAlias = true;
  932. // A load from a specific PseudoSourceValue. Add precise dependencies.
  933. MapVector<ValueType, std::vector<SUnit *> >::iterator I =
  934. ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
  935. MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
  936. ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
  937. if (I != IE)
  938. for (unsigned i = 0, e = I->second.size(); i != e; ++i)
  939. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
  940. I->second[i], RejectMemNodes, 0, true);
  941. if (ThisMayAlias)
  942. AliasMemUses[V].push_back(SU);
  943. else
  944. NonAliasMemUses[V].push_back(SU);
  945. }
  946. if (MayAlias)
  947. adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU,
  948. RejectMemNodes, /*Latency=*/0);
  949. // Add dependencies on alias and barrier chains, if needed.
  950. if (MayAlias && AliasChain)
  951. addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
  952. RejectMemNodes);
  953. if (BarrierChain)
  954. BarrierChain->addPred(SDep(SU, SDep::Barrier));
  955. }
  956. }
  957. }
  958. if (DbgMI)
  959. FirstDbgValue = DbgMI;
  960. Defs.clear();
  961. Uses.clear();
  962. VRegDefs.clear();
  963. PendingLoads.clear();
  964. }
  965. /// \brief Initialize register live-range state for updating kills.
  966. void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
  967. // Start with no live registers.
  968. LiveRegs.reset();
  969. // Examine the live-in regs of all successors.
  970. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
  971. SE = BB->succ_end(); SI != SE; ++SI) {
  972. for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
  973. E = (*SI)->livein_end(); I != E; ++I) {
  974. unsigned Reg = *I;
  975. // Repeat, for reg and all subregs.
  976. for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
  977. SubRegs.isValid(); ++SubRegs)
  978. LiveRegs.set(*SubRegs);
  979. }
  980. }
  981. }
  982. /// \brief If we change a kill flag on the bundle instruction implicit register
  983. /// operands, then we also need to propagate that to any instructions inside
  984. /// the bundle which had the same kill state.
  985. static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
  986. bool NewKillState) {
  987. if (MI->getOpcode() != TargetOpcode::BUNDLE)
  988. return;
  989. // Walk backwards from the last instruction in the bundle to the first.
  990. // Once we set a kill flag on an instruction, we bail out, as otherwise we
  991. // might set it on too many operands. We will clear as many flags as we
  992. // can though.
  993. MachineBasicBlock::instr_iterator Begin = MI;
  994. MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
  995. while (Begin != End) {
  996. for (MachineOperand &MO : (--End)->operands()) {
  997. if (!MO.isReg() || MO.isDef() || Reg != MO.getReg())
  998. continue;
  999. // DEBUG_VALUE nodes do not contribute to code generation and should
  1000. // always be ignored. Failure to do so may result in trying to modify
  1001. // KILL flags on DEBUG_VALUE nodes, which is distressing.
  1002. if (MO.isDebug())
  1003. continue;
  1004. // If the register has the internal flag then it could be killing an
  1005. // internal def of the register. In this case, just skip. We only want
  1006. // to toggle the flag on operands visible outside the bundle.
  1007. if (MO.isInternalRead())
  1008. continue;
  1009. if (MO.isKill() == NewKillState)
  1010. continue;
  1011. MO.setIsKill(NewKillState);
  1012. if (NewKillState)
  1013. return;
  1014. }
  1015. }
  1016. }
  1017. bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
  1018. // Setting kill flag...
  1019. if (!MO.isKill()) {
  1020. MO.setIsKill(true);
  1021. toggleBundleKillFlag(MI, MO.getReg(), true);
  1022. return false;
  1023. }
  1024. // If MO itself is live, clear the kill flag...
  1025. if (LiveRegs.test(MO.getReg())) {
  1026. MO.setIsKill(false);
  1027. toggleBundleKillFlag(MI, MO.getReg(), false);
  1028. return false;
  1029. }
  1030. // If any subreg of MO is live, then create an imp-def for that
  1031. // subreg and keep MO marked as killed.
  1032. MO.setIsKill(false);
  1033. toggleBundleKillFlag(MI, MO.getReg(), false);
  1034. bool AllDead = true;
  1035. const unsigned SuperReg = MO.getReg();
  1036. MachineInstrBuilder MIB(MF, MI);
  1037. for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
  1038. if (LiveRegs.test(*SubRegs)) {
  1039. MIB.addReg(*SubRegs, RegState::ImplicitDefine);
  1040. AllDead = false;
  1041. }
  1042. }
  1043. if(AllDead) {
  1044. MO.setIsKill(true);
  1045. toggleBundleKillFlag(MI, MO.getReg(), true);
  1046. }
  1047. return false;
  1048. }
  1049. // FIXME: Reuse the LivePhysRegs utility for this.
  1050. void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
  1051. DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
  1052. LiveRegs.resize(TRI->getNumRegs());
  1053. BitVector killedRegs(TRI->getNumRegs());
  1054. startBlockForKills(MBB);
  1055. // Examine block from end to start...
  1056. unsigned Count = MBB->size();
  1057. for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
  1058. I != E; --Count) {
  1059. MachineInstr *MI = --I;
  1060. if (MI->isDebugValue())
  1061. continue;
  1062. // Update liveness. Registers that are defed but not used in this
  1063. // instruction are now dead. Mark register and all subregs as they
  1064. // are completely defined.
  1065. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  1066. MachineOperand &MO = MI->getOperand(i);
  1067. if (MO.isRegMask())
  1068. LiveRegs.clearBitsNotInMask(MO.getRegMask());
  1069. if (!MO.isReg()) continue;
  1070. unsigned Reg = MO.getReg();
  1071. if (Reg == 0) continue;
  1072. if (!MO.isDef()) continue;
  1073. // Ignore two-addr defs.
  1074. if (MI->isRegTiedToUseOperand(i)) continue;
  1075. // Repeat for reg and all subregs.
  1076. for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
  1077. SubRegs.isValid(); ++SubRegs)
  1078. LiveRegs.reset(*SubRegs);
  1079. }
  1080. // Examine all used registers and set/clear kill flag. When a
  1081. // register is used multiple times we only set the kill flag on
  1082. // the first use. Don't set kill flags on undef operands.
  1083. killedRegs.reset();
  1084. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  1085. MachineOperand &MO = MI->getOperand(i);
  1086. if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
  1087. unsigned Reg = MO.getReg();
  1088. if ((Reg == 0) || MRI.isReserved(Reg)) continue;
  1089. bool kill = false;
  1090. if (!killedRegs.test(Reg)) {
  1091. kill = true;
  1092. // A register is not killed if any subregs are live...
  1093. for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
  1094. if (LiveRegs.test(*SubRegs)) {
  1095. kill = false;
  1096. break;
  1097. }
  1098. }
  1099. // If subreg is not live, then register is killed if it became
  1100. // live in this instruction
  1101. if (kill)
  1102. kill = !LiveRegs.test(Reg);
  1103. }
  1104. if (MO.isKill() != kill) {
  1105. DEBUG(dbgs() << "Fixing " << MO << " in ");
  1106. // Warning: toggleKillFlag may invalidate MO.
  1107. toggleKillFlag(MI, MO);
  1108. DEBUG(MI->dump());
  1109. DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {
  1110. MachineBasicBlock::instr_iterator Begin = MI;
  1111. MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
  1112. while (++Begin != End)
  1113. DEBUG(Begin->dump());
  1114. });
  1115. }
  1116. killedRegs.set(Reg);
  1117. }
  1118. // Mark any used register (that is not using undef) and subregs as
  1119. // now live...
  1120. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  1121. MachineOperand &MO = MI->getOperand(i);
  1122. if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
  1123. unsigned Reg = MO.getReg();
  1124. if ((Reg == 0) || MRI.isReserved(Reg)) continue;
  1125. for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
  1126. SubRegs.isValid(); ++SubRegs)
  1127. LiveRegs.set(*SubRegs);
  1128. }
  1129. }
  1130. }
  1131. void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
  1132. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1133. SU->getInstr()->dump();
  1134. #endif
  1135. }
  1136. std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
  1137. std::string s;
  1138. raw_string_ostream oss(s);
  1139. if (SU == &EntrySU)
  1140. oss << "<entry>";
  1141. else if (SU == &ExitSU)
  1142. oss << "<exit>";
  1143. else
  1144. SU->getInstr()->print(oss, /*SkipOpers=*/true);
  1145. return oss.str();
  1146. }
  1147. /// Return the basic block label. It is not necessarilly unique because a block
  1148. /// contains multiple scheduling regions. But it is fine for visualization.
  1149. std::string ScheduleDAGInstrs::getDAGName() const {
  1150. return "dag." + BB->getFullName();
  1151. }
  1152. //===----------------------------------------------------------------------===//
  1153. // SchedDFSResult Implementation
  1154. //===----------------------------------------------------------------------===//
  1155. namespace llvm {
  1156. /// \brief Internal state used to compute SchedDFSResult.
  1157. class SchedDFSImpl {
  1158. SchedDFSResult &R;
  1159. /// Join DAG nodes into equivalence classes by their subtree.
  1160. IntEqClasses SubtreeClasses;
  1161. /// List PredSU, SuccSU pairs that represent data edges between subtrees.
  1162. std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
  1163. struct RootData {
  1164. unsigned NodeID;
  1165. unsigned ParentNodeID; // Parent node (member of the parent subtree).
  1166. unsigned SubInstrCount; // Instr count in this tree only, not children.
  1167. RootData(unsigned id): NodeID(id),
  1168. ParentNodeID(SchedDFSResult::InvalidSubtreeID),
  1169. SubInstrCount(0) {}
  1170. unsigned getSparseSetIndex() const { return NodeID; }
  1171. };
  1172. SparseSet<RootData> RootSet;
  1173. public:
  1174. SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
  1175. RootSet.setUniverse(R.DFSNodeData.size());
  1176. }
  1177. /// Return true if this node been visited by the DFS traversal.
  1178. ///
  1179. /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
  1180. /// ID. Later, SubtreeID is updated but remains valid.
  1181. bool isVisited(const SUnit *SU) const {
  1182. return R.DFSNodeData[SU->NodeNum].SubtreeID
  1183. != SchedDFSResult::InvalidSubtreeID;
  1184. }
  1185. /// Initialize this node's instruction count. We don't need to flag the node
  1186. /// visited until visitPostorder because the DAG cannot have cycles.
  1187. void visitPreorder(const SUnit *SU) {
  1188. R.DFSNodeData[SU->NodeNum].InstrCount =
  1189. SU->getInstr()->isTransient() ? 0 : 1;
  1190. }
  1191. /// Called once for each node after all predecessors are visited. Revisit this
  1192. /// node's predecessors and potentially join them now that we know the ILP of
  1193. /// the other predecessors.
  1194. void visitPostorderNode(const SUnit *SU) {
  1195. // Mark this node as the root of a subtree. It may be joined with its
  1196. // successors later.
  1197. R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
  1198. RootData RData(SU->NodeNum);
  1199. RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
  1200. // If any predecessors are still in their own subtree, they either cannot be
  1201. // joined or are large enough to remain separate. If this parent node's
  1202. // total instruction count is not greater than a child subtree by at least
  1203. // the subtree limit, then try to join it now since splitting subtrees is
  1204. // only useful if multiple high-pressure paths are possible.
  1205. unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
  1206. for (SUnit::const_pred_iterator
  1207. PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
  1208. if (PI->getKind() != SDep::Data)
  1209. continue;
  1210. unsigned PredNum = PI->getSUnit()->NodeNum;
  1211. if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
  1212. joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
  1213. // Either link or merge the TreeData entry from the child to the parent.
  1214. if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
  1215. // If the predecessor's parent is invalid, this is a tree edge and the
  1216. // current node is the parent.
  1217. if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
  1218. RootSet[PredNum].ParentNodeID = SU->NodeNum;
  1219. }
  1220. else if (RootSet.count(PredNum)) {
  1221. // The predecessor is not a root, but is still in the root set. This
  1222. // must be the new parent that it was just joined to. Note that
  1223. // RootSet[PredNum].ParentNodeID may either be invalid or may still be
  1224. // set to the original parent.
  1225. RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
  1226. RootSet.erase(PredNum);
  1227. }
  1228. }
  1229. RootSet[SU->NodeNum] = RData;
  1230. }
  1231. /// Called once for each tree edge after calling visitPostOrderNode on the
  1232. /// predecessor. Increment the parent node's instruction count and
  1233. /// preemptively join this subtree to its parent's if it is small enough.
  1234. void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
  1235. R.DFSNodeData[Succ->NodeNum].InstrCount
  1236. += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
  1237. joinPredSubtree(PredDep, Succ);
  1238. }
  1239. /// Add a connection for cross edges.
  1240. void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
  1241. ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
  1242. }
  1243. /// Set each node's subtree ID to the representative ID and record connections
  1244. /// between trees.
  1245. void finalize() {
  1246. SubtreeClasses.compress();
  1247. R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
  1248. assert(SubtreeClasses.getNumClasses() == RootSet.size()
  1249. && "number of roots should match trees");
  1250. for (SparseSet<RootData>::const_iterator
  1251. RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
  1252. unsigned TreeID = SubtreeClasses[RI->NodeID];
  1253. if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
  1254. R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
  1255. R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
  1256. // Note that SubInstrCount may be greater than InstrCount if we joined
  1257. // subtrees across a cross edge. InstrCount will be attributed to the
  1258. // original parent, while SubInstrCount will be attributed to the joined
  1259. // parent.
  1260. }
  1261. R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
  1262. R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
  1263. DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
  1264. for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
  1265. R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
  1266. DEBUG(dbgs() << " SU(" << Idx << ") in tree "
  1267. << R.DFSNodeData[Idx].SubtreeID << '\n');
  1268. }
  1269. for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
  1270. I = ConnectionPairs.begin(), E = ConnectionPairs.end();
  1271. I != E; ++I) {
  1272. unsigned PredTree = SubtreeClasses[I->first->NodeNum];
  1273. unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
  1274. if (PredTree == SuccTree)
  1275. continue;
  1276. unsigned Depth = I->first->getDepth();
  1277. addConnection(PredTree, SuccTree, Depth);
  1278. addConnection(SuccTree, PredTree, Depth);
  1279. }
  1280. }
  1281. protected:
  1282. /// Join the predecessor subtree with the successor that is its DFS
  1283. /// parent. Apply some heuristics before joining.
  1284. bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
  1285. bool CheckLimit = true) {
  1286. assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
  1287. // Check if the predecessor is already joined.
  1288. const SUnit *PredSU = PredDep.getSUnit();
  1289. unsigned PredNum = PredSU->NodeNum;
  1290. if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
  1291. return false;
  1292. // Four is the magic number of successors before a node is considered a
  1293. // pinch point.
  1294. unsigned NumDataSucs = 0;
  1295. for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
  1296. SE = PredSU->Succs.end(); SI != SE; ++SI) {
  1297. if (SI->getKind() == SDep::Data) {
  1298. if (++NumDataSucs >= 4)
  1299. return false;
  1300. }
  1301. }
  1302. if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
  1303. return false;
  1304. R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
  1305. SubtreeClasses.join(Succ->NodeNum, PredNum);
  1306. return true;
  1307. }
  1308. /// Called by finalize() to record a connection between trees.
  1309. void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
  1310. if (!Depth)
  1311. return;
  1312. do {
  1313. SmallVectorImpl<SchedDFSResult::Connection> &Connections =
  1314. R.SubtreeConnections[FromTree];
  1315. for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
  1316. I = Connections.begin(), E = Connections.end(); I != E; ++I) {
  1317. if (I->TreeID == ToTree) {
  1318. I->Level = std::max(I->Level, Depth);
  1319. return;
  1320. }
  1321. }
  1322. Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
  1323. FromTree = R.DFSTreeData[FromTree].ParentTreeID;
  1324. } while (FromTree != SchedDFSResult::InvalidSubtreeID);
  1325. }
  1326. };
  1327. } // namespace llvm
  1328. namespace {
  1329. /// \brief Manage the stack used by a reverse depth-first search over the DAG.
  1330. class SchedDAGReverseDFS {
  1331. std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
  1332. public:
  1333. bool isComplete() const { return DFSStack.empty(); }
  1334. void follow(const SUnit *SU) {
  1335. DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
  1336. }
  1337. void advance() { ++DFSStack.back().second; }
  1338. const SDep *backtrack() {
  1339. DFSStack.pop_back();
  1340. return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
  1341. }
  1342. const SUnit *getCurr() const { return DFSStack.back().first; }
  1343. SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
  1344. SUnit::const_pred_iterator getPredEnd() const {
  1345. return getCurr()->Preds.end();
  1346. }
  1347. };
  1348. } // anonymous
  1349. static bool hasDataSucc(const SUnit *SU) {
  1350. for (SUnit::const_succ_iterator
  1351. SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
  1352. if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
  1353. return true;
  1354. }
  1355. return false;
  1356. }
  1357. /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
  1358. /// search from this root.
  1359. void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
  1360. if (!IsBottomUp)
  1361. llvm_unreachable("Top-down ILP metric is unimplemnted");
  1362. SchedDFSImpl Impl(*this);
  1363. for (ArrayRef<SUnit>::const_iterator
  1364. SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
  1365. const SUnit *SU = &*SI;
  1366. if (Impl.isVisited(SU) || hasDataSucc(SU))
  1367. continue;
  1368. SchedDAGReverseDFS DFS;
  1369. Impl.visitPreorder(SU);
  1370. DFS.follow(SU);
  1371. for (;;) {
  1372. // Traverse the leftmost path as far as possible.
  1373. while (DFS.getPred() != DFS.getPredEnd()) {
  1374. const SDep &PredDep = *DFS.getPred();
  1375. DFS.advance();
  1376. // Ignore non-data edges.
  1377. if (PredDep.getKind() != SDep::Data
  1378. || PredDep.getSUnit()->isBoundaryNode()) {
  1379. continue;
  1380. }
  1381. // An already visited edge is a cross edge, assuming an acyclic DAG.
  1382. if (Impl.isVisited(PredDep.getSUnit())) {
  1383. Impl.visitCrossEdge(PredDep, DFS.getCurr());
  1384. continue;
  1385. }
  1386. Impl.visitPreorder(PredDep.getSUnit());
  1387. DFS.follow(PredDep.getSUnit());
  1388. }
  1389. // Visit the top of the stack in postorder and backtrack.
  1390. const SUnit *Child = DFS.getCurr();
  1391. const SDep *PredDep = DFS.backtrack();
  1392. Impl.visitPostorderNode(Child);
  1393. if (PredDep)
  1394. Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
  1395. if (DFS.isComplete())
  1396. break;
  1397. }
  1398. }
  1399. Impl.finalize();
  1400. }
  1401. /// The root of the given SubtreeID was just scheduled. For all subtrees
  1402. /// connected to this tree, record the depth of the connection so that the
  1403. /// nearest connected subtrees can be prioritized.
  1404. void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
  1405. for (SmallVectorImpl<Connection>::const_iterator
  1406. I = SubtreeConnections[SubtreeID].begin(),
  1407. E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
  1408. SubtreeConnectLevels[I->TreeID] =
  1409. std::max(SubtreeConnectLevels[I->TreeID], I->Level);
  1410. DEBUG(dbgs() << " Tree: " << I->TreeID
  1411. << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
  1412. }
  1413. }
  1414. LLVM_DUMP_METHOD
  1415. void ILPValue::print(raw_ostream &OS) const {
  1416. OS << InstrCount << " / " << Length << " = ";
  1417. if (!Length)
  1418. OS << "BADILP";
  1419. else
  1420. OS << format("%g", ((double)InstrCount / Length));
  1421. }
  1422. LLVM_DUMP_METHOD
  1423. void ILPValue::dump() const {
  1424. dbgs() << *this << '\n';
  1425. }
  1426. namespace llvm {
  1427. LLVM_DUMP_METHOD
  1428. raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
  1429. Val.print(OS);
  1430. return OS;
  1431. }
  1432. } // namespace llvm