ScheduleDAGSDNodes.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This implements the ScheduleDAG class, which is a base class used by
  11. // scheduling implementation classes.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "ScheduleDAGSDNodes.h"
  15. #include "InstrEmitter.h"
  16. #include "SDNodeDbgValue.h"
  17. #include "llvm/ADT/DenseMap.h"
  18. #include "llvm/ADT/SmallPtrSet.h"
  19. #include "llvm/ADT/SmallSet.h"
  20. #include "llvm/ADT/SmallVector.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/CodeGen/MachineInstrBuilder.h"
  23. #include "llvm/CodeGen/MachineRegisterInfo.h"
  24. #include "llvm/CodeGen/SelectionDAG.h"
  25. #include "llvm/MC/MCInstrItineraries.h"
  26. #include "llvm/Support/CommandLine.h"
  27. #include "llvm/Support/Debug.h"
  28. #include "llvm/Support/raw_ostream.h"
  29. #include "llvm/Target/TargetInstrInfo.h"
  30. #include "llvm/Target/TargetLowering.h"
  31. #include "llvm/Target/TargetRegisterInfo.h"
  32. #include "llvm/Target/TargetSubtargetInfo.h"
  33. using namespace llvm;
  34. #define DEBUG_TYPE "pre-RA-sched"
  35. STATISTIC(LoadsClustered, "Number of loads clustered together");
  36. // This allows the latency-based scheduler to notice high latency instructions
  37. // without a target itinerary. The choice of number here has more to do with
  38. // balancing scheduler heuristics than with the actual machine latency.
  39. static cl::opt<int> HighLatencyCycles(
  40. "sched-high-latency-cycles", cl::Hidden, cl::init(10),
  41. cl::desc("Roughly estimate the number of cycles that 'long latency'"
  42. "instructions take for targets with no itinerary"));
  43. ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
  44. : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
  45. InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
  46. /// Run - perform scheduling.
  47. ///
  48. void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
  49. BB = bb;
  50. DAG = dag;
  51. // Clear the scheduler's SUnit DAG.
  52. ScheduleDAG::clearDAG();
  53. Sequence.clear();
  54. // Invoke the target's selection of scheduler.
  55. Schedule();
  56. }
  57. /// NewSUnit - Creates a new SUnit and return a ptr to it.
  58. ///
  59. SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
  60. #ifndef NDEBUG
  61. const SUnit *Addr = nullptr;
  62. if (!SUnits.empty())
  63. Addr = &SUnits[0];
  64. #endif
  65. SUnits.emplace_back(N, (unsigned)SUnits.size());
  66. assert((Addr == nullptr || Addr == &SUnits[0]) &&
  67. "SUnits std::vector reallocated on the fly!");
  68. SUnits.back().OrigNode = &SUnits.back();
  69. SUnit *SU = &SUnits.back();
  70. const TargetLowering &TLI = DAG->getTargetLoweringInfo();
  71. if (!N ||
  72. (N->isMachineOpcode() &&
  73. N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
  74. SU->SchedulingPref = Sched::None;
  75. else
  76. SU->SchedulingPref = TLI.getSchedulingPreference(N);
  77. return SU;
  78. }
  79. SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
  80. SUnit *SU = newSUnit(Old->getNode());
  81. SU->OrigNode = Old->OrigNode;
  82. SU->Latency = Old->Latency;
  83. SU->isVRegCycle = Old->isVRegCycle;
  84. SU->isCall = Old->isCall;
  85. SU->isCallOp = Old->isCallOp;
  86. SU->isTwoAddress = Old->isTwoAddress;
  87. SU->isCommutable = Old->isCommutable;
  88. SU->hasPhysRegDefs = Old->hasPhysRegDefs;
  89. SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
  90. SU->isScheduleHigh = Old->isScheduleHigh;
  91. SU->isScheduleLow = Old->isScheduleLow;
  92. SU->SchedulingPref = Old->SchedulingPref;
  93. Old->isCloned = true;
  94. return SU;
  95. }
  96. /// CheckForPhysRegDependency - Check if the dependency between def and use of
  97. /// a specified operand is a physical register dependency. If so, returns the
  98. /// register and the cost of copying the register.
  99. static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
  100. const TargetRegisterInfo *TRI,
  101. const TargetInstrInfo *TII,
  102. unsigned &PhysReg, int &Cost) {
  103. if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
  104. return;
  105. unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
  106. if (TargetRegisterInfo::isVirtualRegister(Reg))
  107. return;
  108. unsigned ResNo = User->getOperand(2).getResNo();
  109. if (Def->getOpcode() == ISD::CopyFromReg &&
  110. cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) {
  111. PhysReg = Reg;
  112. } else if (Def->isMachineOpcode()) {
  113. const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
  114. if (ResNo >= II.getNumDefs() &&
  115. II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
  116. PhysReg = Reg;
  117. }
  118. if (PhysReg != 0) {
  119. const TargetRegisterClass *RC =
  120. TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
  121. Cost = RC->getCopyCost();
  122. }
  123. }
  124. // Helper for AddGlue to clone node operands.
  125. static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
  126. SDValue ExtraOper = SDValue()) {
  127. SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
  128. if (ExtraOper.getNode())
  129. Ops.push_back(ExtraOper);
  130. SDVTList VTList = DAG->getVTList(VTs);
  131. MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr;
  132. MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
  133. // Store memory references.
  134. if (MN) {
  135. Begin = MN->memoperands_begin();
  136. End = MN->memoperands_end();
  137. }
  138. DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
  139. // Reset the memory references
  140. if (MN)
  141. MN->setMemRefs(Begin, End);
  142. }
  143. static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
  144. SDNode *GlueDestNode = Glue.getNode();
  145. // Don't add glue from a node to itself.
  146. if (GlueDestNode == N) return false;
  147. // Don't add a glue operand to something that already uses glue.
  148. if (GlueDestNode &&
  149. N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
  150. return false;
  151. }
  152. // Don't add glue to something that already has a glue value.
  153. if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
  154. SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
  155. if (AddGlue)
  156. VTs.push_back(MVT::Glue);
  157. CloneNodeWithValues(N, DAG, VTs, Glue);
  158. return true;
  159. }
  160. // Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
  161. // node even though simply shrinking the value list is sufficient.
  162. static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
  163. assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
  164. !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
  165. "expected an unused glue value");
  166. CloneNodeWithValues(N, DAG,
  167. makeArrayRef(N->value_begin(), N->getNumValues() - 1));
  168. }
  169. /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
  170. /// This function finds loads of the same base and different offsets. If the
  171. /// offsets are not far apart (target specific), it add MVT::Glue inputs and
  172. /// outputs to ensure they are scheduled together and in order. This
  173. /// optimization may benefit some targets by improving cache locality.
  174. void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
  175. SDNode *Chain = nullptr;
  176. unsigned NumOps = Node->getNumOperands();
  177. if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
  178. Chain = Node->getOperand(NumOps-1).getNode();
  179. if (!Chain)
  180. return;
  181. // Look for other loads of the same chain. Find loads that are loading from
  182. // the same base pointer and different offsets.
  183. SmallPtrSet<SDNode*, 16> Visited;
  184. SmallVector<int64_t, 4> Offsets;
  185. DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
  186. bool Cluster = false;
  187. SDNode *Base = Node;
  188. // This algorithm requires a reasonably low use count before finding a match
  189. // to avoid uselessly blowing up compile time in large blocks.
  190. unsigned UseCount = 0;
  191. for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
  192. I != E && UseCount < 100; ++I, ++UseCount) {
  193. SDNode *User = *I;
  194. if (User == Node || !Visited.insert(User).second)
  195. continue;
  196. int64_t Offset1, Offset2;
  197. if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
  198. Offset1 == Offset2)
  199. // FIXME: Should be ok if they addresses are identical. But earlier
  200. // optimizations really should have eliminated one of the loads.
  201. continue;
  202. if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
  203. Offsets.push_back(Offset1);
  204. O2SMap.insert(std::make_pair(Offset2, User));
  205. Offsets.push_back(Offset2);
  206. if (Offset2 < Offset1)
  207. Base = User;
  208. Cluster = true;
  209. // Reset UseCount to allow more matches.
  210. UseCount = 0;
  211. }
  212. if (!Cluster)
  213. return;
  214. // Sort them in increasing order.
  215. std::sort(Offsets.begin(), Offsets.end());
  216. // Check if the loads are close enough.
  217. SmallVector<SDNode*, 4> Loads;
  218. unsigned NumLoads = 0;
  219. int64_t BaseOff = Offsets[0];
  220. SDNode *BaseLoad = O2SMap[BaseOff];
  221. Loads.push_back(BaseLoad);
  222. for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
  223. int64_t Offset = Offsets[i];
  224. SDNode *Load = O2SMap[Offset];
  225. if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
  226. break; // Stop right here. Ignore loads that are further away.
  227. Loads.push_back(Load);
  228. ++NumLoads;
  229. }
  230. if (NumLoads == 0)
  231. return;
  232. // Cluster loads by adding MVT::Glue outputs and inputs. This also
  233. // ensure they are scheduled in order of increasing addresses.
  234. SDNode *Lead = Loads[0];
  235. SDValue InGlue = SDValue(nullptr, 0);
  236. if (AddGlue(Lead, InGlue, true, DAG))
  237. InGlue = SDValue(Lead, Lead->getNumValues() - 1);
  238. for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
  239. bool OutGlue = I < E - 1;
  240. SDNode *Load = Loads[I];
  241. // If AddGlue fails, we could leave an unsused glue value. This should not
  242. // cause any
  243. if (AddGlue(Load, InGlue, OutGlue, DAG)) {
  244. if (OutGlue)
  245. InGlue = SDValue(Load, Load->getNumValues() - 1);
  246. ++LoadsClustered;
  247. }
  248. else if (!OutGlue && InGlue.getNode())
  249. RemoveUnusedGlue(InGlue.getNode(), DAG);
  250. }
  251. }
  252. /// ClusterNodes - Cluster certain nodes which should be scheduled together.
  253. ///
  254. void ScheduleDAGSDNodes::ClusterNodes() {
  255. for (SDNode &NI : DAG->allnodes()) {
  256. SDNode *Node = &NI;
  257. if (!Node || !Node->isMachineOpcode())
  258. continue;
  259. unsigned Opc = Node->getMachineOpcode();
  260. const MCInstrDesc &MCID = TII->get(Opc);
  261. if (MCID.mayLoad())
  262. // Cluster loads from "near" addresses into combined SUnits.
  263. ClusterNeighboringLoads(Node);
  264. }
  265. }
  266. void ScheduleDAGSDNodes::BuildSchedUnits() {
  267. // During scheduling, the NodeId field of SDNode is used to map SDNodes
  268. // to their associated SUnits by holding SUnits table indices. A value
  269. // of -1 means the SDNode does not yet have an associated SUnit.
  270. unsigned NumNodes = 0;
  271. for (SDNode &NI : DAG->allnodes()) {
  272. NI.setNodeId(-1);
  273. ++NumNodes;
  274. }
  275. // Reserve entries in the vector for each of the SUnits we are creating. This
  276. // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
  277. // invalidated.
  278. // FIXME: Multiply by 2 because we may clone nodes during scheduling.
  279. // This is a temporary workaround.
  280. SUnits.reserve(NumNodes * 2);
  281. // Add all nodes in depth first order.
  282. SmallVector<SDNode*, 64> Worklist;
  283. SmallPtrSet<SDNode*, 64> Visited;
  284. Worklist.push_back(DAG->getRoot().getNode());
  285. Visited.insert(DAG->getRoot().getNode());
  286. SmallVector<SUnit*, 8> CallSUnits;
  287. while (!Worklist.empty()) {
  288. SDNode *NI = Worklist.pop_back_val();
  289. // Add all operands to the worklist unless they've already been added.
  290. for (const SDValue &Op : NI->op_values())
  291. if (Visited.insert(Op.getNode()).second)
  292. Worklist.push_back(Op.getNode());
  293. if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
  294. continue;
  295. // If this node has already been processed, stop now.
  296. if (NI->getNodeId() != -1) continue;
  297. SUnit *NodeSUnit = newSUnit(NI);
  298. // See if anything is glued to this node, if so, add them to glued
  299. // nodes. Nodes can have at most one glue input and one glue output. Glue
  300. // is required to be the last operand and result of a node.
  301. // Scan up to find glued preds.
  302. SDNode *N = NI;
  303. while (N->getNumOperands() &&
  304. N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
  305. N = N->getOperand(N->getNumOperands()-1).getNode();
  306. assert(N->getNodeId() == -1 && "Node already inserted!");
  307. N->setNodeId(NodeSUnit->NodeNum);
  308. if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
  309. NodeSUnit->isCall = true;
  310. }
  311. // Scan down to find any glued succs.
  312. N = NI;
  313. while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
  314. SDValue GlueVal(N, N->getNumValues()-1);
  315. // There are either zero or one users of the Glue result.
  316. bool HasGlueUse = false;
  317. for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
  318. UI != E; ++UI)
  319. if (GlueVal.isOperandOf(*UI)) {
  320. HasGlueUse = true;
  321. assert(N->getNodeId() == -1 && "Node already inserted!");
  322. N->setNodeId(NodeSUnit->NodeNum);
  323. N = *UI;
  324. if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
  325. NodeSUnit->isCall = true;
  326. break;
  327. }
  328. if (!HasGlueUse) break;
  329. }
  330. if (NodeSUnit->isCall)
  331. CallSUnits.push_back(NodeSUnit);
  332. // Schedule zero-latency TokenFactor below any nodes that may increase the
  333. // schedule height. Otherwise, ancestors of the TokenFactor may appear to
  334. // have false stalls.
  335. if (NI->getOpcode() == ISD::TokenFactor)
  336. NodeSUnit->isScheduleLow = true;
  337. // If there are glue operands involved, N is now the bottom-most node
  338. // of the sequence of nodes that are glued together.
  339. // Update the SUnit.
  340. NodeSUnit->setNode(N);
  341. assert(N->getNodeId() == -1 && "Node already inserted!");
  342. N->setNodeId(NodeSUnit->NodeNum);
  343. // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
  344. InitNumRegDefsLeft(NodeSUnit);
  345. // Assign the Latency field of NodeSUnit using target-provided information.
  346. computeLatency(NodeSUnit);
  347. }
  348. // Find all call operands.
  349. while (!CallSUnits.empty()) {
  350. SUnit *SU = CallSUnits.pop_back_val();
  351. for (const SDNode *SUNode = SU->getNode(); SUNode;
  352. SUNode = SUNode->getGluedNode()) {
  353. if (SUNode->getOpcode() != ISD::CopyToReg)
  354. continue;
  355. SDNode *SrcN = SUNode->getOperand(2).getNode();
  356. if (isPassiveNode(SrcN)) continue; // Not scheduled.
  357. SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
  358. SrcSU->isCallOp = true;
  359. }
  360. }
  361. }
  362. void ScheduleDAGSDNodes::AddSchedEdges() {
  363. const TargetSubtargetInfo &ST = MF.getSubtarget();
  364. // Check to see if the scheduler cares about latencies.
  365. bool UnitLatencies = forceUnitLatencies();
  366. // Pass 2: add the preds, succs, etc.
  367. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
  368. SUnit *SU = &SUnits[su];
  369. SDNode *MainNode = SU->getNode();
  370. if (MainNode->isMachineOpcode()) {
  371. unsigned Opc = MainNode->getMachineOpcode();
  372. const MCInstrDesc &MCID = TII->get(Opc);
  373. for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
  374. if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
  375. SU->isTwoAddress = true;
  376. break;
  377. }
  378. }
  379. if (MCID.isCommutable())
  380. SU->isCommutable = true;
  381. }
  382. // Find all predecessors and successors of the group.
  383. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
  384. if (N->isMachineOpcode() &&
  385. TII->get(N->getMachineOpcode()).getImplicitDefs()) {
  386. SU->hasPhysRegClobbers = true;
  387. unsigned NumUsed = InstrEmitter::CountResults(N);
  388. while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
  389. --NumUsed; // Skip over unused values at the end.
  390. if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
  391. SU->hasPhysRegDefs = true;
  392. }
  393. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
  394. SDNode *OpN = N->getOperand(i).getNode();
  395. if (isPassiveNode(OpN)) continue; // Not scheduled.
  396. SUnit *OpSU = &SUnits[OpN->getNodeId()];
  397. assert(OpSU && "Node has no SUnit!");
  398. if (OpSU == SU) continue; // In the same group.
  399. EVT OpVT = N->getOperand(i).getValueType();
  400. assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
  401. bool isChain = OpVT == MVT::Other;
  402. unsigned PhysReg = 0;
  403. int Cost = 1;
  404. // Determine if this is a physical register dependency.
  405. CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
  406. assert((PhysReg == 0 || !isChain) &&
  407. "Chain dependence via physreg data?");
  408. // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
  409. // emits a copy from the physical register to a virtual register unless
  410. // it requires a cross class copy (cost < 0). That means we are only
  411. // treating "expensive to copy" register dependency as physical register
  412. // dependency. This may change in the future though.
  413. if (Cost >= 0 && !StressSched)
  414. PhysReg = 0;
  415. // If this is a ctrl dep, latency is 1.
  416. unsigned OpLatency = isChain ? 1 : OpSU->Latency;
  417. // Special-case TokenFactor chains as zero-latency.
  418. if(isChain && OpN->getOpcode() == ISD::TokenFactor)
  419. OpLatency = 0;
  420. SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
  421. : SDep(OpSU, SDep::Data, PhysReg);
  422. Dep.setLatency(OpLatency);
  423. if (!isChain && !UnitLatencies) {
  424. computeOperandLatency(OpN, N, i, Dep);
  425. ST.adjustSchedDependency(OpSU, SU, Dep);
  426. }
  427. if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
  428. // Multiple register uses are combined in the same SUnit. For example,
  429. // we could have a set of glued nodes with all their defs consumed by
  430. // another set of glued nodes. Register pressure tracking sees this as
  431. // a single use, so to keep pressure balanced we reduce the defs.
  432. //
  433. // We can't tell (without more book-keeping) if this results from
  434. // glued nodes or duplicate operands. As long as we don't reduce
  435. // NumRegDefsLeft to zero, we handle the common cases well.
  436. --OpSU->NumRegDefsLeft;
  437. }
  438. }
  439. }
  440. }
  441. }
  442. /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
  443. /// are input. This SUnit graph is similar to the SelectionDAG, but
  444. /// excludes nodes that aren't interesting to scheduling, and represents
  445. /// glued together nodes with a single SUnit.
  446. void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
  447. // Cluster certain nodes which should be scheduled together.
  448. ClusterNodes();
  449. // Populate the SUnits array.
  450. BuildSchedUnits();
  451. // Compute all the scheduling dependencies between nodes.
  452. AddSchedEdges();
  453. }
  454. // Initialize NumNodeDefs for the current Node's opcode.
  455. void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
  456. // Check for phys reg copy.
  457. if (!Node)
  458. return;
  459. if (!Node->isMachineOpcode()) {
  460. if (Node->getOpcode() == ISD::CopyFromReg)
  461. NodeNumDefs = 1;
  462. else
  463. NodeNumDefs = 0;
  464. return;
  465. }
  466. unsigned POpc = Node->getMachineOpcode();
  467. if (POpc == TargetOpcode::IMPLICIT_DEF) {
  468. // No register need be allocated for this.
  469. NodeNumDefs = 0;
  470. return;
  471. }
  472. if (POpc == TargetOpcode::PATCHPOINT &&
  473. Node->getValueType(0) == MVT::Other) {
  474. // PATCHPOINT is defined to have one result, but it might really have none
  475. // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
  476. // real definition.
  477. NodeNumDefs = 0;
  478. return;
  479. }
  480. unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
  481. // Some instructions define regs that are not represented in the selection DAG
  482. // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
  483. NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
  484. DefIdx = 0;
  485. }
  486. // Construct a RegDefIter for this SUnit and find the first valid value.
  487. ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
  488. const ScheduleDAGSDNodes *SD)
  489. : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
  490. InitNodeNumDefs();
  491. Advance();
  492. }
  493. // Advance to the next valid value defined by the SUnit.
  494. void ScheduleDAGSDNodes::RegDefIter::Advance() {
  495. for (;Node;) { // Visit all glued nodes.
  496. for (;DefIdx < NodeNumDefs; ++DefIdx) {
  497. if (!Node->hasAnyUseOfValue(DefIdx))
  498. continue;
  499. ValueType = Node->getSimpleValueType(DefIdx);
  500. ++DefIdx;
  501. return; // Found a normal regdef.
  502. }
  503. Node = Node->getGluedNode();
  504. if (!Node) {
  505. return; // No values left to visit.
  506. }
  507. InitNodeNumDefs();
  508. }
  509. }
  510. void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
  511. assert(SU->NumRegDefsLeft == 0 && "expect a new node");
  512. for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
  513. assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
  514. ++SU->NumRegDefsLeft;
  515. }
  516. }
  517. void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
  518. SDNode *N = SU->getNode();
  519. // TokenFactor operands are considered zero latency, and some schedulers
  520. // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
  521. // whenever node latency is nonzero.
  522. if (N && N->getOpcode() == ISD::TokenFactor) {
  523. SU->Latency = 0;
  524. return;
  525. }
  526. // Check to see if the scheduler cares about latencies.
  527. if (forceUnitLatencies()) {
  528. SU->Latency = 1;
  529. return;
  530. }
  531. if (!InstrItins || InstrItins->isEmpty()) {
  532. if (N && N->isMachineOpcode() &&
  533. TII->isHighLatencyDef(N->getMachineOpcode()))
  534. SU->Latency = HighLatencyCycles;
  535. else
  536. SU->Latency = 1;
  537. return;
  538. }
  539. // Compute the latency for the node. We use the sum of the latencies for
  540. // all nodes glued together into this SUnit.
  541. SU->Latency = 0;
  542. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
  543. if (N->isMachineOpcode())
  544. SU->Latency += TII->getInstrLatency(InstrItins, N);
  545. }
  546. void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
  547. unsigned OpIdx, SDep& dep) const{
  548. // Check to see if the scheduler cares about latencies.
  549. if (forceUnitLatencies())
  550. return;
  551. if (dep.getKind() != SDep::Data)
  552. return;
  553. unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
  554. if (Use->isMachineOpcode())
  555. // Adjust the use operand index by num of defs.
  556. OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
  557. int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
  558. if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
  559. !BB->succ_empty()) {
  560. unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
  561. if (TargetRegisterInfo::isVirtualRegister(Reg))
  562. // This copy is a liveout value. It is likely coalesced, so reduce the
  563. // latency so not to penalize the def.
  564. // FIXME: need target specific adjustment here?
  565. Latency = (Latency > 1) ? Latency - 1 : 1;
  566. }
  567. if (Latency >= 0)
  568. dep.setLatency(Latency);
  569. }
  570. void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
  571. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  572. if (!SU->getNode()) {
  573. dbgs() << "PHYS REG COPY\n";
  574. return;
  575. }
  576. SU->getNode()->dump(DAG);
  577. dbgs() << "\n";
  578. SmallVector<SDNode *, 4> GluedNodes;
  579. for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
  580. GluedNodes.push_back(N);
  581. while (!GluedNodes.empty()) {
  582. dbgs() << " ";
  583. GluedNodes.back()->dump(DAG);
  584. dbgs() << "\n";
  585. GluedNodes.pop_back();
  586. }
  587. #endif
  588. }
  589. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  590. void ScheduleDAGSDNodes::dumpSchedule() const {
  591. for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
  592. if (SUnit *SU = Sequence[i])
  593. SU->dump(this);
  594. else
  595. dbgs() << "**** NOOP ****\n";
  596. }
  597. }
  598. #endif
  599. #ifndef NDEBUG
  600. /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
  601. /// their state is consistent with the nodes listed in Sequence.
  602. ///
  603. void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
  604. unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
  605. unsigned Noops = 0;
  606. for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
  607. if (!Sequence[i])
  608. ++Noops;
  609. assert(Sequence.size() - Noops == ScheduledNodes &&
  610. "The number of nodes scheduled doesn't match the expected number!");
  611. }
  612. #endif // NDEBUG
  613. /// ProcessSDDbgValues - Process SDDbgValues associated with this node.
  614. static void
  615. ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
  616. SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
  617. DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) {
  618. if (!N->getHasDebugValue())
  619. return;
  620. // Opportunistically insert immediate dbg_value uses, i.e. those with source
  621. // order number right after the N.
  622. MachineBasicBlock *BB = Emitter.getBlock();
  623. MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
  624. ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
  625. for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
  626. if (DVs[i]->isInvalidated())
  627. continue;
  628. unsigned DVOrder = DVs[i]->getOrder();
  629. if (!Order || DVOrder == ++Order) {
  630. MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
  631. if (DbgMI) {
  632. Orders.push_back(std::make_pair(DVOrder, DbgMI));
  633. BB->insert(InsertPos, DbgMI);
  634. }
  635. DVs[i]->setIsInvalidated();
  636. }
  637. }
  638. }
  639. // ProcessSourceNode - Process nodes with source order numbers. These are added
  640. // to a vector which EmitSchedule uses to determine how to insert dbg_value
  641. // instructions in the right order.
  642. static void
  643. ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
  644. DenseMap<SDValue, unsigned> &VRBaseMap,
  645. SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
  646. SmallSet<unsigned, 8> &Seen) {
  647. unsigned Order = N->getIROrder();
  648. if (!Order || !Seen.insert(Order).second) {
  649. // Process any valid SDDbgValues even if node does not have any order
  650. // assigned.
  651. ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
  652. return;
  653. }
  654. MachineBasicBlock *BB = Emitter.getBlock();
  655. if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
  656. // Fast-isel may have inserted some instructions, in which case the
  657. // BB->back().isPHI() test will not fire when we want it to.
  658. std::prev(Emitter.getInsertPos())->isPHI()) {
  659. // Did not insert any instruction.
  660. Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr));
  661. return;
  662. }
  663. Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos())));
  664. ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
  665. }
  666. void ScheduleDAGSDNodes::
  667. EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
  668. MachineBasicBlock::iterator InsertPos) {
  669. for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
  670. I != E; ++I) {
  671. if (I->isCtrl()) continue; // ignore chain preds
  672. if (I->getSUnit()->CopyDstRC) {
  673. // Copy to physical register.
  674. DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
  675. assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
  676. // Find the destination physical register.
  677. unsigned Reg = 0;
  678. for (SUnit::const_succ_iterator II = SU->Succs.begin(),
  679. EE = SU->Succs.end(); II != EE; ++II) {
  680. if (II->isCtrl()) continue; // ignore chain preds
  681. if (II->getReg()) {
  682. Reg = II->getReg();
  683. break;
  684. }
  685. }
  686. BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
  687. .addReg(VRI->second);
  688. } else {
  689. // Copy from physical register.
  690. assert(I->getReg() && "Unknown physical register!");
  691. unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
  692. bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
  693. (void)isNew; // Silence compiler warning.
  694. assert(isNew && "Node emitted out of order - early");
  695. BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
  696. .addReg(I->getReg());
  697. }
  698. break;
  699. }
  700. }
  701. /// EmitSchedule - Emit the machine code in scheduled order. Return the new
  702. /// InsertPos and MachineBasicBlock that contains this insertion
  703. /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
  704. /// not necessarily refer to returned BB. The emitter may split blocks.
  705. MachineBasicBlock *ScheduleDAGSDNodes::
  706. EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
  707. InstrEmitter Emitter(BB, InsertPos);
  708. DenseMap<SDValue, unsigned> VRBaseMap;
  709. DenseMap<SUnit*, unsigned> CopyVRBaseMap;
  710. SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
  711. SmallSet<unsigned, 8> Seen;
  712. bool HasDbg = DAG->hasDebugValues();
  713. // If this is the first BB, emit byval parameter dbg_value's.
  714. if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
  715. SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
  716. SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
  717. for (; PDI != PDE; ++PDI) {
  718. MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
  719. if (DbgMI)
  720. BB->insert(InsertPos, DbgMI);
  721. }
  722. }
  723. for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
  724. SUnit *SU = Sequence[i];
  725. if (!SU) {
  726. // Null SUnit* is a noop.
  727. TII->insertNoop(*Emitter.getBlock(), InsertPos);
  728. continue;
  729. }
  730. // For pre-regalloc scheduling, create instructions corresponding to the
  731. // SDNode and any glued SDNodes and append them to the block.
  732. if (!SU->getNode()) {
  733. // Emit a copy.
  734. EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
  735. continue;
  736. }
  737. SmallVector<SDNode *, 4> GluedNodes;
  738. for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
  739. GluedNodes.push_back(N);
  740. while (!GluedNodes.empty()) {
  741. SDNode *N = GluedNodes.back();
  742. Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
  743. VRBaseMap);
  744. // Remember the source order of the inserted instruction.
  745. if (HasDbg)
  746. ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
  747. GluedNodes.pop_back();
  748. }
  749. Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
  750. VRBaseMap);
  751. // Remember the source order of the inserted instruction.
  752. if (HasDbg)
  753. ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
  754. Seen);
  755. }
  756. // Insert all the dbg_values which have not already been inserted in source
  757. // order sequence.
  758. if (HasDbg) {
  759. MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
  760. // Sort the source order instructions and use the order to insert debug
  761. // values.
  762. std::sort(Orders.begin(), Orders.end(), less_first());
  763. SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
  764. SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
  765. // Now emit the rest according to source order.
  766. unsigned LastOrder = 0;
  767. for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
  768. unsigned Order = Orders[i].first;
  769. MachineInstr *MI = Orders[i].second;
  770. // Insert all SDDbgValue's whose order(s) are before "Order".
  771. if (!MI)
  772. continue;
  773. for (; DI != DE &&
  774. (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
  775. if ((*DI)->isInvalidated())
  776. continue;
  777. MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
  778. if (DbgMI) {
  779. if (!LastOrder)
  780. // Insert to start of the BB (after PHIs).
  781. BB->insert(BBBegin, DbgMI);
  782. else {
  783. // Insert at the instruction, which may be in a different
  784. // block, if the block was split by a custom inserter.
  785. MachineBasicBlock::iterator Pos = MI;
  786. MI->getParent()->insert(Pos, DbgMI);
  787. }
  788. }
  789. }
  790. LastOrder = Order;
  791. }
  792. // Add trailing DbgValue's before the terminator. FIXME: May want to add
  793. // some of them before one or more conditional branches?
  794. SmallVector<MachineInstr*, 8> DbgMIs;
  795. while (DI != DE) {
  796. if (!(*DI)->isInvalidated())
  797. if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
  798. DbgMIs.push_back(DbgMI);
  799. ++DI;
  800. }
  801. MachineBasicBlock *InsertBB = Emitter.getBlock();
  802. MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
  803. InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
  804. }
  805. InsertPos = Emitter.getInsertPos();
  806. return Emitter.getBlock();
  807. }
  808. /// Return the basic block label.
  809. std::string ScheduleDAGSDNodes::getDAGName() const {
  810. return "sunit-dag." + BB->getFullName();
  811. }