LLVM  8.0.1
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/PriorityQueue.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/CodeGen/Passes.h"
51 #include "llvm/Config/llvm-config.h"
52 #include "llvm/MC/LaneBitmask.h"
53 #include "llvm/Pass.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <cstdint>
64 #include <iterator>
65 #include <limits>
66 #include <memory>
67 #include <string>
68 #include <tuple>
69 #include <utility>
70 #include <vector>
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "machine-scheduler"
75 
76 namespace llvm {
77 
78 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
79  cl::desc("Force top-down list scheduling"));
80 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
81  cl::desc("Force bottom-up list scheduling"));
83 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
84  cl::desc("Print critical path length to stdout"));
85 
86 } // end namespace llvm
87 
88 #ifndef NDEBUG
89 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
90  cl::desc("Pop up a window to show MISched dags after they are processed"));
91 
92 /// In some situations a few uninteresting nodes depend on nearly all other
93 /// nodes in the graph, provide a cutoff to hide them.
94 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
95  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
96 
97 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
98  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
99 
100 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
101  cl::desc("Only schedule this function"));
102 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
103  cl::desc("Only schedule this MBB#"));
104 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
105  cl::desc("Print schedule DAGs"));
106 #else
107 static const bool ViewMISchedDAGs = false;
108 static const bool PrintDAGs = false;
109 #endif // NDEBUG
110 
111 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
112 /// size of the ready lists.
113 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
114  cl::desc("Limit ready list to N instructions"), cl::init(256));
115 
116 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
117  cl::desc("Enable register pressure scheduling."), cl::init(true));
118 
119 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
120  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
121 
122 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
123  cl::desc("Enable memop clustering."),
124  cl::init(true));
125 
126 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
127  cl::desc("Verify machine instrs before and after machine scheduling"));
128 
129 // DAG subtrees must have at least this many nodes.
130 static const unsigned MinSubtreeSize = 8;
131 
132 // Pin the vtables to this file.
133 void MachineSchedStrategy::anchor() {}
134 
135 void ScheduleDAGMutation::anchor() {}
136 
137 //===----------------------------------------------------------------------===//
138 // Machine Instruction Scheduling Pass and Registry
139 //===----------------------------------------------------------------------===//
140 
142  RegClassInfo = new RegisterClassInfo();
143 }
144 
146  delete RegClassInfo;
147 }
148 
149 namespace {
150 
151 /// Base class for a machine scheduler class that can run at any point.
152 class MachineSchedulerBase : public MachineSchedContext,
153  public MachineFunctionPass {
154 public:
155  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
156 
157  void print(raw_ostream &O, const Module* = nullptr) const override;
158 
159 protected:
160  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
161 };
162 
163 /// MachineScheduler runs after coalescing and before register allocation.
164 class MachineScheduler : public MachineSchedulerBase {
165 public:
166  MachineScheduler();
167 
168  void getAnalysisUsage(AnalysisUsage &AU) const override;
169 
170  bool runOnMachineFunction(MachineFunction&) override;
171 
172  static char ID; // Class identification, replacement for typeinfo
173 
174 protected:
175  ScheduleDAGInstrs *createMachineScheduler();
176 };
177 
178 /// PostMachineScheduler runs after shortly before code emission.
179 class PostMachineScheduler : public MachineSchedulerBase {
180 public:
181  PostMachineScheduler();
182 
183  void getAnalysisUsage(AnalysisUsage &AU) const override;
184 
185  bool runOnMachineFunction(MachineFunction&) override;
186 
187  static char ID; // Class identification, replacement for typeinfo
188 
189 protected:
190  ScheduleDAGInstrs *createPostMachineScheduler();
191 };
192 
193 } // end anonymous namespace
194 
195 char MachineScheduler::ID = 0;
196 
198 
199 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
200  "Machine Instruction Scheduler", false, false)
205 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
207 
208 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
210 }
211 
212 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
213  AU.setPreservesCFG();
218  AU.addRequired<SlotIndexes>();
223 }
224 
225 char PostMachineScheduler::ID = 0;
226 
228 
229 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
230  "PostRA Machine Instruction Scheduler", false, false)
231 
232 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
234 }
235 
236 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
237  AU.setPreservesCFG();
242 }
243 
246 
247 /// A dummy default scheduler factory indicates whether the scheduler
248 /// is overridden on the command line.
250  return nullptr;
251 }
252 
253 /// MachineSchedOpt allows command line selection of the scheduler.
256 MachineSchedOpt("misched",
258  cl::desc("Machine instruction scheduler to use"));
259 
261 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
263 
265  "enable-misched",
266  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
267  cl::Hidden);
268 
270  "enable-post-misched",
271  cl::desc("Enable the post-ra machine instruction scheduling pass."),
272  cl::init(true), cl::Hidden);
273 
274 /// Decrement this iterator until reaching the top or a non-debug instr.
278  assert(I != Beg && "reached the top of the region, cannot decrement");
279  while (--I != Beg) {
280  if (!I->isDebugInstr())
281  break;
282  }
283  return I;
284 }
285 
286 /// Non-const version.
292 }
293 
294 /// If this iterator is a debug value, increment until reaching the End or a
295 /// non-debug instruction.
299  for(; I != End; ++I) {
300  if (!I->isDebugInstr())
301  break;
302  }
303  return I;
304 }
305 
306 /// Non-const version.
312 }
313 
314 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
315 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
316  // Select the scheduler, or set the default.
317  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
318  if (Ctor != useDefaultMachineSched)
319  return Ctor(this);
320 
321  // Get the default scheduler set by the target for this function.
322  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
323  if (Scheduler)
324  return Scheduler;
325 
326  // Default to GenericScheduler.
327  return createGenericSchedLive(this);
328 }
329 
330 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
331 /// the caller. We don't have a command line option to override the postRA
332 /// scheduler. The Target must configure it.
333 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
334  // Get the postRA scheduler set by the target for this function.
335  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
336  if (Scheduler)
337  return Scheduler;
338 
339  // Default to GenericScheduler.
340  return createGenericSchedPostRA(this);
341 }
342 
343 /// Top-level MachineScheduler pass driver.
344 ///
345 /// Visit blocks in function order. Divide each block into scheduling regions
346 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
347 /// consistent with the DAG builder, which traverses the interior of the
348 /// scheduling regions bottom-up.
349 ///
350 /// This design avoids exposing scheduling boundaries to the DAG builder,
351 /// simplifying the DAG builder's support for "special" target instructions.
352 /// At the same time the design allows target schedulers to operate across
353 /// scheduling boundaries, for example to bundle the boundary instructions
354 /// without reordering them. This creates complexity, because the target
355 /// scheduler must update the RegionBegin and RegionEnd positions cached by
356 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
357 /// design would be to split blocks at scheduling boundaries, but LLVM has a
358 /// general bias against block splitting purely for implementation simplicity.
359 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
360  if (skipFunction(mf.getFunction()))
361  return false;
362 
363  if (EnableMachineSched.getNumOccurrences()) {
364  if (!EnableMachineSched)
365  return false;
366  } else if (!mf.getSubtarget().enableMachineScheduler())
367  return false;
368 
369  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
370 
371  // Initialize the context of the pass.
372  MF = &mf;
373  MLI = &getAnalysis<MachineLoopInfo>();
374  MDT = &getAnalysis<MachineDominatorTree>();
375  PassConfig = &getAnalysis<TargetPassConfig>();
376  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
377 
378  LIS = &getAnalysis<LiveIntervals>();
379 
380  if (VerifyScheduling) {
381  LLVM_DEBUG(LIS->dump());
382  MF->verify(this, "Before machine scheduling.");
383  }
384  RegClassInfo->runOnMachineFunction(*MF);
385 
386  // Instantiate the selected scheduler for this target, function, and
387  // optimization level.
388  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
389  scheduleRegions(*Scheduler, false);
390 
391  LLVM_DEBUG(LIS->dump());
392  if (VerifyScheduling)
393  MF->verify(this, "After machine scheduling.");
394  return true;
395 }
396 
397 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
398  if (skipFunction(mf.getFunction()))
399  return false;
400 
401  if (EnablePostRAMachineSched.getNumOccurrences()) {
402  if (!EnablePostRAMachineSched)
403  return false;
404  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
405  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
406  return false;
407  }
408  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
409 
410  // Initialize the context of the pass.
411  MF = &mf;
412  MLI = &getAnalysis<MachineLoopInfo>();
413  PassConfig = &getAnalysis<TargetPassConfig>();
414 
415  if (VerifyScheduling)
416  MF->verify(this, "Before post machine scheduling.");
417 
418  // Instantiate the selected scheduler for this target, function, and
419  // optimization level.
420  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
421  scheduleRegions(*Scheduler, true);
422 
423  if (VerifyScheduling)
424  MF->verify(this, "After post machine scheduling.");
425  return true;
426 }
427 
428 /// Return true of the given instruction should not be included in a scheduling
429 /// region.
430 ///
431 /// MachineScheduler does not currently support scheduling across calls. To
432 /// handle calls, the DAG builder needs to be modified to create register
433 /// anti/output dependencies on the registers clobbered by the call's regmask
434 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
435 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
436 /// the boundary, but there would be no benefit to postRA scheduling across
437 /// calls this late anyway.
439  MachineBasicBlock *MBB,
440  MachineFunction *MF,
441  const TargetInstrInfo *TII) {
442  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
443 }
444 
445 /// A region of an MBB for scheduling.
446 namespace {
447 struct SchedRegion {
448  /// RegionBegin is the first instruction in the scheduling region, and
449  /// RegionEnd is either MBB->end() or the scheduling boundary after the
450  /// last instruction in the scheduling region. These iterators cannot refer
451  /// to instructions outside of the identified scheduling region because
452  /// those may be reordered before scheduling this region.
453  MachineBasicBlock::iterator RegionBegin;
454  MachineBasicBlock::iterator RegionEnd;
455  unsigned NumRegionInstrs;
456 
458  unsigned N) :
459  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
460 };
461 } // end anonymous namespace
462 
464 
465 static void
467  MBBRegionsVector &Regions,
468  bool RegionsTopDown) {
469  MachineFunction *MF = MBB->getParent();
470  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
471 
472  MachineBasicBlock::iterator I = nullptr;
473  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
474  RegionEnd != MBB->begin(); RegionEnd = I) {
475 
476  // Avoid decrementing RegionEnd for blocks with no terminator.
477  if (RegionEnd != MBB->end() ||
478  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
479  --RegionEnd;
480  }
481 
482  // The next region starts above the previous region. Look backward in the
483  // instruction stream until we find the nearest boundary.
484  unsigned NumRegionInstrs = 0;
485  I = RegionEnd;
486  for (;I != MBB->begin(); --I) {
487  MachineInstr &MI = *std::prev(I);
488  if (isSchedBoundary(&MI, &*MBB, MF, TII))
489  break;
490  if (!MI.isDebugInstr())
491  // MBB::size() uses instr_iterator to count. Here we need a bundle to
492  // count as a single instruction.
493  ++NumRegionInstrs;
494  }
495 
496  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
497  }
498 
499  if (RegionsTopDown)
500  std::reverse(Regions.begin(), Regions.end());
501 }
502 
503 /// Main driver for both MachineScheduler and PostMachineScheduler.
504 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
505  bool FixKillFlags) {
506  // Visit all machine basic blocks.
507  //
508  // TODO: Visit blocks in global postorder or postorder within the bottom-up
509  // loop tree. Then we can optionally compute global RegPressure.
510  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
511  MBB != MBBEnd; ++MBB) {
512 
513  Scheduler.startBlock(&*MBB);
514 
515 #ifndef NDEBUG
516  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
517  continue;
518  if (SchedOnlyBlock.getNumOccurrences()
519  && (int)SchedOnlyBlock != MBB->getNumber())
520  continue;
521 #endif
522 
523  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
524  // points to the scheduling boundary at the bottom of the region. The DAG
525  // does not include RegionEnd, but the region does (i.e. the next
526  // RegionEnd is above the previous RegionBegin). If the current block has
527  // no terminator then RegionEnd == MBB->end() for the bottom region.
528  //
529  // All the regions of MBB are first found and stored in MBBRegions, which
530  // will be processed (MBB) top-down if initialized with true.
531  //
532  // The Scheduler may insert instructions during either schedule() or
533  // exitRegion(), even for empty regions. So the local iterators 'I' and
534  // 'RegionEnd' are invalid across these calls. Instructions must not be
535  // added to other regions than the current one without updating MBBRegions.
536 
537  MBBRegionsVector MBBRegions;
538  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
539  for (MBBRegionsVector::iterator R = MBBRegions.begin();
540  R != MBBRegions.end(); ++R) {
541  MachineBasicBlock::iterator I = R->RegionBegin;
542  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
543  unsigned NumRegionInstrs = R->NumRegionInstrs;
544 
545  // Notify the scheduler of the region, even if we may skip scheduling
546  // it. Perhaps it still needs to be bundled.
547  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
548 
549  // Skip empty scheduling regions (0 or 1 schedulable instructions).
550  if (I == RegionEnd || I == std::prev(RegionEnd)) {
551  // Close the current region. Bundle the terminator if needed.
552  // This invalidates 'RegionEnd' and 'I'.
553  Scheduler.exitRegion();
554  continue;
555  }
556  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
557  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
558  << " " << MBB->getName() << "\n From: " << *I
559  << " To: ";
560  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
561  else dbgs() << "End";
562  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
564  errs() << MF->getName();
565  errs() << ":%bb. " << MBB->getNumber();
566  errs() << " " << MBB->getName() << " \n";
567  }
568 
569  // Schedule a region: possibly reorder instructions.
570  // This invalidates the original region iterators.
571  Scheduler.schedule();
572 
573  // Close the current region.
574  Scheduler.exitRegion();
575  }
576  Scheduler.finishBlock();
577  // FIXME: Ideally, no further passes should rely on kill flags. However,
578  // thumb2 size reduction is currently an exception, so the PostMIScheduler
579  // needs to do this.
580  if (FixKillFlags)
581  Scheduler.fixupKills(*MBB);
582  }
583  Scheduler.finalizeSchedule();
584 }
585 
586 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
587  // unimplemented
588 }
589 
590 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
592  dbgs() << "Queue " << Name << ": ";
593  for (const SUnit *SU : Queue)
594  dbgs() << SU->NodeNum << " ";
595  dbgs() << "\n";
596 }
597 #endif
598 
599 //===----------------------------------------------------------------------===//
600 // ScheduleDAGMI - Basic machine instruction scheduling. This is
601 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
602 // virtual registers.
603 // ===----------------------------------------------------------------------===/
604 
605 // Provide a vtable anchor.
607 
608 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
609  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
610 }
611 
612 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
613  if (SuccSU != &ExitSU) {
614  // Do not use WillCreateCycle, it assumes SD scheduling.
615  // If Pred is reachable from Succ, then the edge creates a cycle.
616  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
617  return false;
618  Topo.AddPred(SuccSU, PredDep.getSUnit());
619  }
620  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
621  // Return true regardless of whether a new edge needed to be inserted.
622  return true;
623 }
624 
625 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
626 /// NumPredsLeft reaches zero, release the successor node.
627 ///
628 /// FIXME: Adjust SuccSU height based on MinLatency.
629 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
630  SUnit *SuccSU = SuccEdge->getSUnit();
631 
632  if (SuccEdge->isWeak()) {
633  --SuccSU->WeakPredsLeft;
634  if (SuccEdge->isCluster())
635  NextClusterSucc = SuccSU;
636  return;
637  }
638 #ifndef NDEBUG
639  if (SuccSU->NumPredsLeft == 0) {
640  dbgs() << "*** Scheduling failed! ***\n";
641  dumpNode(*SuccSU);
642  dbgs() << " has been released too many times!\n";
643  llvm_unreachable(nullptr);
644  }
645 #endif
646  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
647  // CurrCycle may have advanced since then.
648  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
649  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
650 
651  --SuccSU->NumPredsLeft;
652  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
653  SchedImpl->releaseTopNode(SuccSU);
654 }
655 
656 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
658  for (SDep &Succ : SU->Succs)
659  releaseSucc(SU, &Succ);
660 }
661 
662 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
663 /// NumSuccsLeft reaches zero, release the predecessor node.
664 ///
665 /// FIXME: Adjust PredSU height based on MinLatency.
666 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
667  SUnit *PredSU = PredEdge->getSUnit();
668 
669  if (PredEdge->isWeak()) {
670  --PredSU->WeakSuccsLeft;
671  if (PredEdge->isCluster())
672  NextClusterPred = PredSU;
673  return;
674  }
675 #ifndef NDEBUG
676  if (PredSU->NumSuccsLeft == 0) {
677  dbgs() << "*** Scheduling failed! ***\n";
678  dumpNode(*PredSU);
679  dbgs() << " has been released too many times!\n";
680  llvm_unreachable(nullptr);
681  }
682 #endif
683  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
684  // CurrCycle may have advanced since then.
685  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
686  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
687 
688  --PredSU->NumSuccsLeft;
689  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
690  SchedImpl->releaseBottomNode(PredSU);
691 }
692 
693 /// releasePredecessors - Call releasePred on each of SU's predecessors.
695  for (SDep &Pred : SU->Preds)
696  releasePred(SU, &Pred);
697 }
698 
701  SchedImpl->enterMBB(bb);
702 }
703 
705  SchedImpl->leaveMBB();
707 }
708 
709 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
710 /// crossing a scheduling boundary. [begin, end) includes all instructions in
711 /// the region, including the boundary itself and single-instruction regions
712 /// that don't get scheduled.
716  unsigned regioninstrs)
717 {
718  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
719 
720  SchedImpl->initPolicy(begin, end, regioninstrs);
721 }
722 
723 /// This is normally called from the main scheduler loop but may also be invoked
724 /// by the scheduling strategy to perform additional code motion.
727  // Advance RegionBegin if the first instruction moves down.
728  if (&*RegionBegin == MI)
729  ++RegionBegin;
730 
731  // Update the instruction stream.
732  BB->splice(InsertPos, BB, MI);
733 
734  // Update LiveIntervals
735  if (LIS)
736  LIS->handleMove(*MI, /*UpdateFlags=*/true);
737 
738  // Recede RegionBegin if an instruction moves above the first.
739  if (RegionBegin == InsertPos)
740  RegionBegin = MI;
741 }
742 
744 #ifndef NDEBUG
745  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
746  CurrentTop = CurrentBottom;
747  return false;
748  }
749  ++NumInstrsScheduled;
750 #endif
751  return true;
752 }
753 
754 /// Per-region scheduling driver, called back from
755 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
756 /// does not consider liveness or register pressure. It is useful for PostRA
757 /// scheduling and potentially other custom schedulers.
759  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
760  LLVM_DEBUG(SchedImpl->dumpPolicy());
761 
762  // Build the DAG.
763  buildSchedGraph(AA);
764 
765  Topo.InitDAGTopologicalSorting();
766 
767  postprocessDAG();
768 
769  SmallVector<SUnit*, 8> TopRoots, BotRoots;
770  findRootsAndBiasEdges(TopRoots, BotRoots);
771 
772  LLVM_DEBUG(dump());
773  if (PrintDAGs) dump();
774  if (ViewMISchedDAGs) viewGraph();
775 
776  // Initialize the strategy before modifying the DAG.
777  // This may initialize a DFSResult to be used for queue priority.
778  SchedImpl->initialize(this);
779 
780  // Initialize ready queues now that the DAG and priority data are finalized.
781  initQueues(TopRoots, BotRoots);
782 
783  bool IsTopNode = false;
784  while (true) {
785  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
786  SUnit *SU = SchedImpl->pickNode(IsTopNode);
787  if (!SU) break;
788 
789  assert(!SU->isScheduled && "Node already scheduled");
790  if (!checkSchedLimit())
791  break;
792 
793  MachineInstr *MI = SU->getInstr();
794  if (IsTopNode) {
795  assert(SU->isTopReady() && "node still has unscheduled dependencies");
796  if (&*CurrentTop == MI)
797  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
798  else
799  moveInstruction(MI, CurrentTop);
800  } else {
801  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
803  priorNonDebug(CurrentBottom, CurrentTop);
804  if (&*priorII == MI)
805  CurrentBottom = priorII;
806  else {
807  if (&*CurrentTop == MI)
808  CurrentTop = nextIfDebug(++CurrentTop, priorII);
809  moveInstruction(MI, CurrentBottom);
810  CurrentBottom = MI;
811  }
812  }
813  // Notify the scheduling strategy before updating the DAG.
814  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
815  // runs, it can then use the accurate ReadyCycle time to determine whether
816  // newly released nodes can move to the readyQ.
817  SchedImpl->schedNode(SU, IsTopNode);
818 
819  updateQueues(SU, IsTopNode);
820  }
821  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
822 
823  placeDebugValues();
824 
825  LLVM_DEBUG({
826  dbgs() << "*** Final schedule for "
827  << printMBBReference(*begin()->getParent()) << " ***\n";
828  dumpSchedule();
829  dbgs() << '\n';
830  });
831 }
832 
833 /// Apply each ScheduleDAGMutation step in order.
835  for (auto &m : Mutations)
836  m->apply(this);
837 }
838 
839 void ScheduleDAGMI::
841  SmallVectorImpl<SUnit*> &BotRoots) {
842  for (SUnit &SU : SUnits) {
843  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
844 
845  // Order predecessors so DFSResult follows the critical path.
846  SU.biasCriticalPath();
847 
848  // A SUnit is ready to top schedule if it has no predecessors.
849  if (!SU.NumPredsLeft)
850  TopRoots.push_back(&SU);
851  // A SUnit is ready to bottom schedule if it has no successors.
852  if (!SU.NumSuccsLeft)
853  BotRoots.push_back(&SU);
854  }
855  ExitSU.biasCriticalPath();
856 }
857 
858 /// Identify DAG roots and setup scheduler queues.
860  ArrayRef<SUnit*> BotRoots) {
861  NextClusterSucc = nullptr;
862  NextClusterPred = nullptr;
863 
864  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
865  //
866  // Nodes with unreleased weak edges can still be roots.
867  // Release top roots in forward order.
868  for (SUnit *SU : TopRoots)
869  SchedImpl->releaseTopNode(SU);
870 
871  // Release bottom roots in reverse order so the higher priority nodes appear
872  // first. This is more natural and slightly more efficient.
874  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
875  SchedImpl->releaseBottomNode(*I);
876  }
877 
878  releaseSuccessors(&EntrySU);
879  releasePredecessors(&ExitSU);
880 
881  SchedImpl->registerRoots();
882 
883  // Advance past initial DebugValues.
884  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
885  CurrentBottom = RegionEnd;
886 }
887 
888 /// Update scheduler queues after scheduling an instruction.
889 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
890  // Release dependent instructions for scheduling.
891  if (IsTopNode)
892  releaseSuccessors(SU);
893  else
894  releasePredecessors(SU);
895 
896  SU->isScheduled = true;
897 }
898 
899 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
901  // If first instruction was a DBG_VALUE then put it back.
902  if (FirstDbgValue) {
903  BB->splice(RegionBegin, BB, FirstDbgValue);
904  RegionBegin = FirstDbgValue;
905  }
906 
907  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
908  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
909  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
910  MachineInstr *DbgValue = P.first;
911  MachineBasicBlock::iterator OrigPrevMI = P.second;
912  if (&*RegionBegin == DbgValue)
913  ++RegionBegin;
914  BB->splice(++OrigPrevMI, BB, DbgValue);
915  if (OrigPrevMI == std::prev(RegionEnd))
916  RegionEnd = DbgValue;
917  }
918  DbgValues.clear();
919  FirstDbgValue = nullptr;
920 }
921 
922 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
924  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
925  if (SUnit *SU = getSUnit(&(*MI)))
926  dumpNode(*SU);
927  else
928  dbgs() << "Missing SUnit\n";
929  }
930 }
931 #endif
932 
933 //===----------------------------------------------------------------------===//
934 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
935 // preservation.
936 //===----------------------------------------------------------------------===//
937 
939  delete DFSResult;
940 }
941 
943  const MachineInstr &MI = *SU.getInstr();
944  for (const MachineOperand &MO : MI.operands()) {
945  if (!MO.isReg())
946  continue;
947  if (!MO.readsReg())
948  continue;
949  if (TrackLaneMasks && !MO.isUse())
950  continue;
951 
952  unsigned Reg = MO.getReg();
954  continue;
955 
956  // Ignore re-defs.
957  if (TrackLaneMasks) {
958  bool FoundDef = false;
959  for (const MachineOperand &MO2 : MI.operands()) {
960  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
961  FoundDef = true;
962  break;
963  }
964  }
965  if (FoundDef)
966  continue;
967  }
968 
969  // Record this local VReg use.
970  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
971  for (; UI != VRegUses.end(); ++UI) {
972  if (UI->SU == &SU)
973  break;
974  }
975  if (UI == VRegUses.end())
976  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
977  }
978 }
979 
980 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
981 /// crossing a scheduling boundary. [begin, end) includes all instructions in
982 /// the region, including the boundary itself and single-instruction regions
983 /// that don't get scheduled.
987  unsigned regioninstrs)
988 {
989  // ScheduleDAGMI initializes SchedImpl's per-region policy.
990  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
991 
992  // For convenience remember the end of the liveness region.
993  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
994 
995  SUPressureDiffs.clear();
996 
997  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
998  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
999 
1000  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
1001  "ShouldTrackLaneMasks requires ShouldTrackPressure");
1002 }
1003 
1004 // Setup the register pressure trackers for the top scheduled top and bottom
1005 // scheduled regions.
1007  VRegUses.clear();
1008  VRegUses.setUniverse(MRI.getNumVirtRegs());
1009  for (SUnit &SU : SUnits)
1010  collectVRegUses(SU);
1011 
1012  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1013  ShouldTrackLaneMasks, false);
1014  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1015  ShouldTrackLaneMasks, false);
1016 
1017  // Close the RPTracker to finalize live ins.
1018  RPTracker.closeRegion();
1019 
1020  LLVM_DEBUG(RPTracker.dump());
1021 
1022  // Initialize the live ins and live outs.
1023  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1024  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1025 
1026  // Close one end of the tracker so we can call
1027  // getMaxUpward/DownwardPressureDelta before advancing across any
1028  // instructions. This converts currently live regs into live ins/outs.
1029  TopRPTracker.closeTop();
1030  BotRPTracker.closeBottom();
1031 
1032  BotRPTracker.initLiveThru(RPTracker);
1033  if (!BotRPTracker.getLiveThru().empty()) {
1034  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1035  LLVM_DEBUG(dbgs() << "Live Thru: ";
1036  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1037  };
1038 
1039  // For each live out vreg reduce the pressure change associated with other
1040  // uses of the same vreg below the live-out reaching def.
1041  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1042 
1043  // Account for liveness generated by the region boundary.
1044  if (LiveRegionEnd != RegionEnd) {
1046  BotRPTracker.recede(&LiveUses);
1047  updatePressureDiffs(LiveUses);
1048  }
1049 
1050  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1051  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1052  dbgs() << "Bottom Pressure:\n";
1053  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1054 
1055  assert((BotRPTracker.getPos() == RegionEnd ||
1056  (RegionEnd->isDebugInstr() &&
1057  BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1058  "Can't find the region bottom");
1059 
1060  // Cache the list of excess pressure sets in this region. This will also track
1061  // the max pressure in the scheduled code for these sets.
1062  RegionCriticalPSets.clear();
1063  const std::vector<unsigned> &RegionPressure =
1064  RPTracker.getPressure().MaxSetPressure;
1065  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1066  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1067  if (RegionPressure[i] > Limit) {
1068  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1069  << " Actual " << RegionPressure[i] << "\n");
1070  RegionCriticalPSets.push_back(PressureChange(i));
1071  }
1072  }
1073  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1074  for (const PressureChange &RCPS
1075  : RegionCriticalPSets) dbgs()
1076  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1077  dbgs() << "\n");
1078 }
1079 
1082  const std::vector<unsigned> &NewMaxPressure) {
1083  const PressureDiff &PDiff = getPressureDiff(SU);
1084  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1085  for (const PressureChange &PC : PDiff) {
1086  if (!PC.isValid())
1087  break;
1088  unsigned ID = PC.getPSet();
1089  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1090  ++CritIdx;
1091  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1092  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1093  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1094  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1095  }
1096  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1097  if (NewMaxPressure[ID] >= Limit - 2) {
1098  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1099  << NewMaxPressure[ID]
1100  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1101  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1102  << " livethru)\n");
1103  }
1104  }
1105 }
1106 
1107 /// Update the PressureDiff array for liveness after scheduling this
1108 /// instruction.
1110  ArrayRef<RegisterMaskPair> LiveUses) {
1111  for (const RegisterMaskPair &P : LiveUses) {
1112  unsigned Reg = P.RegUnit;
1113  /// FIXME: Currently assuming single-use physregs.
1114  if (!TRI->isVirtualRegister(Reg))
1115  continue;
1116 
1117  if (ShouldTrackLaneMasks) {
1118  // If the register has just become live then other uses won't change
1119  // this fact anymore => decrement pressure.
1120  // If the register has just become dead then other uses make it come
1121  // back to life => increment pressure.
1122  bool Decrement = P.LaneMask.any();
1123 
1124  for (const VReg2SUnit &V2SU
1125  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1126  SUnit &SU = *V2SU.SU;
1127  if (SU.isScheduled || &SU == &ExitSU)
1128  continue;
1129 
1130  PressureDiff &PDiff = getPressureDiff(&SU);
1131  PDiff.addPressureChange(Reg, Decrement, &MRI);
1132  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1133  << printReg(Reg, TRI) << ':'
1134  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1135  dbgs() << " to "; PDiff.dump(*TRI););
1136  }
1137  } else {
1138  assert(P.LaneMask.any());
1139  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1140  // This may be called before CurrentBottom has been initialized. However,
1141  // BotRPTracker must have a valid position. We want the value live into the
1142  // instruction or live out of the block, so ask for the previous
1143  // instruction's live-out.
1144  const LiveInterval &LI = LIS->getInterval(Reg);
1145  VNInfo *VNI;
1147  nextIfDebug(BotRPTracker.getPos(), BB->end());
1148  if (I == BB->end())
1149  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1150  else {
1151  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1152  VNI = LRQ.valueIn();
1153  }
1154  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1155  assert(VNI && "No live value at use.");
1156  for (const VReg2SUnit &V2SU
1157  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1158  SUnit *SU = V2SU.SU;
1159  // If this use comes before the reaching def, it cannot be a last use,
1160  // so decrease its pressure change.
1161  if (!SU->isScheduled && SU != &ExitSU) {
1162  LiveQueryResult LRQ =
1163  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1164  if (LRQ.valueIn() == VNI) {
1165  PressureDiff &PDiff = getPressureDiff(SU);
1166  PDiff.addPressureChange(Reg, true, &MRI);
1167  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1168  << *SU->getInstr();
1169  dbgs() << " to "; PDiff.dump(*TRI););
1170  }
1171  }
1172  }
1173  }
1174  }
1175 }
1176 
1178 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1179  if (EntrySU.getInstr() != nullptr)
1180  dumpNodeAll(EntrySU);
1181  for (const SUnit &SU : SUnits) {
1182  dumpNodeAll(SU);
1183  if (ShouldTrackPressure) {
1184  dbgs() << " Pressure Diff : ";
1185  getPressureDiff(&SU).dump(*TRI);
1186  }
1187  dbgs() << " Single Issue : ";
1188  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1189  SchedModel.mustEndGroup(SU.getInstr()))
1190  dbgs() << "true;";
1191  else
1192  dbgs() << "false;";
1193  dbgs() << '\n';
1194  }
1195  if (ExitSU.getInstr() != nullptr)
1196  dumpNodeAll(ExitSU);
1197 #endif
1198 }
1199 
1200 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1201 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1202 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1203 ///
1204 /// This is a skeletal driver, with all the functionality pushed into helpers,
1205 /// so that it can be easily extended by experimental schedulers. Generally,
1206 /// implementing MachineSchedStrategy should be sufficient to implement a new
1207 /// scheduling algorithm. However, if a scheduler further subclasses
1208 /// ScheduleDAGMILive then it will want to override this virtual method in order
1209 /// to update any specialized state.
1211  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1212  LLVM_DEBUG(SchedImpl->dumpPolicy());
1213  buildDAGWithRegPressure();
1214 
1215  Topo.InitDAGTopologicalSorting();
1216 
1217  postprocessDAG();
1218 
1219  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1220  findRootsAndBiasEdges(TopRoots, BotRoots);
1221 
1222  // Initialize the strategy before modifying the DAG.
1223  // This may initialize a DFSResult to be used for queue priority.
1224  SchedImpl->initialize(this);
1225 
1226  LLVM_DEBUG(dump());
1227  if (PrintDAGs) dump();
1228  if (ViewMISchedDAGs) viewGraph();
1229 
1230  // Initialize ready queues now that the DAG and priority data are finalized.
1231  initQueues(TopRoots, BotRoots);
1232 
1233  bool IsTopNode = false;
1234  while (true) {
1235  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1236  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1237  if (!SU) break;
1238 
1239  assert(!SU->isScheduled && "Node already scheduled");
1240  if (!checkSchedLimit())
1241  break;
1242 
1243  scheduleMI(SU, IsTopNode);
1244 
1245  if (DFSResult) {
1246  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1247  if (!ScheduledTrees.test(SubtreeID)) {
1248  ScheduledTrees.set(SubtreeID);
1249  DFSResult->scheduleTree(SubtreeID);
1250  SchedImpl->scheduleTree(SubtreeID);
1251  }
1252  }
1253 
1254  // Notify the scheduling strategy after updating the DAG.
1255  SchedImpl->schedNode(SU, IsTopNode);
1256 
1257  updateQueues(SU, IsTopNode);
1258  }
1259  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1260 
1261  placeDebugValues();
1262 
1263  LLVM_DEBUG({
1264  dbgs() << "*** Final schedule for "
1265  << printMBBReference(*begin()->getParent()) << " ***\n";
1266  dumpSchedule();
1267  dbgs() << '\n';
1268  });
1269 }
1270 
1271 /// Build the DAG and setup three register pressure trackers.
1273  if (!ShouldTrackPressure) {
1274  RPTracker.reset();
1275  RegionCriticalPSets.clear();
1276  buildSchedGraph(AA);
1277  return;
1278  }
1279 
1280  // Initialize the register pressure tracker used by buildSchedGraph.
1281  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1282  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1283 
1284  // Account for liveness generate by the region boundary.
1285  if (LiveRegionEnd != RegionEnd)
1286  RPTracker.recede();
1287 
1288  // Build the DAG, and compute current register pressure.
1289  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1290 
1291  // Initialize top/bottom trackers after computing region pressure.
1292  initRegPressure();
1293 }
1294 
1296  if (!DFSResult)
1297  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1298  DFSResult->clear();
1299  ScheduledTrees.clear();
1300  DFSResult->resize(SUnits.size());
1301  DFSResult->compute(SUnits);
1302  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1303 }
1304 
1305 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1306 /// only provides the critical path for single block loops. To handle loops that
1307 /// span blocks, we could use the vreg path latencies provided by
1308 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1309 /// available for use in the scheduler.
1310 ///
1311 /// The cyclic path estimation identifies a def-use pair that crosses the back
1312 /// edge and considers the depth and height of the nodes. For example, consider
1313 /// the following instruction sequence where each instruction has unit latency
1314 /// and defines an epomymous virtual register:
1315 ///
1316 /// a->b(a,c)->c(b)->d(c)->exit
1317 ///
1318 /// The cyclic critical path is a two cycles: b->c->b
1319 /// The acyclic critical path is four cycles: a->b->c->d->exit
1320 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1321 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1322 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1323 /// LiveInDepth = depth(b) = len(a->b) = 1
1324 ///
1325 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1326 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1327 /// CyclicCriticalPath = min(2, 2) = 2
1328 ///
1329 /// This could be relevant to PostRA scheduling, but is currently implemented
1330 /// assuming LiveIntervals.
1332  // This only applies to single block loop.
1333  if (!BB->isSuccessor(BB))
1334  return 0;
1335 
1336  unsigned MaxCyclicLatency = 0;
1337  // Visit each live out vreg def to find def/use pairs that cross iterations.
1338  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1339  unsigned Reg = P.RegUnit;
1340  if (!TRI->isVirtualRegister(Reg))
1341  continue;
1342  const LiveInterval &LI = LIS->getInterval(Reg);
1343  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1344  if (!DefVNI)
1345  continue;
1346 
1347  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1348  const SUnit *DefSU = getSUnit(DefMI);
1349  if (!DefSU)
1350  continue;
1351 
1352  unsigned LiveOutHeight = DefSU->getHeight();
1353  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1354  // Visit all local users of the vreg def.
1355  for (const VReg2SUnit &V2SU
1356  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1357  SUnit *SU = V2SU.SU;
1358  if (SU == &ExitSU)
1359  continue;
1360 
1361  // Only consider uses of the phi.
1362  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1363  if (!LRQ.valueIn()->isPHIDef())
1364  continue;
1365 
1366  // Assume that a path spanning two iterations is a cycle, which could
1367  // overestimate in strange cases. This allows cyclic latency to be
1368  // estimated as the minimum slack of the vreg's depth or height.
1369  unsigned CyclicLatency = 0;
1370  if (LiveOutDepth > SU->getDepth())
1371  CyclicLatency = LiveOutDepth - SU->getDepth();
1372 
1373  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1374  if (LiveInHeight > LiveOutHeight) {
1375  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1376  CyclicLatency = LiveInHeight - LiveOutHeight;
1377  } else
1378  CyclicLatency = 0;
1379 
1380  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1381  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1382  if (CyclicLatency > MaxCyclicLatency)
1383  MaxCyclicLatency = CyclicLatency;
1384  }
1385  }
1386  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1387  return MaxCyclicLatency;
1388 }
1389 
1390 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1391 /// the Top RP tracker in case the region beginning has changed.
1393  ArrayRef<SUnit*> BotRoots) {
1394  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1395  if (ShouldTrackPressure) {
1396  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1397  TopRPTracker.setPos(CurrentTop);
1398  }
1399 }
1400 
1401 /// Move an instruction and update register pressure.
1402 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1403  // Move the instruction to its new location in the instruction stream.
1404  MachineInstr *MI = SU->getInstr();
1405 
1406  if (IsTopNode) {
1407  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1408  if (&*CurrentTop == MI)
1409  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1410  else {
1411  moveInstruction(MI, CurrentTop);
1412  TopRPTracker.setPos(MI);
1413  }
1414 
1415  if (ShouldTrackPressure) {
1416  // Update top scheduled pressure.
1417  RegisterOperands RegOpers;
1418  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1419  if (ShouldTrackLaneMasks) {
1420  // Adjust liveness and add missing dead+read-undef flags.
1421  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1422  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1423  } else {
1424  // Adjust for missing dead-def flags.
1425  RegOpers.detectDeadDefs(*MI, *LIS);
1426  }
1427 
1428  TopRPTracker.advance(RegOpers);
1429  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1430  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1431  TopRPTracker.getRegSetPressureAtPos(), TRI););
1432 
1433  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1434  }
1435  } else {
1436  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1437  MachineBasicBlock::iterator priorII =
1438  priorNonDebug(CurrentBottom, CurrentTop);
1439  if (&*priorII == MI)
1440  CurrentBottom = priorII;
1441  else {
1442  if (&*CurrentTop == MI) {
1443  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1444  TopRPTracker.setPos(CurrentTop);
1445  }
1446  moveInstruction(MI, CurrentBottom);
1447  CurrentBottom = MI;
1448  BotRPTracker.setPos(CurrentBottom);
1449  }
1450  if (ShouldTrackPressure) {
1451  RegisterOperands RegOpers;
1452  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1453  if (ShouldTrackLaneMasks) {
1454  // Adjust liveness and add missing dead+read-undef flags.
1455  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1456  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1457  } else {
1458  // Adjust for missing dead-def flags.
1459  RegOpers.detectDeadDefs(*MI, *LIS);
1460  }
1461 
1462  if (BotRPTracker.getPos() != CurrentBottom)
1463  BotRPTracker.recedeSkipDebugValues();
1465  BotRPTracker.recede(RegOpers, &LiveUses);
1466  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1467  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1468  BotRPTracker.getRegSetPressureAtPos(), TRI););
1469 
1470  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1471  updatePressureDiffs(LiveUses);
1472  }
1473  }
1474 }
1475 
1476 //===----------------------------------------------------------------------===//
1477 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1478 //===----------------------------------------------------------------------===//
1479 
1480 namespace {
1481 
1482 /// Post-process the DAG to create cluster edges between neighboring
1483 /// loads or between neighboring stores.
1484 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1485  struct MemOpInfo {
1486  SUnit *SU;
1487  MachineOperand *BaseOp;
1488  int64_t Offset;
1489 
1490  MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs)
1491  : SU(su), BaseOp(Op), Offset(ofs) {}
1492 
1493  bool operator<(const MemOpInfo &RHS) const {
1494  if (BaseOp->getType() != RHS.BaseOp->getType())
1495  return BaseOp->getType() < RHS.BaseOp->getType();
1496 
1497  if (BaseOp->isReg())
1498  return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
1499  std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
1500  RHS.SU->NodeNum);
1501  if (BaseOp->isFI()) {
1502  const MachineFunction &MF =
1503  *BaseOp->getParent()->getParent()->getParent();
1504  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1505  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1507  // Can't use tuple comparison here since we might need to use a
1508  // different order when the stack grows down.
1509  if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
1510  return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
1511  : BaseOp->getIndex() < RHS.BaseOp->getIndex();
1512 
1513  if (Offset != RHS.Offset)
1514  return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
1515 
1516  return SU->NodeNum < RHS.SU->NodeNum;
1517  }
1518 
1519  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1520  "index bases.");
1521  }
1522  };
1523 
1524  const TargetInstrInfo *TII;
1525  const TargetRegisterInfo *TRI;
1526  bool IsLoad;
1527 
1528 public:
1529  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1530  const TargetRegisterInfo *tri, bool IsLoad)
1531  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1532 
1533  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1534 
1535 protected:
1536  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1537 };
1538 
1539 class StoreClusterMutation : public BaseMemOpClusterMutation {
1540 public:
1541  StoreClusterMutation(const TargetInstrInfo *tii,
1542  const TargetRegisterInfo *tri)
1543  : BaseMemOpClusterMutation(tii, tri, false) {}
1544 };
1545 
1546 class LoadClusterMutation : public BaseMemOpClusterMutation {
1547 public:
1548  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1549  : BaseMemOpClusterMutation(tii, tri, true) {}
1550 };
1551 
1552 } // end anonymous namespace
1553 
1554 namespace llvm {
1555 
1556 std::unique_ptr<ScheduleDAGMutation>
1558  const TargetRegisterInfo *TRI) {
1559  return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1560  : nullptr;
1561 }
1562 
1563 std::unique_ptr<ScheduleDAGMutation>
1565  const TargetRegisterInfo *TRI) {
1566  return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1567  : nullptr;
1568 }
1569 
1570 } // end namespace llvm
1571 
1572 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1573  ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1574  SmallVector<MemOpInfo, 32> MemOpRecords;
1575  for (SUnit *SU : MemOps) {
1576  MachineOperand *BaseOp;
1577  int64_t Offset;
1578  if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
1579  MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
1580  }
1581  if (MemOpRecords.size() < 2)
1582  return;
1583 
1584  llvm::sort(MemOpRecords);
1585  unsigned ClusterLength = 1;
1586  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1587  SUnit *SUa = MemOpRecords[Idx].SU;
1588  SUnit *SUb = MemOpRecords[Idx+1].SU;
1589  if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
1590  *MemOpRecords[Idx + 1].BaseOp,
1591  ClusterLength) &&
1592  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1593  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1594  << SUb->NodeNum << ")\n");
1595  // Copy successor edges from SUa to SUb. Interleaving computation
1596  // dependent on SUa can prevent load combining due to register reuse.
1597  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1598  // loads should have effectively the same inputs.
1599  for (const SDep &Succ : SUa->Succs) {
1600  if (Succ.getSUnit() == SUb)
1601  continue;
1602  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1603  << ")\n");
1604  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1605  }
1606  ++ClusterLength;
1607  } else
1608  ClusterLength = 1;
1609  }
1610 }
1611 
1612 /// Callback from DAG postProcessing to create cluster edges for loads.
1614  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1615 
1616  // Map DAG NodeNum to store chain ID.
1617  DenseMap<unsigned, unsigned> StoreChainIDs;
1618  // Map each store chain to a set of dependent MemOps.
1619  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1620  for (SUnit &SU : DAG->SUnits) {
1621  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1622  (!IsLoad && !SU.getInstr()->mayStore()))
1623  continue;
1624 
1625  unsigned ChainPredID = DAG->SUnits.size();
1626  for (const SDep &Pred : SU.Preds) {
1627  if (Pred.isCtrl()) {
1628  ChainPredID = Pred.getSUnit()->NodeNum;
1629  break;
1630  }
1631  }
1632  // Check if this chain-like pred has been seen
1633  // before. ChainPredID==MaxNodeID at the top of the schedule.
1634  unsigned NumChains = StoreChainDependents.size();
1635  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1636  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1637  if (Result.second)
1638  StoreChainDependents.resize(NumChains + 1);
1639  StoreChainDependents[Result.first->second].push_back(&SU);
1640  }
1641 
1642  // Iterate over the store chains.
1643  for (auto &SCD : StoreChainDependents)
1644  clusterNeighboringMemOps(SCD, DAG);
1645 }
1646 
1647 //===----------------------------------------------------------------------===//
1648 // CopyConstrain - DAG post-processing to encourage copy elimination.
1649 //===----------------------------------------------------------------------===//
1650 
1651 namespace {
1652 
1653 /// Post-process the DAG to create weak edges from all uses of a copy to
1654 /// the one use that defines the copy's source vreg, most likely an induction
1655 /// variable increment.
1656 class CopyConstrain : public ScheduleDAGMutation {
1657  // Transient state.
1658  SlotIndex RegionBeginIdx;
1659 
1660  // RegionEndIdx is the slot index of the last non-debug instruction in the
1661  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1662  SlotIndex RegionEndIdx;
1663 
1664 public:
1665  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1666 
1667  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1668 
1669 protected:
1670  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1671 };
1672 
1673 } // end anonymous namespace
1674 
1675 namespace llvm {
1676 
1677 std::unique_ptr<ScheduleDAGMutation>
1679  const TargetRegisterInfo *TRI) {
1680  return llvm::make_unique<CopyConstrain>(TII, TRI);
1681 }
1682 
1683 } // end namespace llvm
1684 
1685 /// constrainLocalCopy handles two possibilities:
1686 /// 1) Local src:
1687 /// I0: = dst
1688 /// I1: src = ...
1689 /// I2: = dst
1690 /// I3: dst = src (copy)
1691 /// (create pred->succ edges I0->I1, I2->I1)
1692 ///
1693 /// 2) Local copy:
1694 /// I0: dst = src (copy)
1695 /// I1: = dst
1696 /// I2: src = ...
1697 /// I3: = dst
1698 /// (create pred->succ edges I1->I2, I3->I2)
1699 ///
1700 /// Although the MachineScheduler is currently constrained to single blocks,
1701 /// this algorithm should handle extended blocks. An EBB is a set of
1702 /// contiguously numbered blocks such that the previous block in the EBB is
1703 /// always the single predecessor.
1704 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1705  LiveIntervals *LIS = DAG->getLIS();
1706  MachineInstr *Copy = CopySU->getInstr();
1707 
1708  // Check for pure vreg copies.
1709  const MachineOperand &SrcOp = Copy->getOperand(1);
1710  unsigned SrcReg = SrcOp.getReg();
1711  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1712  return;
1713 
1714  const MachineOperand &DstOp = Copy->getOperand(0);
1715  unsigned DstReg = DstOp.getReg();
1716  if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1717  return;
1718 
1719  // Check if either the dest or source is local. If it's live across a back
1720  // edge, it's not local. Note that if both vregs are live across the back
1721  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1722  // If both the copy's source and dest are local live intervals, then we
1723  // should treat the dest as the global for the purpose of adding
1724  // constraints. This adds edges from source's other uses to the copy.
1725  unsigned LocalReg = SrcReg;
1726  unsigned GlobalReg = DstReg;
1727  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1728  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1729  LocalReg = DstReg;
1730  GlobalReg = SrcReg;
1731  LocalLI = &LIS->getInterval(LocalReg);
1732  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1733  return;
1734  }
1735  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1736 
1737  // Find the global segment after the start of the local LI.
1738  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1739  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1740  // local live range. We could create edges from other global uses to the local
1741  // start, but the coalescer should have already eliminated these cases, so
1742  // don't bother dealing with it.
1743  if (GlobalSegment == GlobalLI->end())
1744  return;
1745 
1746  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1747  // returned the next global segment. But if GlobalSegment overlaps with
1748  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1749  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1750  if (GlobalSegment->contains(LocalLI->beginIndex()))
1751  ++GlobalSegment;
1752 
1753  if (GlobalSegment == GlobalLI->end())
1754  return;
1755 
1756  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1757  if (GlobalSegment != GlobalLI->begin()) {
1758  // Two address defs have no hole.
1759  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1760  GlobalSegment->start)) {
1761  return;
1762  }
1763  // If the prior global segment may be defined by the same two-address
1764  // instruction that also defines LocalLI, then can't make a hole here.
1765  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1766  LocalLI->beginIndex())) {
1767  return;
1768  }
1769  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1770  // it would be a disconnected component in the live range.
1771  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1772  "Disconnected LRG within the scheduling region.");
1773  }
1774  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1775  if (!GlobalDef)
1776  return;
1777 
1778  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1779  if (!GlobalSU)
1780  return;
1781 
1782  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1783  // constraining the uses of the last local def to precede GlobalDef.
1784  SmallVector<SUnit*,8> LocalUses;
1785  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1786  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1787  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1788  for (const SDep &Succ : LastLocalSU->Succs) {
1789  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1790  continue;
1791  if (Succ.getSUnit() == GlobalSU)
1792  continue;
1793  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1794  return;
1795  LocalUses.push_back(Succ.getSUnit());
1796  }
1797  // Open the top of the GlobalLI hole by constraining any earlier global uses
1798  // to precede the start of LocalLI.
1799  SmallVector<SUnit*,8> GlobalUses;
1800  MachineInstr *FirstLocalDef =
1801  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1802  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1803  for (const SDep &Pred : GlobalSU->Preds) {
1804  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1805  continue;
1806  if (Pred.getSUnit() == FirstLocalSU)
1807  continue;
1808  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1809  return;
1810  GlobalUses.push_back(Pred.getSUnit());
1811  }
1812  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1813  // Add the weak edges.
1815  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1816  LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1817  << GlobalSU->NodeNum << ")\n");
1818  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1819  }
1821  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1822  LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1823  << FirstLocalSU->NodeNum << ")\n");
1824  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1825  }
1826 }
1827 
1828 /// Callback from DAG postProcessing to create weak edges to encourage
1829 /// copy elimination.
1830 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1831  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1832  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1833 
1834  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1835  if (FirstPos == DAG->end())
1836  return;
1837  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1838  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1839  *priorNonDebug(DAG->end(), DAG->begin()));
1840 
1841  for (SUnit &SU : DAG->SUnits) {
1842  if (!SU.getInstr()->isCopy())
1843  continue;
1844 
1845  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1846  }
1847 }
1848 
1849 //===----------------------------------------------------------------------===//
1850 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1851 // and possibly other custom schedulers.
1852 //===----------------------------------------------------------------------===//
1853 
1854 static const unsigned InvalidCycle = ~0U;
1855 
1856 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1857 
1858 /// Given a Count of resource usage and a Latency value, return true if a
1859 /// SchedBoundary becomes resource limited.
1860 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1861  unsigned Latency) {
1862  return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1863 }
1864 
1866  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1867  // Destroying and reconstructing it is very expensive though. So keep
1868  // invalid, placeholder HazardRecs.
1869  if (HazardRec && HazardRec->isEnabled()) {
1870  delete HazardRec;
1871  HazardRec = nullptr;
1872  }
1873  Available.clear();
1874  Pending.clear();
1875  CheckPending = false;
1876  CurrCycle = 0;
1877  CurrMOps = 0;
1878  MinReadyCycle = std::numeric_limits<unsigned>::max();
1879  ExpectedLatency = 0;
1880  DependentLatency = 0;
1881  RetiredMOps = 0;
1882  MaxExecutedResCount = 0;
1883  ZoneCritResIdx = 0;
1884  IsResourceLimited = false;
1885  ReservedCycles.clear();
1886 #ifndef NDEBUG
1887  // Track the maximum number of stall cycles that could arise either from the
1888  // latency of a DAG edge or the number of cycles that a processor resource is
1889  // reserved (SchedBoundary::ReservedCycles).
1890  MaxObservedStall = 0;
1891 #endif
1892  // Reserve a zero-count for invalid CritResIdx.
1893  ExecutedResCounts.resize(1);
1894  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1895 }
1896 
1897 void SchedRemainder::
1898 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1899  reset();
1900  if (!SchedModel->hasInstrSchedModel())
1901  return;
1902  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1903  for (SUnit &SU : DAG->SUnits) {
1904  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1905  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1906  * SchedModel->getMicroOpFactor();
1908  PI = SchedModel->getWriteProcResBegin(SC),
1909  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1910  unsigned PIdx = PI->ProcResourceIdx;
1911  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1912  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1913  }
1914  }
1915 }
1916 
1917 void SchedBoundary::
1919  reset();
1920  DAG = dag;
1921  SchedModel = smodel;
1922  Rem = rem;
1923  if (SchedModel->hasInstrSchedModel()) {
1924  ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1925  ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1926  }
1927 }
1928 
1929 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1930 /// these "soft stalls" differently than the hard stall cycles based on CPU
1931 /// resources and computed by checkHazard(). A fully in-order model
1932 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1933 /// available for scheduling until they are ready. However, a weaker in-order
1934 /// model may use this for heuristics. For example, if a processor has in-order
1935 /// behavior when reading certain resources, this may come into play.
1937  if (!SU->isUnbuffered)
1938  return 0;
1939 
1940  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1941  if (ReadyCycle > CurrCycle)
1942  return ReadyCycle - CurrCycle;
1943  return 0;
1944 }
1945 
1946 /// Compute the next cycle at which the given processor resource can be
1947 /// scheduled.
1948 unsigned SchedBoundary::
1949 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1950  unsigned NextUnreserved = ReservedCycles[PIdx];
1951  // If this resource has never been used, always return cycle zero.
1952  if (NextUnreserved == InvalidCycle)
1953  return 0;
1954  // For bottom-up scheduling add the cycles needed for the current operation.
1955  if (!isTop())
1956  NextUnreserved += Cycles;
1957  return NextUnreserved;
1958 }
1959 
1960 /// Does this SU have a hazard within the current instruction group.
1961 ///
1962 /// The scheduler supports two modes of hazard recognition. The first is the
1963 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1964 /// supports highly complicated in-order reservation tables
1965 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1966 ///
1967 /// The second is a streamlined mechanism that checks for hazards based on
1968 /// simple counters that the scheduler itself maintains. It explicitly checks
1969 /// for instruction dispatch limitations, including the number of micro-ops that
1970 /// can dispatch per cycle.
1971 ///
1972 /// TODO: Also check whether the SU must start a new group.
1974  if (HazardRec->isEnabled()
1975  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1976  return true;
1977  }
1978 
1979  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1980  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1981  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1982  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1983  return true;
1984  }
1985 
1986  if (CurrMOps > 0 &&
1987  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1988  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1989  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
1990  << (isTop() ? "begin" : "end") << " group\n");
1991  return true;
1992  }
1993 
1994  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1995  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1996  for (const MCWriteProcResEntry &PE :
1997  make_range(SchedModel->getWriteProcResBegin(SC),
1998  SchedModel->getWriteProcResEnd(SC))) {
1999  unsigned ResIdx = PE.ProcResourceIdx;
2000  unsigned Cycles = PE.Cycles;
2001  unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
2002  if (NRCycle > CurrCycle) {
2003 #ifndef NDEBUG
2004  MaxObservedStall = std::max(Cycles, MaxObservedStall);
2005 #endif
2006  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2007  << SchedModel->getResourceName(ResIdx) << "="
2008  << NRCycle << "c\n");
2009  return true;
2010  }
2011  }
2012  }
2013  return false;
2014 }
2015 
2016 // Find the unscheduled node in ReadySUs with the highest latency.
2017 unsigned SchedBoundary::
2019  SUnit *LateSU = nullptr;
2020  unsigned RemLatency = 0;
2021  for (SUnit *SU : ReadySUs) {
2022  unsigned L = getUnscheduledLatency(SU);
2023  if (L > RemLatency) {
2024  RemLatency = L;
2025  LateSU = SU;
2026  }
2027  }
2028  if (LateSU) {
2029  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2030  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2031  }
2032  return RemLatency;
2033 }
2034 
2035 // Count resources in this zone and the remaining unscheduled
2036 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2037 // resource index, or zero if the zone is issue limited.
2038 unsigned SchedBoundary::
2039 getOtherResourceCount(unsigned &OtherCritIdx) {
2040  OtherCritIdx = 0;
2041  if (!SchedModel->hasInstrSchedModel())
2042  return 0;
2043 
2044  unsigned OtherCritCount = Rem->RemIssueCount
2045  + (RetiredMOps * SchedModel->getMicroOpFactor());
2046  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2047  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2048  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2049  PIdx != PEnd; ++PIdx) {
2050  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2051  if (OtherCount > OtherCritCount) {
2052  OtherCritCount = OtherCount;
2053  OtherCritIdx = PIdx;
2054  }
2055  }
2056  if (OtherCritIdx) {
2057  LLVM_DEBUG(
2058  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2059  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2060  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2061  }
2062  return OtherCritCount;
2063 }
2064 
2065 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2066  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2067 
2068 #ifndef NDEBUG
2069  // ReadyCycle was been bumped up to the CurrCycle when this node was
2070  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2071  // scheduling, so may now be greater than ReadyCycle.
2072  if (ReadyCycle > CurrCycle)
2073  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2074 #endif
2075 
2076  if (ReadyCycle < MinReadyCycle)
2077  MinReadyCycle = ReadyCycle;
2078 
2079  // Check for interlocks first. For the purpose of other heuristics, an
2080  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2081  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2082  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2083  Available.size() >= ReadyListLimit)
2084  Pending.push(SU);
2085  else
2086  Available.push(SU);
2087 }
2088 
2089 /// Move the boundary of scheduled code by one cycle.
2090 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2091  if (SchedModel->getMicroOpBufferSize() == 0) {
2092  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2093  "MinReadyCycle uninitialized");
2094  if (MinReadyCycle > NextCycle)
2095  NextCycle = MinReadyCycle;
2096  }
2097  // Update the current micro-ops, which will issue in the next cycle.
2098  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2099  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2100 
2101  // Decrement DependentLatency based on the next cycle.
2102  if ((NextCycle - CurrCycle) > DependentLatency)
2103  DependentLatency = 0;
2104  else
2105  DependentLatency -= (NextCycle - CurrCycle);
2106 
2107  if (!HazardRec->isEnabled()) {
2108  // Bypass HazardRec virtual calls.
2109  CurrCycle = NextCycle;
2110  } else {
2111  // Bypass getHazardType calls in case of long latency.
2112  for (; CurrCycle != NextCycle; ++CurrCycle) {
2113  if (isTop())
2114  HazardRec->AdvanceCycle();
2115  else
2116  HazardRec->RecedeCycle();
2117  }
2118  }
2119  CheckPending = true;
2120  IsResourceLimited =
2121  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2122  getScheduledLatency());
2123 
2124  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2125  << '\n');
2126 }
2127 
2128 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2129  ExecutedResCounts[PIdx] += Count;
2130  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2131  MaxExecutedResCount = ExecutedResCounts[PIdx];
2132 }
2133 
2134 /// Add the given processor resource to this scheduled zone.
2135 ///
2136 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2137 /// during which this resource is consumed.
2138 ///
2139 /// \return the next cycle at which the instruction may execute without
2140 /// oversubscribing resources.
2141 unsigned SchedBoundary::
2142 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2143  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2144  unsigned Count = Factor * Cycles;
2145  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2146  << Cycles << "x" << Factor << "u\n");
2147 
2148  // Update Executed resources counts.
2149  incExecutedResources(PIdx, Count);
2150  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2151  Rem->RemainingCounts[PIdx] -= Count;
2152 
2153  // Check if this resource exceeds the current critical resource. If so, it
2154  // becomes the critical resource.
2155  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2156  ZoneCritResIdx = PIdx;
2157  LLVM_DEBUG(dbgs() << " *** Critical resource "
2158  << SchedModel->getResourceName(PIdx) << ": "
2159  << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2160  << "c\n");
2161  }
2162  // For reserved resources, record the highest cycle using the resource.
2163  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2164  if (NextAvailable > CurrCycle) {
2165  LLVM_DEBUG(dbgs() << " Resource conflict: "
2166  << SchedModel->getProcResource(PIdx)->Name
2167  << " reserved until @" << NextAvailable << "\n");
2168  }
2169  return NextAvailable;
2170 }
2171 
2172 /// Move the boundary of scheduled code by one SUnit.
2174  // Update the reservation table.
2175  if (HazardRec->isEnabled()) {
2176  if (!isTop() && SU->isCall) {
2177  // Calls are scheduled with their preceding instructions. For bottom-up
2178  // scheduling, clear the pipeline state before emitting.
2179  HazardRec->Reset();
2180  }
2181  HazardRec->EmitInstruction(SU);
2182  }
2183  // checkHazard should prevent scheduling multiple instructions per cycle that
2184  // exceed the issue width.
2185  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2186  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2187  assert(
2188  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2189  "Cannot schedule this instruction's MicroOps in the current cycle.");
2190 
2191  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2192  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2193 
2194  unsigned NextCycle = CurrCycle;
2195  switch (SchedModel->getMicroOpBufferSize()) {
2196  case 0:
2197  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2198  break;
2199  case 1:
2200  if (ReadyCycle > NextCycle) {
2201  NextCycle = ReadyCycle;
2202  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2203  }
2204  break;
2205  default:
2206  // We don't currently model the OOO reorder buffer, so consider all
2207  // scheduled MOps to be "retired". We do loosely model in-order resource
2208  // latency. If this instruction uses an in-order resource, account for any
2209  // likely stall cycles.
2210  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2211  NextCycle = ReadyCycle;
2212  break;
2213  }
2214  RetiredMOps += IncMOps;
2215 
2216  // Update resource counts and critical resource.
2217  if (SchedModel->hasInstrSchedModel()) {
2218  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2219  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2220  Rem->RemIssueCount -= DecRemIssue;
2221  if (ZoneCritResIdx) {
2222  // Scale scheduled micro-ops for comparing with the critical resource.
2223  unsigned ScaledMOps =
2224  RetiredMOps * SchedModel->getMicroOpFactor();
2225 
2226  // If scaled micro-ops are now more than the previous critical resource by
2227  // a full cycle, then micro-ops issue becomes critical.
2228  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2229  >= (int)SchedModel->getLatencyFactor()) {
2230  ZoneCritResIdx = 0;
2231  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2232  << ScaledMOps / SchedModel->getLatencyFactor()
2233  << "c\n");
2234  }
2235  }
2237  PI = SchedModel->getWriteProcResBegin(SC),
2238  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2239  unsigned RCycle =
2240  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2241  if (RCycle > NextCycle)
2242  NextCycle = RCycle;
2243  }
2244  if (SU->hasReservedResource) {
2245  // For reserved resources, record the highest cycle using the resource.
2246  // For top-down scheduling, this is the cycle in which we schedule this
2247  // instruction plus the number of cycles the operations reserves the
2248  // resource. For bottom-up is it simply the instruction's cycle.
2250  PI = SchedModel->getWriteProcResBegin(SC),
2251  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2252  unsigned PIdx = PI->ProcResourceIdx;
2253  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2254  if (isTop()) {
2255  ReservedCycles[PIdx] =
2256  std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2257  }
2258  else
2259  ReservedCycles[PIdx] = NextCycle;
2260  }
2261  }
2262  }
2263  }
2264  // Update ExpectedLatency and DependentLatency.
2265  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2266  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2267  if (SU->getDepth() > TopLatency) {
2268  TopLatency = SU->getDepth();
2269  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2270  << SU->NodeNum << ") " << TopLatency << "c\n");
2271  }
2272  if (SU->getHeight() > BotLatency) {
2273  BotLatency = SU->getHeight();
2274  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2275  << SU->NodeNum << ") " << BotLatency << "c\n");
2276  }
2277  // If we stall for any reason, bump the cycle.
2278  if (NextCycle > CurrCycle)
2279  bumpCycle(NextCycle);
2280  else
2281  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2282  // resource limited. If a stall occurred, bumpCycle does this.
2283  IsResourceLimited =
2284  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2285  getScheduledLatency());
2286 
2287  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2288  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2289  // one cycle. Since we commonly reach the max MOps here, opportunistically
2290  // bump the cycle to avoid uselessly checking everything in the readyQ.
2291  CurrMOps += IncMOps;
2292 
2293  // Bump the cycle count for issue group constraints.
2294  // This must be done after NextCycle has been adjust for all other stalls.
2295  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2296  // currCycle to X.
2297  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2298  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2299  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2300  << " group\n");
2301  bumpCycle(++NextCycle);
2302  }
2303 
2304  while (CurrMOps >= SchedModel->getIssueWidth()) {
2305  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2306  << CurrCycle << '\n');
2307  bumpCycle(++NextCycle);
2308  }
2309  LLVM_DEBUG(dumpScheduledState());
2310 }
2311 
2312 /// Release pending ready nodes in to the available queue. This makes them
2313 /// visible to heuristics.
2315  // If the available queue is empty, it is safe to reset MinReadyCycle.
2316  if (Available.empty())
2317  MinReadyCycle = std::numeric_limits<unsigned>::max();
2318 
2319  // Check to see if any of the pending instructions are ready to issue. If
2320  // so, add them to the available queue.
2321  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2322  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2323  SUnit *SU = *(Pending.begin()+i);
2324  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2325 
2326  if (ReadyCycle < MinReadyCycle)
2327  MinReadyCycle = ReadyCycle;
2328 
2329  if (!IsBuffered && ReadyCycle > CurrCycle)
2330  continue;
2331 
2332  if (checkHazard(SU))
2333  continue;
2334 
2335  if (Available.size() >= ReadyListLimit)
2336  break;
2337 
2338  Available.push(SU);
2339  Pending.remove(Pending.begin()+i);
2340  --i; --e;
2341  }
2342  CheckPending = false;
2343 }
2344 
2345 /// Remove SU from the ready set for this boundary.
2347  if (Available.isInQueue(SU))
2348  Available.remove(Available.find(SU));
2349  else {
2350  assert(Pending.isInQueue(SU) && "bad ready count");
2351  Pending.remove(Pending.find(SU));
2352  }
2353 }
2354 
2355 /// If this queue only has one ready candidate, return it. As a side effect,
2356 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2357 /// one node is ready. If multiple instructions are ready, return NULL.
2359  if (CheckPending)
2360  releasePending();
2361 
2362  if (CurrMOps > 0) {
2363  // Defer any ready instrs that now have a hazard.
2364  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2365  if (checkHazard(*I)) {
2366  Pending.push(*I);
2367  I = Available.remove(I);
2368  continue;
2369  }
2370  ++I;
2371  }
2372  }
2373  for (unsigned i = 0; Available.empty(); ++i) {
2374 // FIXME: Re-enable assert once PR20057 is resolved.
2375 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2376 // "permanent hazard");
2377  (void)i;
2378  bumpCycle(CurrCycle + 1);
2379  releasePending();
2380  }
2381 
2382  LLVM_DEBUG(Pending.dump());
2383  LLVM_DEBUG(Available.dump());
2384 
2385  if (Available.size() == 1)
2386  return *Available.begin();
2387  return nullptr;
2388 }
2389 
2390 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2391 // This is useful information to dump after bumpNode.
2392 // Note that the Queue contents are more useful before pickNodeFromQueue.
2394  unsigned ResFactor;
2395  unsigned ResCount;
2396  if (ZoneCritResIdx) {
2397  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2398  ResCount = getResourceCount(ZoneCritResIdx);
2399  } else {
2400  ResFactor = SchedModel->getMicroOpFactor();
2401  ResCount = RetiredMOps * ResFactor;
2402  }
2403  unsigned LFactor = SchedModel->getLatencyFactor();
2404  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2405  << " Retired: " << RetiredMOps;
2406  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2407  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2408  << ResCount / ResFactor << " "
2409  << SchedModel->getResourceName(ZoneCritResIdx)
2410  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2411  << (IsResourceLimited ? " - Resource" : " - Latency")
2412  << " limited.\n";
2413 }
2414 #endif
2415 
2416 //===----------------------------------------------------------------------===//
2417 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2418 //===----------------------------------------------------------------------===//
2419 
2422  const TargetSchedModel *SchedModel) {
2423  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2424  return;
2425 
2426  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2428  PI = SchedModel->getWriteProcResBegin(SC),
2429  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2430  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2431  ResDelta.CritResources += PI->Cycles;
2432  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2433  ResDelta.DemandedResources += PI->Cycles;
2434  }
2435 }
2436 
2437 /// Compute remaining latency. We need this both to determine whether the
2438 /// overall schedule has become latency-limited and whether the instructions
2439 /// outside this zone are resource or latency limited.
2440 ///
2441 /// The "dependent" latency is updated incrementally during scheduling as the
2442 /// max height/depth of scheduled nodes minus the cycles since it was
2443 /// scheduled:
2444 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2445 ///
2446 /// The "independent" latency is the max ready queue depth:
2447 /// ILat = max N.depth for N in Available|Pending
2448 ///
2449 /// RemainingLatency is the greater of independent and dependent latency.
2450 ///
2451 /// These computations are expensive, especially in DAGs with many edges, so
2452 /// only do them if necessary.
2453 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2454  unsigned RemLatency = CurrZone.getDependentLatency();
2455  RemLatency = std::max(RemLatency,
2456  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2457  RemLatency = std::max(RemLatency,
2458  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2459  return RemLatency;
2460 }
2461 
2462 /// Returns true if the current cycle plus remaning latency is greater than
2463 /// the critical path in the scheduling region.
2464 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2465  SchedBoundary &CurrZone,
2466  bool ComputeRemLatency,
2467  unsigned &RemLatency) const {
2468  // The current cycle is already greater than the critical path, so we are
2469  // already latency limited and don't need to compute the remaining latency.
2470  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2471  return true;
2472 
2473  // If we haven't scheduled anything yet, then we aren't latency limited.
2474  if (CurrZone.getCurrCycle() == 0)
2475  return false;
2476 
2477  if (ComputeRemLatency)
2478  RemLatency = computeRemLatency(CurrZone);
2479 
2480  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2481 }
2482 
2483 /// Set the CandPolicy given a scheduling zone given the current resources and
2484 /// latencies inside and outside the zone.
2485 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2486  SchedBoundary &CurrZone,
2487  SchedBoundary *OtherZone) {
2488  // Apply preemptive heuristics based on the total latency and resources
2489  // inside and outside this zone. Potential stalls should be considered before
2490  // following this policy.
2491 
2492  // Compute the critical resource outside the zone.
2493  unsigned OtherCritIdx = 0;
2494  unsigned OtherCount =
2495  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2496 
2497  bool OtherResLimited = false;
2498  unsigned RemLatency = 0;
2499  bool RemLatencyComputed = false;
2500  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2501  RemLatency = computeRemLatency(CurrZone);
2502  RemLatencyComputed = true;
2503  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2504  OtherCount, RemLatency);
2505  }
2506 
2507  // Schedule aggressively for latency in PostRA mode. We don't check for
2508  // acyclic latency during PostRA, and highly out-of-order processors will
2509  // skip PostRA scheduling.
2510  if (!OtherResLimited &&
2511  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2512  RemLatency))) {
2513  Policy.ReduceLatency |= true;
2514  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2515  << " RemainingLatency " << RemLatency << " + "
2516  << CurrZone.getCurrCycle() << "c > CritPath "
2517  << Rem.CriticalPath << "\n");
2518  }
2519  // If the same resource is limiting inside and outside the zone, do nothing.
2520  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2521  return;
2522 
2523  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2524  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2525  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2526  } if (OtherResLimited) dbgs()
2527  << " RemainingLimit: "
2528  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2529  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2530  << " Latency limited both directions.\n");
2531 
2532  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2533  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2534 
2535  if (OtherResLimited)
2536  Policy.DemandResIdx = OtherCritIdx;
2537 }
2538 
2539 #ifndef NDEBUG
2542  switch (Reason) {
2543  case NoCand: return "NOCAND ";
2544  case Only1: return "ONLY1 ";
2545  case PhysReg: return "PHYS-REG ";
2546  case RegExcess: return "REG-EXCESS";
2547  case RegCritical: return "REG-CRIT ";
2548  case Stall: return "STALL ";
2549  case Cluster: return "CLUSTER ";
2550  case Weak: return "WEAK ";
2551  case RegMax: return "REG-MAX ";
2552  case ResourceReduce: return "RES-REDUCE";
2553  case ResourceDemand: return "RES-DEMAND";
2554  case TopDepthReduce: return "TOP-DEPTH ";
2555  case TopPathReduce: return "TOP-PATH ";
2556  case BotHeightReduce:return "BOT-HEIGHT";
2557  case BotPathReduce: return "BOT-PATH ";
2558  case NextDefUse: return "DEF-USE ";
2559  case NodeOrder: return "ORDER ";
2560  };
2561  llvm_unreachable("Unknown reason!");
2562 }
2563 
2565  PressureChange P;
2566  unsigned ResIdx = 0;
2567  unsigned Latency = 0;
2568  switch (Cand.Reason) {
2569  default:
2570  break;
2571  case RegExcess:
2572  P = Cand.RPDelta.Excess;
2573  break;
2574  case RegCritical:
2575  P = Cand.RPDelta.CriticalMax;
2576  break;
2577  case RegMax:
2578  P = Cand.RPDelta.CurrentMax;
2579  break;
2580  case ResourceReduce:
2581  ResIdx = Cand.Policy.ReduceResIdx;
2582  break;
2583  case ResourceDemand:
2584  ResIdx = Cand.Policy.DemandResIdx;
2585  break;
2586  case TopDepthReduce:
2587  Latency = Cand.SU->getDepth();
2588  break;
2589  case TopPathReduce:
2590  Latency = Cand.SU->getHeight();
2591  break;
2592  case BotHeightReduce:
2593  Latency = Cand.SU->getHeight();
2594  break;
2595  case BotPathReduce:
2596  Latency = Cand.SU->getDepth();
2597  break;
2598  }
2599  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2600  if (P.isValid())
2601  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2602  << ":" << P.getUnitInc() << " ";
2603  else
2604  dbgs() << " ";
2605  if (ResIdx)
2606  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2607  else
2608  dbgs() << " ";
2609  if (Latency)
2610  dbgs() << " " << Latency << " cycles ";
2611  else
2612  dbgs() << " ";
2613  dbgs() << '\n';
2614 }
2615 #endif
2616 
2617 namespace llvm {
2618 /// Return true if this heuristic determines order.
2619 bool tryLess(int TryVal, int CandVal,
2623  if (TryVal < CandVal) {
2624  TryCand.Reason = Reason;
2625  return true;
2626  }
2627  if (TryVal > CandVal) {
2628  if (Cand.Reason > Reason)
2629  Cand.Reason = Reason;
2630  return true;
2631  }
2632  return false;
2633 }
2634 
2635 bool tryGreater(int TryVal, int CandVal,
2639  if (TryVal > CandVal) {
2640  TryCand.Reason = Reason;
2641  return true;
2642  }
2643  if (TryVal < CandVal) {
2644  if (Cand.Reason > Reason)
2645  Cand.Reason = Reason;
2646  return true;
2647  }
2648  return false;
2649 }
2650 
2653  SchedBoundary &Zone) {
2654  if (Zone.isTop()) {
2655  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2656  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2657  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2658  return true;
2659  }
2660  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2661  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2662  return true;
2663  } else {
2664  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2665  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2666  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2667  return true;
2668  }
2669  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2670  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2671  return true;
2672  }
2673  return false;
2674 }
2675 } // end namespace llvm
2676 
2677 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2678  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2679  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2680 }
2681 
2683  tracePick(Cand.Reason, Cand.AtTop);
2684 }
2685 
2687  assert(dag->hasVRegLiveness() &&
2688  "(PreRA)GenericScheduler needs vreg liveness");
2689  DAG = static_cast<ScheduleDAGMILive*>(dag);
2690  SchedModel = DAG->getSchedModel();
2691  TRI = DAG->TRI;
2692 
2693  Rem.init(DAG, SchedModel);
2694  Top.init(DAG, SchedModel, &Rem);
2695  Bot.init(DAG, SchedModel, &Rem);
2696 
2697  // Initialize resource counts.
2698 
2699  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2700  // are disabled, then these HazardRecs will be disabled.
2701  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2702  if (!Top.HazardRec) {
2703  Top.HazardRec =
2704  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2705  Itin, DAG);
2706  }
2707  if (!Bot.HazardRec) {
2708  Bot.HazardRec =
2709  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2710  Itin, DAG);
2711  }
2712  TopCand.SU = nullptr;
2713  BotCand.SU = nullptr;
2714 }
2715 
2716 /// Initialize the per-region scheduling policy.
2719  unsigned NumRegionInstrs) {
2720  const MachineFunction &MF = *Begin->getMF();
2721  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2722 
2723  // Avoid setting up the register pressure tracker for small regions to save
2724  // compile time. As a rough heuristic, only track pressure when the number of
2725  // schedulable instructions exceeds half the integer register file.
2726  RegionPolicy.ShouldTrackPressure = true;
2727  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2728  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2729  if (TLI->isTypeLegal(LegalIntVT)) {
2730  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2731  TLI->getRegClassFor(LegalIntVT));
2732  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2733  }
2734  }
2735 
2736  // For generic targets, we default to bottom-up, because it's simpler and more
2737  // compile-time optimizations have been implemented in that direction.
2738  RegionPolicy.OnlyBottomUp = true;
2739 
2740  // Allow the subtarget to override default policy.
2741  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2742 
2743  // After subtarget overrides, apply command line options.
2744  if (!EnableRegPressure)
2745  RegionPolicy.ShouldTrackPressure = false;
2746 
2747  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2748  // e.g. -misched-bottomup=false allows scheduling in both directions.
2750  "-misched-topdown incompatible with -misched-bottomup");
2751  if (ForceBottomUp.getNumOccurrences() > 0) {
2752  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2753  if (RegionPolicy.OnlyBottomUp)
2754  RegionPolicy.OnlyTopDown = false;
2755  }
2756  if (ForceTopDown.getNumOccurrences() > 0) {
2757  RegionPolicy.OnlyTopDown = ForceTopDown;
2758  if (RegionPolicy.OnlyTopDown)
2759  RegionPolicy.OnlyBottomUp = false;
2760  }
2761 }
2762 
2764  // Cannot completely remove virtual function even in release mode.
2765 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2766  dbgs() << "GenericScheduler RegionPolicy: "
2767  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2768  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2769  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2770  << "\n";
2771 #endif
2772 }
2773 
2774 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2775 /// critical path by more cycles than it takes to drain the instruction buffer.
2776 /// We estimate an upper bounds on in-flight instructions as:
2777 ///
2778 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2779 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2780 /// InFlightResources = InFlightIterations * LoopResources
2781 ///
2782 /// TODO: Check execution resources in addition to IssueCount.
2784  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2785  return;
2786 
2787  // Scaled number of cycles per loop iteration.
2788  unsigned IterCount =
2789  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2790  Rem.RemIssueCount);
2791  // Scaled acyclic critical path.
2792  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2793  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2794  unsigned InFlightCount =
2795  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2796  unsigned BufferLimit =
2797  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2798 
2799  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2800 
2801  LLVM_DEBUG(
2802  dbgs() << "IssueCycles="
2803  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2804  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2805  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2806  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2807  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2808  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
2809 }
2810 
2812  Rem.CriticalPath = DAG->ExitSU.getDepth();
2813 
2814  // Some roots may not feed into ExitSU. Check all of them in case.
2815  for (const SUnit *SU : Bot.Available) {
2816  if (SU->getDepth() > Rem.CriticalPath)
2817  Rem.CriticalPath = SU->getDepth();
2818  }
2819  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2820  if (DumpCriticalPathLength) {
2821  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2822  }
2823 
2824  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2825  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2826  checkAcyclicLatency();
2827  }
2828 }
2829 
2830 namespace llvm {
2831 bool tryPressure(const PressureChange &TryP,
2832  const PressureChange &CandP,
2836  const TargetRegisterInfo *TRI,
2837  const MachineFunction &MF) {
2838  // If one candidate decreases and the other increases, go with it.
2839  // Invalid candidates have UnitInc==0.
2840  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2841  Reason)) {
2842  return true;
2843  }
2844  // Do not compare the magnitude of pressure changes between top and bottom
2845  // boundary.
2846  if (Cand.AtTop != TryCand.AtTop)
2847  return false;
2848 
2849  // If both candidates affect the same set in the same boundary, go with the
2850  // smallest increase.
2851  unsigned TryPSet = TryP.getPSetOrMax();
2852  unsigned CandPSet = CandP.getPSetOrMax();
2853  if (TryPSet == CandPSet) {
2854  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2855  Reason);
2856  }
2857 
2858  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2860 
2861  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2863 
2864  // If the candidates are decreasing pressure, reverse priority.
2865  if (TryP.getUnitInc() < 0)
2866  std::swap(TryRank, CandRank);
2867  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2868 }
2869 
2870 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2871  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2872 }
2873 
2874 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2875 /// their physreg def/use.
2876 ///
2877 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2878 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2879 /// with the operation that produces or consumes the physreg. We'll do this when
2880 /// regalloc has support for parallel copies.
2881 int biasPhysReg(const SUnit *SU, bool isTop) {
2882  const MachineInstr *MI = SU->getInstr();
2883 
2884  if (MI->isCopy()) {
2885  unsigned ScheduledOper = isTop ? 1 : 0;
2886  unsigned UnscheduledOper = isTop ? 0 : 1;
2887  // If we have already scheduled the physreg produce/consumer, immediately
2888  // schedule the copy.
2890  MI->getOperand(ScheduledOper).getReg()))
2891  return 1;
2892  // If the physreg is at the boundary, defer it. Otherwise schedule it
2893  // immediately to free the dependent. We can hoist the copy later.
2894  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2896  MI->getOperand(UnscheduledOper).getReg()))
2897  return AtBoundary ? -1 : 1;
2898  }
2899 
2900  if (MI->isMoveImmediate()) {
2901  // If we have a move immediate and all successors have been assigned, bias
2902  // towards scheduling this later. Make sure all register defs are to
2903  // physical registers.
2904  bool DoBias = true;
2905  for (const MachineOperand &Op : MI->defs()) {
2906  if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
2907  DoBias = false;
2908  break;
2909  }
2910  }
2911 
2912  if (DoBias)
2913  return isTop ? -1 : 1;
2914  }
2915 
2916  return 0;
2917 }
2918 } // end namespace llvm
2919 
2921  bool AtTop,
2922  const RegPressureTracker &RPTracker,
2923  RegPressureTracker &TempTracker) {
2924  Cand.SU = SU;
2925  Cand.AtTop = AtTop;
2926  if (DAG->isTrackingPressure()) {
2927  if (AtTop) {
2928  TempTracker.getMaxDownwardPressureDelta(
2929  Cand.SU->getInstr(),
2930  Cand.RPDelta,
2931  DAG->getRegionCriticalPSets(),
2933  } else {
2934  if (VerifyScheduling) {
2935  TempTracker.getMaxUpwardPressureDelta(
2936  Cand.SU->getInstr(),
2937  &DAG->getPressureDiff(Cand.SU),
2938  Cand.RPDelta,
2939  DAG->getRegionCriticalPSets(),
2941  } else {
2942  RPTracker.getUpwardPressureDelta(
2943  Cand.SU->getInstr(),
2944  DAG->getPressureDiff(Cand.SU),
2945  Cand.RPDelta,
2946  DAG->getRegionCriticalPSets(),
2948  }
2949  }
2950  }
2951  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2952  << " Try SU(" << Cand.SU->NodeNum << ") "
2953  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2954  << Cand.RPDelta.Excess.getUnitInc() << "\n");
2955 }
2956 
2957 /// Apply a set of heuristics to a new candidate. Heuristics are currently
2958 /// hierarchical. This may be more efficient than a graduated cost model because
2959 /// we don't need to evaluate all aspects of the model for each node in the
2960 /// queue. But it's really done to make the heuristics easier to debug and
2961 /// statistically analyze.
2962 ///
2963 /// \param Cand provides the policy and current best candidate.
2964 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2965 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2966 // if Cand is from a different zone than TryCand.
2968  SchedCandidate &TryCand,
2969  SchedBoundary *Zone) const {
2970  // Initialize the candidate if needed.
2971  if (!Cand.isValid()) {
2972  TryCand.Reason = NodeOrder;
2973  return;
2974  }
2975 
2976  // Bias PhysReg Defs and copies to their uses and defined respectively.
2977  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
2978  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
2979  return;
2980 
2981  // Avoid exceeding the target's limit.
2982  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2983  Cand.RPDelta.Excess,
2984  TryCand, Cand, RegExcess, TRI,
2985  DAG->MF))
2986  return;
2987 
2988  // Avoid increasing the max critical pressure in the scheduled region.
2989  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2990  Cand.RPDelta.CriticalMax,
2991  TryCand, Cand, RegCritical, TRI,
2992  DAG->MF))
2993  return;
2994 
2995  // We only compare a subset of features when comparing nodes between
2996  // Top and Bottom boundary. Some properties are simply incomparable, in many
2997  // other instances we should only override the other boundary if something
2998  // is a clear good pick on one boundary. Skip heuristics that are more
2999  // "tie-breaking" in nature.
3000  bool SameBoundary = Zone != nullptr;
3001  if (SameBoundary) {
3002  // For loops that are acyclic path limited, aggressively schedule for
3003  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3004  // heuristics to take precedence.
3005  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3006  tryLatency(TryCand, Cand, *Zone))
3007  return;
3008 
3009  // Prioritize instructions that read unbuffered resources by stall cycles.
3010  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3011  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3012  return;
3013  }
3014 
3015  // Keep clustered nodes together to encourage downstream peephole
3016  // optimizations which may reduce resource requirements.
3017  //
3018  // This is a best effort to set things up for a post-RA pass. Optimizations
3019  // like generating loads of multiple registers should ideally be done within
3020  // the scheduler pass by combining the loads during DAG postprocessing.
3021  const SUnit *CandNextClusterSU =
3022  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3023  const SUnit *TryCandNextClusterSU =
3024  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3025  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3026  Cand.SU == CandNextClusterSU,
3027  TryCand, Cand, Cluster))
3028  return;
3029 
3030  if (SameBoundary) {
3031  // Weak edges are for clustering and other constraints.
3032  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3033  getWeakLeft(Cand.SU, Cand.AtTop),
3034  TryCand, Cand, Weak))
3035  return;
3036  }
3037 
3038  // Avoid increasing the max pressure of the entire region.
3039  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3040  Cand.RPDelta.CurrentMax,
3041  TryCand, Cand, RegMax, TRI,
3042  DAG->MF))
3043  return;
3044 
3045  if (SameBoundary) {
3046  // Avoid critical resource consumption and balance the schedule.
3047  TryCand.initResourceDelta(DAG, SchedModel);
3049  TryCand, Cand, ResourceReduce))
3050  return;
3053  TryCand, Cand, ResourceDemand))
3054  return;
3055 
3056  // Avoid serializing long latency dependence chains.
3057  // For acyclic path limited loops, latency was already checked above.
3058  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3059  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3060  return;
3061 
3062  // Fall through to original instruction order.
3063  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3064  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3065  TryCand.Reason = NodeOrder;
3066  }
3067  }
3068 }
3069 
3070 /// Pick the best candidate from the queue.
3071 ///
3072 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3073 /// DAG building. To adjust for the current scheduling location we need to
3074 /// maintain the number of vreg uses remaining to be top-scheduled.
3076  const CandPolicy &ZonePolicy,
3077  const RegPressureTracker &RPTracker,
3078  SchedCandidate &Cand) {
3079  // getMaxPressureDelta temporarily modifies the tracker.
3080  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3081 
3082  ReadyQueue &Q = Zone.Available;
3083  for (SUnit *SU : Q) {
3084 
3085  SchedCandidate TryCand(ZonePolicy);
3086  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3087  // Pass SchedBoundary only when comparing nodes from the same boundary.
3088  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3089  tryCandidate(Cand, TryCand, ZoneArg);
3090  if (TryCand.Reason != NoCand) {
3091  // Initialize resource delta if needed in case future heuristics query it.
3092  if (TryCand.ResDelta == SchedResourceDelta())
3093  TryCand.initResourceDelta(DAG, SchedModel);
3094  Cand.setBest(TryCand);
3095  LLVM_DEBUG(traceCandidate(Cand));
3096  }
3097  }
3098 }
3099 
3100 /// Pick the best candidate node from either the top or bottom queue.
3102  // Schedule as far as possible in the direction of no choice. This is most
3103  // efficient, but also provides the best heuristics for CriticalPSets.
3104  if (SUnit *SU = Bot.pickOnlyChoice()) {
3105  IsTopNode = false;
3106  tracePick(Only1, false);
3107  return SU;
3108  }
3109  if (SUnit *SU = Top.pickOnlyChoice()) {
3110  IsTopNode = true;
3111  tracePick(Only1, true);
3112  return SU;
3113  }
3114  // Set the bottom-up policy based on the state of the current bottom zone and
3115  // the instructions outside the zone, including the top zone.
3116  CandPolicy BotPolicy;
3117  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3118  // Set the top-down policy based on the state of the current top zone and
3119  // the instructions outside the zone, including the bottom zone.
3120  CandPolicy TopPolicy;
3121  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3122 
3123  // See if BotCand is still valid (because we previously scheduled from Top).
3124  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3125  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3126  BotCand.Policy != BotPolicy) {
3127  BotCand.reset(CandPolicy());
3128  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3129  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3130  } else {
3131  LLVM_DEBUG(traceCandidate(BotCand));
3132 #ifndef NDEBUG
3133  if (VerifyScheduling) {
3134  SchedCandidate TCand;
3135  TCand.reset(CandPolicy());
3136  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3137  assert(TCand.SU == BotCand.SU &&
3138  "Last pick result should correspond to re-picking right now");
3139  }
3140 #endif
3141  }
3142 
3143  // Check if the top Q has a better candidate.
3144  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3145  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3146  TopCand.Policy != TopPolicy) {
3147  TopCand.reset(CandPolicy());
3148  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3149  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3150  } else {
3151  LLVM_DEBUG(traceCandidate(TopCand));
3152 #ifndef NDEBUG
3153  if (VerifyScheduling) {
3154  SchedCandidate TCand;
3155  TCand.reset(CandPolicy());
3156  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3157  assert(TCand.SU == TopCand.SU &&
3158  "Last pick result should correspond to re-picking right now");
3159  }
3160 #endif
3161  }
3162 
3163  // Pick best from BotCand and TopCand.
3164  assert(BotCand.isValid());
3165  assert(TopCand.isValid());
3166  SchedCandidate Cand = BotCand;
3167  TopCand.Reason = NoCand;
3168  tryCandidate(Cand, TopCand, nullptr);
3169  if (TopCand.Reason != NoCand) {
3170  Cand.setBest(TopCand);
3171  LLVM_DEBUG(traceCandidate(Cand));
3172  }
3173 
3174  IsTopNode = Cand.AtTop;
3175  tracePick(Cand);
3176  return Cand.SU;
3177 }
3178 
3179 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3181  if (DAG->top() == DAG->bottom()) {
3182  assert(Top.Available.empty() && Top.Pending.empty() &&
3183  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3184  return nullptr;
3185  }
3186  SUnit *SU;
3187  do {
3188  if (RegionPolicy.OnlyTopDown) {
3189  SU = Top.pickOnlyChoice();
3190  if (!SU) {
3191  CandPolicy NoPolicy;
3192  TopCand.reset(NoPolicy);
3193  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3194  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3195  tracePick(TopCand);
3196  SU = TopCand.SU;
3197  }
3198  IsTopNode = true;
3199  } else if (RegionPolicy.OnlyBottomUp) {
3200  SU = Bot.pickOnlyChoice();
3201  if (!SU) {
3202  CandPolicy NoPolicy;
3203  BotCand.reset(NoPolicy);
3204  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3205  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3206  tracePick(BotCand);
3207  SU = BotCand.SU;
3208  }
3209  IsTopNode = false;
3210  } else {
3211  SU = pickNodeBidirectional(IsTopNode);
3212  }
3213  } while (SU->isScheduled);
3214 
3215  if (SU->isTopReady())
3216  Top.removeReady(SU);
3217  if (SU->isBottomReady())
3218  Bot.removeReady(SU);
3219 
3220  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3221  << *SU->getInstr());
3222  return SU;
3223 }
3224 
3226  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3227  if (!isTop)
3228  ++InsertPos;
3229  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3230 
3231  // Find already scheduled copies with a single physreg dependence and move
3232  // them just above the scheduled instruction.
3233  for (SDep &Dep : Deps) {
3234  if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3235  continue;
3236  SUnit *DepSU = Dep.getSUnit();
3237  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3238  continue;
3239  MachineInstr *Copy = DepSU->getInstr();
3240  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3241  continue;
3242  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3243  DAG->dumpNode(*Dep.getSUnit()));
3244  DAG->moveInstruction(Copy, InsertPos);
3245  }
3246 }
3247 
3248 /// Update the scheduler's state after scheduling a node. This is the same node
3249 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3250 /// update it's state based on the current cycle before MachineSchedStrategy
3251 /// does.
3252 ///
3253 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3254 /// them here. See comments in biasPhysReg.
3255 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3256  if (IsTopNode) {
3257  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3258  Top.bumpNode(SU);
3259  if (SU->hasPhysRegUses)
3260  reschedulePhysReg(SU, true);
3261  } else {
3262  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3263  Bot.bumpNode(SU);
3264  if (SU->hasPhysRegDefs)
3265  reschedulePhysReg(SU, false);
3266  }
3267 }
3268 
3269 /// Create the standard converging machine scheduler. This will be used as the
3270 /// default scheduler if the target does not set a default.
3272  ScheduleDAGMILive *DAG =
3273  new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3274  // Register DAG post-processors.
3275  //
3276  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3277  // data and pass it to later mutations. Have a single mutation that gathers
3278  // the interesting nodes in one pass.
3280  return DAG;
3281 }
3282 
3284  return createGenericSchedLive(C);
3285 }
3286 
3287 static MachineSchedRegistry
3288 GenericSchedRegistry("converge", "Standard converging scheduler.",
3290 
3291 //===----------------------------------------------------------------------===//
3292 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3293 //===----------------------------------------------------------------------===//
3294 
3296  DAG = Dag;
3297  SchedModel = DAG->getSchedModel();
3298  TRI = DAG->TRI;
3299 
3300  Rem.init(DAG, SchedModel);
3301  Top.init(DAG, SchedModel, &Rem);
3302  BotRoots.clear();
3303 
3304  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3305  // or are disabled, then these HazardRecs will be disabled.
3306  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3307  if (!Top.HazardRec) {
3308  Top.HazardRec =
3310  Itin, DAG);
3311  }
3312 }
3313 
3315  Rem.CriticalPath = DAG->ExitSU.getDepth();
3316 
3317  // Some roots may not feed into ExitSU. Check all of them in case.
3318  for (const SUnit *SU : BotRoots) {
3319  if (SU->getDepth() > Rem.CriticalPath)
3320  Rem.CriticalPath = SU->getDepth();
3321  }
3322  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3323  if (DumpCriticalPathLength) {
3324  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3325  }
3326 }
3327 
3328 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3329 ///
3330 /// \param Cand provides the policy and current best candidate.
3331 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3333  SchedCandidate &TryCand) {
3334  // Initialize the candidate if needed.
3335  if (!Cand.isValid()) {
3336  TryCand.Reason = NodeOrder;
3337  return;
3338  }
3339 
3340  // Prioritize instructions that read unbuffered resources by stall cycles.
3341  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3342  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3343  return;
3344 
3345  // Keep clustered nodes together.
3346  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3347  Cand.SU == DAG->getNextClusterSucc(),
3348  TryCand, Cand, Cluster))
3349  return;
3350 
3351  // Avoid critical resource consumption and balance the schedule.
3352  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3353  TryCand, Cand, ResourceReduce))
3354  return;
3356  Cand.ResDelta.DemandedResources,
3357  TryCand, Cand, ResourceDemand))
3358  return;
3359 
3360  // Avoid serializing long latency dependence chains.
3361  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3362  return;
3363  }
3364 
3365  // Fall through to original instruction order.
3366  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3367  TryCand.Reason = NodeOrder;
3368 }
3369 
3371  ReadyQueue &Q = Top.Available;
3372  for (SUnit *SU : Q) {
3373  SchedCandidate TryCand(Cand.Policy);
3374  TryCand.SU = SU;
3375  TryCand.AtTop = true;
3376  TryCand.initResourceDelta(DAG, SchedModel);
3377  tryCandidate(Cand, TryCand);
3378  if (TryCand.Reason != NoCand) {
3379  Cand.setBest(TryCand);
3380  LLVM_DEBUG(traceCandidate(Cand));
3381  }
3382  }
3383 }
3384 
3385 /// Pick the next node to schedule.
3387  if (DAG->top() == DAG->bottom()) {
3388  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3389  return nullptr;
3390  }
3391  SUnit *SU;
3392  do {
3393  SU = Top.pickOnlyChoice();
3394  if (SU) {
3395  tracePick(Only1, true);
3396  } else {
3397  CandPolicy NoPolicy;
3398  SchedCandidate TopCand(NoPolicy);
3399  // Set the top-down policy based on the state of the current top zone and
3400  // the instructions outside the zone, including the bottom zone.
3401  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3402  pickNodeFromQueue(TopCand);
3403  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3404  tracePick(TopCand);
3405  SU = TopCand.SU;
3406  }
3407  } while (SU->isScheduled);
3408 
3409  IsTopNode = true;
3410  Top.removeReady(SU);
3411 
3412  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3413  << *SU->getInstr());
3414  return SU;
3415 }
3416 
3417 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3418 /// scheduled/remaining flags in the DAG nodes.
3419 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3420  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3421  Top.bumpNode(SU);
3422 }
3423 
3425  return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3426  /*RemoveKillFlags=*/true);
3427 }
3428 
3429 //===----------------------------------------------------------------------===//
3430 // ILP Scheduler. Currently for experimental analysis of heuristics.
3431 //===----------------------------------------------------------------------===//
3432 
3433 namespace {
3434 
3435 /// Order nodes by the ILP metric.
3436 struct ILPOrder {
3437  const SchedDFSResult *DFSResult = nullptr;
3438  const BitVector *ScheduledTrees = nullptr;
3439  bool MaximizeILP;
3440 
3441  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3442 
3443  /// Apply a less-than relation on node priority.
3444  ///
3445  /// (Return true if A comes after B in the Q.)
3446  bool operator()(const SUnit *A, const SUnit *B) const {
3447  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3448  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3449  if (SchedTreeA != SchedTreeB) {
3450  // Unscheduled trees have lower priority.
3451  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3452  return ScheduledTrees->test(SchedTreeB);
3453 
3454  // Trees with shallower connections have have lower priority.
3455  if (DFSResult->getSubtreeLevel(SchedTreeA)
3456  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3457  return DFSResult->getSubtreeLevel(SchedTreeA)
3458  < DFSResult->getSubtreeLevel(SchedTreeB);
3459  }
3460  }
3461  if (MaximizeILP)
3462  return DFSResult->getILP(A) < DFSResult->getILP(B);
3463  else
3464  return DFSResult->getILP(A) > DFSResult->getILP(B);
3465  }
3466 };
3467 
3468 /// Schedule based on the ILP metric.
3469 class ILPScheduler : public MachineSchedStrategy {
3470  ScheduleDAGMILive *DAG = nullptr;
3471  ILPOrder Cmp;
3472 
3473  std::vector<SUnit*> ReadyQ;
3474 
3475 public:
3476  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3477 
3478  void initialize(ScheduleDAGMI *dag) override {
3479  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3480  DAG = static_cast<ScheduleDAGMILive*>(dag);
3481  DAG->computeDFSResult();
3482  Cmp.DFSResult = DAG->getDFSResult();
3483  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3484  ReadyQ.clear();
3485  }
3486 
3487  void registerRoots() override {
3488  // Restore the heap in ReadyQ with the updated DFS results.
3489  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3490  }
3491 
3492  /// Implement MachineSchedStrategy interface.
3493  /// -----------------------------------------
3494 
3495  /// Callback to select the highest priority node from the ready Q.
3496  SUnit *pickNode(bool &IsTopNode) override {
3497  if (ReadyQ.empty()) return nullptr;
3498  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3499  SUnit *SU = ReadyQ.back();
3500  ReadyQ.pop_back();
3501  IsTopNode = false;
3502  LLVM_DEBUG(dbgs() << "Pick node "
3503  << "SU(" << SU->NodeNum << ") "
3504  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3505  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3506  << " @"
3507  << DAG->getDFSResult()->getSubtreeLevel(
3508  DAG->getDFSResult()->getSubtreeID(SU))
3509  << '\n'
3510  << "Scheduling " << *SU->getInstr());
3511  return SU;
3512  }
3513 
3514  /// Scheduler callback to notify that a new subtree is scheduled.
3515  void scheduleTree(unsigned SubtreeID) override {
3516  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3517  }
3518 
3519  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3520  /// DFSResults, and resort the priority Q.
3521  void schedNode(SUnit *SU, bool IsTopNode) override {
3522  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3523  }
3524 
3525  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3526 
3527  void releaseBottomNode(SUnit *SU) override {
3528  ReadyQ.push_back(SU);
3529  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3530  }
3531 };
3532 
3533 } // end anonymous namespace
3534 
3536  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3537 }
3539  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3540 }
3541 
3543  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3545  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3546 
3547 //===----------------------------------------------------------------------===//
3548 // Machine Instruction Shuffler for Correctness Testing
3549 //===----------------------------------------------------------------------===//
3550 
3551 #ifndef NDEBUG
3552 namespace {
3553 
3554 /// Apply a less-than relation on the node order, which corresponds to the
3555 /// instruction order prior to scheduling. IsReverse implements greater-than.
3556 template<bool IsReverse>
3557 struct SUnitOrder {
3558  bool operator()(SUnit *A, SUnit *B) const {
3559  if (IsReverse)
3560  return A->NodeNum > B->NodeNum;
3561  else
3562  return A->NodeNum < B->NodeNum;
3563  }
3564 };
3565 
3566 /// Reorder instructions as much as possible.
3567 class InstructionShuffler : public MachineSchedStrategy {
3568  bool IsAlternating;
3569  bool IsTopDown;
3570 
3571  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3572  // gives nodes with a higher number higher priority causing the latest
3573  // instructions to be scheduled first.
3574  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3575  TopQ;
3576 
3577  // When scheduling bottom-up, use greater-than as the queue priority.
3579  BottomQ;
3580 
3581 public:
3582  InstructionShuffler(bool alternate, bool topdown)
3583  : IsAlternating(alternate), IsTopDown(topdown) {}
3584 
3585  void initialize(ScheduleDAGMI*) override {
3586  TopQ.clear();
3587  BottomQ.clear();
3588  }
3589 
3590  /// Implement MachineSchedStrategy interface.
3591  /// -----------------------------------------
3592 
3593  SUnit *pickNode(bool &IsTopNode) override {
3594  SUnit *SU;
3595  if (IsTopDown) {
3596  do {
3597  if (TopQ.empty()) return nullptr;
3598  SU = TopQ.top();
3599  TopQ.pop();
3600  } while (SU->isScheduled);
3601  IsTopNode = true;
3602  } else {
3603  do {
3604  if (BottomQ.empty()) return nullptr;
3605  SU = BottomQ.top();
3606  BottomQ.pop();
3607  } while (SU->isScheduled);
3608  IsTopNode = false;
3609  }
3610  if (IsAlternating)
3611  IsTopDown = !IsTopDown;
3612  return SU;
3613  }
3614 
3615  void schedNode(SUnit *SU, bool IsTopNode) override {}
3616 
3617  void releaseTopNode(SUnit *SU) override {
3618  TopQ.push(SU);
3619  }
3620  void releaseBottomNode(SUnit *SU) override {
3621  BottomQ.push(SU);
3622  }
3623 };
3624 
3625 } // end anonymous namespace
3626 
3628  bool Alternate = !ForceTopDown && !ForceBottomUp;
3629  bool TopDown = !ForceBottomUp;
3630  assert((TopDown || !ForceTopDown) &&
3631  "-misched-topdown incompatible with -misched-bottomup");
3632  return new ScheduleDAGMILive(
3633  C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3634 }
3635 
3637  "shuffle", "Shuffle machine instructions alternating directions",
3639 #endif // !NDEBUG
3640 
3641 //===----------------------------------------------------------------------===//
3642 // GraphWriter support for ScheduleDAGMILive.
3643 //===----------------------------------------------------------------------===//
3644 
3645 #ifndef NDEBUG
3646 namespace llvm {
3647 
3648 template<> struct GraphTraits<
3650 
3651 template<>
3654 
3655  static std::string getGraphName(const ScheduleDAG *G) {
3656  return G->MF.getName();
3657  }
3658 
3659  static bool renderGraphFromBottomUp() {
3660  return true;
3661  }
3662 
3663  static bool isNodeHidden(const SUnit *Node) {
3664  if (ViewMISchedCutoff == 0)
3665  return false;
3666  return (Node->Preds.size() > ViewMISchedCutoff
3667  || Node->Succs.size() > ViewMISchedCutoff);
3668  }
3669 
3670  /// If you want to override the dot attributes printed for a particular
3671  /// edge, override this method.
3672  static std::string getEdgeAttributes(const SUnit *Node,
3673  SUnitIterator EI,
3674  const ScheduleDAG *Graph) {
3675  if (EI.isArtificialDep())
3676  return "color=cyan,style=dashed";
3677  if (EI.isCtrlDep())
3678  return "color=blue,style=dashed";
3679  return "";
3680  }
3681 
3682  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3683  std::string Str;
3684  raw_string_ostream SS(Str);
3685  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3686  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3687  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3688  SS << "SU:" << SU->NodeNum;
3689  if (DFS)
3690  SS << " I:" << DFS->getNumInstrs(SU);
3691  return SS.str();
3692  }
3693 
3694  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3695  return G->getGraphNodeLabel(SU);
3696  }
3697 
3698  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3699  std::string Str("shape=Mrecord");
3700  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3701  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3702  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3703  if (DFS) {
3704  Str += ",style=filled,fillcolor=\"#";
3705  Str += DOT::getColorString(DFS->getSubtreeID(N));
3706  Str += '"';
3707  }
3708  return Str;
3709  }
3710 };
3711 
3712 } // end namespace llvm
3713 #endif // NDEBUG
3714 
3715 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3716 /// rendered using 'dot'.
3717 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3718 #ifndef NDEBUG
3719  ViewGraph(this, Name, false, Title);
3720 #else
3721  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3722  << "systems with Graphviz or gv!\n";
3723 #endif // NDEBUG
3724 }
3725 
3726 /// Out-of-line implementation with no arguments is handy for gdb.
3728  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3729 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:119
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:75
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:146
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
static MachinePassRegistry< ScheduleDAGCtor > Registry
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:328
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
This class represents lattice values for constants.
Definition: AllocatorList.h:24
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
Segments::iterator iterator
Definition: LiveInterval.h:208
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void push_back(const T &Elt)
Definition: SmallVector.h:218
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:638
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:402
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned getReg() const
getReg - Returns the register number.
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void dump() const override
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
#define DEBUG_TYPE
unsigned Reg
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:140
bool test(unsigned Idx) const
Definition: BitVector.h:502
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
unsigned const TargetRegisterInfo * TRI
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:94
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:453
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:304
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:260
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
static cl::opt< bool > PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs"))
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:700
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
*ViewGraph Emit a dot run run gv on the postscript *then cleanup For use from the debugger *void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
Definition: GraphWriter.h:367
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:564
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:288
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const RegPressureTracker & getBotRPTracker() const
amdgpu Simplify well known AMD library false Value Value const Twine & Name
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:273
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
An individual mapping from virtual register number to SUnit.
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:212
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
Result of a LiveRange query.
Definition: LiveInterval.h:90
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:283
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1186
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:284
PressureDiff & getPressureDiff(const SUnit *SU)
#define LLVM_DUMP_METHOD
Definition: Compiler.h:74
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:303
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:293
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:331
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:255
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:275
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:162
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:272
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:450
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:484
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:170
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:529
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:50
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:292
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:820
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isCall
Is a function call.
Definition: ScheduleDAG.h:279
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:380
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:377
Helpers for implementing custom MachineSchedStrategy classes.
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
unsigned const MachineRegisterInfo * MRI
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:277
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:384
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Represent the analysis usage information of a pass.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:481
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:74
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getWeakLeft(const SUnit *SU, bool isTop)
size_t size() const
Definition: SmallVector.h:53
bool isDebugInstr() const
Definition: MachineInstr.h:999
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:499
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:274
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:198
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:299
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
Information about stack frame layout on the target.
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:71
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
LiveInterval & getInterval(unsigned Reg)
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:417
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:327
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
void dumpNode(const SUnit &SU) const override
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:181
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:410
void reschedulePhysReg(SUnit *SU, bool isTop)
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:141
Representation of each machine instruction.
Definition: MachineInstr.h:64
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:568
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:563
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
virtual const TargetFrameLowering * getFrameLowering() const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:562
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:490
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:268
const std::vector< PressureChange > & getRegionCriticalPSets() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:656
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:211
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:807
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:483
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:373
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:506
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:261
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:207
static const Function * getParent(const Value *V)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:195
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:566
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
void finishBlock() override
Cleans up after scheduling in the given block.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:246
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:159
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:659
void resize(size_type N)
Definition: SmallVector.h:351