LLVM  8.0.1
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "ScheduleDAGSDNodes.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
39 #include "llvm/Config/llvm-config.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/MC/MCInstrDesc.h"
42 #include "llvm/MC/MCRegisterInfo.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/Compiler.h"
47 #include "llvm/Support/Debug.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <cstdlib>
55 #include <iterator>
56 #include <limits>
57 #include <memory>
58 #include <utility>
59 #include <vector>
60 
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "pre-RA-sched"
64 
65 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
66 STATISTIC(NumUnfolds, "Number of nodes unfolded");
67 STATISTIC(NumDups, "Number of duplicated nodes");
68 STATISTIC(NumPRCopies, "Number of physical register copies");
69 
70 static RegisterScheduler
71  burrListDAGScheduler("list-burr",
72  "Bottom-up register reduction list scheduling",
74 
75 static RegisterScheduler
76  sourceListDAGScheduler("source",
77  "Similar to list-burr but schedules in source "
78  "order when possible",
80 
81 static RegisterScheduler
82  hybridListDAGScheduler("list-hybrid",
83  "Bottom-up register pressure aware list scheduling "
84  "which tries to balance latency and register pressure",
86 
87 static RegisterScheduler
88  ILPListDAGScheduler("list-ilp",
89  "Bottom-up register pressure aware list scheduling "
90  "which tries to balance ILP and register pressure",
92 
94  "disable-sched-cycles", cl::Hidden, cl::init(false),
95  cl::desc("Disable cycle-level precision during preRA scheduling"));
96 
97 // Temporary sched=list-ilp flags until the heuristics are robust.
98 // Some options are also available under sched=list-hybrid.
100  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
101  cl::desc("Disable regpressure priority in sched=list-ilp"));
103  "disable-sched-live-uses", cl::Hidden, cl::init(true),
104  cl::desc("Disable live use priority in sched=list-ilp"));
106  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
107  cl::desc("Disable virtual register cycle interference checks"));
109  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
110  cl::desc("Disable physreg def-use affinity"));
112  "disable-sched-stalls", cl::Hidden, cl::init(true),
113  cl::desc("Disable no-stall priority in sched=list-ilp"));
115  "disable-sched-critical-path", cl::Hidden, cl::init(false),
116  cl::desc("Disable critical path priority in sched=list-ilp"));
118  "disable-sched-height", cl::Hidden, cl::init(false),
119  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
121  "disable-2addr-hack", cl::Hidden, cl::init(true),
122  cl::desc("Disable scheduler's two-address hack"));
123 
125  "max-sched-reorder", cl::Hidden, cl::init(6),
126  cl::desc("Number of instructions to allow ahead of the critical path "
127  "in sched=list-ilp"));
128 
130  "sched-avg-ipc", cl::Hidden, cl::init(1),
131  cl::desc("Average inst/cycle whan no target itinerary exists."));
132 
133 namespace {
134 
135 //===----------------------------------------------------------------------===//
136 /// ScheduleDAGRRList - The actual register reduction list scheduler
137 /// implementation. This supports both top-down and bottom-up scheduling.
138 ///
139 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
140 private:
141  /// NeedLatency - True if the scheduler will make use of latency information.
142  bool NeedLatency;
143 
144  /// AvailableQueue - The priority queue to use for the available SUnits.
145  SchedulingPriorityQueue *AvailableQueue;
146 
147  /// PendingQueue - This contains all of the instructions whose operands have
148  /// been issued, but their results are not ready yet (due to the latency of
149  /// the operation). Once the operands becomes available, the instruction is
150  /// added to the AvailableQueue.
151  std::vector<SUnit *> PendingQueue;
152 
153  /// HazardRec - The hazard recognizer to use.
154  ScheduleHazardRecognizer *HazardRec;
155 
156  /// CurCycle - The current scheduler state corresponds to this cycle.
157  unsigned CurCycle = 0;
158 
159  /// MinAvailableCycle - Cycle of the soonest available instruction.
160  unsigned MinAvailableCycle;
161 
162  /// IssueCount - Count instructions issued in this cycle
163  /// Currently valid only for bottom-up scheduling.
164  unsigned IssueCount;
165 
166  /// LiveRegDefs - A set of physical registers and their definition
167  /// that are "live". These nodes must be scheduled before any other nodes that
168  /// modifies the registers can be scheduled.
169  unsigned NumLiveRegs;
170  std::unique_ptr<SUnit*[]> LiveRegDefs;
171  std::unique_ptr<SUnit*[]> LiveRegGens;
172 
173  // Collect interferences between physical register use/defs.
174  // Each interference is an SUnit and set of physical registers.
175  SmallVector<SUnit*, 4> Interferences;
176 
178 
179  LRegsMapT LRegsMap;
180 
181  /// Topo - A topological ordering for SUnits which permits fast IsReachable
182  /// and similar queries.
184 
185  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
186  // DAG crawling.
187  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
188 
189 public:
190  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
191  SchedulingPriorityQueue *availqueue,
192  CodeGenOpt::Level OptLevel)
193  : ScheduleDAGSDNodes(mf),
194  NeedLatency(needlatency), AvailableQueue(availqueue),
195  Topo(SUnits, nullptr) {
196  const TargetSubtargetInfo &STI = mf.getSubtarget();
197  if (DisableSchedCycles || !NeedLatency)
198  HazardRec = new ScheduleHazardRecognizer();
199  else
200  HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
201  }
202 
203  ~ScheduleDAGRRList() override {
204  delete HazardRec;
205  delete AvailableQueue;
206  }
207 
208  void Schedule() override;
209 
210  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
211 
212  /// IsReachable - Checks if SU is reachable from TargetSU.
213  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
214  return Topo.IsReachable(SU, TargetSU);
215  }
216 
217  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
218  /// create a cycle.
219  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
220  return Topo.WillCreateCycle(SU, TargetSU);
221  }
222 
223  /// AddPred - adds a predecessor edge to SUnit SU.
224  /// This returns true if this is a new predecessor.
225  /// Updates the topological ordering if required.
226  void AddPred(SUnit *SU, const SDep &D) {
227  Topo.AddPred(SU, D.getSUnit());
228  SU->addPred(D);
229  }
230 
231  /// RemovePred - removes a predecessor edge from SUnit SU.
232  /// This returns true if an edge was removed.
233  /// Updates the topological ordering if required.
234  void RemovePred(SUnit *SU, const SDep &D) {
235  Topo.RemovePred(SU, D.getSUnit());
236  SU->removePred(D);
237  }
238 
239 private:
240  bool isReady(SUnit *SU) {
241  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
242  AvailableQueue->isReady(SU);
243  }
244 
245  void ReleasePred(SUnit *SU, const SDep *PredEdge);
246  void ReleasePredecessors(SUnit *SU);
247  void ReleasePending();
248  void AdvanceToCycle(unsigned NextCycle);
249  void AdvancePastStalls(SUnit *SU);
250  void EmitNode(SUnit *SU);
251  void ScheduleNodeBottomUp(SUnit*);
252  void CapturePred(SDep *PredEdge);
253  void UnscheduleNodeBottomUp(SUnit*);
254  void RestoreHazardCheckerBottomUp();
255  void BacktrackBottomUp(SUnit*, SUnit*);
256  SUnit *TryUnfoldSU(SUnit *);
257  SUnit *CopyAndMoveSuccessors(SUnit*);
258  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
259  const TargetRegisterClass*,
260  const TargetRegisterClass*,
262  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
263 
264  void releaseInterferences(unsigned Reg = 0);
265 
266  SUnit *PickNodeToScheduleBottomUp();
267  void ListScheduleBottomUp();
268 
269  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
270  /// Updates the topological ordering if required.
271  SUnit *CreateNewSUnit(SDNode *N) {
272  unsigned NumSUnits = SUnits.size();
273  SUnit *NewNode = newSUnit(N);
274  // Update the topological ordering.
275  if (NewNode->NodeNum >= NumSUnits)
277  return NewNode;
278  }
279 
280  /// CreateClone - Creates a new SUnit from an existing one.
281  /// Updates the topological ordering if required.
282  SUnit *CreateClone(SUnit *N) {
283  unsigned NumSUnits = SUnits.size();
284  SUnit *NewNode = Clone(N);
285  // Update the topological ordering.
286  if (NewNode->NodeNum >= NumSUnits)
288  return NewNode;
289  }
290 
291  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
292  /// need actual latency information but the hybrid scheduler does.
293  bool forceUnitLatencies() const override {
294  return !NeedLatency;
295  }
296 };
297 
298 } // end anonymous namespace
299 
300 /// GetCostForDef - Looks up the register class and cost for a given definition.
301 /// Typically this just means looking up the representative register class,
302 /// but for untyped values (MVT::Untyped) it means inspecting the node's
303 /// opcode to determine what register class is being generated.
304 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
305  const TargetLowering *TLI,
306  const TargetInstrInfo *TII,
307  const TargetRegisterInfo *TRI,
308  unsigned &RegClass, unsigned &Cost,
309  const MachineFunction &MF) {
310  MVT VT = RegDefPos.GetValue();
311 
312  // Special handling for untyped values. These values can only come from
313  // the expansion of custom DAG-to-DAG patterns.
314  if (VT == MVT::Untyped) {
315  const SDNode *Node = RegDefPos.GetNode();
316 
317  // Special handling for CopyFromReg of untyped values.
318  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
319  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
320  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
321  RegClass = RC->getID();
322  Cost = 1;
323  return;
324  }
325 
326  unsigned Opcode = Node->getMachineOpcode();
327  if (Opcode == TargetOpcode::REG_SEQUENCE) {
328  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
329  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
330  RegClass = RC->getID();
331  Cost = 1;
332  return;
333  }
334 
335  unsigned Idx = RegDefPos.GetIdx();
336  const MCInstrDesc Desc = TII->get(Opcode);
337  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
338  RegClass = RC->getID();
339  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
340  // better way to determine it.
341  Cost = 1;
342  } else {
343  RegClass = TLI->getRepRegClassFor(VT)->getID();
344  Cost = TLI->getRepRegClassCostFor(VT);
345  }
346 }
347 
348 /// Schedule - Schedule the DAG using list scheduling.
349 void ScheduleDAGRRList::Schedule() {
350  LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
351  << " '" << BB->getName() << "' **********\n");
352 
353  CurCycle = 0;
354  IssueCount = 0;
355  MinAvailableCycle =
357  NumLiveRegs = 0;
358  // Allocate slots for each physical register, plus one for a special register
359  // to track the virtual resource of a calling sequence.
360  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
361  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
362  CallSeqEndForStart.clear();
363  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
364 
365  // Build the scheduling graph.
366  BuildSchedGraph(nullptr);
367 
368  LLVM_DEBUG(dump());
370 
371  AvailableQueue->initNodes(SUnits);
372 
373  HazardRec->Reset();
374 
375  // Execute the actual scheduling loop.
376  ListScheduleBottomUp();
377 
378  AvailableQueue->releaseState();
379 
380  LLVM_DEBUG({
381  dbgs() << "*** Final schedule ***\n";
382  dumpSchedule();
383  dbgs() << '\n';
384  });
385 }
386 
387 //===----------------------------------------------------------------------===//
388 // Bottom-Up Scheduling
389 //===----------------------------------------------------------------------===//
390 
391 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
392 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
393 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
394  SUnit *PredSU = PredEdge->getSUnit();
395 
396 #ifndef NDEBUG
397  if (PredSU->NumSuccsLeft == 0) {
398  dbgs() << "*** Scheduling failed! ***\n";
399  dumpNode(*PredSU);
400  dbgs() << " has been released too many times!\n";
401  llvm_unreachable(nullptr);
402  }
403 #endif
404  --PredSU->NumSuccsLeft;
405 
406  if (!forceUnitLatencies()) {
407  // Updating predecessor's height. This is now the cycle when the
408  // predecessor can be scheduled without causing a pipeline stall.
409  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
410  }
411 
412  // If all the node's successors are scheduled, this node is ready
413  // to be scheduled. Ignore the special EntrySU node.
414  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
415  PredSU->isAvailable = true;
416 
417  unsigned Height = PredSU->getHeight();
418  if (Height < MinAvailableCycle)
419  MinAvailableCycle = Height;
420 
421  if (isReady(PredSU)) {
422  AvailableQueue->push(PredSU);
423  }
424  // CapturePred and others may have left the node in the pending queue, avoid
425  // adding it twice.
426  else if (!PredSU->isPending) {
427  PredSU->isPending = true;
428  PendingQueue.push_back(PredSU);
429  }
430  }
431 }
432 
433 /// IsChainDependent - Test if Outer is reachable from Inner through
434 /// chain dependencies.
435 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
436  unsigned NestLevel,
437  const TargetInstrInfo *TII) {
438  SDNode *N = Outer;
439  while (true) {
440  if (N == Inner)
441  return true;
442  // For a TokenFactor, examine each operand. There may be multiple ways
443  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
444  // most nesting in order to ensure that we find the corresponding match.
445  if (N->getOpcode() == ISD::TokenFactor) {
446  for (const SDValue &Op : N->op_values())
447  if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
448  return true;
449  return false;
450  }
451  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
452  if (N->isMachineOpcode()) {
453  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
454  ++NestLevel;
455  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
456  if (NestLevel == 0)
457  return false;
458  --NestLevel;
459  }
460  }
461  // Otherwise, find the chain and continue climbing.
462  for (const SDValue &Op : N->op_values())
463  if (Op.getValueType() == MVT::Other) {
464  N = Op.getNode();
465  goto found_chain_operand;
466  }
467  return false;
468  found_chain_operand:;
469  if (N->getOpcode() == ISD::EntryToken)
470  return false;
471  }
472 }
473 
474 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
475 /// the corresponding (lowered) CALLSEQ_BEGIN node.
476 ///
477 /// NestLevel and MaxNested are used in recursion to indcate the current level
478 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
479 /// level seen so far.
480 ///
481 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
482 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
483 static SDNode *
484 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
485  const TargetInstrInfo *TII) {
486  while (true) {
487  // For a TokenFactor, examine each operand. There may be multiple ways
488  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
489  // most nesting in order to ensure that we find the corresponding match.
490  if (N->getOpcode() == ISD::TokenFactor) {
491  SDNode *Best = nullptr;
492  unsigned BestMaxNest = MaxNest;
493  for (const SDValue &Op : N->op_values()) {
494  unsigned MyNestLevel = NestLevel;
495  unsigned MyMaxNest = MaxNest;
496  if (SDNode *New = FindCallSeqStart(Op.getNode(),
497  MyNestLevel, MyMaxNest, TII))
498  if (!Best || (MyMaxNest > BestMaxNest)) {
499  Best = New;
500  BestMaxNest = MyMaxNest;
501  }
502  }
503  assert(Best);
504  MaxNest = BestMaxNest;
505  return Best;
506  }
507  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
508  if (N->isMachineOpcode()) {
509  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
510  ++NestLevel;
511  MaxNest = std::max(MaxNest, NestLevel);
512  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
513  assert(NestLevel != 0);
514  --NestLevel;
515  if (NestLevel == 0)
516  return N;
517  }
518  }
519  // Otherwise, find the chain and continue climbing.
520  for (const SDValue &Op : N->op_values())
521  if (Op.getValueType() == MVT::Other) {
522  N = Op.getNode();
523  goto found_chain_operand;
524  }
525  return nullptr;
526  found_chain_operand:;
527  if (N->getOpcode() == ISD::EntryToken)
528  return nullptr;
529  }
530 }
531 
532 /// Call ReleasePred for each predecessor, then update register live def/gen.
533 /// Always update LiveRegDefs for a register dependence even if the current SU
534 /// also defines the register. This effectively create one large live range
535 /// across a sequence of two-address node. This is important because the
536 /// entire chain must be scheduled together. Example:
537 ///
538 /// flags = (3) add
539 /// flags = (2) addc flags
540 /// flags = (1) addc flags
541 ///
542 /// results in
543 ///
544 /// LiveRegDefs[flags] = 3
545 /// LiveRegGens[flags] = 1
546 ///
547 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
548 /// interference on flags.
549 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
550  // Bottom up: release predecessors
551  for (SDep &Pred : SU->Preds) {
552  ReleasePred(SU, &Pred);
553  if (Pred.isAssignedRegDep()) {
554  // This is a physical register dependency and it's impossible or
555  // expensive to copy the register. Make sure nothing that can
556  // clobber the register is scheduled between the predecessor and
557  // this node.
558  SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
559  assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
560  "interference on register dependence");
561  LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
562  if (!LiveRegGens[Pred.getReg()]) {
563  ++NumLiveRegs;
564  LiveRegGens[Pred.getReg()] = SU;
565  }
566  }
567  }
568 
569  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
570  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
571  // these nodes, to prevent other calls from being interscheduled with them.
572  unsigned CallResource = TRI->getNumRegs();
573  if (!LiveRegDefs[CallResource])
574  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
575  if (Node->isMachineOpcode() &&
576  Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
577  unsigned NestLevel = 0;
578  unsigned MaxNest = 0;
579  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
580  assert(N && "Must find call sequence start");
581 
582  SUnit *Def = &SUnits[N->getNodeId()];
583  CallSeqEndForStart[Def] = SU;
584 
585  ++NumLiveRegs;
586  LiveRegDefs[CallResource] = Def;
587  LiveRegGens[CallResource] = SU;
588  break;
589  }
590 }
591 
592 /// Check to see if any of the pending instructions are ready to issue. If
593 /// so, add them to the available queue.
594 void ScheduleDAGRRList::ReleasePending() {
595  if (DisableSchedCycles) {
596  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
597  return;
598  }
599 
600  // If the available queue is empty, it is safe to reset MinAvailableCycle.
601  if (AvailableQueue->empty())
602  MinAvailableCycle = std::numeric_limits<unsigned>::max();
603 
604  // Check to see if any of the pending instructions are ready to issue. If
605  // so, add them to the available queue.
606  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
607  unsigned ReadyCycle = PendingQueue[i]->getHeight();
608  if (ReadyCycle < MinAvailableCycle)
609  MinAvailableCycle = ReadyCycle;
610 
611  if (PendingQueue[i]->isAvailable) {
612  if (!isReady(PendingQueue[i]))
613  continue;
614  AvailableQueue->push(PendingQueue[i]);
615  }
616  PendingQueue[i]->isPending = false;
617  PendingQueue[i] = PendingQueue.back();
618  PendingQueue.pop_back();
619  --i; --e;
620  }
621 }
622 
623 /// Move the scheduler state forward by the specified number of Cycles.
624 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
625  if (NextCycle <= CurCycle)
626  return;
627 
628  IssueCount = 0;
629  AvailableQueue->setCurCycle(NextCycle);
630  if (!HazardRec->isEnabled()) {
631  // Bypass lots of virtual calls in case of long latency.
632  CurCycle = NextCycle;
633  }
634  else {
635  for (; CurCycle != NextCycle; ++CurCycle) {
636  HazardRec->RecedeCycle();
637  }
638  }
639  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
640  // available Q to release pending nodes at least once before popping.
641  ReleasePending();
642 }
643 
644 /// Move the scheduler state forward until the specified node's dependents are
645 /// ready and can be scheduled with no resource conflicts.
646 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
647  if (DisableSchedCycles)
648  return;
649 
650  // FIXME: Nodes such as CopyFromReg probably should not advance the current
651  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
652  // has predecessors the cycle will be advanced when they are scheduled.
653  // But given the crude nature of modeling latency though such nodes, we
654  // currently need to treat these nodes like real instructions.
655  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
656 
657  unsigned ReadyCycle = SU->getHeight();
658 
659  // Bump CurCycle to account for latency. We assume the latency of other
660  // available instructions may be hidden by the stall (not a full pipe stall).
661  // This updates the hazard recognizer's cycle before reserving resources for
662  // this instruction.
663  AdvanceToCycle(ReadyCycle);
664 
665  // Calls are scheduled in their preceding cycle, so don't conflict with
666  // hazards from instructions after the call. EmitNode will reset the
667  // scoreboard state before emitting the call.
668  if (SU->isCall)
669  return;
670 
671  // FIXME: For resource conflicts in very long non-pipelined stages, we
672  // should probably skip ahead here to avoid useless scoreboard checks.
673  int Stalls = 0;
674  while (true) {
676  HazardRec->getHazardType(SU, -Stalls);
677 
679  break;
680 
681  ++Stalls;
682  }
683  AdvanceToCycle(CurCycle + Stalls);
684 }
685 
686 /// Record this SUnit in the HazardRecognizer.
687 /// Does not update CurCycle.
688 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
689  if (!HazardRec->isEnabled())
690  return;
691 
692  // Check for phys reg copy.
693  if (!SU->getNode())
694  return;
695 
696  switch (SU->getNode()->getOpcode()) {
697  default:
698  assert(SU->getNode()->isMachineOpcode() &&
699  "This target-independent node should not be scheduled.");
700  break;
701  case ISD::MERGE_VALUES:
702  case ISD::TokenFactor:
703  case ISD::LIFETIME_START:
704  case ISD::LIFETIME_END:
705  case ISD::CopyToReg:
706  case ISD::CopyFromReg:
707  case ISD::EH_LABEL:
708  // Noops don't affect the scoreboard state. Copies are likely to be
709  // removed.
710  return;
711  case ISD::INLINEASM:
712  // For inline asm, clear the pipeline state.
713  HazardRec->Reset();
714  return;
715  }
716  if (SU->isCall) {
717  // Calls are scheduled with their preceding instructions. For bottom-up
718  // scheduling, clear the pipeline state before emitting.
719  HazardRec->Reset();
720  }
721 
722  HazardRec->EmitInstruction(SU);
723 }
724 
725 static void resetVRegCycle(SUnit *SU);
726 
727 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
728 /// count of its predecessors. If a predecessor pending count is zero, add it to
729 /// the Available queue.
730 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
731  LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
732  LLVM_DEBUG(dumpNode(*SU));
733 
734 #ifndef NDEBUG
735  if (CurCycle < SU->getHeight())
736  LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
737  << "] pipeline stall!\n");
738 #endif
739 
740  // FIXME: Do not modify node height. It may interfere with
741  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
742  // node its ready cycle can aid heuristics, and after scheduling it can
743  // indicate the scheduled cycle.
744  SU->setHeightToAtLeast(CurCycle);
745 
746  // Reserve resources for the scheduled instruction.
747  EmitNode(SU);
748 
749  Sequence.push_back(SU);
750 
751  AvailableQueue->scheduledNode(SU);
752 
753  // If HazardRec is disabled, and each inst counts as one cycle, then
754  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
755  // PendingQueue for schedulers that implement HasReadyFilter.
756  if (!HazardRec->isEnabled() && AvgIPC < 2)
757  AdvanceToCycle(CurCycle + 1);
758 
759  // Update liveness of predecessors before successors to avoid treating a
760  // two-address node as a live range def.
761  ReleasePredecessors(SU);
762 
763  // Release all the implicit physical register defs that are live.
764  for (SDep &Succ : SU->Succs) {
765  // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
766  if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
767  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
768  --NumLiveRegs;
769  LiveRegDefs[Succ.getReg()] = nullptr;
770  LiveRegGens[Succ.getReg()] = nullptr;
771  releaseInterferences(Succ.getReg());
772  }
773  }
774  // Release the special call resource dependence, if this is the beginning
775  // of a call.
776  unsigned CallResource = TRI->getNumRegs();
777  if (LiveRegDefs[CallResource] == SU)
778  for (const SDNode *SUNode = SU->getNode(); SUNode;
779  SUNode = SUNode->getGluedNode()) {
780  if (SUNode->isMachineOpcode() &&
781  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
782  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
783  --NumLiveRegs;
784  LiveRegDefs[CallResource] = nullptr;
785  LiveRegGens[CallResource] = nullptr;
786  releaseInterferences(CallResource);
787  }
788  }
789 
790  resetVRegCycle(SU);
791 
792  SU->isScheduled = true;
793 
794  // Conditions under which the scheduler should eagerly advance the cycle:
795  // (1) No available instructions
796  // (2) All pipelines full, so available instructions must have hazards.
797  //
798  // If HazardRec is disabled, the cycle was pre-advanced before calling
799  // ReleasePredecessors. In that case, IssueCount should remain 0.
800  //
801  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
802  if (HazardRec->isEnabled() || AvgIPC > 1) {
803  if (SU->getNode() && SU->getNode()->isMachineOpcode())
804  ++IssueCount;
805  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
806  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
807  AdvanceToCycle(CurCycle + 1);
808  }
809 }
810 
811 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
812 /// unscheduled, increase the succ left count of its predecessors. Remove
813 /// them from AvailableQueue if necessary.
814 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
815  SUnit *PredSU = PredEdge->getSUnit();
816  if (PredSU->isAvailable) {
817  PredSU->isAvailable = false;
818  if (!PredSU->isPending)
819  AvailableQueue->remove(PredSU);
820  }
821 
823  "NumSuccsLeft will overflow!");
824  ++PredSU->NumSuccsLeft;
825 }
826 
827 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
828 /// its predecessor states to reflect the change.
829 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
830  LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
831  LLVM_DEBUG(dumpNode(*SU));
832 
833  for (SDep &Pred : SU->Preds) {
834  CapturePred(&Pred);
835  if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
836  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
837  assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
838  "Physical register dependency violated?");
839  --NumLiveRegs;
840  LiveRegDefs[Pred.getReg()] = nullptr;
841  LiveRegGens[Pred.getReg()] = nullptr;
842  releaseInterferences(Pred.getReg());
843  }
844  }
845 
846  // Reclaim the special call resource dependence, if this is the beginning
847  // of a call.
848  unsigned CallResource = TRI->getNumRegs();
849  for (const SDNode *SUNode = SU->getNode(); SUNode;
850  SUNode = SUNode->getGluedNode()) {
851  if (SUNode->isMachineOpcode() &&
852  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
853  SUnit *SeqEnd = CallSeqEndForStart[SU];
854  assert(SeqEnd && "Call sequence start/end must be known");
855  assert(!LiveRegDefs[CallResource]);
856  assert(!LiveRegGens[CallResource]);
857  ++NumLiveRegs;
858  LiveRegDefs[CallResource] = SU;
859  LiveRegGens[CallResource] = SeqEnd;
860  }
861  }
862 
863  // Release the special call resource dependence, if this is the end
864  // of a call.
865  if (LiveRegGens[CallResource] == SU)
866  for (const SDNode *SUNode = SU->getNode(); SUNode;
867  SUNode = SUNode->getGluedNode()) {
868  if (SUNode->isMachineOpcode() &&
869  SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
870  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
871  assert(LiveRegDefs[CallResource]);
872  assert(LiveRegGens[CallResource]);
873  --NumLiveRegs;
874  LiveRegDefs[CallResource] = nullptr;
875  LiveRegGens[CallResource] = nullptr;
876  releaseInterferences(CallResource);
877  }
878  }
879 
880  for (auto &Succ : SU->Succs) {
881  if (Succ.isAssignedRegDep()) {
882  auto Reg = Succ.getReg();
883  if (!LiveRegDefs[Reg])
884  ++NumLiveRegs;
885  // This becomes the nearest def. Note that an earlier def may still be
886  // pending if this is a two-address node.
887  LiveRegDefs[Reg] = SU;
888 
889  // Update LiveRegGen only if was empty before this unscheduling.
890  // This is to avoid incorrect updating LiveRegGen set in previous run.
891  if (!LiveRegGens[Reg]) {
892  // Find the successor with the lowest height.
893  LiveRegGens[Reg] = Succ.getSUnit();
894  for (auto &Succ2 : SU->Succs) {
895  if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
896  Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
897  LiveRegGens[Reg] = Succ2.getSUnit();
898  }
899  }
900  }
901  }
902  if (SU->getHeight() < MinAvailableCycle)
903  MinAvailableCycle = SU->getHeight();
904 
905  SU->setHeightDirty();
906  SU->isScheduled = false;
907  SU->isAvailable = true;
908  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
909  // Don't make available until backtracking is complete.
910  SU->isPending = true;
911  PendingQueue.push_back(SU);
912  }
913  else {
914  AvailableQueue->push(SU);
915  }
916  AvailableQueue->unscheduledNode(SU);
917 }
918 
919 /// After backtracking, the hazard checker needs to be restored to a state
920 /// corresponding the current cycle.
921 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
922  HazardRec->Reset();
923 
924  unsigned LookAhead = std::min((unsigned)Sequence.size(),
925  HazardRec->getMaxLookAhead());
926  if (LookAhead == 0)
927  return;
928 
929  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
930  unsigned HazardCycle = (*I)->getHeight();
931  for (auto E = Sequence.end(); I != E; ++I) {
932  SUnit *SU = *I;
933  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
934  HazardRec->RecedeCycle();
935  }
936  EmitNode(SU);
937  }
938 }
939 
940 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
941 /// BTCycle in order to schedule a specific node.
942 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
943  SUnit *OldSU = Sequence.back();
944  while (true) {
945  Sequence.pop_back();
946  // FIXME: use ready cycle instead of height
947  CurCycle = OldSU->getHeight();
948  UnscheduleNodeBottomUp(OldSU);
949  AvailableQueue->setCurCycle(CurCycle);
950  if (OldSU == BtSU)
951  break;
952  OldSU = Sequence.back();
953  }
954 
955  assert(!SU->isSucc(OldSU) && "Something is wrong!");
956 
957  RestoreHazardCheckerBottomUp();
958 
959  ReleasePending();
960 
961  ++NumBacktracks;
962 }
963 
964 static bool isOperandOf(const SUnit *SU, SDNode *N) {
965  for (const SDNode *SUNode = SU->getNode(); SUNode;
966  SUNode = SUNode->getGluedNode()) {
967  if (SUNode->isOperandOf(N))
968  return true;
969  }
970  return false;
971 }
972 
973 /// TryUnfold - Attempt to unfold
974 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
975  SDNode *N = SU->getNode();
976  // Use while over if to ease fall through.
977  SmallVector<SDNode *, 2> NewNodes;
978  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
979  return nullptr;
980 
981  // unfolding an x86 DEC64m operation results in store, dec, load which
982  // can't be handled here so quit
983  if (NewNodes.size() == 3)
984  return nullptr;
985 
986  assert(NewNodes.size() == 2 && "Expected a load folding node!");
987 
988  N = NewNodes[1];
989  SDNode *LoadNode = NewNodes[0];
990  unsigned NumVals = N->getNumValues();
991  unsigned OldNumVals = SU->getNode()->getNumValues();
992 
993  // LoadNode may already exist. This can happen when there is another
994  // load from the same location and producing the same type of value
995  // but it has different alignment or volatileness.
996  bool isNewLoad = true;
997  SUnit *LoadSU;
998  if (LoadNode->getNodeId() != -1) {
999  LoadSU = &SUnits[LoadNode->getNodeId()];
1000  // If LoadSU has already been scheduled, we should clone it but
1001  // this would negate the benefit to unfolding so just return SU.
1002  if (LoadSU->isScheduled)
1003  return SU;
1004  isNewLoad = false;
1005  } else {
1006  LoadSU = CreateNewSUnit(LoadNode);
1007  LoadNode->setNodeId(LoadSU->NodeNum);
1008 
1009  InitNumRegDefsLeft(LoadSU);
1010  computeLatency(LoadSU);
1011  }
1012 
1013  bool isNewN = true;
1014  SUnit *NewSU;
1015  // This can only happen when isNewLoad is false.
1016  if (N->getNodeId() != -1) {
1017  NewSU = &SUnits[N->getNodeId()];
1018  // If NewSU has already been scheduled, we need to clone it, but this
1019  // negates the benefit to unfolding so just return SU.
1020  if (NewSU->isScheduled)
1021  return SU;
1022  isNewN = false;
1023  } else {
1024  NewSU = CreateNewSUnit(N);
1025  N->setNodeId(NewSU->NodeNum);
1026 
1027  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1028  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1029  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1030  NewSU->isTwoAddress = true;
1031  break;
1032  }
1033  }
1034  if (MCID.isCommutable())
1035  NewSU->isCommutable = true;
1036 
1037  InitNumRegDefsLeft(NewSU);
1038  computeLatency(NewSU);
1039  }
1040 
1041  LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1042 
1043  // Now that we are committed to unfolding replace DAG Uses.
1044  for (unsigned i = 0; i != NumVals; ++i)
1045  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1046  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1047  SDValue(LoadNode, 1));
1048 
1049  // Record all the edges to and from the old SU, by category.
1050  SmallVector<SDep, 4> ChainPreds;
1051  SmallVector<SDep, 4> ChainSuccs;
1052  SmallVector<SDep, 4> LoadPreds;
1053  SmallVector<SDep, 4> NodePreds;
1054  SmallVector<SDep, 4> NodeSuccs;
1055  for (SDep &Pred : SU->Preds) {
1056  if (Pred.isCtrl())
1057  ChainPreds.push_back(Pred);
1058  else if (isOperandOf(Pred.getSUnit(), LoadNode))
1059  LoadPreds.push_back(Pred);
1060  else
1061  NodePreds.push_back(Pred);
1062  }
1063  for (SDep &Succ : SU->Succs) {
1064  if (Succ.isCtrl())
1065  ChainSuccs.push_back(Succ);
1066  else
1067  NodeSuccs.push_back(Succ);
1068  }
1069 
1070  // Now assign edges to the newly-created nodes.
1071  for (const SDep &Pred : ChainPreds) {
1072  RemovePred(SU, Pred);
1073  if (isNewLoad)
1074  AddPred(LoadSU, Pred);
1075  }
1076  for (const SDep &Pred : LoadPreds) {
1077  RemovePred(SU, Pred);
1078  if (isNewLoad)
1079  AddPred(LoadSU, Pred);
1080  }
1081  for (const SDep &Pred : NodePreds) {
1082  RemovePred(SU, Pred);
1083  AddPred(NewSU, Pred);
1084  }
1085  for (SDep D : NodeSuccs) {
1086  SUnit *SuccDep = D.getSUnit();
1087  D.setSUnit(SU);
1088  RemovePred(SuccDep, D);
1089  D.setSUnit(NewSU);
1090  AddPred(SuccDep, D);
1091  // Balance register pressure.
1092  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1093  !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1094  --NewSU->NumRegDefsLeft;
1095  }
1096  for (SDep D : ChainSuccs) {
1097  SUnit *SuccDep = D.getSUnit();
1098  D.setSUnit(SU);
1099  RemovePred(SuccDep, D);
1100  if (isNewLoad) {
1101  D.setSUnit(LoadSU);
1102  AddPred(SuccDep, D);
1103  }
1104  }
1105 
1106  // Add a data dependency to reflect that NewSU reads the value defined
1107  // by LoadSU.
1108  SDep D(LoadSU, SDep::Data, 0);
1109  D.setLatency(LoadSU->Latency);
1110  AddPred(NewSU, D);
1111 
1112  if (isNewLoad)
1113  AvailableQueue->addNode(LoadSU);
1114  if (isNewN)
1115  AvailableQueue->addNode(NewSU);
1116 
1117  ++NumUnfolds;
1118 
1119  if (NewSU->NumSuccsLeft == 0)
1120  NewSU->isAvailable = true;
1121 
1122  return NewSU;
1123 }
1124 
1125 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1126 /// successors to the newly created node.
1127 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1128  SDNode *N = SU->getNode();
1129  if (!N)
1130  return nullptr;
1131 
1132  LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1133  LLVM_DEBUG(dumpNode(*SU));
1134 
1135  if (N->getGluedNode() &&
1136  !TII->canCopyGluedNodeDuringSchedule(N)) {
1137  LLVM_DEBUG(
1138  dbgs()
1139  << "Giving up because it has incoming glue and the target does not "
1140  "want to copy it\n");
1141  return nullptr;
1142  }
1143 
1144  SUnit *NewSU;
1145  bool TryUnfold = false;
1146  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1147  MVT VT = N->getSimpleValueType(i);
1148  if (VT == MVT::Glue) {
1149  LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1150  return nullptr;
1151  } else if (VT == MVT::Other)
1152  TryUnfold = true;
1153  }
1154  for (const SDValue &Op : N->op_values()) {
1155  MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1156  if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1157  LLVM_DEBUG(
1158  dbgs() << "Giving up because it one of the operands is glue and "
1159  "the target does not want to copy it\n");
1160  return nullptr;
1161  }
1162  }
1163 
1164  // If possible unfold instruction.
1165  if (TryUnfold) {
1166  SUnit *UnfoldSU = TryUnfoldSU(SU);
1167  if (!UnfoldSU)
1168  return nullptr;
1169  SU = UnfoldSU;
1170  N = SU->getNode();
1171  // If this can be scheduled don't bother duplicating and just return
1172  if (SU->NumSuccsLeft == 0)
1173  return SU;
1174  }
1175 
1176  LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1177  NewSU = CreateClone(SU);
1178 
1179  // New SUnit has the exact same predecessors.
1180  for (SDep &Pred : SU->Preds)
1181  if (!Pred.isArtificial())
1182  AddPred(NewSU, Pred);
1183 
1184  // Only copy scheduled successors. Cut them from old node's successor
1185  // list and move them over.
1187  for (SDep &Succ : SU->Succs) {
1188  if (Succ.isArtificial())
1189  continue;
1190  SUnit *SuccSU = Succ.getSUnit();
1191  if (SuccSU->isScheduled) {
1192  SDep D = Succ;
1193  D.setSUnit(NewSU);
1194  AddPred(SuccSU, D);
1195  D.setSUnit(SU);
1196  DelDeps.push_back(std::make_pair(SuccSU, D));
1197  }
1198  }
1199  for (auto &DelDep : DelDeps)
1200  RemovePred(DelDep.first, DelDep.second);
1201 
1202  AvailableQueue->updateNode(SU);
1203  AvailableQueue->addNode(NewSU);
1204 
1205  ++NumDups;
1206  return NewSU;
1207 }
1208 
1209 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1210 /// scheduled successors of the given SUnit to the last copy.
1211 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1212  const TargetRegisterClass *DestRC,
1213  const TargetRegisterClass *SrcRC,
1215  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1216  CopyFromSU->CopySrcRC = SrcRC;
1217  CopyFromSU->CopyDstRC = DestRC;
1218 
1219  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1220  CopyToSU->CopySrcRC = DestRC;
1221  CopyToSU->CopyDstRC = SrcRC;
1222 
1223  // Only copy scheduled successors. Cut them from old node's successor
1224  // list and move them over.
1226  for (SDep &Succ : SU->Succs) {
1227  if (Succ.isArtificial())
1228  continue;
1229  SUnit *SuccSU = Succ.getSUnit();
1230  if (SuccSU->isScheduled) {
1231  SDep D = Succ;
1232  D.setSUnit(CopyToSU);
1233  AddPred(SuccSU, D);
1234  DelDeps.push_back(std::make_pair(SuccSU, Succ));
1235  }
1236  else {
1237  // Avoid scheduling the def-side copy before other successors. Otherwise
1238  // we could introduce another physreg interference on the copy and
1239  // continue inserting copies indefinitely.
1240  AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1241  }
1242  }
1243  for (auto &DelDep : DelDeps)
1244  RemovePred(DelDep.first, DelDep.second);
1245 
1246  SDep FromDep(SU, SDep::Data, Reg);
1247  FromDep.setLatency(SU->Latency);
1248  AddPred(CopyFromSU, FromDep);
1249  SDep ToDep(CopyFromSU, SDep::Data, 0);
1250  ToDep.setLatency(CopyFromSU->Latency);
1251  AddPred(CopyToSU, ToDep);
1252 
1253  AvailableQueue->updateNode(SU);
1254  AvailableQueue->addNode(CopyFromSU);
1255  AvailableQueue->addNode(CopyToSU);
1256  Copies.push_back(CopyFromSU);
1257  Copies.push_back(CopyToSU);
1258 
1259  ++NumPRCopies;
1260 }
1261 
1262 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1263 /// definition of the specified node.
1264 /// FIXME: Move to SelectionDAG?
1265 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1266  const TargetInstrInfo *TII) {
1267  unsigned NumRes;
1268  if (N->getOpcode() == ISD::CopyFromReg) {
1269  // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1270  NumRes = 1;
1271  } else {
1272  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1273  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1274  NumRes = MCID.getNumDefs();
1275  for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1276  if (Reg == *ImpDef)
1277  break;
1278  ++NumRes;
1279  }
1280  }
1281  return N->getSimpleValueType(NumRes);
1282 }
1283 
1284 /// CheckForLiveRegDef - Return true and update live register vector if the
1285 /// specified register def of the specified SUnit clobbers any "live" registers.
1286 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1287  SUnit **LiveRegDefs,
1288  SmallSet<unsigned, 4> &RegAdded,
1290  const TargetRegisterInfo *TRI) {
1291  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1292 
1293  // Check if Ref is live.
1294  if (!LiveRegDefs[*AliasI]) continue;
1295 
1296  // Allow multiple uses of the same def.
1297  if (LiveRegDefs[*AliasI] == SU) continue;
1298 
1299  // Add Reg to the set of interfering live regs.
1300  if (RegAdded.insert(*AliasI).second) {
1301  LRegs.push_back(*AliasI);
1302  }
1303  }
1304 }
1305 
1306 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1307 /// by RegMask, and add them to LRegs.
1308 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1309  ArrayRef<SUnit*> LiveRegDefs,
1310  SmallSet<unsigned, 4> &RegAdded,
1311  SmallVectorImpl<unsigned> &LRegs) {
1312  // Look at all live registers. Skip Reg0 and the special CallResource.
1313  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1314  if (!LiveRegDefs[i]) continue;
1315  if (LiveRegDefs[i] == SU) continue;
1316  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1317  if (RegAdded.insert(i).second)
1318  LRegs.push_back(i);
1319  }
1320 }
1321 
1322 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1323 static const uint32_t *getNodeRegMask(const SDNode *N) {
1324  for (const SDValue &Op : N->op_values())
1325  if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1326  return RegOp->getRegMask();
1327  return nullptr;
1328 }
1329 
1330 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1331 /// scheduling of the given node to satisfy live physical register dependencies.
1332 /// If the specific node is the last one that's available to schedule, do
1333 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1334 bool ScheduleDAGRRList::
1335 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1336  if (NumLiveRegs == 0)
1337  return false;
1338 
1339  SmallSet<unsigned, 4> RegAdded;
1340  // If this node would clobber any "live" register, then it's not ready.
1341  //
1342  // If SU is the currently live definition of the same register that it uses,
1343  // then we are free to schedule it.
1344  for (SDep &Pred : SU->Preds) {
1345  if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1346  CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1347  RegAdded, LRegs, TRI);
1348  }
1349 
1350  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1351  if (Node->getOpcode() == ISD::INLINEASM) {
1352  // Inline asm can clobber physical defs.
1353  unsigned NumOps = Node->getNumOperands();
1354  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1355  --NumOps; // Ignore the glue operand.
1356 
1357  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1358  unsigned Flags =
1359  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1360  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1361 
1362  ++i; // Skip the ID value.
1363  if (InlineAsm::isRegDefKind(Flags) ||
1365  InlineAsm::isClobberKind(Flags)) {
1366  // Check for def of register or earlyclobber register.
1367  for (; NumVals; --NumVals, ++i) {
1368  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1370  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1371  }
1372  } else
1373  i += NumVals;
1374  }
1375  continue;
1376  }
1377 
1378  if (!Node->isMachineOpcode())
1379  continue;
1380  // If we're in the middle of scheduling a call, don't begin scheduling
1381  // another call. Also, don't allow any physical registers to be live across
1382  // the call.
1383  if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1384  // Check the special calling-sequence resource.
1385  unsigned CallResource = TRI->getNumRegs();
1386  if (LiveRegDefs[CallResource]) {
1387  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1388  while (SDNode *Glued = Gen->getGluedNode())
1389  Gen = Glued;
1390  if (!IsChainDependent(Gen, Node, 0, TII) &&
1391  RegAdded.insert(CallResource).second)
1392  LRegs.push_back(CallResource);
1393  }
1394  }
1395  if (const uint32_t *RegMask = getNodeRegMask(Node))
1396  CheckForLiveRegDefMasked(SU, RegMask,
1397  makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1398  RegAdded, LRegs);
1399 
1400  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1401  if (MCID.hasOptionalDef()) {
1402  // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1403  // This operand can be either a def of CPSR, if the S bit is set; or a use
1404  // of %noreg. When the OptionalDef is set to a valid register, we need to
1405  // handle it in the same way as an ImplicitDef.
1406  for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1407  if (MCID.OpInfo[i].isOptionalDef()) {
1408  const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1409  unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1410  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1411  }
1412  }
1413  if (!MCID.ImplicitDefs)
1414  continue;
1415  for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1416  CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1417  }
1418 
1419  return !LRegs.empty();
1420 }
1421 
1422 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1423  // Add the nodes that aren't ready back onto the available list.
1424  for (unsigned i = Interferences.size(); i > 0; --i) {
1425  SUnit *SU = Interferences[i-1];
1426  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1427  if (Reg) {
1428  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1429  if (!is_contained(LRegs, Reg))
1430  continue;
1431  }
1432  SU->isPending = false;
1433  // The interfering node may no longer be available due to backtracking.
1434  // Furthermore, it may have been made available again, in which case it is
1435  // now already in the AvailableQueue.
1436  if (SU->isAvailable && !SU->NodeQueueId) {
1437  LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1438  AvailableQueue->push(SU);
1439  }
1440  if (i < Interferences.size())
1441  Interferences[i-1] = Interferences.back();
1442  Interferences.pop_back();
1443  LRegsMap.erase(LRegsPos);
1444  }
1445 }
1446 
1447 /// Return a node that can be scheduled in this cycle. Requirements:
1448 /// (1) Ready: latency has been satisfied
1449 /// (2) No Hazards: resources are available
1450 /// (3) No Interferences: may unschedule to break register interferences.
1451 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1452  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1453  auto FindAvailableNode = [&]() {
1454  while (CurSU) {
1456  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1457  break;
1458  LLVM_DEBUG(dbgs() << " Interfering reg ";
1459  if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1460  else dbgs() << printReg(LRegs[0], TRI);
1461  dbgs() << " SU #" << CurSU->NodeNum << '\n');
1462  std::pair<LRegsMapT::iterator, bool> LRegsPair =
1463  LRegsMap.insert(std::make_pair(CurSU, LRegs));
1464  if (LRegsPair.second) {
1465  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1466  Interferences.push_back(CurSU);
1467  }
1468  else {
1469  assert(CurSU->isPending && "Interferences are pending");
1470  // Update the interference with current live regs.
1471  LRegsPair.first->second = LRegs;
1472  }
1473  CurSU = AvailableQueue->pop();
1474  }
1475  };
1476  FindAvailableNode();
1477  if (CurSU)
1478  return CurSU;
1479 
1480  // All candidates are delayed due to live physical reg dependencies.
1481  // Try backtracking, code duplication, or inserting cross class copies
1482  // to resolve it.
1483  for (SUnit *TrySU : Interferences) {
1484  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1485 
1486  // Try unscheduling up to the point where it's safe to schedule
1487  // this node.
1488  SUnit *BtSU = nullptr;
1489  unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1490  for (unsigned Reg : LRegs) {
1491  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1492  BtSU = LiveRegGens[Reg];
1493  LiveCycle = BtSU->getHeight();
1494  }
1495  }
1496  if (!WillCreateCycle(TrySU, BtSU)) {
1497  // BacktrackBottomUp mutates Interferences!
1498  BacktrackBottomUp(TrySU, BtSU);
1499 
1500  // Force the current node to be scheduled before the node that
1501  // requires the physical reg dep.
1502  if (BtSU->isAvailable) {
1503  BtSU->isAvailable = false;
1504  if (!BtSU->isPending)
1505  AvailableQueue->remove(BtSU);
1506  }
1507  LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1508  << ") to SU(" << TrySU->NodeNum << ")\n");
1509  AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1510 
1511  // If one or more successors has been unscheduled, then the current
1512  // node is no longer available.
1513  if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1514  LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1515  CurSU = AvailableQueue->pop();
1516  } else {
1517  LLVM_DEBUG(dbgs() << "TrySU available\n");
1518  // Available and in AvailableQueue
1519  AvailableQueue->remove(TrySU);
1520  CurSU = TrySU;
1521  }
1522  FindAvailableNode();
1523  // Interferences has been mutated. We must break.
1524  break;
1525  }
1526  }
1527 
1528  if (!CurSU) {
1529  // Can't backtrack. If it's too expensive to copy the value, then try
1530  // duplicate the nodes that produces these "too expensive to copy"
1531  // values to break the dependency. In case even that doesn't work,
1532  // insert cross class copies.
1533  // If it's not too expensive, i.e. cost != -1, issue copies.
1534  SUnit *TrySU = Interferences[0];
1535  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1536  assert(LRegs.size() == 1 && "Can't handle this yet!");
1537  unsigned Reg = LRegs[0];
1538  SUnit *LRDef = LiveRegDefs[Reg];
1539  MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1540  const TargetRegisterClass *RC =
1541  TRI->getMinimalPhysRegClass(Reg, VT);
1542  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1543 
1544  // If cross copy register class is the same as RC, then it must be possible
1545  // copy the value directly. Do not try duplicate the def.
1546  // If cross copy register class is not the same as RC, then it's possible to
1547  // copy the value but it require cross register class copies and it is
1548  // expensive.
1549  // If cross copy register class is null, then it's not possible to copy
1550  // the value at all.
1551  SUnit *NewDef = nullptr;
1552  if (DestRC != RC) {
1553  NewDef = CopyAndMoveSuccessors(LRDef);
1554  if (!DestRC && !NewDef)
1555  report_fatal_error("Can't handle live physical register dependency!");
1556  }
1557  if (!NewDef) {
1558  // Issue copies, these can be expensive cross register class copies.
1560  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1561  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1562  << " to SU #" << Copies.front()->NodeNum << "\n");
1563  AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1564  NewDef = Copies.back();
1565  }
1566 
1567  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1568  << " to SU #" << TrySU->NodeNum << "\n");
1569  LiveRegDefs[Reg] = NewDef;
1570  AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1571  TrySU->isAvailable = false;
1572  CurSU = NewDef;
1573  }
1574  assert(CurSU && "Unable to resolve live physical register dependencies!");
1575  return CurSU;
1576 }
1577 
1578 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1579 /// schedulers.
1580 void ScheduleDAGRRList::ListScheduleBottomUp() {
1581  // Release any predecessors of the special Exit node.
1582  ReleasePredecessors(&ExitSU);
1583 
1584  // Add root to Available queue.
1585  if (!SUnits.empty()) {
1586  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1587  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1588  RootSU->isAvailable = true;
1589  AvailableQueue->push(RootSU);
1590  }
1591 
1592  // While Available queue is not empty, grab the node with the highest
1593  // priority. If it is not ready put it back. Schedule the node.
1594  Sequence.reserve(SUnits.size());
1595  while (!AvailableQueue->empty() || !Interferences.empty()) {
1596  LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1597  AvailableQueue->dump(this));
1598 
1599  // Pick the best node to schedule taking all constraints into
1600  // consideration.
1601  SUnit *SU = PickNodeToScheduleBottomUp();
1602 
1603  AdvancePastStalls(SU);
1604 
1605  ScheduleNodeBottomUp(SU);
1606 
1607  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1608  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1609  assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1610  "MinAvailableCycle uninitialized");
1611  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1612  }
1613  }
1614 
1615  // Reverse the order if it is bottom up.
1616  std::reverse(Sequence.begin(), Sequence.end());
1617 
1618 #ifndef NDEBUG
1619  VerifyScheduledSequence(/*isBottomUp=*/true);
1620 #endif
1621 }
1622 
1623 namespace {
1624 
1625 class RegReductionPQBase;
1626 
1627 struct queue_sort {
1628  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1629 };
1630 
1631 #ifndef NDEBUG
1632 template<class SF>
1633 struct reverse_sort : public queue_sort {
1634  SF &SortFunc;
1635 
1636  reverse_sort(SF &sf) : SortFunc(sf) {}
1637 
1638  bool operator()(SUnit* left, SUnit* right) const {
1639  // reverse left/right rather than simply !SortFunc(left, right)
1640  // to expose different paths in the comparison logic.
1641  return SortFunc(right, left);
1642  }
1643 };
1644 #endif // NDEBUG
1645 
1646 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1647 // reduction scheduler.
1648 struct bu_ls_rr_sort : public queue_sort {
1649  enum {
1650  IsBottomUp = true,
1651  HasReadyFilter = false
1652  };
1653 
1654  RegReductionPQBase *SPQ;
1655 
1656  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1657 
1658  bool operator()(SUnit* left, SUnit* right) const;
1659 };
1660 
1661 // src_ls_rr_sort - Priority function for source order scheduler.
1662 struct src_ls_rr_sort : public queue_sort {
1663  enum {
1664  IsBottomUp = true,
1665  HasReadyFilter = false
1666  };
1667 
1668  RegReductionPQBase *SPQ;
1669 
1670  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1671 
1672  bool operator()(SUnit* left, SUnit* right) const;
1673 };
1674 
1675 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1676 struct hybrid_ls_rr_sort : public queue_sort {
1677  enum {
1678  IsBottomUp = true,
1679  HasReadyFilter = false
1680  };
1681 
1682  RegReductionPQBase *SPQ;
1683 
1684  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1685 
1686  bool isReady(SUnit *SU, unsigned CurCycle) const;
1687 
1688  bool operator()(SUnit* left, SUnit* right) const;
1689 };
1690 
1691 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1692 // scheduler.
1693 struct ilp_ls_rr_sort : public queue_sort {
1694  enum {
1695  IsBottomUp = true,
1696  HasReadyFilter = false
1697  };
1698 
1699  RegReductionPQBase *SPQ;
1700 
1701  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1702 
1703  bool isReady(SUnit *SU, unsigned CurCycle) const;
1704 
1705  bool operator()(SUnit* left, SUnit* right) const;
1706 };
1707 
1708 class RegReductionPQBase : public SchedulingPriorityQueue {
1709 protected:
1710  std::vector<SUnit *> Queue;
1711  unsigned CurQueueId = 0;
1712  bool TracksRegPressure;
1713  bool SrcOrder;
1714 
1715  // SUnits - The SUnits for the current graph.
1716  std::vector<SUnit> *SUnits;
1717 
1718  MachineFunction &MF;
1719  const TargetInstrInfo *TII;
1720  const TargetRegisterInfo *TRI;
1721  const TargetLowering *TLI;
1722  ScheduleDAGRRList *scheduleDAG = nullptr;
1723 
1724  // SethiUllmanNumbers - The SethiUllman number for each node.
1725  std::vector<unsigned> SethiUllmanNumbers;
1726 
1727  /// RegPressure - Tracking current reg pressure per register class.
1728  std::vector<unsigned> RegPressure;
1729 
1730  /// RegLimit - Tracking the number of allocatable registers per register
1731  /// class.
1732  std::vector<unsigned> RegLimit;
1733 
1734 public:
1735  RegReductionPQBase(MachineFunction &mf,
1736  bool hasReadyFilter,
1737  bool tracksrp,
1738  bool srcorder,
1739  const TargetInstrInfo *tii,
1740  const TargetRegisterInfo *tri,
1741  const TargetLowering *tli)
1742  : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1743  SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1744  if (TracksRegPressure) {
1745  unsigned NumRC = TRI->getNumRegClasses();
1746  RegLimit.resize(NumRC);
1747  RegPressure.resize(NumRC);
1748  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1749  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1750  for (const TargetRegisterClass *RC : TRI->regclasses())
1751  RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1752  }
1753  }
1754 
1755  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1756  scheduleDAG = scheduleDag;
1757  }
1758 
1759  ScheduleHazardRecognizer* getHazardRec() {
1760  return scheduleDAG->getHazardRec();
1761  }
1762 
1763  void initNodes(std::vector<SUnit> &sunits) override;
1764 
1765  void addNode(const SUnit *SU) override;
1766 
1767  void updateNode(const SUnit *SU) override;
1768 
1769  void releaseState() override {
1770  SUnits = nullptr;
1771  SethiUllmanNumbers.clear();
1772  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1773  }
1774 
1775  unsigned getNodePriority(const SUnit *SU) const;
1776 
1777  unsigned getNodeOrdering(const SUnit *SU) const {
1778  if (!SU->getNode()) return 0;
1779 
1780  return SU->getNode()->getIROrder();
1781  }
1782 
1783  bool empty() const override { return Queue.empty(); }
1784 
1785  void push(SUnit *U) override {
1786  assert(!U->NodeQueueId && "Node in the queue already");
1787  U->NodeQueueId = ++CurQueueId;
1788  Queue.push_back(U);
1789  }
1790 
1791  void remove(SUnit *SU) override {
1792  assert(!Queue.empty() && "Queue is empty!");
1793  assert(SU->NodeQueueId != 0 && "Not in queue!");
1794  std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1795  if (I != std::prev(Queue.end()))
1796  std::swap(*I, Queue.back());
1797  Queue.pop_back();
1798  SU->NodeQueueId = 0;
1799  }
1800 
1801  bool tracksRegPressure() const override { return TracksRegPressure; }
1802 
1803  void dumpRegPressure() const;
1804 
1805  bool HighRegPressure(const SUnit *SU) const;
1806 
1807  bool MayReduceRegPressure(SUnit *SU) const;
1808 
1809  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1810 
1811  void scheduledNode(SUnit *SU) override;
1812 
1813  void unscheduledNode(SUnit *SU) override;
1814 
1815 protected:
1816  bool canClobber(const SUnit *SU, const SUnit *Op);
1817  void AddPseudoTwoAddrDeps();
1818  void PrescheduleNodesWithMultipleUses();
1819  void CalculateSethiUllmanNumbers();
1820 };
1821 
1822 template<class SF>
1823 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1824  std::vector<SUnit *>::iterator Best = Q.begin();
1825  for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
1826  if (Picker(*Best, *I))
1827  Best = I;
1828  SUnit *V = *Best;
1829  if (Best != std::prev(Q.end()))
1830  std::swap(*Best, Q.back());
1831  Q.pop_back();
1832  return V;
1833 }
1834 
1835 template<class SF>
1836 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1837 #ifndef NDEBUG
1838  if (DAG->StressSched) {
1839  reverse_sort<SF> RPicker(Picker);
1840  return popFromQueueImpl(Q, RPicker);
1841  }
1842 #endif
1843  (void)DAG;
1844  return popFromQueueImpl(Q, Picker);
1845 }
1846 
1847 //===----------------------------------------------------------------------===//
1848 // RegReductionPriorityQueue Definition
1849 //===----------------------------------------------------------------------===//
1850 //
1851 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1852 // to reduce register pressure.
1853 //
1854 template<class SF>
1855 class RegReductionPriorityQueue : public RegReductionPQBase {
1856  SF Picker;
1857 
1858 public:
1859  RegReductionPriorityQueue(MachineFunction &mf,
1860  bool tracksrp,
1861  bool srcorder,
1862  const TargetInstrInfo *tii,
1863  const TargetRegisterInfo *tri,
1864  const TargetLowering *tli)
1865  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1866  tii, tri, tli),
1867  Picker(this) {}
1868 
1869  bool isBottomUp() const override { return SF::IsBottomUp; }
1870 
1871  bool isReady(SUnit *U) const override {
1872  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1873  }
1874 
1875  SUnit *pop() override {
1876  if (Queue.empty()) return nullptr;
1877 
1878  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1879  V->NodeQueueId = 0;
1880  return V;
1881  }
1882 
1883 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1884  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1885  // Emulate pop() without clobbering NodeQueueIds.
1886  std::vector<SUnit *> DumpQueue = Queue;
1887  SF DumpPicker = Picker;
1888  while (!DumpQueue.empty()) {
1889  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1890  dbgs() << "Height " << SU->getHeight() << ": ";
1891  DAG->dumpNode(*SU);
1892  }
1893  }
1894 #endif
1895 };
1896 
1897 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1898 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1899 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1900 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1901 
1902 } // end anonymous namespace
1903 
1904 //===----------------------------------------------------------------------===//
1905 // Static Node Priority for Register Pressure Reduction
1906 //===----------------------------------------------------------------------===//
1907 
1908 // Check for special nodes that bypass scheduling heuristics.
1909 // Currently this pushes TokenFactor nodes down, but may be used for other
1910 // pseudo-ops as well.
1911 //
1912 // Return -1 to schedule right above left, 1 for left above right.
1913 // Return 0 if no bias exists.
1914 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1915  bool LSchedLow = left->isScheduleLow;
1916  bool RSchedLow = right->isScheduleLow;
1917  if (LSchedLow != RSchedLow)
1918  return LSchedLow < RSchedLow ? 1 : -1;
1919  return 0;
1920 }
1921 
1922 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1923 /// Smaller number is the higher priority.
1924 static unsigned
1925 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1926  if (SUNumbers[SU->NodeNum] != 0)
1927  return SUNumbers[SU->NodeNum];
1928 
1929  // Use WorkList to avoid stack overflow on excessively large IRs.
1930  struct WorkState {
1931  WorkState(const SUnit *SU) : SU(SU) {}
1932  const SUnit *SU;
1933  unsigned PredsProcessed = 0;
1934  };
1935 
1936  SmallVector<WorkState, 16> WorkList;
1937  WorkList.push_back(SU);
1938  while (!WorkList.empty()) {
1939  auto &Temp = WorkList.back();
1940  auto *TempSU = Temp.SU;
1941  bool AllPredsKnown = true;
1942  // Try to find a non-evaluated pred and push it into the processing stack.
1943  for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1944  auto &Pred = TempSU->Preds[P];
1945  if (Pred.isCtrl()) continue; // ignore chain preds
1946  SUnit *PredSU = Pred.getSUnit();
1947  if (SUNumbers[PredSU->NodeNum] == 0) {
1948 #ifndef NDEBUG
1949  // In debug mode, check that we don't have such element in the stack.
1950  for (auto It : WorkList)
1951  assert(It.SU != PredSU && "Trying to push an element twice?");
1952 #endif
1953  // Next time start processing this one starting from the next pred.
1954  Temp.PredsProcessed = P + 1;
1955  WorkList.push_back(PredSU);
1956  AllPredsKnown = false;
1957  break;
1958  }
1959  }
1960 
1961  if (!AllPredsKnown)
1962  continue;
1963 
1964  // Once all preds are known, we can calculate the answer for this one.
1965  unsigned SethiUllmanNumber = 0;
1966  unsigned Extra = 0;
1967  for (const SDep &Pred : TempSU->Preds) {
1968  if (Pred.isCtrl()) continue; // ignore chain preds
1969  SUnit *PredSU = Pred.getSUnit();
1970  unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1971  assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1972  if (PredSethiUllman > SethiUllmanNumber) {
1973  SethiUllmanNumber = PredSethiUllman;
1974  Extra = 0;
1975  } else if (PredSethiUllman == SethiUllmanNumber)
1976  ++Extra;
1977  }
1978 
1979  SethiUllmanNumber += Extra;
1980  if (SethiUllmanNumber == 0)
1981  SethiUllmanNumber = 1;
1982  SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
1983  WorkList.pop_back();
1984  }
1985 
1986  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
1987  return SUNumbers[SU->NodeNum];
1988 }
1989 
1990 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1991 /// scheduling units.
1992 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1993  SethiUllmanNumbers.assign(SUnits->size(), 0);
1994 
1995  for (const SUnit &SU : *SUnits)
1996  CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
1997 }
1998 
1999 void RegReductionPQBase::addNode(const SUnit *SU) {
2000  unsigned SUSize = SethiUllmanNumbers.size();
2001  if (SUnits->size() > SUSize)
2002  SethiUllmanNumbers.resize(SUSize*2, 0);
2003  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2004 }
2005 
2006 void RegReductionPQBase::updateNode(const SUnit *SU) {
2007  SethiUllmanNumbers[SU->NodeNum] = 0;
2008  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2009 }
2010 
2011 // Lower priority means schedule further down. For bottom-up scheduling, lower
2012 // priority SUs are scheduled before higher priority SUs.
2013 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2014  assert(SU->NodeNum < SethiUllmanNumbers.size());
2015  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2016  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2017  // CopyToReg should be close to its uses to facilitate coalescing and
2018  // avoid spilling.
2019  return 0;
2020  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2021  Opc == TargetOpcode::SUBREG_TO_REG ||
2022  Opc == TargetOpcode::INSERT_SUBREG)
2023  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2024  // close to their uses to facilitate coalescing.
2025  return 0;
2026  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2027  // If SU does not have a register use, i.e. it doesn't produce a value
2028  // that would be consumed (e.g. store), then it terminates a chain of
2029  // computation. Give it a large SethiUllman number so it will be
2030  // scheduled right before its predecessors that it doesn't lengthen
2031  // their live ranges.
2032  return 0xffff;
2033  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2034  // If SU does not have a register def, schedule it close to its uses
2035  // because it does not lengthen any live ranges.
2036  return 0;
2037 #if 1
2038  return SethiUllmanNumbers[SU->NodeNum];
2039 #else
2040  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2041  if (SU->isCallOp) {
2042  // FIXME: This assumes all of the defs are used as call operands.
2043  int NP = (int)Priority - SU->getNode()->getNumValues();
2044  return (NP > 0) ? NP : 0;
2045  }
2046  return Priority;
2047 #endif
2048 }
2049 
2050 //===----------------------------------------------------------------------===//
2051 // Register Pressure Tracking
2052 //===----------------------------------------------------------------------===//
2053 
2054 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2055 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2056  for (const TargetRegisterClass *RC : TRI->regclasses()) {
2057  unsigned Id = RC->getID();
2058  unsigned RP = RegPressure[Id];
2059  if (!RP) continue;
2060  LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2061  << RegLimit[Id] << '\n');
2062  }
2063 }
2064 #endif
2065 
2066 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2067  if (!TLI)
2068  return false;
2069 
2070  for (const SDep &Pred : SU->Preds) {
2071  if (Pred.isCtrl())
2072  continue;
2073  SUnit *PredSU = Pred.getSUnit();
2074  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2075  // to cover the number of registers defined (they are all live).
2076  if (PredSU->NumRegDefsLeft == 0) {
2077  continue;
2078  }
2079  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2080  RegDefPos.IsValid(); RegDefPos.Advance()) {
2081  unsigned RCId, Cost;
2082  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2083 
2084  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2085  return true;
2086  }
2087  }
2088  return false;
2089 }
2090 
2091 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2092  const SDNode *N = SU->getNode();
2093 
2094  if (!N->isMachineOpcode() || !SU->NumSuccs)
2095  return false;
2096 
2097  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2098  for (unsigned i = 0; i != NumDefs; ++i) {
2099  MVT VT = N->getSimpleValueType(i);
2100  if (!N->hasAnyUseOfValue(i))
2101  continue;
2102  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2103  if (RegPressure[RCId] >= RegLimit[RCId])
2104  return true;
2105  }
2106  return false;
2107 }
2108 
2109 // Compute the register pressure contribution by this instruction by count up
2110 // for uses that are not live and down for defs. Only count register classes
2111 // that are already under high pressure. As a side effect, compute the number of
2112 // uses of registers that are already live.
2113 //
2114 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2115 // so could probably be factored.
2116 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2117  LiveUses = 0;
2118  int PDiff = 0;
2119  for (const SDep &Pred : SU->Preds) {
2120  if (Pred.isCtrl())
2121  continue;
2122  SUnit *PredSU = Pred.getSUnit();
2123  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2124  // to cover the number of registers defined (they are all live).
2125  if (PredSU->NumRegDefsLeft == 0) {
2126  if (PredSU->getNode()->isMachineOpcode())
2127  ++LiveUses;
2128  continue;
2129  }
2130  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2131  RegDefPos.IsValid(); RegDefPos.Advance()) {
2132  MVT VT = RegDefPos.GetValue();
2133  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2134  if (RegPressure[RCId] >= RegLimit[RCId])
2135  ++PDiff;
2136  }
2137  }
2138  const SDNode *N = SU->getNode();
2139 
2140  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2141  return PDiff;
2142 
2143  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2144  for (unsigned i = 0; i != NumDefs; ++i) {
2145  MVT VT = N->getSimpleValueType(i);
2146  if (!N->hasAnyUseOfValue(i))
2147  continue;
2148  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2149  if (RegPressure[RCId] >= RegLimit[RCId])
2150  --PDiff;
2151  }
2152  return PDiff;
2153 }
2154 
2155 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2156  if (!TracksRegPressure)
2157  return;
2158 
2159  if (!SU->getNode())
2160  return;
2161 
2162  for (const SDep &Pred : SU->Preds) {
2163  if (Pred.isCtrl())
2164  continue;
2165  SUnit *PredSU = Pred.getSUnit();
2166  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2167  // to cover the number of registers defined (they are all live).
2168  if (PredSU->NumRegDefsLeft == 0) {
2169  continue;
2170  }
2171  // FIXME: The ScheduleDAG currently loses information about which of a
2172  // node's values is consumed by each dependence. Consequently, if the node
2173  // defines multiple register classes, we don't know which to pressurize
2174  // here. Instead the following loop consumes the register defs in an
2175  // arbitrary order. At least it handles the common case of clustered loads
2176  // to the same class. For precise liveness, each SDep needs to indicate the
2177  // result number. But that tightly couples the ScheduleDAG with the
2178  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2179  // value type or register class to SDep.
2180  //
2181  // The most important aspect of register tracking is balancing the increase
2182  // here with the reduction further below. Note that this SU may use multiple
2183  // defs in PredSU. The can't be determined here, but we've already
2184  // compensated by reducing NumRegDefsLeft in PredSU during
2185  // ScheduleDAGSDNodes::AddSchedEdges.
2186  --PredSU->NumRegDefsLeft;
2187  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2188  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2189  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2190  if (SkipRegDefs)
2191  continue;
2192 
2193  unsigned RCId, Cost;
2194  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2195  RegPressure[RCId] += Cost;
2196  break;
2197  }
2198  }
2199 
2200  // We should have this assert, but there may be dead SDNodes that never
2201  // materialize as SUnits, so they don't appear to generate liveness.
2202  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2203  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2204  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2205  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2206  if (SkipRegDefs > 0)
2207  continue;
2208  unsigned RCId, Cost;
2209  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2210  if (RegPressure[RCId] < Cost) {
2211  // Register pressure tracking is imprecise. This can happen. But we try
2212  // hard not to let it happen because it likely results in poor scheduling.
2213  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2214  << ") has too many regdefs\n");
2215  RegPressure[RCId] = 0;
2216  }
2217  else {
2218  RegPressure[RCId] -= Cost;
2219  }
2220  }
2221  LLVM_DEBUG(dumpRegPressure());
2222 }
2223 
2224 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2225  if (!TracksRegPressure)
2226  return;
2227 
2228  const SDNode *N = SU->getNode();
2229  if (!N) return;
2230 
2231  if (!N->isMachineOpcode()) {
2232  if (N->getOpcode() != ISD::CopyToReg)
2233  return;
2234  } else {
2235  unsigned Opc = N->getMachineOpcode();
2236  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2237  Opc == TargetOpcode::INSERT_SUBREG ||
2238  Opc == TargetOpcode::SUBREG_TO_REG ||
2239  Opc == TargetOpcode::REG_SEQUENCE ||
2240  Opc == TargetOpcode::IMPLICIT_DEF)
2241  return;
2242  }
2243 
2244  for (const SDep &Pred : SU->Preds) {
2245  if (Pred.isCtrl())
2246  continue;
2247  SUnit *PredSU = Pred.getSUnit();
2248  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2249  // counts data deps.
2250  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2251  continue;
2252  const SDNode *PN = PredSU->getNode();
2253  if (!PN->isMachineOpcode()) {
2254  if (PN->getOpcode() == ISD::CopyFromReg) {
2255  MVT VT = PN->getSimpleValueType(0);
2256  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2257  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2258  }
2259  continue;
2260  }
2261  unsigned POpc = PN->getMachineOpcode();
2262  if (POpc == TargetOpcode::IMPLICIT_DEF)
2263  continue;
2264  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2265  POpc == TargetOpcode::INSERT_SUBREG ||
2266  POpc == TargetOpcode::SUBREG_TO_REG) {
2267  MVT VT = PN->getSimpleValueType(0);
2268  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2269  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2270  continue;
2271  }
2272  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2273  for (unsigned i = 0; i != NumDefs; ++i) {
2274  MVT VT = PN->getSimpleValueType(i);
2275  if (!PN->hasAnyUseOfValue(i))
2276  continue;
2277  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2278  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2279  // Register pressure tracking is imprecise. This can happen.
2280  RegPressure[RCId] = 0;
2281  else
2282  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2283  }
2284  }
2285 
2286  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2287  // may transfer data dependencies to CopyToReg.
2288  if (SU->NumSuccs && N->isMachineOpcode()) {
2289  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2290  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2291  MVT VT = N->getSimpleValueType(i);
2292  if (VT == MVT::Glue || VT == MVT::Other)
2293  continue;
2294  if (!N->hasAnyUseOfValue(i))
2295  continue;
2296  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2297  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2298  }
2299  }
2300 
2301  LLVM_DEBUG(dumpRegPressure());
2302 }
2303 
2304 //===----------------------------------------------------------------------===//
2305 // Dynamic Node Priority for Register Pressure Reduction
2306 //===----------------------------------------------------------------------===//
2307 
2308 /// closestSucc - Returns the scheduled cycle of the successor which is
2309 /// closest to the current cycle.
2310 static unsigned closestSucc(const SUnit *SU) {
2311  unsigned MaxHeight = 0;
2312  for (const SDep &Succ : SU->Succs) {
2313  if (Succ.isCtrl()) continue; // ignore chain succs
2314  unsigned Height = Succ.getSUnit()->getHeight();
2315  // If there are bunch of CopyToRegs stacked up, they should be considered
2316  // to be at the same position.
2317  if (Succ.getSUnit()->getNode() &&
2318  Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2319  Height = closestSucc(Succ.getSUnit())+1;
2320  if (Height > MaxHeight)
2321  MaxHeight = Height;
2322  }
2323  return MaxHeight;
2324 }
2325 
2326 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2327 /// for scratch registers, i.e. number of data dependencies.
2328 static unsigned calcMaxScratches(const SUnit *SU) {
2329  unsigned Scratches = 0;
2330  for (const SDep &Pred : SU->Preds) {
2331  if (Pred.isCtrl()) continue; // ignore chain preds
2332  Scratches++;
2333  }
2334  return Scratches;
2335 }
2336 
2337 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2338 /// CopyFromReg from a virtual register.
2339 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2340  bool RetVal = false;
2341  for (const SDep &Pred : SU->Preds) {
2342  if (Pred.isCtrl()) continue;
2343  const SUnit *PredSU = Pred.getSUnit();
2344  if (PredSU->getNode() &&
2345  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2346  unsigned Reg =
2347  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2349  RetVal = true;
2350  continue;
2351  }
2352  }
2353  return false;
2354  }
2355  return RetVal;
2356 }
2357 
2358 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2359 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2360 /// it has no other use. It should be scheduled closer to the terminator.
2361 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2362  bool RetVal = false;
2363  for (const SDep &Succ : SU->Succs) {
2364  if (Succ.isCtrl()) continue;
2365  const SUnit *SuccSU = Succ.getSUnit();
2366  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2367  unsigned Reg =
2368  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2370  RetVal = true;
2371  continue;
2372  }
2373  }
2374  return false;
2375  }
2376  return RetVal;
2377 }
2378 
2379 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2380 // set isVRegCycle for its CopyFromReg operands.
2381 //
2382 // This is only relevant for single-block loops, in which case the VRegCycle
2383 // node is likely an induction variable in which the operand and target virtual
2384 // registers should be coalesced (e.g. pre/post increment values). Setting the
2385 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2386 // CopyFromReg so that this node becomes the virtual register "kill". This
2387 // avoids interference between the values live in and out of the block and
2388 // eliminates a copy inside the loop.
2389 static void initVRegCycle(SUnit *SU) {
2391  return;
2392 
2393  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2394  return;
2395 
2396  LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2397 
2398  SU->isVRegCycle = true;
2399 
2400  for (const SDep &Pred : SU->Preds) {
2401  if (Pred.isCtrl()) continue;
2402  Pred.getSUnit()->isVRegCycle = true;
2403  }
2404 }
2405 
2406 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2407 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2408 static void resetVRegCycle(SUnit *SU) {
2409  if (!SU->isVRegCycle)
2410  return;
2411 
2412  for (const SDep &Pred : SU->Preds) {
2413  if (Pred.isCtrl()) continue; // ignore chain preds
2414  SUnit *PredSU = Pred.getSUnit();
2415  if (PredSU->isVRegCycle) {
2416  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2417  "VRegCycle def must be CopyFromReg");
2418  Pred.getSUnit()->isVRegCycle = false;
2419  }
2420  }
2421 }
2422 
2423 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2424 // means a node that defines the VRegCycle has not been scheduled yet.
2425 static bool hasVRegCycleUse(const SUnit *SU) {
2426  // If this SU also defines the VReg, don't hoist it as a "use".
2427  if (SU->isVRegCycle)
2428  return false;
2429 
2430  for (const SDep &Pred : SU->Preds) {
2431  if (Pred.isCtrl()) continue; // ignore chain preds
2432  if (Pred.getSUnit()->isVRegCycle &&
2433  Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2434  LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2435  return true;
2436  }
2437  }
2438  return false;
2439 }
2440 
2441 // Check for either a dependence (latency) or resource (hazard) stall.
2442 //
2443 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2444 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2445  if ((int)SPQ->getCurCycle() < Height) return true;
2446  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2448  return true;
2449  return false;
2450 }
2451 
2452 // Return -1 if left has higher priority, 1 if right has higher priority.
2453 // Return 0 if latency-based priority is equivalent.
2454 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2455  RegReductionPQBase *SPQ) {
2456  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2457  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2458  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2459  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2460  int LHeight = (int)left->getHeight() + LPenalty;
2461  int RHeight = (int)right->getHeight() + RPenalty;
2462 
2463  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2464  BUHasStall(left, LHeight, SPQ);
2465  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2466  BUHasStall(right, RHeight, SPQ);
2467 
2468  // If scheduling one of the node will cause a pipeline stall, delay it.
2469  // If scheduling either one of the node will cause a pipeline stall, sort
2470  // them according to their height.
2471  if (LStall) {
2472  if (!RStall)
2473  return 1;
2474  if (LHeight != RHeight)
2475  return LHeight > RHeight ? 1 : -1;
2476  } else if (RStall)
2477  return -1;
2478 
2479  // If either node is scheduling for latency, sort them by height/depth
2480  // and latency.
2481  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2482  right->SchedulingPref == Sched::ILP)) {
2483  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2484  // is enabled, grouping instructions by cycle, then its height is already
2485  // covered so only its depth matters. We also reach this point if both stall
2486  // but have the same height.
2487  if (!SPQ->getHazardRec()->isEnabled()) {
2488  if (LHeight != RHeight)
2489  return LHeight > RHeight ? 1 : -1;
2490  }
2491  int LDepth = left->getDepth() - LPenalty;
2492  int RDepth = right->getDepth() - RPenalty;
2493  if (LDepth != RDepth) {
2494  LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2495  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2496  << ") depth " << RDepth << "\n");
2497  return LDepth < RDepth ? 1 : -1;
2498  }
2499  if (left->Latency != right->Latency)
2500  return left->Latency > right->Latency ? 1 : -1;
2501  }
2502  return 0;
2503 }
2504 
2505 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2506  // Schedule physical register definitions close to their use. This is
2507  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2508  // long as shortening physreg live ranges is generally good, we can defer
2509  // creating a subtarget hook.
2510  if (!DisableSchedPhysRegJoin) {
2511  bool LHasPhysReg = left->hasPhysRegDefs;
2512  bool RHasPhysReg = right->hasPhysRegDefs;
2513  if (LHasPhysReg != RHasPhysReg) {
2514  #ifndef NDEBUG
2515  static const char *const PhysRegMsg[] = { " has no physreg",
2516  " defines a physreg" };
2517  #endif
2518  LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2519  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2520  << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2521  return LHasPhysReg < RHasPhysReg;
2522  }
2523  }
2524 
2525  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2526  unsigned LPriority = SPQ->getNodePriority(left);
2527  unsigned RPriority = SPQ->getNodePriority(right);
2528 
2529  // Be really careful about hoisting call operands above previous calls.
2530  // Only allows it if it would reduce register pressure.
2531  if (left->isCall && right->isCallOp) {
2532  unsigned RNumVals = right->getNode()->getNumValues();
2533  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2534  }
2535  if (right->isCall && left->isCallOp) {
2536  unsigned LNumVals = left->getNode()->getNumValues();
2537  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2538  }
2539 
2540  if (LPriority != RPriority)
2541  return LPriority > RPriority;
2542 
2543  // One or both of the nodes are calls and their sethi-ullman numbers are the
2544  // same, then keep source order.
2545  if (left->isCall || right->isCall) {
2546  unsigned LOrder = SPQ->getNodeOrdering(left);
2547  unsigned ROrder = SPQ->getNodeOrdering(right);
2548 
2549  // Prefer an ordering where the lower the non-zero order number, the higher
2550  // the preference.
2551  if ((LOrder || ROrder) && LOrder != ROrder)
2552  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2553  }
2554 
2555  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2556  // e.g.
2557  // t1 = op t2, c1
2558  // t3 = op t4, c2
2559  //
2560  // and the following instructions are both ready.
2561  // t2 = op c3
2562  // t4 = op c4
2563  //
2564  // Then schedule t2 = op first.
2565  // i.e.
2566  // t4 = op c4
2567  // t2 = op c3
2568  // t1 = op t2, c1
2569  // t3 = op t4, c2
2570  //
2571  // This creates more short live intervals.
2572  unsigned LDist = closestSucc(left);
2573  unsigned RDist = closestSucc(right);
2574  if (LDist != RDist)
2575  return LDist < RDist;
2576 
2577  // How many registers becomes live when the node is scheduled.
2578  unsigned LScratch = calcMaxScratches(left);
2579  unsigned RScratch = calcMaxScratches(right);
2580  if (LScratch != RScratch)
2581  return LScratch > RScratch;
2582 
2583  // Comparing latency against a call makes little sense unless the node
2584  // is register pressure-neutral.
2585  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2586  return (left->NodeQueueId > right->NodeQueueId);
2587 
2588  // Do not compare latencies when one or both of the nodes are calls.
2589  if (!DisableSchedCycles &&
2590  !(left->isCall || right->isCall)) {
2591  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2592  if (result != 0)
2593  return result > 0;
2594  }
2595  else {
2596  if (left->getHeight() != right->getHeight())
2597  return left->getHeight() > right->getHeight();
2598 
2599  if (left->getDepth() != right->getDepth())
2600  return left->getDepth() < right->getDepth();
2601  }
2602 
2603  assert(left->NodeQueueId && right->NodeQueueId &&
2604  "NodeQueueId cannot be zero");
2605  return (left->NodeQueueId > right->NodeQueueId);
2606 }
2607 
2608 // Bottom up
2609 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2610  if (int res = checkSpecialNodes(left, right))
2611  return res > 0;
2612 
2613  return BURRSort(left, right, SPQ);
2614 }
2615 
2616 // Source order, otherwise bottom up.
2617 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2618  if (int res = checkSpecialNodes(left, right))
2619  return res > 0;
2620 
2621  unsigned LOrder = SPQ->getNodeOrdering(left);
2622  unsigned ROrder = SPQ->getNodeOrdering(right);
2623 
2624  // Prefer an ordering where the lower the non-zero order number, the higher
2625  // the preference.
2626  if ((LOrder || ROrder) && LOrder != ROrder)
2627  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2628 
2629  return BURRSort(left, right, SPQ);
2630 }
2631 
2632 // If the time between now and when the instruction will be ready can cover
2633 // the spill code, then avoid adding it to the ready queue. This gives long
2634 // stalls highest priority and allows hoisting across calls. It should also
2635 // speed up processing the available queue.
2636 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2637  static const unsigned ReadyDelay = 3;
2638 
2639  if (SPQ->MayReduceRegPressure(SU)) return true;
2640 
2641  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2642 
2643  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2645  return false;
2646 
2647  return true;
2648 }
2649 
2650 // Return true if right should be scheduled with higher priority than left.
2651 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2652  if (int res = checkSpecialNodes(left, right))
2653  return res > 0;
2654 
2655  if (left->isCall || right->isCall)
2656  // No way to compute latency of calls.
2657  return BURRSort(left, right, SPQ);
2658 
2659  bool LHigh = SPQ->HighRegPressure(left);
2660  bool RHigh = SPQ->HighRegPressure(right);
2661  // Avoid causing spills. If register pressure is high, schedule for
2662  // register pressure reduction.
2663  if (LHigh && !RHigh) {
2664  LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2665  << right->NodeNum << ")\n");
2666  return true;
2667  }
2668  else if (!LHigh && RHigh) {
2669  LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2670  << left->NodeNum << ")\n");
2671  return false;
2672  }
2673  if (!LHigh && !RHigh) {
2674  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2675  if (result != 0)
2676  return result > 0;
2677  }
2678  return BURRSort(left, right, SPQ);
2679 }
2680 
2681 // Schedule as many instructions in each cycle as possible. So don't make an
2682 // instruction available unless it is ready in the current cycle.
2683 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2684  if (SU->getHeight() > CurCycle) return false;
2685 
2686  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2688  return false;
2689 
2690  return true;
2691 }
2692 
2693 static bool canEnableCoalescing(SUnit *SU) {
2694  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2695  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2696  // CopyToReg should be close to its uses to facilitate coalescing and
2697  // avoid spilling.
2698  return true;
2699 
2700  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2701  Opc == TargetOpcode::SUBREG_TO_REG ||
2702  Opc == TargetOpcode::INSERT_SUBREG)
2703  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2704  // close to their uses to facilitate coalescing.
2705  return true;
2706 
2707  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2708  // If SU does not have a register def, schedule it close to its uses
2709  // because it does not lengthen any live ranges.
2710  return true;
2711 
2712  return false;
2713 }
2714 
2715 // list-ilp is currently an experimental scheduler that allows various
2716 // heuristics to be enabled prior to the normal register reduction logic.
2717 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2718  if (int res = checkSpecialNodes(left, right))
2719  return res > 0;
2720 
2721  if (left->isCall || right->isCall)
2722  // No way to compute latency of calls.
2723  return BURRSort(left, right, SPQ);
2724 
2725  unsigned LLiveUses = 0, RLiveUses = 0;
2726  int LPDiff = 0, RPDiff = 0;
2728  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2729  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2730  }
2731  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2732  LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2733  << "): " << LPDiff << " != SU(" << right->NodeNum
2734  << "): " << RPDiff << "\n");
2735  return LPDiff > RPDiff;
2736  }
2737 
2738  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2739  bool LReduce = canEnableCoalescing(left);
2740  bool RReduce = canEnableCoalescing(right);
2741  if (LReduce && !RReduce) return false;
2742  if (RReduce && !LReduce) return true;
2743  }
2744 
2745  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2746  LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2747  << " != SU(" << right->NodeNum << "): " << RLiveUses
2748  << "\n");
2749  return LLiveUses < RLiveUses;
2750  }
2751 
2752  if (!DisableSchedStalls) {
2753  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2754  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2755  if (LStall != RStall)
2756  return left->getHeight() > right->getHeight();
2757  }
2758 
2759  if (!DisableSchedCriticalPath) {
2760  int spread = (int)left->getDepth() - (int)right->getDepth();
2761  if (std::abs(spread) > MaxReorderWindow) {
2762  LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2763  << left->getDepth() << " != SU(" << right->NodeNum
2764  << "): " << right->getDepth() << "\n");
2765  return left->getDepth() < right->getDepth();
2766  }
2767  }
2768 
2769  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2770  int spread = (int)left->getHeight() - (int)right->getHeight();
2771  if (std::abs(spread) > MaxReorderWindow)
2772  return left->getHeight() > right->getHeight();
2773  }
2774 
2775  return BURRSort(left, right, SPQ);
2776 }
2777 
2778 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2779  SUnits = &sunits;
2780  // Add pseudo dependency edges for two-address nodes.
2781  if (!Disable2AddrHack)
2782  AddPseudoTwoAddrDeps();
2783  // Reroute edges to nodes with multiple uses.
2784  if (!TracksRegPressure && !SrcOrder)
2785  PrescheduleNodesWithMultipleUses();
2786  // Calculate node priorities.
2787  CalculateSethiUllmanNumbers();
2788 
2789  // For single block loops, mark nodes that look like canonical IV increments.
2790  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2791  for (SUnit &SU : sunits)
2792  initVRegCycle(&SU);
2793 }
2794 
2795 //===----------------------------------------------------------------------===//
2796 // Preschedule for Register Pressure
2797 //===----------------------------------------------------------------------===//
2798 
2799 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2800  if (SU->isTwoAddress) {
2801  unsigned Opc = SU->getNode()->getMachineOpcode();
2802  const MCInstrDesc &MCID = TII->get(Opc);
2803  unsigned NumRes = MCID.getNumDefs();
2804  unsigned NumOps = MCID.getNumOperands() - NumRes;
2805  for (unsigned i = 0; i != NumOps; ++i) {
2806  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2807  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2808  if (DU->getNodeId() != -1 &&
2809  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2810  return true;
2811  }
2812  }
2813  }
2814  return false;
2815 }
2816 
2817 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2818 /// successor's explicit physregs whose definition can reach DepSU.
2819 /// i.e. DepSU should not be scheduled above SU.
2820 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2821  ScheduleDAGRRList *scheduleDAG,
2822  const TargetInstrInfo *TII,
2823  const TargetRegisterInfo *TRI) {
2824  const MCPhysReg *ImpDefs
2825  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2826  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2827  if(!ImpDefs && !RegMask)
2828  return false;
2829 
2830  for (const SDep &Succ : SU->Succs) {
2831  SUnit *SuccSU = Succ.getSUnit();
2832  for (const SDep &SuccPred : SuccSU->Preds) {
2833  if (!SuccPred.isAssignedRegDep())
2834  continue;
2835 
2836  if (RegMask &&
2837  MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2838  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2839  return true;
2840 
2841  if (ImpDefs)
2842  for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2843  // Return true if SU clobbers this physical register use and the
2844  // definition of the register reaches from DepSU. IsReachable queries
2845  // a topological forward sort of the DAG (following the successors).
2846  if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2847  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2848  return true;
2849  }
2850  }
2851  return false;
2852 }
2853 
2854 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2855 /// physical register defs.
2856 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2857  const TargetInstrInfo *TII,
2858  const TargetRegisterInfo *TRI) {
2859  SDNode *N = SuccSU->getNode();
2860  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2861  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2862  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2863  for (const SDNode *SUNode = SU->getNode(); SUNode;
2864  SUNode = SUNode->getGluedNode()) {
2865  if (!SUNode->isMachineOpcode())
2866  continue;
2867  const MCPhysReg *SUImpDefs =
2868  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2869  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2870  if (!SUImpDefs && !SURegMask)
2871  continue;
2872  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2873  MVT VT = N->getSimpleValueType(i);
2874  if (VT == MVT::Glue || VT == MVT::Other)
2875  continue;
2876  if (!N->hasAnyUseOfValue(i))
2877  continue;
2878  unsigned Reg = ImpDefs[i - NumDefs];
2879  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2880  return true;
2881  if (!SUImpDefs)
2882  continue;
2883  for (;*SUImpDefs; ++SUImpDefs) {
2884  unsigned SUReg = *SUImpDefs;
2885  if (TRI->regsOverlap(Reg, SUReg))
2886  return true;
2887  }
2888  }
2889  }
2890  return false;
2891 }
2892 
2893 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2894 /// are not handled well by the general register pressure reduction
2895 /// heuristics. When presented with code like this:
2896 ///
2897 /// N
2898 /// / |
2899 /// / |
2900 /// U store
2901 /// |
2902 /// ...
2903 ///
2904 /// the heuristics tend to push the store up, but since the
2905 /// operand of the store has another use (U), this would increase
2906 /// the length of that other use (the U->N edge).
2907 ///
2908 /// This function transforms code like the above to route U's
2909 /// dependence through the store when possible, like this:
2910 ///
2911 /// N
2912 /// ||
2913 /// ||
2914 /// store
2915 /// |
2916 /// U
2917 /// |
2918 /// ...
2919 ///
2920 /// This results in the store being scheduled immediately
2921 /// after N, which shortens the U->N live range, reducing
2922 /// register pressure.
2923 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2924  // Visit all the nodes in topological order, working top-down.
2925  for (SUnit &SU : *SUnits) {
2926  // For now, only look at nodes with no data successors, such as stores.
2927  // These are especially important, due to the heuristics in
2928  // getNodePriority for nodes with no data successors.
2929  if (SU.NumSuccs != 0)
2930  continue;
2931  // For now, only look at nodes with exactly one data predecessor.
2932  if (SU.NumPreds != 1)
2933  continue;
2934  // Avoid prescheduling copies to virtual registers, which don't behave
2935  // like other nodes from the perspective of scheduling heuristics.
2936  if (SDNode *N = SU.getNode())
2937  if (N->getOpcode() == ISD::CopyToReg &&
2939  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2940  continue;
2941 
2942  // Locate the single data predecessor.
2943  SUnit *PredSU = nullptr;
2944  for (const SDep &Pred : SU.Preds)
2945  if (!Pred.isCtrl()) {
2946  PredSU = Pred.getSUnit();
2947  break;
2948  }
2949  assert(PredSU);
2950 
2951  // Don't rewrite edges that carry physregs, because that requires additional
2952  // support infrastructure.
2953  if (PredSU->hasPhysRegDefs)
2954  continue;
2955  // Short-circuit the case where SU is PredSU's only data successor.
2956  if (PredSU->NumSuccs == 1)
2957  continue;
2958  // Avoid prescheduling to copies from virtual registers, which don't behave
2959  // like other nodes from the perspective of scheduling heuristics.
2960  if (SDNode *N = SU.getNode())
2961  if (N->getOpcode() == ISD::CopyFromReg &&
2963  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2964  continue;
2965 
2966  // Perform checks on the successors of PredSU.
2967  for (const SDep &PredSucc : PredSU->Succs) {
2968  SUnit *PredSuccSU = PredSucc.getSUnit();
2969  if (PredSuccSU == &SU) continue;
2970  // If PredSU has another successor with no data successors, for
2971  // now don't attempt to choose either over the other.
2972  if (PredSuccSU->NumSuccs == 0)
2973  goto outer_loop_continue;
2974  // Don't break physical register dependencies.
2975  if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2976  if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
2977  goto outer_loop_continue;
2978  // Don't introduce graph cycles.
2979  if (scheduleDAG->IsReachable(&SU, PredSuccSU))
2980  goto outer_loop_continue;
2981  }
2982 
2983  // Ok, the transformation is safe and the heuristics suggest it is
2984  // profitable. Update the graph.
2985  LLVM_DEBUG(
2986  dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
2987  << PredSU->NodeNum
2988  << " to guide scheduling in the presence of multiple uses\n");
2989  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2990  SDep Edge = PredSU->Succs[i];
2991  assert(!Edge.isAssignedRegDep());
2992  SUnit *SuccSU = Edge.getSUnit();
2993  if (SuccSU != &SU) {
2994  Edge.setSUnit(PredSU);
2995  scheduleDAG->RemovePred(SuccSU, Edge);
2996  scheduleDAG->AddPred(&SU, Edge);
2997  Edge.setSUnit(&SU);
2998  scheduleDAG->AddPred(SuccSU, Edge);
2999  --i;
3000  }
3001  }
3002  outer_loop_continue:;
3003  }
3004 }
3005 
3006 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3007 /// it as a def&use operand. Add a pseudo control edge from it to the other
3008 /// node (if it won't create a cycle) so the two-address one will be scheduled
3009 /// first (lower in the schedule). If both nodes are two-address, favor the
3010 /// one that has a CopyToReg use (more likely to be a loop induction update).
3011 /// If both are two-address, but one is commutable while the other is not
3012 /// commutable, favor the one that's not commutable.
3013 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3014  for (SUnit &SU : *SUnits) {
3015  if (!SU.isTwoAddress)
3016  continue;
3017 
3018  SDNode *Node = SU.getNode();
3019  if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3020  continue;
3021 
3022  bool isLiveOut = hasOnlyLiveOutUses(&SU);
3023  unsigned Opc = Node->getMachineOpcode();
3024  const MCInstrDesc &MCID = TII->get(Opc);
3025  unsigned NumRes = MCID.getNumDefs();
3026  unsigned NumOps = MCID.getNumOperands() - NumRes;
3027  for (unsigned j = 0; j != NumOps; ++j) {
3028  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3029  continue;
3030  SDNode *DU = SU.getNode()->getOperand(j).getNode();
3031  if (DU->getNodeId() == -1)
3032  continue;
3033  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3034  if (!DUSU)
3035  continue;
3036  for (const SDep &Succ : DUSU->Succs) {
3037  if (Succ.isCtrl())
3038  continue;
3039  SUnit *SuccSU = Succ.getSUnit();
3040  if (SuccSU == &SU)
3041  continue;
3042  // Be conservative. Ignore if nodes aren't at roughly the same
3043  // depth and height.
3044  if (SuccSU->getHeight() < SU.getHeight() &&
3045  (SU.getHeight() - SuccSU->getHeight()) > 1)
3046  continue;
3047  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3048  // constrains whatever is using the copy, instead of the copy
3049  // itself. In the case that the copy is coalesced, this
3050  // preserves the intent of the pseudo two-address heurietics.
3051  while (SuccSU->Succs.size() == 1 &&
3052  SuccSU->getNode()->isMachineOpcode() &&
3053  SuccSU->getNode()->getMachineOpcode() ==
3054  TargetOpcode::COPY_TO_REGCLASS)
3055  SuccSU = SuccSU->Succs.front().getSUnit();
3056  // Don't constrain non-instruction nodes.
3057  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3058  continue;
3059  // Don't constrain nodes with physical register defs if the
3060  // predecessor can clobber them.
3061  if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3062  if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3063  continue;
3064  }
3065  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3066  // these may be coalesced away. We want them close to their uses.
3067  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3068  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3069  SuccOpc == TargetOpcode::INSERT_SUBREG ||
3070  SuccOpc == TargetOpcode::SUBREG_TO_REG)
3071  continue;
3072  if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3073  (!canClobber(SuccSU, DUSU) ||
3074  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3075  (!SU.isCommutable && SuccSU->isCommutable)) &&
3076  !scheduleDAG->IsReachable(SuccSU, &SU)) {
3077  LLVM_DEBUG(dbgs()
3078  << " Adding a pseudo-two-addr edge from SU #"
3079  << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3080  scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
3081  }
3082  }
3083  }
3084  }
3085 }
3086 
3087 //===----------------------------------------------------------------------===//
3088 // Public Constructor Functions
3089 //===----------------------------------------------------------------------===//
3090 
3093  CodeGenOpt::Level OptLevel) {
3094  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3095  const TargetInstrInfo *TII = STI.getInstrInfo();
3096  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3097 
3098  BURegReductionPriorityQueue *PQ =
3099  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3100  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3101  PQ->setScheduleDAG(SD);
3102  return SD;
3103 }
3104 
3107  CodeGenOpt::Level OptLevel) {
3108  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3109  const TargetInstrInfo *TII = STI.getInstrInfo();
3110  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3111 
3112  SrcRegReductionPriorityQueue *PQ =
3113  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3114  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3115  PQ->setScheduleDAG(SD);
3116  return SD;
3117 }
3118 
3121  CodeGenOpt::Level OptLevel) {
3122  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3123  const TargetInstrInfo *TII = STI.getInstrInfo();
3124  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3125  const TargetLowering *TLI = IS->TLI;
3126 
3127  HybridBURRPriorityQueue *PQ =
3128  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3129 
3130  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3131  PQ->setScheduleDAG(SD);
3132  return SD;
3133 }
3134 
3137  CodeGenOpt::Level OptLevel) {
3138  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3139  const TargetInstrInfo *TII = STI.getInstrInfo();
3140  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3141  const TargetLowering *TLI = IS->TLI;
3142 
3143  ILPBURRPriorityQueue *PQ =
3144  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3145  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3146  PQ->setScheduleDAG(SD);
3147  return SD;
3148 }
virtual void initNodes(std::vector< SUnit > &SUnits)=0
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:270
virtual void updateNode(const SUnit *SU)=0
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static bool canEnableCoalescing(SUnit *SU)
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the &#39;representative&#39; register class for the specified value type.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:437
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:359
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getIROrder() const
Return the node ordering.
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:522
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:281
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:402
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
virtual void dumpNode(const SUnit &SU) const =0
virtual void push(SUnit *U)=0
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:281
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:487
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it&#39;s successor&#39;s explicit physregs who...
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
unsigned getCallFrameDestroyOpcode() const
void setNodeId(int Id)
Set unique node id.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
SDNode * getNode() const
get the SDNode which holds the desired result
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:212
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static bool hasVRegCycleUse(const SUnit *SU)
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:306
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:541
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
MachineFunction * MF
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:260
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:288
This interface is used to plug different priorities computation algorithms into the list scheduler...
Definition: ScheduleDAG.h:500
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:271
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:548
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:273
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
virtual void releaseState()=0
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
const TargetLowering * TLI
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the &#39;representative&#39; register class for the specified value type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
iterator_range< regclass_iterator > regclasses() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:284
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:550
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:278
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
#define LLVM_DUMP_METHOD
Definition: Compiler.h:74
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:667
unsigned getNumRegClasses() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
SUnit * OrigNode
If not this, the node from which this node was cloned.
Definition: ScheduleDAG.h:254
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler...
bool isPending
True once pending.
Definition: ScheduleDAG.h:286
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:162
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:484
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:275
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
TargetInstrInfo - Interface to description of machine instruction set.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit *> LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask, and add them to LRegs.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:844
Scheduling dependency.
Definition: ScheduleDAG.h:50
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
void setHeightToAtLeast(unsigned NewHeight)
If NewDepth is greater than this node&#39;s depth value, set it to be the new height value.
bool isAvailable()
Definition: Compression.cpp:48
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isCall
Is a function call.
Definition: ScheduleDAG.h:279
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction...
Definition: MCInstrDesc.h:546
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
Machine Value Type.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:96
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:239
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:277
virtual void addNode(const SUnit *SU)=0
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
virtual bool empty() const =0
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
iterator_range< value_op_iterator > op_values() const
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
const SDValue & getOperand(unsigned Num) const
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
static bool isOperandOf(const SUnit *SU, SDNode *N)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:174
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag...
Definition: InlineAsm.h:336
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:278
unsigned getCallFrameSetupOpcode() const
These methods return the opcode of the frame setup/destroy instructions if they exist (-1 otherwise)...
MCRegAliasIterator enumerates all registers aliasing Reg.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU&#39;s physical register defs...
static void resetVRegCycle(SUnit *SU)
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle...
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:294
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
RegDefIter - In place iteration over the values defined by an SUnit.
size_t size() const
Definition: SmallVector.h:53
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:672
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:210
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:188
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:282
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
virtual void remove(SUnit *SU)=0
bool regsOverlap(unsigned regA, unsigned regB) const
Returns true if the two registers are equal or alias each other.
static void initVRegCycle(SUnit *SU)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
SI Lower i1 Copies
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:308
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:546
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:280
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
TargetSubtargetInfo - Generic base class for all target subtargets.
int getNodeId() const
Return the unique node id.
bool isAvailable
True once available.
Definition: ScheduleDAG.h:287
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:269
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:410
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:41
unsigned short NumRegDefsLeft
of reg defs with no scheduled use.
Definition: ScheduleDAG.h:276
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:268
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:524
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:261
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:290
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
#define LLVM_DEBUG(X)
Definition: Debug.h:123
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:693
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:285
virtual SUnit * pop()=0
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:443
virtual HazardType getHazardType(SUnit *m, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:246
This file describes how to lower LLVM code to machine code.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1245
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.