LLVM  8.0.1
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
11 //
12 // This SMS implementation is a target-independent back-end pass. When enabled,
13 // the pass runs just prior to the register allocation pass, while the machine
14 // IR is in SSA form. If software pipelining is successful, then the original
15 // loop is replaced by the optimized loop. The optimized loop contains one or
16 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
17 // the instructions cannot be scheduled in a given MII, we increase the MII by
18 // one and try again.
19 //
20 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
21 // represent loop carried dependences in the DAG as order edges to the Phi
22 // nodes. We also perform several passes over the DAG to eliminate unnecessary
23 // edges that inhibit the ability to pipeline. The implementation uses the
24 // DFAPacketizer class to compute the minimum initiation interval and the check
25 // where an instruction may be inserted in the pipelined schedule.
26 //
27 // In order for the SMS pass to work, several target specific hooks need to be
28 // implemented to get information about the loop structure and to rewrite
29 // instructions.
30 //
31 //===----------------------------------------------------------------------===//
32 
33 #include "llvm/ADT/ArrayRef.h"
34 #include "llvm/ADT/BitVector.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/MapVector.h"
37 #include "llvm/ADT/PriorityQueue.h"
38 #include "llvm/ADT/SetVector.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
66 #include "llvm/Config/llvm-config.h"
67 #include "llvm/IR/Attributes.h"
68 #include "llvm/IR/DebugLoc.h"
69 #include "llvm/IR/Function.h"
70 #include "llvm/MC/LaneBitmask.h"
71 #include "llvm/MC/MCInstrDesc.h"
73 #include "llvm/MC/MCRegisterInfo.h"
74 #include "llvm/Pass.h"
76 #include "llvm/Support/Compiler.h"
77 #include "llvm/Support/Debug.h"
80 #include <algorithm>
81 #include <cassert>
82 #include <climits>
83 #include <cstdint>
84 #include <deque>
85 #include <functional>
86 #include <iterator>
87 #include <map>
88 #include <memory>
89 #include <tuple>
90 #include <utility>
91 #include <vector>
92 
93 using namespace llvm;
94 
95 #define DEBUG_TYPE "pipeliner"
96 
97 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
98 STATISTIC(NumPipelined, "Number of loops software pipelined");
99 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
100 
101 /// A command line option to turn software pipelining on or off.
102 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
104  cl::desc("Enable Software Pipelining"));
105 
106 /// A command line option to enable SWP at -Os.
107 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
108  cl::desc("Enable SWP at Os."), cl::Hidden,
109  cl::init(false));
110 
111 /// A command line argument to limit minimum initial interval for pipelining.
112 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
113  cl::desc("Size limit for the MII."),
114  cl::Hidden, cl::init(27));
115 
116 /// A command line argument to limit the number of stages in the pipeline.
117 static cl::opt<int>
118  SwpMaxStages("pipeliner-max-stages",
119  cl::desc("Maximum stages allowed in the generated scheduled."),
120  cl::Hidden, cl::init(3));
121 
122 /// A command line option to disable the pruning of chain dependences due to
123 /// an unrelated Phi.
124 static cl::opt<bool>
125  SwpPruneDeps("pipeliner-prune-deps",
126  cl::desc("Prune dependences between unrelated Phi nodes."),
127  cl::Hidden, cl::init(true));
128 
129 /// A command line option to disable the pruning of loop carried order
130 /// dependences.
131 static cl::opt<bool>
132  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
133  cl::desc("Prune loop carried order dependences."),
134  cl::Hidden, cl::init(true));
135 
136 #ifndef NDEBUG
137 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
138 #endif
139 
140 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
141  cl::ReallyHidden, cl::init(false),
142  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
143 
144 namespace llvm {
145 
146 // A command line option to enable the CopyToPhi DAG mutation.
148  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
149  cl::init(true), cl::ZeroOrMore,
150  cl::desc("Enable CopyToPhi DAG Mutation"));
151 
152 } // end namespace llvm
153 
154 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
155 char MachinePipeliner::ID = 0;
156 #ifndef NDEBUG
158 #endif
160 
162  "Modulo Software Pipelining", false, false)
168  "Modulo Software Pipelining", false, false)
169 
170 /// The "main" function for implementing Swing Modulo Scheduling.
171 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
172  if (skipFunction(mf.getFunction()))
173  return false;
174 
175  if (!EnableSWP)
176  return false;
177 
178  if (mf.getFunction().getAttributes().hasAttribute(
180  !EnableSWPOptSize.getPosition())
181  return false;
182 
183  MF = &mf;
184  MLI = &getAnalysis<MachineLoopInfo>();
185  MDT = &getAnalysis<MachineDominatorTree>();
186  TII = MF->getSubtarget().getInstrInfo();
187  RegClassInfo.runOnMachineFunction(*MF);
188 
189  for (auto &L : *MLI)
190  scheduleLoop(*L);
191 
192  return false;
193 }
194 
195 /// Attempt to perform the SMS algorithm on the specified loop. This function is
196 /// the main entry point for the algorithm. The function identifies candidate
197 /// loops, calculates the minimum initiation interval, and attempts to schedule
198 /// the loop.
199 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
200  bool Changed = false;
201  for (auto &InnerLoop : L)
202  Changed |= scheduleLoop(*InnerLoop);
203 
204 #ifndef NDEBUG
205  // Stop trying after reaching the limit (if any).
206  int Limit = SwpLoopLimit;
207  if (Limit >= 0) {
208  if (NumTries >= SwpLoopLimit)
209  return Changed;
210  NumTries++;
211  }
212 #endif
213 
214  if (!canPipelineLoop(L))
215  return Changed;
216 
217  ++NumTrytoPipeline;
218 
219  Changed = swingModuloScheduler(L);
220 
221  return Changed;
222 }
223 
224 /// Return true if the loop can be software pipelined. The algorithm is
225 /// restricted to loops with a single basic block. Make sure that the
226 /// branch in the loop can be analyzed.
227 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
228  if (L.getNumBlocks() != 1)
229  return false;
230 
231  // Check if the branch can't be understood because we can't do pipelining
232  // if that's the case.
233  LI.TBB = nullptr;
234  LI.FBB = nullptr;
235  LI.BrCond.clear();
236  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
237  return false;
238 
239  LI.LoopInductionVar = nullptr;
240  LI.LoopCompare = nullptr;
242  return false;
243 
244  if (!L.getLoopPreheader())
245  return false;
246 
247  // Remove any subregisters from inputs to phi nodes.
248  preprocessPhiNodes(*L.getHeader());
249  return true;
250 }
251 
252 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
254  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
255 
256  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
257  MachineOperand &DefOp = PI.getOperand(0);
258  assert(DefOp.getSubReg() == 0);
259  auto *RC = MRI.getRegClass(DefOp.getReg());
260 
261  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
262  MachineOperand &RegOp = PI.getOperand(i);
263  if (RegOp.getSubReg() == 0)
264  continue;
265 
266  // If the operand uses a subregister, replace it with a new register
267  // without subregisters, and generate a copy to the new register.
268  unsigned NewReg = MRI.createVirtualRegister(RC);
269  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
271  const DebugLoc &DL = PredB.findDebugLoc(At);
272  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
273  .addReg(RegOp.getReg(), getRegState(RegOp),
274  RegOp.getSubReg());
275  Slots.insertMachineInstrInMaps(*Copy);
276  RegOp.setReg(NewReg);
277  RegOp.setSubReg(0);
278  }
279  }
280 }
281 
282 /// The SMS algorithm consists of the following main steps:
283 /// 1. Computation and analysis of the dependence graph.
284 /// 2. Ordering of the nodes (instructions).
285 /// 3. Attempt to Schedule the loop.
286 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
287  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
288 
289  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
290 
291  MachineBasicBlock *MBB = L.getHeader();
292  // The kernel should not include any terminator instructions. These
293  // will be added back later.
294  SMS.startBlock(MBB);
295 
296  // Compute the number of 'real' instructions in the basic block by
297  // ignoring terminators.
298  unsigned size = MBB->size();
300  E = MBB->instr_end();
301  I != E; ++I, --size)
302  ;
303 
304  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
305  SMS.schedule();
306  SMS.exitRegion();
307 
308  SMS.finishBlock();
309  return SMS.hasNewSchedule();
310 }
311 
312 /// We override the schedule function in ScheduleDAGInstrs to implement the
313 /// scheduling part of the Swing Modulo Scheduling algorithm.
315  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
316  buildSchedGraph(AA);
317  addLoopCarriedDependences(AA);
318  updatePhiDependences();
319  Topo.InitDAGTopologicalSorting();
320  changeDependences();
321  postprocessDAG();
322  LLVM_DEBUG(dump());
323 
324  NodeSetType NodeSets;
325  findCircuits(NodeSets);
326  NodeSetType Circuits = NodeSets;
327 
328  // Calculate the MII.
329  unsigned ResMII = calculateResMII();
330  unsigned RecMII = calculateRecMII(NodeSets);
331 
332  fuseRecs(NodeSets);
333 
334  // This flag is used for testing and can cause correctness problems.
335  if (SwpIgnoreRecMII)
336  RecMII = 0;
337 
338  MII = std::max(ResMII, RecMII);
339  LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII
340  << ", res=" << ResMII << ")\n");
341 
342  // Can't schedule a loop without a valid MII.
343  if (MII == 0)
344  return;
345 
346  // Don't pipeline large loops.
347  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
348  return;
349 
350  computeNodeFunctions(NodeSets);
351 
352  registerPressureFilter(NodeSets);
353 
354  colocateNodeSets(NodeSets);
355 
356  checkNodeSets(NodeSets);
357 
358  LLVM_DEBUG({
359  for (auto &I : NodeSets) {
360  dbgs() << " Rec NodeSet ";
361  I.dump();
362  }
363  });
364 
365  std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
366 
367  groupRemainingNodes(NodeSets);
368 
369  removeDuplicateNodes(NodeSets);
370 
371  LLVM_DEBUG({
372  for (auto &I : NodeSets) {
373  dbgs() << " NodeSet ";
374  I.dump();
375  }
376  });
377 
378  computeNodeOrder(NodeSets);
379 
380  // check for node order issues
381  checkValidNodeOrder(Circuits);
382 
383  SMSchedule Schedule(Pass.MF);
384  Scheduled = schedulePipeline(Schedule);
385 
386  if (!Scheduled)
387  return;
388 
389  unsigned numStages = Schedule.getMaxStageCount();
390  // No need to generate pipeline if there are no overlapped iterations.
391  if (numStages == 0)
392  return;
393 
394  // Check that the maximum stage count is less than user-defined limit.
395  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
396  return;
397 
398  generatePipelinedLoop(Schedule);
399  ++NumPipelined;
400 }
401 
402 /// Clean up after the software pipeliner runs.
404  for (MachineInstr *I : NewMIs)
406  NewMIs.clear();
407 
408  // Call the superclass.
410 }
411 
412 /// Return the register values for the operands of a Phi instruction.
413 /// This function assume the instruction is a Phi.
415  unsigned &InitVal, unsigned &LoopVal) {
416  assert(Phi.isPHI() && "Expecting a Phi.");
417 
418  InitVal = 0;
419  LoopVal = 0;
420  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
421  if (Phi.getOperand(i + 1).getMBB() != Loop)
422  InitVal = Phi.getOperand(i).getReg();
423  else
424  LoopVal = Phi.getOperand(i).getReg();
425 
426  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
427 }
428 
429 /// Return the Phi register value that comes from the incoming block.
430 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
431  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
432  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
433  return Phi.getOperand(i).getReg();
434  return 0;
435 }
436 
437 /// Return the Phi register value that comes the loop block.
438 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
439  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
440  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
441  return Phi.getOperand(i).getReg();
442  return 0;
443 }
444 
445 /// Return true if SUb can be reached from SUa following the chain edges.
446 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
447  SmallPtrSet<SUnit *, 8> Visited;
448  SmallVector<SUnit *, 8> Worklist;
449  Worklist.push_back(SUa);
450  while (!Worklist.empty()) {
451  const SUnit *SU = Worklist.pop_back_val();
452  for (auto &SI : SU->Succs) {
453  SUnit *SuccSU = SI.getSUnit();
454  if (SI.getKind() == SDep::Order) {
455  if (Visited.count(SuccSU))
456  continue;
457  if (SuccSU == SUb)
458  return true;
459  Worklist.push_back(SuccSU);
460  Visited.insert(SuccSU);
461  }
462  }
463  }
464  return false;
465 }
466 
467 /// Return true if the instruction causes a chain between memory
468 /// references before and after it.
470  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
471  (MI.hasOrderedMemoryRef() &&
472  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
473 }
474 
475 /// Return the underlying objects for the memory references of an instruction.
476 /// This function calls the code in ValueTracking, but first checks that the
477 /// instruction has a memory operand.
480  const DataLayout &DL) {
481  if (!MI->hasOneMemOperand())
482  return;
484  if (!MM->getValue())
485  return;
486  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
487  for (Value *V : Objs) {
488  if (!isIdentifiedObject(V)) {
489  Objs.clear();
490  return;
491  }
492  Objs.push_back(V);
493  }
494 }
495 
496 /// Add a chain edge between a load and store if the store can be an
497 /// alias of the load on a subsequent iteration, i.e., a loop carried
498 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
499 /// but that code doesn't create loop carried dependences.
500 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
502  Value *UnknownValue =
504  for (auto &SU : SUnits) {
505  MachineInstr &MI = *SU.getInstr();
506  if (isDependenceBarrier(MI, AA))
507  PendingLoads.clear();
508  else if (MI.mayLoad()) {
510  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
511  if (Objs.empty())
512  Objs.push_back(UnknownValue);
513  for (auto V : Objs) {
514  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
515  SUs.push_back(&SU);
516  }
517  } else if (MI.mayStore()) {
519  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
520  if (Objs.empty())
521  Objs.push_back(UnknownValue);
522  for (auto V : Objs) {
524  PendingLoads.find(V);
525  if (I == PendingLoads.end())
526  continue;
527  for (auto Load : I->second) {
528  if (isSuccOrder(Load, &SU))
529  continue;
530  MachineInstr &LdMI = *Load->getInstr();
531  // First, perform the cheaper check that compares the base register.
532  // If they are the same and the load offset is less than the store
533  // offset, then mark the dependence as loop carried potentially.
534  MachineOperand *BaseOp1, *BaseOp2;
535  int64_t Offset1, Offset2;
536  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
537  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
538  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
539  (int)Offset1 < (int)Offset2) {
540  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
541  "What happened to the chain edge?");
542  SDep Dep(Load, SDep::Barrier);
543  Dep.setLatency(1);
544  SU.addPred(Dep);
545  continue;
546  }
547  }
548  // Second, the more expensive check that uses alias analysis on the
549  // base registers. If they alias, and the load offset is less than
550  // the store offset, the mark the dependence as loop carried.
551  if (!AA) {
552  SDep Dep(Load, SDep::Barrier);
553  Dep.setLatency(1);
554  SU.addPred(Dep);
555  continue;
556  }
557  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
558  MachineMemOperand *MMO2 = *MI.memoperands_begin();
559  if (!MMO1->getValue() || !MMO2->getValue()) {
560  SDep Dep(Load, SDep::Barrier);
561  Dep.setLatency(1);
562  SU.addPred(Dep);
563  continue;
564  }
565  if (MMO1->getValue() == MMO2->getValue() &&
566  MMO1->getOffset() <= MMO2->getOffset()) {
567  SDep Dep(Load, SDep::Barrier);
568  Dep.setLatency(1);
569  SU.addPred(Dep);
570  continue;
571  }
572  AliasResult AAResult = AA->alias(
574  MMO1->getAAInfo()),
576  MMO2->getAAInfo()));
577 
578  if (AAResult != NoAlias) {
579  SDep Dep(Load, SDep::Barrier);
580  Dep.setLatency(1);
581  SU.addPred(Dep);
582  }
583  }
584  }
585  }
586  }
587 }
588 
589 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
590 /// processes dependences for PHIs. This function adds true dependences
591 /// from a PHI to a use, and a loop carried dependence from the use to the
592 /// PHI. The loop carried dependence is represented as an anti dependence
593 /// edge. This function also removes chain dependences between unrelated
594 /// PHIs.
595 void SwingSchedulerDAG::updatePhiDependences() {
596  SmallVector<SDep, 4> RemoveDeps;
598 
599  // Iterate over each DAG node.
600  for (SUnit &I : SUnits) {
601  RemoveDeps.clear();
602  // Set to true if the instruction has an operand defined by a Phi.
603  unsigned HasPhiUse = 0;
604  unsigned HasPhiDef = 0;
605  MachineInstr *MI = I.getInstr();
606  // Iterate over each operand, and we process the definitions.
608  MOE = MI->operands_end();
609  MOI != MOE; ++MOI) {
610  if (!MOI->isReg())
611  continue;
612  unsigned Reg = MOI->getReg();
613  if (MOI->isDef()) {
614  // If the register is used by a Phi, then create an anti dependence.
616  UI = MRI.use_instr_begin(Reg),
617  UE = MRI.use_instr_end();
618  UI != UE; ++UI) {
619  MachineInstr *UseMI = &*UI;
620  SUnit *SU = getSUnit(UseMI);
621  if (SU != nullptr && UseMI->isPHI()) {
622  if (!MI->isPHI()) {
623  SDep Dep(SU, SDep::Anti, Reg);
624  Dep.setLatency(1);
625  I.addPred(Dep);
626  } else {
627  HasPhiDef = Reg;
628  // Add a chain edge to a dependent Phi that isn't an existing
629  // predecessor.
630  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
631  I.addPred(SDep(SU, SDep::Barrier));
632  }
633  }
634  }
635  } else if (MOI->isUse()) {
636  // If the register is defined by a Phi, then create a true dependence.
637  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
638  if (DefMI == nullptr)
639  continue;
640  SUnit *SU = getSUnit(DefMI);
641  if (SU != nullptr && DefMI->isPHI()) {
642  if (!MI->isPHI()) {
643  SDep Dep(SU, SDep::Data, Reg);
644  Dep.setLatency(0);
645  ST.adjustSchedDependency(SU, &I, Dep);
646  I.addPred(Dep);
647  } else {
648  HasPhiUse = Reg;
649  // Add a chain edge to a dependent Phi that isn't an existing
650  // predecessor.
651  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
652  I.addPred(SDep(SU, SDep::Barrier));
653  }
654  }
655  }
656  }
657  // Remove order dependences from an unrelated Phi.
658  if (!SwpPruneDeps)
659  continue;
660  for (auto &PI : I.Preds) {
661  MachineInstr *PMI = PI.getSUnit()->getInstr();
662  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
663  if (I.getInstr()->isPHI()) {
664  if (PMI->getOperand(0).getReg() == HasPhiUse)
665  continue;
666  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
667  continue;
668  }
669  RemoveDeps.push_back(PI);
670  }
671  }
672  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
673  I.removePred(RemoveDeps[i]);
674  }
675 }
676 
677 /// Iterate over each DAG node and see if we can change any dependences
678 /// in order to reduce the recurrence MII.
679 void SwingSchedulerDAG::changeDependences() {
680  // See if an instruction can use a value from the previous iteration.
681  // If so, we update the base and offset of the instruction and change
682  // the dependences.
683  for (SUnit &I : SUnits) {
684  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
685  int64_t NewOffset = 0;
686  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
687  NewOffset))
688  continue;
689 
690  // Get the MI and SUnit for the instruction that defines the original base.
691  unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
692  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
693  if (!DefMI)
694  continue;
695  SUnit *DefSU = getSUnit(DefMI);
696  if (!DefSU)
697  continue;
698  // Get the MI and SUnit for the instruction that defins the new base.
699  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
700  if (!LastMI)
701  continue;
702  SUnit *LastSU = getSUnit(LastMI);
703  if (!LastSU)
704  continue;
705 
706  if (Topo.IsReachable(&I, LastSU))
707  continue;
708 
709  // Remove the dependence. The value now depends on a prior iteration.
711  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
712  ++P)
713  if (P->getSUnit() == DefSU)
714  Deps.push_back(*P);
715  for (int i = 0, e = Deps.size(); i != e; i++) {
716  Topo.RemovePred(&I, Deps[i].getSUnit());
717  I.removePred(Deps[i]);
718  }
719  // Remove the chain dependence between the instructions.
720  Deps.clear();
721  for (auto &P : LastSU->Preds)
722  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
723  Deps.push_back(P);
724  for (int i = 0, e = Deps.size(); i != e; i++) {
725  Topo.RemovePred(LastSU, Deps[i].getSUnit());
726  LastSU->removePred(Deps[i]);
727  }
728 
729  // Add a dependence between the new instruction and the instruction
730  // that defines the new base.
731  SDep Dep(&I, SDep::Anti, NewBase);
732  Topo.AddPred(LastSU, &I);
733  LastSU->addPred(Dep);
734 
735  // Remember the base and offset information so that we can update the
736  // instruction during code generation.
737  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
738  }
739 }
740 
741 namespace {
742 
743 // FuncUnitSorter - Comparison operator used to sort instructions by
744 // the number of functional unit choices.
745 struct FuncUnitSorter {
748 
749  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
750 
751  // Compute the number of functional unit alternatives needed
752  // at each stage, and take the minimum value. We prioritize the
753  // instructions by the least number of choices first.
754  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
755  unsigned schedClass = Inst->getDesc().getSchedClass();
756  unsigned min = UINT_MAX;
757  for (const InstrStage *IS = InstrItins->beginStage(schedClass),
758  *IE = InstrItins->endStage(schedClass);
759  IS != IE; ++IS) {
760  unsigned funcUnits = IS->getUnits();
761  unsigned numAlternatives = countPopulation(funcUnits);
762  if (numAlternatives < min) {
763  min = numAlternatives;
764  F = funcUnits;
765  }
766  }
767  return min;
768  }
769 
770  // Compute the critical resources needed by the instruction. This
771  // function records the functional units needed by instructions that
772  // must use only one functional unit. We use this as a tie breaker
773  // for computing the resource MII. The instrutions that require
774  // the same, highly used, functional unit have high priority.
775  void calcCriticalResources(MachineInstr &MI) {
776  unsigned SchedClass = MI.getDesc().getSchedClass();
777  for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
778  *IE = InstrItins->endStage(SchedClass);
779  IS != IE; ++IS) {
780  unsigned FuncUnits = IS->getUnits();
781  if (countPopulation(FuncUnits) == 1)
782  Resources[FuncUnits]++;
783  }
784  }
785 
786  /// Return true if IS1 has less priority than IS2.
787  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
788  unsigned F1 = 0, F2 = 0;
789  unsigned MFUs1 = minFuncUnits(IS1, F1);
790  unsigned MFUs2 = minFuncUnits(IS2, F2);
791  if (MFUs1 == 1 && MFUs2 == 1)
792  return Resources.lookup(F1) < Resources.lookup(F2);
793  return MFUs1 > MFUs2;
794  }
795 };
796 
797 } // end anonymous namespace
798 
799 /// Calculate the resource constrained minimum initiation interval for the
800 /// specified loop. We use the DFA to model the resources needed for
801 /// each instruction, and we ignore dependences. A different DFA is created
802 /// for each cycle that is required. When adding a new instruction, we attempt
803 /// to add it to each existing DFA, until a legal space is found. If the
804 /// instruction cannot be reserved in an existing DFA, we create a new one.
805 unsigned SwingSchedulerDAG::calculateResMII() {
809 
810  // Sort the instructions by the number of available choices for scheduling,
811  // least to most. Use the number of critical resources as the tie breaker.
812  FuncUnitSorter FUS =
813  FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
815  E = MBB->getFirstTerminator();
816  I != E; ++I)
817  FUS.calcCriticalResources(*I);
819  FuncUnitOrder(FUS);
820 
822  E = MBB->getFirstTerminator();
823  I != E; ++I)
824  FuncUnitOrder.push(&*I);
825 
826  while (!FuncUnitOrder.empty()) {
827  MachineInstr *MI = FuncUnitOrder.top();
828  FuncUnitOrder.pop();
829  if (TII->isZeroCost(MI->getOpcode()))
830  continue;
831  // Attempt to reserve the instruction in an existing DFA. At least one
832  // DFA is needed for each cycle.
833  unsigned NumCycles = getSUnit(MI)->Latency;
834  unsigned ReservedCycles = 0;
837  for (unsigned C = 0; C < NumCycles; ++C)
838  while (RI != RE) {
839  if ((*RI++)->canReserveResources(*MI)) {
840  ++ReservedCycles;
841  break;
842  }
843  }
844  // Start reserving resources using existing DFAs.
845  for (unsigned C = 0; C < ReservedCycles; ++C) {
846  --RI;
847  (*RI)->reserveResources(*MI);
848  }
849  // Add new DFAs, if needed, to reserve resources.
850  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
851  DFAPacketizer *NewResource =
853  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
854  NewResource->reserveResources(*MI);
855  Resources.push_back(NewResource);
856  }
857  }
858  int Resmii = Resources.size();
859  // Delete the memory for each of the DFAs that were created earlier.
860  for (DFAPacketizer *RI : Resources) {
861  DFAPacketizer *D = RI;
862  delete D;
863  }
864  Resources.clear();
865  return Resmii;
866 }
867 
868 /// Calculate the recurrence-constrainted minimum initiation interval.
869 /// Iterate over each circuit. Compute the delay(c) and distance(c)
870 /// for each circuit. The II needs to satisfy the inequality
871 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
872 /// II that satisfies the inequality, and the RecMII is the maximum
873 /// of those values.
874 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
875  unsigned RecMII = 0;
876 
877  for (NodeSet &Nodes : NodeSets) {
878  if (Nodes.empty())
879  continue;
880 
881  unsigned Delay = Nodes.getLatency();
882  unsigned Distance = 1;
883 
884  // ii = ceil(delay / distance)
885  unsigned CurMII = (Delay + Distance - 1) / Distance;
886  Nodes.setRecMII(CurMII);
887  if (CurMII > RecMII)
888  RecMII = CurMII;
889  }
890 
891  return RecMII;
892 }
893 
894 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
895 /// but we do this to find the circuits, and then change them back.
896 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
898  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
899  SUnit *SU = &SUnits[i];
900  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
901  IP != EP; ++IP) {
902  if (IP->getKind() != SDep::Anti)
903  continue;
904  DepsAdded.push_back(std::make_pair(SU, *IP));
905  }
906  }
907  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
908  E = DepsAdded.end();
909  I != E; ++I) {
910  // Remove this anti dependency and add one in the reverse direction.
911  SUnit *SU = I->first;
912  SDep &D = I->second;
913  SUnit *TargetSU = D.getSUnit();
914  unsigned Reg = D.getReg();
915  unsigned Lat = D.getLatency();
916  SU->removePred(D);
917  SDep Dep(SU, SDep::Anti, Reg);
918  Dep.setLatency(Lat);
919  TargetSU->addPred(Dep);
920  }
921 }
922 
923 /// Create the adjacency structure of the nodes in the graph.
924 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
925  SwingSchedulerDAG *DAG) {
926  BitVector Added(SUnits.size());
927  DenseMap<int, int> OutputDeps;
928  for (int i = 0, e = SUnits.size(); i != e; ++i) {
929  Added.reset();
930  // Add any successor to the adjacency matrix and exclude duplicates.
931  for (auto &SI : SUnits[i].Succs) {
932  // Only create a back-edge on the first and last nodes of a dependence
933  // chain. This records any chains and adds them later.
934  if (SI.getKind() == SDep::Output) {
935  int N = SI.getSUnit()->NodeNum;
936  int BackEdge = i;
937  auto Dep = OutputDeps.find(BackEdge);
938  if (Dep != OutputDeps.end()) {
939  BackEdge = Dep->second;
940  OutputDeps.erase(Dep);
941  }
942  OutputDeps[N] = BackEdge;
943  }
944  // Do not process a boundary node, an artificial node.
945  // A back-edge is processed only if it goes to a Phi.
946  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
947  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
948  continue;
949  int N = SI.getSUnit()->NodeNum;
950  if (!Added.test(N)) {
951  AdjK[i].push_back(N);
952  Added.set(N);
953  }
954  }
955  // A chain edge between a store and a load is treated as a back-edge in the
956  // adjacency matrix.
957  for (auto &PI : SUnits[i].Preds) {
958  if (!SUnits[i].getInstr()->mayStore() ||
959  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
960  continue;
961  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
962  int N = PI.getSUnit()->NodeNum;
963  if (!Added.test(N)) {
964  AdjK[i].push_back(N);
965  Added.set(N);
966  }
967  }
968  }
969  }
970  // Add back-edges in the adjacency matrix for the output dependences.
971  for (auto &OD : OutputDeps)
972  if (!Added.test(OD.second)) {
973  AdjK[OD.first].push_back(OD.second);
974  Added.set(OD.second);
975  }
976 }
977 
978 /// Identify an elementary circuit in the dependence graph starting at the
979 /// specified node.
980 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
981  bool HasBackedge) {
982  SUnit *SV = &SUnits[V];
983  bool F = false;
984  Stack.insert(SV);
985  Blocked.set(V);
986 
987  for (auto W : AdjK[V]) {
988  if (NumPaths > MaxPaths)
989  break;
990  if (W < S)
991  continue;
992  if (W == S) {
993  if (!HasBackedge)
994  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
995  F = true;
996  ++NumPaths;
997  break;
998  } else if (!Blocked.test(W)) {
999  if (circuit(W, S, NodeSets,
1000  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1001  F = true;
1002  }
1003  }
1004 
1005  if (F)
1006  unblock(V);
1007  else {
1008  for (auto W : AdjK[V]) {
1009  if (W < S)
1010  continue;
1011  if (B[W].count(SV) == 0)
1012  B[W].insert(SV);
1013  }
1014  }
1015  Stack.pop_back();
1016  return F;
1017 }
1018 
1019 /// Unblock a node in the circuit finding algorithm.
1020 void SwingSchedulerDAG::Circuits::unblock(int U) {
1021  Blocked.reset(U);
1022  SmallPtrSet<SUnit *, 4> &BU = B[U];
1023  while (!BU.empty()) {
1025  assert(SI != BU.end() && "Invalid B set.");
1026  SUnit *W = *SI;
1027  BU.erase(W);
1028  if (Blocked.test(W->NodeNum))
1029  unblock(W->NodeNum);
1030  }
1031 }
1032 
1033 /// Identify all the elementary circuits in the dependence graph using
1034 /// Johnson's circuit algorithm.
1035 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1036  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1037  // but we do this to find the circuits, and then change them back.
1038  swapAntiDependences(SUnits);
1039 
1040  Circuits Cir(SUnits, Topo);
1041  // Create the adjacency structure.
1042  Cir.createAdjacencyStructure(this);
1043  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1044  Cir.reset();
1045  Cir.circuit(i, i, NodeSets);
1046  }
1047 
1048  // Change the dependences back so that we've created a DAG again.
1049  swapAntiDependences(SUnits);
1050 }
1051 
1052 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1053 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1054 // additional copies that are needed across iterations. An artificial dependence
1055 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1056 
1057 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1058 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1059 // PHI-------True-Dep------> USEOfPhi
1060 
1061 // The mutation creates
1062 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1063 
1064 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1065 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1066 // late to avoid additional copies across iterations. The possible scheduling
1067 // order would be
1068 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1069 
1071  for (SUnit &SU : DAG->SUnits) {
1072  // Find the COPY/REG_SEQUENCE instruction.
1073  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1074  continue;
1075 
1076  // Record the loop carried PHIs.
1077  SmallVector<SUnit *, 4> PHISUs;
1078  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1079  SmallVector<SUnit *, 4> SrcSUs;
1080 
1081  for (auto &Dep : SU.Preds) {
1082  SUnit *TmpSU = Dep.getSUnit();
1083  MachineInstr *TmpMI = TmpSU->getInstr();
1084  SDep::Kind DepKind = Dep.getKind();
1085  // Save the loop carried PHI.
1086  if (DepKind == SDep::Anti && TmpMI->isPHI())
1087  PHISUs.push_back(TmpSU);
1088  // Save the source of COPY/REG_SEQUENCE.
1089  // If the source has no pre-decessors, we will end up creating cycles.
1090  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1091  SrcSUs.push_back(TmpSU);
1092  }
1093 
1094  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1095  continue;
1096 
1097  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1098  // SUnit to the container.
1099  SmallVector<SUnit *, 8> UseSUs;
1100  for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
1101  for (auto &Dep : (*I)->Succs) {
1102  if (Dep.getKind() != SDep::Data)
1103  continue;
1104 
1105  SUnit *TmpSU = Dep.getSUnit();
1106  MachineInstr *TmpMI = TmpSU->getInstr();
1107  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1108  PHISUs.push_back(TmpSU);
1109  continue;
1110  }
1111  UseSUs.push_back(TmpSU);
1112  }
1113  }
1114 
1115  if (UseSUs.size() == 0)
1116  continue;
1117 
1118  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1119  // Add the artificial dependencies if it does not form a cycle.
1120  for (auto I : UseSUs) {
1121  for (auto Src : SrcSUs) {
1122  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1123  Src->addPred(SDep(I, SDep::Artificial));
1124  SDAG->Topo.AddPred(Src, I);
1125  }
1126  }
1127  }
1128  }
1129 }
1130 
1131 /// Return true for DAG nodes that we ignore when computing the cost functions.
1132 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1133 /// in the calculation of the ASAP, ALAP, etc functions.
1134 static bool ignoreDependence(const SDep &D, bool isPred) {
1135  if (D.isArtificial())
1136  return true;
1137  return D.getKind() == SDep::Anti && isPred;
1138 }
1139 
1140 /// Compute several functions need to order the nodes for scheduling.
1141 /// ASAP - Earliest time to schedule a node.
1142 /// ALAP - Latest time to schedule a node.
1143 /// MOV - Mobility function, difference between ALAP and ASAP.
1144 /// D - Depth of each node.
1145 /// H - Height of each node.
1146 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1147  ScheduleInfo.resize(SUnits.size());
1148 
1149  LLVM_DEBUG({
1150  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1151  E = Topo.end();
1152  I != E; ++I) {
1153  const SUnit &SU = SUnits[*I];
1154  dumpNode(SU);
1155  }
1156  });
1157 
1158  int maxASAP = 0;
1159  // Compute ASAP and ZeroLatencyDepth.
1160  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1161  E = Topo.end();
1162  I != E; ++I) {
1163  int asap = 0;
1164  int zeroLatencyDepth = 0;
1165  SUnit *SU = &SUnits[*I];
1166  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1167  EP = SU->Preds.end();
1168  IP != EP; ++IP) {
1169  SUnit *pred = IP->getSUnit();
1170  if (IP->getLatency() == 0)
1171  zeroLatencyDepth =
1172  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1173  if (ignoreDependence(*IP, true))
1174  continue;
1175  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1176  getDistance(pred, SU, *IP) * MII));
1177  }
1178  maxASAP = std::max(maxASAP, asap);
1179  ScheduleInfo[*I].ASAP = asap;
1180  ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
1181  }
1182 
1183  // Compute ALAP, ZeroLatencyHeight, and MOV.
1185  E = Topo.rend();
1186  I != E; ++I) {
1187  int alap = maxASAP;
1188  int zeroLatencyHeight = 0;
1189  SUnit *SU = &SUnits[*I];
1190  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1191  ES = SU->Succs.end();
1192  IS != ES; ++IS) {
1193  SUnit *succ = IS->getSUnit();
1194  if (IS->getLatency() == 0)
1195  zeroLatencyHeight =
1196  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1197  if (ignoreDependence(*IS, true))
1198  continue;
1199  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1200  getDistance(SU, succ, *IS) * MII));
1201  }
1202 
1203  ScheduleInfo[*I].ALAP = alap;
1204  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1205  }
1206 
1207  // After computing the node functions, compute the summary for each node set.
1208  for (NodeSet &I : NodeSets)
1209  I.computeNodeSetInfo(this);
1210 
1211  LLVM_DEBUG({
1212  for (unsigned i = 0; i < SUnits.size(); i++) {
1213  dbgs() << "\tNode " << i << ":\n";
1214  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1215  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1216  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1217  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1218  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1219  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1220  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1221  }
1222  });
1223 }
1224 
1225 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1226 /// as the predecessors of the elements of NodeOrder that are not also in
1227 /// NodeOrder.
1230  const NodeSet *S = nullptr) {
1231  Preds.clear();
1232  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1233  I != E; ++I) {
1234  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1235  PI != PE; ++PI) {
1236  if (S && S->count(PI->getSUnit()) == 0)
1237  continue;
1238  if (ignoreDependence(*PI, true))
1239  continue;
1240  if (NodeOrder.count(PI->getSUnit()) == 0)
1241  Preds.insert(PI->getSUnit());
1242  }
1243  // Back-edges are predecessors with an anti-dependence.
1244  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1245  ES = (*I)->Succs.end();
1246  IS != ES; ++IS) {
1247  if (IS->getKind() != SDep::Anti)
1248  continue;
1249  if (S && S->count(IS->getSUnit()) == 0)
1250  continue;
1251  if (NodeOrder.count(IS->getSUnit()) == 0)
1252  Preds.insert(IS->getSUnit());
1253  }
1254  }
1255  return !Preds.empty();
1256 }
1257 
1258 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1259 /// as the successors of the elements of NodeOrder that are not also in
1260 /// NodeOrder.
1263  const NodeSet *S = nullptr) {
1264  Succs.clear();
1265  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1266  I != E; ++I) {
1267  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1268  SI != SE; ++SI) {
1269  if (S && S->count(SI->getSUnit()) == 0)
1270  continue;
1271  if (ignoreDependence(*SI, false))
1272  continue;
1273  if (NodeOrder.count(SI->getSUnit()) == 0)
1274  Succs.insert(SI->getSUnit());
1275  }
1276  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1277  PE = (*I)->Preds.end();
1278  PI != PE; ++PI) {
1279  if (PI->getKind() != SDep::Anti)
1280  continue;
1281  if (S && S->count(PI->getSUnit()) == 0)
1282  continue;
1283  if (NodeOrder.count(PI->getSUnit()) == 0)
1284  Succs.insert(PI->getSUnit());
1285  }
1286  }
1287  return !Succs.empty();
1288 }
1289 
1290 /// Return true if there is a path from the specified node to any of the nodes
1291 /// in DestNodes. Keep track and return the nodes in any path.
1292 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1293  SetVector<SUnit *> &DestNodes,
1294  SetVector<SUnit *> &Exclude,
1295  SmallPtrSet<SUnit *, 8> &Visited) {
1296  if (Cur->isBoundaryNode())
1297  return false;
1298  if (Exclude.count(Cur) != 0)
1299  return false;
1300  if (DestNodes.count(Cur) != 0)
1301  return true;
1302  if (!Visited.insert(Cur).second)
1303  return Path.count(Cur) != 0;
1304  bool FoundPath = false;
1305  for (auto &SI : Cur->Succs)
1306  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1307  for (auto &PI : Cur->Preds)
1308  if (PI.getKind() == SDep::Anti)
1309  FoundPath |=
1310  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1311  if (FoundPath)
1312  Path.insert(Cur);
1313  return FoundPath;
1314 }
1315 
1316 /// Return true if Set1 is a subset of Set2.
1317 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1318  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1319  if (Set2.count(*I) == 0)
1320  return false;
1321  return true;
1322 }
1323 
1324 /// Compute the live-out registers for the instructions in a node-set.
1325 /// The live-out registers are those that are defined in the node-set,
1326 /// but not used. Except for use operands of Phis.
1328  NodeSet &NS) {
1332  SmallSet<unsigned, 4> Uses;
1333  for (SUnit *SU : NS) {
1334  const MachineInstr *MI = SU->getInstr();
1335  if (MI->isPHI())
1336  continue;
1337  for (const MachineOperand &MO : MI->operands())
1338  if (MO.isReg() && MO.isUse()) {
1339  unsigned Reg = MO.getReg();
1341  Uses.insert(Reg);
1342  else if (MRI.isAllocatable(Reg))
1343  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1344  Uses.insert(*Units);
1345  }
1346  }
1347  for (SUnit *SU : NS)
1348  for (const MachineOperand &MO : SU->getInstr()->operands())
1349  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1350  unsigned Reg = MO.getReg();
1352  if (!Uses.count(Reg))
1353  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1355  } else if (MRI.isAllocatable(Reg)) {
1356  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1357  if (!Uses.count(*Units))
1358  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1360  }
1361  }
1362  RPTracker.addLiveRegs(LiveOutRegs);
1363 }
1364 
1365 /// A heuristic to filter nodes in recurrent node-sets if the register
1366 /// pressure of a set is too high.
1367 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1368  for (auto &NS : NodeSets) {
1369  // Skip small node-sets since they won't cause register pressure problems.
1370  if (NS.size() <= 2)
1371  continue;
1372  IntervalPressure RecRegPressure;
1373  RegPressureTracker RecRPTracker(RecRegPressure);
1374  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1375  computeLiveOuts(MF, RecRPTracker, NS);
1376  RecRPTracker.closeBottom();
1377 
1378  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1379  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1380  return A->NodeNum > B->NodeNum;
1381  });
1382 
1383  for (auto &SU : SUnits) {
1384  // Since we're computing the register pressure for a subset of the
1385  // instructions in a block, we need to set the tracker for each
1386  // instruction in the node-set. The tracker is set to the instruction
1387  // just after the one we're interested in.
1388  MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
1389  RecRPTracker.setPos(std::next(CurInstI));
1390 
1391  RegPressureDelta RPDelta;
1392  ArrayRef<PressureChange> CriticalPSets;
1393  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1394  CriticalPSets,
1395  RecRegPressure.MaxSetPressure);
1396  if (RPDelta.Excess.isValid()) {
1397  LLVM_DEBUG(
1398  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1399  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1400  << ":" << RPDelta.Excess.getUnitInc());
1401  NS.setExceedPressure(SU);
1402  break;
1403  }
1404  RecRPTracker.recede();
1405  }
1406  }
1407 }
1408 
1409 /// A heuristic to colocate node sets that have the same set of
1410 /// successors.
1411 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1412  unsigned Colocate = 0;
1413  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1414  NodeSet &N1 = NodeSets[i];
1416  if (N1.empty() || !succ_L(N1, S1))
1417  continue;
1418  for (int j = i + 1; j < e; ++j) {
1419  NodeSet &N2 = NodeSets[j];
1420  if (N1.compareRecMII(N2) != 0)
1421  continue;
1423  if (N2.empty() || !succ_L(N2, S2))
1424  continue;
1425  if (isSubset(S1, S2) && S1.size() == S2.size()) {
1426  N1.setColocate(++Colocate);
1427  N2.setColocate(Colocate);
1428  break;
1429  }
1430  }
1431  }
1432 }
1433 
1434 /// Check if the existing node-sets are profitable. If not, then ignore the
1435 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1436 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1437 /// then it's best to try to schedule all instructions together instead of
1438 /// starting with the recurrent node-sets.
1439 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1440  // Look for loops with a large MII.
1441  if (MII < 17)
1442  return;
1443  // Check if the node-set contains only a simple add recurrence.
1444  for (auto &NS : NodeSets) {
1445  if (NS.getRecMII() > 2)
1446  return;
1447  if (NS.getMaxDepth() > MII)
1448  return;
1449  }
1450  NodeSets.clear();
1451  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1452  return;
1453 }
1454 
1455 /// Add the nodes that do not belong to a recurrence set into groups
1456 /// based upon connected componenets.
1457 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1458  SetVector<SUnit *> NodesAdded;
1459  SmallPtrSet<SUnit *, 8> Visited;
1460  // Add the nodes that are on a path between the previous node sets and
1461  // the current node set.
1462  for (NodeSet &I : NodeSets) {
1464  // Add the nodes from the current node set to the previous node set.
1465  if (succ_L(I, N)) {
1466  SetVector<SUnit *> Path;
1467  for (SUnit *NI : N) {
1468  Visited.clear();
1469  computePath(NI, Path, NodesAdded, I, Visited);
1470  }
1471  if (!Path.empty())
1472  I.insert(Path.begin(), Path.end());
1473  }
1474  // Add the nodes from the previous node set to the current node set.
1475  N.clear();
1476  if (succ_L(NodesAdded, N)) {
1477  SetVector<SUnit *> Path;
1478  for (SUnit *NI : N) {
1479  Visited.clear();
1480  computePath(NI, Path, I, NodesAdded, Visited);
1481  }
1482  if (!Path.empty())
1483  I.insert(Path.begin(), Path.end());
1484  }
1485  NodesAdded.insert(I.begin(), I.end());
1486  }
1487 
1488  // Create a new node set with the connected nodes of any successor of a node
1489  // in a recurrent set.
1490  NodeSet NewSet;
1492  if (succ_L(NodesAdded, N))
1493  for (SUnit *I : N)
1494  addConnectedNodes(I, NewSet, NodesAdded);
1495  if (!NewSet.empty())
1496  NodeSets.push_back(NewSet);
1497 
1498  // Create a new node set with the connected nodes of any predecessor of a node
1499  // in a recurrent set.
1500  NewSet.clear();
1501  if (pred_L(NodesAdded, N))
1502  for (SUnit *I : N)
1503  addConnectedNodes(I, NewSet, NodesAdded);
1504  if (!NewSet.empty())
1505  NodeSets.push_back(NewSet);
1506 
1507  // Create new nodes sets with the connected nodes any remaining node that
1508  // has no predecessor.
1509  for (unsigned i = 0; i < SUnits.size(); ++i) {
1510  SUnit *SU = &SUnits[i];
1511  if (NodesAdded.count(SU) == 0) {
1512  NewSet.clear();
1513  addConnectedNodes(SU, NewSet, NodesAdded);
1514  if (!NewSet.empty())
1515  NodeSets.push_back(NewSet);
1516  }
1517  }
1518 }
1519 
1520 /// Add the node to the set, and add all is its connected nodes to the set.
1521 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1522  SetVector<SUnit *> &NodesAdded) {
1523  NewSet.insert(SU);
1524  NodesAdded.insert(SU);
1525  for (auto &SI : SU->Succs) {
1526  SUnit *Successor = SI.getSUnit();
1527  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1528  addConnectedNodes(Successor, NewSet, NodesAdded);
1529  }
1530  for (auto &PI : SU->Preds) {
1531  SUnit *Predecessor = PI.getSUnit();
1532  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1533  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1534  }
1535 }
1536 
1537 /// Return true if Set1 contains elements in Set2. The elements in common
1538 /// are returned in a different container.
1539 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1540  SmallSetVector<SUnit *, 8> &Result) {
1541  Result.clear();
1542  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1543  SUnit *SU = Set1[i];
1544  if (Set2.count(SU) != 0)
1545  Result.insert(SU);
1546  }
1547  return !Result.empty();
1548 }
1549 
1550 /// Merge the recurrence node sets that have the same initial node.
1551 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1552  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1553  ++I) {
1554  NodeSet &NI = *I;
1555  for (NodeSetType::iterator J = I + 1; J != E;) {
1556  NodeSet &NJ = *J;
1557  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1558  if (NJ.compareRecMII(NI) > 0)
1559  NI.setRecMII(NJ.getRecMII());
1560  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
1561  ++NII)
1562  I->insert(*NII);
1563  NodeSets.erase(J);
1564  E = NodeSets.end();
1565  } else {
1566  ++J;
1567  }
1568  }
1569  }
1570 }
1571 
1572 /// Remove nodes that have been scheduled in previous NodeSets.
1573 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1574  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1575  ++I)
1576  for (NodeSetType::iterator J = I + 1; J != E;) {
1577  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1578 
1579  if (J->empty()) {
1580  NodeSets.erase(J);
1581  E = NodeSets.end();
1582  } else {
1583  ++J;
1584  }
1585  }
1586 }
1587 
1588 /// Compute an ordered list of the dependence graph nodes, which
1589 /// indicates the order that the nodes will be scheduled. This is a
1590 /// two-level algorithm. First, a partial order is created, which
1591 /// consists of a list of sets ordered from highest to lowest priority.
1592 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1594  NodeOrder.clear();
1595 
1596  for (auto &Nodes : NodeSets) {
1597  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1598  OrderKind Order;
1600  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
1601  R.insert(N.begin(), N.end());
1602  Order = BottomUp;
1603  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1604  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
1605  R.insert(N.begin(), N.end());
1606  Order = TopDown;
1607  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1608  } else if (isIntersect(N, Nodes, R)) {
1609  // If some of the successors are in the existing node-set, then use the
1610  // top-down ordering.
1611  Order = TopDown;
1612  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1613  } else if (NodeSets.size() == 1) {
1614  for (auto &N : Nodes)
1615  if (N->Succs.size() == 0)
1616  R.insert(N);
1617  Order = BottomUp;
1618  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1619  } else {
1620  // Find the node with the highest ASAP.
1621  SUnit *maxASAP = nullptr;
1622  for (SUnit *SU : Nodes) {
1623  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1624  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1625  maxASAP = SU;
1626  }
1627  R.insert(maxASAP);
1628  Order = BottomUp;
1629  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1630  }
1631 
1632  while (!R.empty()) {
1633  if (Order == TopDown) {
1634  // Choose the node with the maximum height. If more than one, choose
1635  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1636  // choose the node with the lowest MOV.
1637  while (!R.empty()) {
1638  SUnit *maxHeight = nullptr;
1639  for (SUnit *I : R) {
1640  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1641  maxHeight = I;
1642  else if (getHeight(I) == getHeight(maxHeight) &&
1643  getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
1644  maxHeight = I;
1645  else if (getHeight(I) == getHeight(maxHeight) &&
1646  getZeroLatencyHeight(I) ==
1647  getZeroLatencyHeight(maxHeight) &&
1648  getMOV(I) < getMOV(maxHeight))
1649  maxHeight = I;
1650  }
1651  NodeOrder.insert(maxHeight);
1652  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1653  R.remove(maxHeight);
1654  for (const auto &I : maxHeight->Succs) {
1655  if (Nodes.count(I.getSUnit()) == 0)
1656  continue;
1657  if (NodeOrder.count(I.getSUnit()) != 0)
1658  continue;
1659  if (ignoreDependence(I, false))
1660  continue;
1661  R.insert(I.getSUnit());
1662  }
1663  // Back-edges are predecessors with an anti-dependence.
1664  for (const auto &I : maxHeight->Preds) {
1665  if (I.getKind() != SDep::Anti)
1666  continue;
1667  if (Nodes.count(I.getSUnit()) == 0)
1668  continue;
1669  if (NodeOrder.count(I.getSUnit()) != 0)
1670  continue;
1671  R.insert(I.getSUnit());
1672  }
1673  }
1674  Order = BottomUp;
1675  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1677  if (pred_L(NodeOrder, N, &Nodes))
1678  R.insert(N.begin(), N.end());
1679  } else {
1680  // Choose the node with the maximum depth. If more than one, choose
1681  // the node with the maximum ZeroLatencyDepth. If still more than one,
1682  // choose the node with the lowest MOV.
1683  while (!R.empty()) {
1684  SUnit *maxDepth = nullptr;
1685  for (SUnit *I : R) {
1686  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1687  maxDepth = I;
1688  else if (getDepth(I) == getDepth(maxDepth) &&
1689  getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
1690  maxDepth = I;
1691  else if (getDepth(I) == getDepth(maxDepth) &&
1692  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1693  getMOV(I) < getMOV(maxDepth))
1694  maxDepth = I;
1695  }
1696  NodeOrder.insert(maxDepth);
1697  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1698  R.remove(maxDepth);
1699  if (Nodes.isExceedSU(maxDepth)) {
1700  Order = TopDown;
1701  R.clear();
1702  R.insert(Nodes.getNode(0));
1703  break;
1704  }
1705  for (const auto &I : maxDepth->Preds) {
1706  if (Nodes.count(I.getSUnit()) == 0)
1707  continue;
1708  if (NodeOrder.count(I.getSUnit()) != 0)
1709  continue;
1710  R.insert(I.getSUnit());
1711  }
1712  // Back-edges are predecessors with an anti-dependence.
1713  for (const auto &I : maxDepth->Succs) {
1714  if (I.getKind() != SDep::Anti)
1715  continue;
1716  if (Nodes.count(I.getSUnit()) == 0)
1717  continue;
1718  if (NodeOrder.count(I.getSUnit()) != 0)
1719  continue;
1720  R.insert(I.getSUnit());
1721  }
1722  }
1723  Order = TopDown;
1724  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
1726  if (succ_L(NodeOrder, N, &Nodes))
1727  R.insert(N.begin(), N.end());
1728  }
1729  }
1730  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1731  }
1732 
1733  LLVM_DEBUG({
1734  dbgs() << "Node order: ";
1735  for (SUnit *I : NodeOrder)
1736  dbgs() << " " << I->NodeNum << " ";
1737  dbgs() << "\n";
1738  });
1739 }
1740 
1741 /// Process the nodes in the computed order and create the pipelined schedule
1742 /// of the instructions, if possible. Return true if a schedule is found.
1743 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
1744  if (NodeOrder.empty())
1745  return false;
1746 
1747  bool scheduleFound = false;
1748  // Keep increasing II until a valid schedule is found.
1749  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
1750  Schedule.reset();
1751  Schedule.setInitiationInterval(II);
1752  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
1753 
1756  do {
1757  SUnit *SU = *NI;
1758 
1759  // Compute the schedule time for the instruction, which is based
1760  // upon the scheduled time for any predecessors/successors.
1761  int EarlyStart = INT_MIN;
1762  int LateStart = INT_MAX;
1763  // These values are set when the size of the schedule window is limited
1764  // due to chain dependences.
1765  int SchedEnd = INT_MAX;
1766  int SchedStart = INT_MIN;
1767  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
1768  II, this);
1769  LLVM_DEBUG({
1770  dbgs() << "Inst (" << SU->NodeNum << ") ";
1771  SU->getInstr()->dump();
1772  dbgs() << "\n";
1773  });
1774  LLVM_DEBUG({
1775  dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
1776  << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
1777  });
1778 
1779  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
1780  SchedStart > LateStart)
1781  scheduleFound = false;
1782  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
1783  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
1784  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
1785  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
1786  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
1787  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
1788  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
1789  SchedEnd =
1790  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
1791  // When scheduling a Phi it is better to start at the late cycle and go
1792  // backwards. The default order may insert the Phi too far away from
1793  // its first dependence.
1794  if (SU->getInstr()->isPHI())
1795  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
1796  else
1797  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
1798  } else {
1799  int FirstCycle = Schedule.getFirstCycle();
1800  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
1801  FirstCycle + getASAP(SU) + II - 1, II);
1802  }
1803  // Even if we find a schedule, make sure the schedule doesn't exceed the
1804  // allowable number of stages. We keep trying if this happens.
1805  if (scheduleFound)
1806  if (SwpMaxStages > -1 &&
1807  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
1808  scheduleFound = false;
1809 
1810  LLVM_DEBUG({
1811  if (!scheduleFound)
1812  dbgs() << "\tCan't schedule\n";
1813  });
1814  } while (++NI != NE && scheduleFound);
1815 
1816  // If a schedule is found, check if it is a valid schedule too.
1817  if (scheduleFound)
1818  scheduleFound = Schedule.isValidSchedule(this);
1819  }
1820 
1821  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
1822 
1823  if (scheduleFound)
1824  Schedule.finalizeSchedule(this);
1825  else
1826  Schedule.reset();
1827 
1828  return scheduleFound && Schedule.getMaxStageCount() > 0;
1829 }
1830 
1831 /// Given a schedule for the loop, generate a new version of the loop,
1832 /// and replace the old version. This function generates a prolog
1833 /// that contains the initial iterations in the pipeline, and kernel
1834 /// loop, and the epilogue that contains the code for the final
1835 /// iterations.
1836 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
1837  // Create a new basic block for the kernel and add it to the CFG.
1838  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
1839 
1840  unsigned MaxStageCount = Schedule.getMaxStageCount();
1841 
1842  // Remember the registers that are used in different stages. The index is
1843  // the iteration, or stage, that the instruction is scheduled in. This is
1844  // a map between register names in the original block and the names created
1845  // in each stage of the pipelined loop.
1846  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
1847  InstrMapTy InstrMap;
1848 
1850  // Generate the prolog instructions that set up the pipeline.
1851  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
1852  MF.insert(BB->getIterator(), KernelBB);
1853 
1854  // Rearrange the instructions to generate the new, pipelined loop,
1855  // and update register names as needed.
1856  for (int Cycle = Schedule.getFirstCycle(),
1857  LastCycle = Schedule.getFinalCycle();
1858  Cycle <= LastCycle; ++Cycle) {
1859  std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
1860  // This inner loop schedules each instruction in the cycle.
1861  for (SUnit *CI : CycleInstrs) {
1862  if (CI->getInstr()->isPHI())
1863  continue;
1864  unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
1865  MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
1866  updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
1867  KernelBB->push_back(NewMI);
1868  InstrMap[NewMI] = CI->getInstr();
1869  }
1870  }
1871 
1872  // Copy any terminator instructions to the new kernel, and update
1873  // names as needed.
1874  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
1875  E = BB->instr_end();
1876  I != E; ++I) {
1877  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
1878  updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
1879  KernelBB->push_back(NewMI);
1880  InstrMap[NewMI] = &*I;
1881  }
1882 
1883  KernelBB->transferSuccessors(BB);
1884  KernelBB->replaceSuccessor(BB, KernelBB);
1885 
1886  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
1887  VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
1888  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
1889  InstrMap, MaxStageCount, MaxStageCount, false);
1890 
1891  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
1892 
1894  // Generate the epilog instructions to complete the pipeline.
1895  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
1896  PrologBBs);
1897 
1898  // We need this step because the register allocation doesn't handle some
1899  // situations well, so we insert copies to help out.
1900  splitLifetimes(KernelBB, EpilogBBs, Schedule);
1901 
1902  // Remove dead instructions due to loop induction variables.
1903  removeDeadInstructions(KernelBB, EpilogBBs);
1904 
1905  // Add branches between prolog and epilog blocks.
1906  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
1907 
1908  // Remove the original loop since it's no longer referenced.
1909  for (auto &I : *BB)
1910  LIS.RemoveMachineInstrFromMaps(I);
1911  BB->clear();
1912  BB->eraseFromParent();
1913 
1914  delete[] VRMap;
1915 }
1916 
1917 /// Generate the pipeline prolog code.
1918 void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
1919  MachineBasicBlock *KernelBB,
1920  ValueMapTy *VRMap,
1921  MBBVectorTy &PrologBBs) {
1922  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
1923  assert(PreheaderBB != nullptr &&
1924  "Need to add code to handle loops w/o preheader");
1925  MachineBasicBlock *PredBB = PreheaderBB;
1926  InstrMapTy InstrMap;
1927 
1928  // Generate a basic block for each stage, not including the last stage,
1929  // which will be generated in the kernel. Each basic block may contain
1930  // instructions from multiple stages/iterations.
1931  for (unsigned i = 0; i < LastStage; ++i) {
1932  // Create and insert the prolog basic block prior to the original loop
1933  // basic block. The original loop is removed later.
1934  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
1935  PrologBBs.push_back(NewBB);
1936  MF.insert(BB->getIterator(), NewBB);
1937  NewBB->transferSuccessors(PredBB);
1938  PredBB->addSuccessor(NewBB);
1939  PredBB = NewBB;
1940 
1941  // Generate instructions for each appropriate stage. Process instructions
1942  // in original program order.
1943  for (int StageNum = i; StageNum >= 0; --StageNum) {
1944  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
1945  BBE = BB->getFirstTerminator();
1946  BBI != BBE; ++BBI) {
1947  if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
1948  if (BBI->isPHI())
1949  continue;
1950  MachineInstr *NewMI =
1951  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
1952  updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
1953  VRMap);
1954  NewBB->push_back(NewMI);
1955  InstrMap[NewMI] = &*BBI;
1956  }
1957  }
1958  }
1959  rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
1960  LLVM_DEBUG({
1961  dbgs() << "prolog:\n";
1962  NewBB->dump();
1963  });
1964  }
1965 
1966  PredBB->replaceSuccessor(BB, KernelBB);
1967 
1968  // Check if we need to remove the branch from the preheader to the original
1969  // loop, and replace it with a branch to the new loop.
1970  unsigned numBranches = TII->removeBranch(*PreheaderBB);
1971  if (numBranches) {
1973  TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
1974  }
1975 }
1976 
1977 /// Generate the pipeline epilog code. The epilog code finishes the iterations
1978 /// that were started in either the prolog or the kernel. We create a basic
1979 /// block for each stage that needs to complete.
1980 void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
1981  MachineBasicBlock *KernelBB,
1982  ValueMapTy *VRMap,
1983  MBBVectorTy &EpilogBBs,
1984  MBBVectorTy &PrologBBs) {
1985  // We need to change the branch from the kernel to the first epilog block, so
1986  // this call to analyze branch uses the kernel rather than the original BB.
1987  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1989  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
1990  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
1991  if (checkBranch)
1992  return;
1993 
1994  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
1995  if (*LoopExitI == KernelBB)
1996  ++LoopExitI;
1997  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
1998  MachineBasicBlock *LoopExitBB = *LoopExitI;
1999 
2000  MachineBasicBlock *PredBB = KernelBB;
2001  MachineBasicBlock *EpilogStart = LoopExitBB;
2002  InstrMapTy InstrMap;
2003 
2004  // Generate a basic block for each stage, not including the last stage,
2005  // which was generated for the kernel. Each basic block may contain
2006  // instructions from multiple stages/iterations.
2007  int EpilogStage = LastStage + 1;
2008  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2010  EpilogBBs.push_back(NewBB);
2011  MF.insert(BB->getIterator(), NewBB);
2012 
2013  PredBB->replaceSuccessor(LoopExitBB, NewBB);
2014  NewBB->addSuccessor(LoopExitBB);
2015 
2016  if (EpilogStart == LoopExitBB)
2017  EpilogStart = NewBB;
2018 
2019  // Add instructions to the epilog depending on the current block.
2020  // Process instructions in original program order.
2021  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2022  for (auto &BBI : *BB) {
2023  if (BBI.isPHI())
2024  continue;
2025  MachineInstr *In = &BBI;
2026  if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2027  // Instructions with memoperands in the epilog are updated with
2028  // conservative values.
2029  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
2030  updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2031  NewBB->push_back(NewMI);
2032  InstrMap[NewMI] = In;
2033  }
2034  }
2035  }
2036  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2037  VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2038  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2039  InstrMap, LastStage, EpilogStage, i == 1);
2040  PredBB = NewBB;
2041 
2042  LLVM_DEBUG({
2043  dbgs() << "epilog:\n";
2044  NewBB->dump();
2045  });
2046  }
2047 
2048  // Fix any Phi nodes in the loop exit block.
2049  for (MachineInstr &MI : *LoopExitBB) {
2050  if (!MI.isPHI())
2051  break;
2052  for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
2053  MachineOperand &MO = MI.getOperand(i);
2054  if (MO.getMBB() == BB)
2055  MO.setMBB(PredBB);
2056  }
2057  }
2058 
2059  // Create a branch to the new epilog from the kernel.
2060  // Remove the original branch and add a new branch to the epilog.
2061  TII->removeBranch(*KernelBB);
2062  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2063  // Add a branch to the loop exit.
2064  if (EpilogBBs.size() > 0) {
2065  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2067  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2068  }
2069 }
2070 
2071 /// Replace all uses of FromReg that appear outside the specified
2072 /// basic block with ToReg.
2073 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2074  MachineBasicBlock *MBB,
2076  LiveIntervals &LIS) {
2077  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2078  E = MRI.use_end();
2079  I != E;) {
2080  MachineOperand &O = *I;
2081  ++I;
2082  if (O.getParent()->getParent() != MBB)
2083  O.setReg(ToReg);
2084  }
2085  if (!LIS.hasInterval(ToReg))
2086  LIS.createEmptyInterval(ToReg);
2087 }
2088 
2089 /// Return true if the register has a use that occurs outside the
2090 /// specified loop.
2091 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2094  E = MRI.use_end();
2095  I != E; ++I)
2096  if (I->getParent()->getParent() != BB)
2097  return true;
2098  return false;
2099 }
2100 
2101 /// Generate Phis for the specific block in the generated pipelined code.
2102 /// This function looks at the Phis from the original code to guide the
2103 /// creation of new Phis.
2104 void SwingSchedulerDAG::generateExistingPhis(
2106  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2107  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2108  bool IsLast) {
2109  // Compute the stage number for the initial value of the Phi, which
2110  // comes from the prolog. The prolog to use depends on to which kernel/
2111  // epilog that we're adding the Phi.
2112  unsigned PrologStage = 0;
2113  unsigned PrevStage = 0;
2114  bool InKernel = (LastStageNum == CurStageNum);
2115  if (InKernel) {
2116  PrologStage = LastStageNum - 1;
2117  PrevStage = CurStageNum;
2118  } else {
2119  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2120  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2121  }
2122 
2123  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2124  BBE = BB->getFirstNonPHI();
2125  BBI != BBE; ++BBI) {
2126  unsigned Def = BBI->getOperand(0).getReg();
2127 
2128  unsigned InitVal = 0;
2129  unsigned LoopVal = 0;
2130  getPhiRegs(*BBI, BB, InitVal, LoopVal);
2131 
2132  unsigned PhiOp1 = 0;
2133  // The Phi value from the loop body typically is defined in the loop, but
2134  // not always. So, we need to check if the value is defined in the loop.
2135  unsigned PhiOp2 = LoopVal;
2136  if (VRMap[LastStageNum].count(LoopVal))
2137  PhiOp2 = VRMap[LastStageNum][LoopVal];
2138 
2139  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2140  int LoopValStage =
2141  Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2142  unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2143  if (NumStages == 0) {
2144  // We don't need to generate a Phi anymore, but we need to rename any uses
2145  // of the Phi value.
2146  unsigned NewReg = VRMap[PrevStage][LoopVal];
2147  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2148  Def, InitVal, NewReg);
2149  if (VRMap[CurStageNum].count(LoopVal))
2150  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2151  }
2152  // Adjust the number of Phis needed depending on the number of prologs left,
2153  // and the distance from where the Phi is first scheduled. The number of
2154  // Phis cannot exceed the number of prolog stages. Each stage can
2155  // potentially define two values.
2156  unsigned MaxPhis = PrologStage + 2;
2157  if (!InKernel && (int)PrologStage <= LoopValStage)
2158  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
2159  unsigned NumPhis = std::min(NumStages, MaxPhis);
2160 
2161  unsigned NewReg = 0;
2162  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2163  // In the epilog, we may need to look back one stage to get the correct
2164  // Phi name because the epilog and prolog blocks execute the same stage.
2165  // The correct name is from the previous block only when the Phi has
2166  // been completely scheduled prior to the epilog, and Phi value is not
2167  // needed in multiple stages.
2168  int StageDiff = 0;
2169  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2170  NumPhis == 1)
2171  StageDiff = 1;
2172  // Adjust the computations below when the phi and the loop definition
2173  // are scheduled in different stages.
2174  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2175  StageDiff = StageScheduled - LoopValStage;
2176  for (unsigned np = 0; np < NumPhis; ++np) {
2177  // If the Phi hasn't been scheduled, then use the initial Phi operand
2178  // value. Otherwise, use the scheduled version of the instruction. This
2179  // is a little complicated when a Phi references another Phi.
2180  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
2181  PhiOp1 = InitVal;
2182  // Check if the Phi has already been scheduled in a prolog stage.
2183  else if (PrologStage >= AccessStage + StageDiff + np &&
2184  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
2185  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2186  // Check if the Phi has already been scheduled, but the loop instruction
2187  // is either another Phi, or doesn't occur in the loop.
2188  else if (PrologStage >= AccessStage + StageDiff + np) {
2189  // If the Phi references another Phi, we need to examine the other
2190  // Phi to get the correct value.
2191  PhiOp1 = LoopVal;
2192  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2193  int Indirects = 1;
2194  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
2195  int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2196  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2197  PhiOp1 = getInitPhiReg(*InstOp1, BB);
2198  else
2199  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2200  InstOp1 = MRI.getVRegDef(PhiOp1);
2201  int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2202  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2203  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2204  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
2205  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2206  break;
2207  }
2208  ++Indirects;
2209  }
2210  } else
2211  PhiOp1 = InitVal;
2212  // If this references a generated Phi in the kernel, get the Phi operand
2213  // from the incoming block.
2214  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2215  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2216  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2217 
2218  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2219  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2220  // In the epilog, a map lookup is needed to get the value from the kernel,
2221  // or previous epilog block. How is does this depends on if the
2222  // instruction is scheduled in the previous block.
2223  if (!InKernel) {
2224  int StageDiffAdj = 0;
2225  if (LoopValStage != -1 && StageScheduled > LoopValStage)
2226  StageDiffAdj = StageScheduled - LoopValStage;
2227  // Use the loop value defined in the kernel, unless the kernel
2228  // contains the last definition of the Phi.
2229  if (np == 0 && PrevStage == LastStageNum &&
2230  (StageScheduled != 0 || LoopValStage != 0) &&
2231  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
2232  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2233  // Use the value defined by the Phi. We add one because we switch
2234  // from looking at the loop value to the Phi definition.
2235  else if (np > 0 && PrevStage == LastStageNum &&
2236  VRMap[PrevStage - np + 1].count(Def))
2237  PhiOp2 = VRMap[PrevStage - np + 1][Def];
2238  // Use the loop value defined in the kernel.
2239  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
2240  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
2241  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2242  // Use the value defined by the Phi, unless we're generating the first
2243  // epilog and the Phi refers to a Phi in a different stage.
2244  else if (VRMap[PrevStage - np].count(Def) &&
2245  (!LoopDefIsPhi || PrevStage != LastStageNum))
2246  PhiOp2 = VRMap[PrevStage - np][Def];
2247  }
2248 
2249  // Check if we can reuse an existing Phi. This occurs when a Phi
2250  // references another Phi, and the other Phi is scheduled in an
2251  // earlier stage. We can try to reuse an existing Phi up until the last
2252  // stage of the current Phi.
2253  if (LoopDefIsPhi) {
2254  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
2255  int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2256  int StageDiff = (StageScheduled - LoopValStage);
2257  LVNumStages -= StageDiff;
2258  // Make sure the loop value Phi has been processed already.
2259  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
2260  NewReg = PhiOp2;
2261  unsigned ReuseStage = CurStageNum;
2262  if (Schedule.isLoopCarried(this, *PhiInst))
2263  ReuseStage -= LVNumStages;
2264  // Check if the Phi to reuse has been generated yet. If not, then
2265  // there is nothing to reuse.
2266  if (VRMap[ReuseStage - np].count(LoopVal)) {
2267  NewReg = VRMap[ReuseStage - np][LoopVal];
2268 
2269  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2270  &*BBI, Def, NewReg);
2271  // Update the map with the new Phi name.
2272  VRMap[CurStageNum - np][Def] = NewReg;
2273  PhiOp2 = NewReg;
2274  if (VRMap[LastStageNum - np - 1].count(LoopVal))
2275  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2276 
2277  if (IsLast && np == NumPhis - 1)
2278  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2279  continue;
2280  }
2281  }
2282  }
2283  if (InKernel && StageDiff > 0 &&
2284  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
2285  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2286  }
2287 
2288  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2289  NewReg = MRI.createVirtualRegister(RC);
2290 
2291  MachineInstrBuilder NewPhi =
2292  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2293  TII->get(TargetOpcode::PHI), NewReg);
2294  NewPhi.addReg(PhiOp1).addMBB(BB1);
2295  NewPhi.addReg(PhiOp2).addMBB(BB2);
2296  if (np == 0)
2297  InstrMap[NewPhi] = &*BBI;
2298 
2299  // We define the Phis after creating the new pipelined code, so
2300  // we need to rename the Phi values in scheduled instructions.
2301 
2302  unsigned PrevReg = 0;
2303  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
2304  PrevReg = VRMap[PrevStage - np][LoopVal];
2305  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2306  Def, NewReg, PrevReg);
2307  // If the Phi has been scheduled, use the new name for rewriting.
2308  if (VRMap[CurStageNum - np].count(Def)) {
2309  unsigned R = VRMap[CurStageNum - np][Def];
2310  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2311  R, NewReg);
2312  }
2313 
2314  // Check if we need to rename any uses that occurs after the loop. The
2315  // register to replace depends on whether the Phi is scheduled in the
2316  // epilog.
2317  if (IsLast && np == NumPhis - 1)
2318  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2319 
2320  // In the kernel, a dependent Phi uses the value from this Phi.
2321  if (InKernel)
2322  PhiOp2 = NewReg;
2323 
2324  // Update the map with the new Phi name.
2325  VRMap[CurStageNum - np][Def] = NewReg;
2326  }
2327 
2328  while (NumPhis++ < NumStages) {
2329  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2330  &*BBI, Def, NewReg, 0);
2331  }
2332 
2333  // Check if we need to rename a Phi that has been eliminated due to
2334  // scheduling.
2335  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
2336  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2337  }
2338 }
2339 
2340 /// Generate Phis for the specified block in the generated pipelined code.
2341 /// These are new Phis needed because the definition is scheduled after the
2342 /// use in the pipelined sequence.
2343 void SwingSchedulerDAG::generatePhis(
2345  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2346  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2347  bool IsLast) {
2348  // Compute the stage number that contains the initial Phi value, and
2349  // the Phi from the previous stage.
2350  unsigned PrologStage = 0;
2351  unsigned PrevStage = 0;
2352  unsigned StageDiff = CurStageNum - LastStageNum;
2353  bool InKernel = (StageDiff == 0);
2354  if (InKernel) {
2355  PrologStage = LastStageNum - 1;
2356  PrevStage = CurStageNum;
2357  } else {
2358  PrologStage = LastStageNum - StageDiff;
2359  PrevStage = LastStageNum + StageDiff - 1;
2360  }
2361 
2362  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2363  BBE = BB->instr_end();
2364  BBI != BBE; ++BBI) {
2365  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2366  MachineOperand &MO = BBI->getOperand(i);
2367  if (!MO.isReg() || !MO.isDef() ||
2369  continue;
2370 
2371  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2372  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2373  unsigned Def = MO.getReg();
2374  unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2375  // An instruction scheduled in stage 0 and is used after the loop
2376  // requires a phi in the epilog for the last definition from either
2377  // the kernel or prolog.
2378  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2379  hasUseAfterLoop(Def, BB, MRI))
2380  NumPhis = 1;
2381  if (!InKernel && (unsigned)StageScheduled > PrologStage)
2382  continue;
2383 
2384  unsigned PhiOp2 = VRMap[PrevStage][Def];
2385  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2386  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2387  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2388  // The number of Phis can't exceed the number of prolog stages. The
2389  // prolog stage number is zero based.
2390  if (NumPhis > PrologStage + 1 - StageScheduled)
2391  NumPhis = PrologStage + 1 - StageScheduled;
2392  for (unsigned np = 0; np < NumPhis; ++np) {
2393  unsigned PhiOp1 = VRMap[PrologStage][Def];
2394  if (np <= PrologStage)
2395  PhiOp1 = VRMap[PrologStage - np][Def];
2396  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2397  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2398  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2399  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2400  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2401  }
2402  if (!InKernel)
2403  PhiOp2 = VRMap[PrevStage - np][Def];
2404 
2405  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2406  unsigned NewReg = MRI.createVirtualRegister(RC);
2407 
2408  MachineInstrBuilder NewPhi =
2409  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2410  TII->get(TargetOpcode::PHI), NewReg);
2411  NewPhi.addReg(PhiOp1).addMBB(BB1);
2412  NewPhi.addReg(PhiOp2).addMBB(BB2);
2413  if (np == 0)
2414  InstrMap[NewPhi] = &*BBI;
2415 
2416  // Rewrite uses and update the map. The actions depend upon whether
2417  // we generating code for the kernel or epilog blocks.
2418  if (InKernel) {
2419  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2420  &*BBI, PhiOp1, NewReg);
2421  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2422  &*BBI, PhiOp2, NewReg);
2423 
2424  PhiOp2 = NewReg;
2425  VRMap[PrevStage - np - 1][Def] = NewReg;
2426  } else {
2427  VRMap[CurStageNum - np][Def] = NewReg;
2428  if (np == NumPhis - 1)
2429  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2430  &*BBI, Def, NewReg);
2431  }
2432  if (IsLast && np == NumPhis - 1)
2433  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2434  }
2435  }
2436  }
2437 }
2438 
2439 /// Remove instructions that generate values with no uses.
2440 /// Typically, these are induction variable operations that generate values
2441 /// used in the loop itself. A dead instruction has a definition with
2442 /// no uses, or uses that occur in the original loop only.
2443 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
2444  MBBVectorTy &EpilogBBs) {
2445  // For each epilog block, check that the value defined by each instruction
2446  // is used. If not, delete it.
2447  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
2448  MBE = EpilogBBs.rend();
2449  MBB != MBE; ++MBB)
2450  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
2451  ME = (*MBB)->instr_rend();
2452  MI != ME;) {
2453  // From DeadMachineInstructionElem. Don't delete inline assembly.
2454  if (MI->isInlineAsm()) {
2455  ++MI;
2456  continue;
2457  }
2458  bool SawStore = false;
2459  // Check if it's safe to remove the instruction due to side effects.
2460  // We can, and want to, remove Phis here.
2461  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
2462  ++MI;
2463  continue;
2464  }
2465  bool used = true;
2466  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
2467  MOE = MI->operands_end();
2468  MOI != MOE; ++MOI) {
2469  if (!MOI->isReg() || !MOI->isDef())
2470  continue;
2471  unsigned reg = MOI->getReg();
2472  // Assume physical registers are used, unless they are marked dead.
2474  used = !MOI->isDead();
2475  if (used)
2476  break;
2477  continue;
2478  }
2479  unsigned realUses = 0;
2480  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
2481  EI = MRI.use_end();
2482  UI != EI; ++UI) {
2483  // Check if there are any uses that occur only in the original
2484  // loop. If so, that's not a real use.
2485  if (UI->getParent()->getParent() != BB) {
2486  realUses++;
2487  used = true;
2488  break;
2489  }
2490  }
2491  if (realUses > 0)
2492  break;
2493  used = false;
2494  }
2495  if (!used) {
2496  LIS.RemoveMachineInstrFromMaps(*MI);
2497  MI++->eraseFromParent();
2498  continue;
2499  }
2500  ++MI;
2501  }
2502  // In the kernel block, check if we can remove a Phi that generates a value
2503  // used in an instruction removed in the epilog block.
2504  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2505  BBE = KernelBB->getFirstNonPHI();
2506  BBI != BBE;) {
2507  MachineInstr *MI = &*BBI;
2508  ++BBI;
2509  unsigned reg = MI->getOperand(0).getReg();
2510  if (MRI.use_begin(reg) == MRI.use_end()) {
2511  LIS.RemoveMachineInstrFromMaps(*MI);
2512  MI->eraseFromParent();
2513  }
2514  }
2515 }
2516 
2517 /// For loop carried definitions, we split the lifetime of a virtual register
2518 /// that has uses past the definition in the next iteration. A copy with a new
2519 /// virtual register is inserted before the definition, which helps with
2520 /// generating a better register assignment.
2521 ///
2522 /// v1 = phi(a, v2) v1 = phi(a, v2)
2523 /// v2 = phi(b, v3) v2 = phi(b, v3)
2524 /// v3 = .. v4 = copy v1
2525 /// .. = V1 v3 = ..
2526 /// .. = v4
2527 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
2528  MBBVectorTy &EpilogBBs,
2529  SMSchedule &Schedule) {
2531  for (auto &PHI : KernelBB->phis()) {
2532  unsigned Def = PHI.getOperand(0).getReg();
2533  // Check for any Phi definition that used as an operand of another Phi
2534  // in the same block.
2535  for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
2536  E = MRI.use_instr_end();
2537  I != E; ++I) {
2538  if (I->isPHI() && I->getParent() == KernelBB) {
2539  // Get the loop carried definition.
2540  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
2541  if (!LCDef)
2542  continue;
2543  MachineInstr *MI = MRI.getVRegDef(LCDef);
2544  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
2545  continue;
2546  // Search through the rest of the block looking for uses of the Phi
2547  // definition. If one occurs, then split the lifetime.
2548  unsigned SplitReg = 0;
2549  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
2550  KernelBB->instr_end()))
2551  if (BBJ.readsRegister(Def)) {
2552  // We split the lifetime when we find the first use.
2553  if (SplitReg == 0) {
2554  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
2555  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
2556  TII->get(TargetOpcode::COPY), SplitReg)
2557  .addReg(Def);
2558  }
2559  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
2560  }
2561  if (!SplitReg)
2562  continue;
2563  // Search through each of the epilog blocks for any uses to be renamed.
2564  for (auto &Epilog : EpilogBBs)
2565  for (auto &I : *Epilog)
2566  if (I.readsRegister(Def))
2567  I.substituteRegister(Def, SplitReg, 0, *TRI);
2568  break;
2569  }
2570  }
2571  }
2572 }
2573 
2574 /// Remove the incoming block from the Phis in a basic block.
2575 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
2576  for (MachineInstr &MI : *BB) {
2577  if (!MI.isPHI())
2578  break;
2579  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
2580  if (MI.getOperand(i + 1).getMBB() == Incoming) {
2581  MI.RemoveOperand(i + 1);
2582  MI.RemoveOperand(i);
2583  break;
2584  }
2585  }
2586 }
2587 
2588 /// Create branches from each prolog basic block to the appropriate epilog
2589 /// block. These edges are needed if the loop ends before reaching the
2590 /// kernel.
2591 void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
2592  MachineBasicBlock *KernelBB,
2593  MBBVectorTy &EpilogBBs,
2594  SMSchedule &Schedule, ValueMapTy *VRMap) {
2595  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
2596  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
2597  MachineInstr *Cmp = Pass.LI.LoopCompare;
2598  MachineBasicBlock *LastPro = KernelBB;
2599  MachineBasicBlock *LastEpi = KernelBB;
2600 
2601  // Start from the blocks connected to the kernel and work "out"
2602  // to the first prolog and the last epilog blocks.
2604  unsigned MaxIter = PrologBBs.size() - 1;
2605  unsigned LC = UINT_MAX;
2606  unsigned LCMin = UINT_MAX;
2607  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
2608  // Add branches to the prolog that go to the corresponding
2609  // epilog, and the fall-thru prolog/kernel block.
2610  MachineBasicBlock *Prolog = PrologBBs[j];
2611  MachineBasicBlock *Epilog = EpilogBBs[i];
2612  // We've executed one iteration, so decrement the loop count and check for
2613  // the loop end.
2615  // Check if the LOOP0 has already been removed. If so, then there is no need
2616  // to reduce the trip count.
2617  if (LC != 0)
2618  LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
2619  MaxIter);
2620 
2621  // Record the value of the first trip count, which is used to determine if
2622  // branches and blocks can be removed for constant trip counts.
2623  if (LCMin == UINT_MAX)
2624  LCMin = LC;
2625 
2626  unsigned numAdded = 0;
2628  Prolog->addSuccessor(Epilog);
2629  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
2630  } else if (j >= LCMin) {
2631  Prolog->addSuccessor(Epilog);
2632  Prolog->removeSuccessor(LastPro);
2633  LastEpi->removeSuccessor(Epilog);
2634  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
2635  removePhis(Epilog, LastEpi);
2636  // Remove the blocks that are no longer referenced.
2637  if (LastPro != LastEpi) {
2638  LastEpi->clear();
2639  LastEpi->eraseFromParent();
2640  }
2641  LastPro->clear();
2642  LastPro->eraseFromParent();
2643  } else {
2644  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
2645  removePhis(Epilog, Prolog);
2646  }
2647  LastPro = Prolog;
2648  LastEpi = Epilog;
2650  E = Prolog->instr_rend();
2651  I != E && numAdded > 0; ++I, --numAdded)
2652  updateInstruction(&*I, false, j, 0, Schedule, VRMap);
2653  }
2654 }
2655 
2656 /// Return true if we can compute the amount the instruction changes
2657 /// during each iteration. Set Delta to the amount of the change.
2658 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2660  MachineOperand *BaseOp;
2661  int64_t Offset;
2662  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
2663  return false;
2664 
2665  if (!BaseOp->isReg())
2666  return false;
2667 
2668  unsigned BaseReg = BaseOp->getReg();
2669 
2671  // Check if there is a Phi. If so, get the definition in the loop.
2672  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2673  if (BaseDef && BaseDef->isPHI()) {
2674  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2675  BaseDef = MRI.getVRegDef(BaseReg);
2676  }
2677  if (!BaseDef)
2678  return false;
2679 
2680  int D = 0;
2681  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2682  return false;
2683 
2684  Delta = D;
2685  return true;
2686 }
2687 
2688 /// Update the memory operand with a new offset when the pipeliner
2689 /// generates a new copy of the instruction that refers to a
2690 /// different memory location.
2691 void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
2692  MachineInstr &OldMI, unsigned Num) {
2693  if (Num == 0)
2694  return;
2695  // If the instruction has memory operands, then adjust the offset
2696  // when the instruction appears in different stages.
2697  if (NewMI.memoperands_empty())
2698  return;
2700  for (MachineMemOperand *MMO : NewMI.memoperands()) {
2701  if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
2702  (!MMO->getValue())) {
2703  NewMMOs.push_back(MMO);
2704  continue;
2705  }
2706  unsigned Delta;
2707  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
2708  int64_t AdjOffset = Delta * Num;
2709  NewMMOs.push_back(
2710  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
2711  } else {
2712  NewMMOs.push_back(
2714  }
2715  }
2716  NewMI.setMemRefs(MF, NewMMOs);
2717 }
2718 
2719 /// Clone the instruction for the new pipelined loop and update the
2720 /// memory operands, if needed.
2721 MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
2722  unsigned CurStageNum,
2723  unsigned InstStageNum) {
2724  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2725  // Check for tied operands in inline asm instructions. This should be handled
2726  // elsewhere, but I'm not sure of the best solution.
2727  if (OldMI->isInlineAsm())
2728  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
2729  const auto &MO = OldMI->getOperand(i);
2730  if (MO.isReg() && MO.isUse())
2731  break;
2732  unsigned UseIdx;
2733  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
2734  NewMI->tieOperands(i, UseIdx);
2735  }
2736  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2737  return NewMI;
2738 }
2739 
2740 /// Clone the instruction for the new pipelined loop. If needed, this
2741 /// function updates the instruction using the values saved in the
2742 /// InstrChanges structure.
2743 MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
2744  unsigned CurStageNum,
2745  unsigned InstStageNum,
2746  SMSchedule &Schedule) {
2747  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2749  InstrChanges.find(getSUnit(OldMI));
2750  if (It != InstrChanges.end()) {
2751  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2752  unsigned BasePos, OffsetPos;
2753  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
2754  return nullptr;
2755  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
2756  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
2757  if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
2758  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
2759  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2760  }
2761  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2762  return NewMI;
2763 }
2764 
2765 /// Update the machine instruction with new virtual registers. This
2766 /// function may change the defintions and/or uses.
2767 void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
2768  unsigned CurStageNum,
2769  unsigned InstrStageNum,
2770  SMSchedule &Schedule,
2771  ValueMapTy *VRMap) {
2772  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
2773  MachineOperand &MO = NewMI->getOperand(i);
2775  continue;
2776  unsigned reg = MO.getReg();
2777  if (MO.isDef()) {
2778  // Create a new virtual register for the definition.
2779  const TargetRegisterClass *RC = MRI.getRegClass(reg);
2780  unsigned NewReg = MRI.createVirtualRegister(RC);
2781  MO.setReg(NewReg);
2782  VRMap[CurStageNum][reg] = NewReg;
2783  if (LastDef)
2784  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
2785  } else if (MO.isUse()) {
2786  MachineInstr *Def = MRI.getVRegDef(reg);
2787  // Compute the stage that contains the last definition for instruction.
2788  int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
2789  unsigned StageNum = CurStageNum;
2790  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
2791  // Compute the difference in stages between the defintion and the use.
2792  unsigned StageDiff = (InstrStageNum - DefStageNum);
2793  // Make an adjustment to get the last definition.
2794  StageNum -= StageDiff;
2795  }
2796  if (VRMap[StageNum].count(reg))
2797  MO.setReg(VRMap[StageNum][reg]);
2798  }
2799  }
2800 }
2801 
2802 /// Return the instruction in the loop that defines the register.
2803 /// If the definition is a Phi, then follow the Phi operand to
2804 /// the instruction in the loop.
2805 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
2807  MachineInstr *Def = MRI.getVRegDef(Reg);
2808  while (Def->isPHI()) {
2809  if (!Visited.insert(Def).second)
2810  break;
2811  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2812  if (Def->getOperand(i + 1).getMBB() == BB) {
2813  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2814  break;
2815  }
2816  }
2817  return Def;
2818 }
2819 
2820 /// Return the new name for the value from the previous stage.
2821 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
2822  unsigned LoopVal, unsigned LoopStage,
2823  ValueMapTy *VRMap,
2824  MachineBasicBlock *BB) {
2825  unsigned PrevVal = 0;
2826  if (StageNum > PhiStage) {
2827  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
2828  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
2829  // The name is defined in the previous stage.
2830  PrevVal = VRMap[StageNum - 1][LoopVal];
2831  else if (VRMap[StageNum].count(LoopVal))
2832  // The previous name is defined in the current stage when the instruction
2833  // order is swapped.
2834  PrevVal = VRMap[StageNum][LoopVal];
2835  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
2836  // The loop value hasn't yet been scheduled.
2837  PrevVal = LoopVal;
2838  else if (StageNum == PhiStage + 1)
2839  // The loop value is another phi, which has not been scheduled.
2840  PrevVal = getInitPhiReg(*LoopInst, BB);
2841  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
2842  // The loop value is another phi, which has been scheduled.
2843  PrevVal =
2844  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
2845  LoopStage, VRMap, BB);
2846  }
2847  return PrevVal;
2848 }
2849 
2850 /// Rewrite the Phi values in the specified block to use the mappings
2851 /// from the initial operand. Once the Phi is scheduled, we switch
2852 /// to using the loop value instead of the Phi value, so those names
2853 /// do not need to be rewritten.
2854 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
2855  unsigned StageNum,
2856  SMSchedule &Schedule,
2857  ValueMapTy *VRMap,
2858  InstrMapTy &InstrMap) {
2859  for (auto &PHI : BB->phis()) {
2860  unsigned InitVal = 0;
2861  unsigned LoopVal = 0;
2862  getPhiRegs(PHI, BB, InitVal, LoopVal);
2863  unsigned PhiDef = PHI.getOperand(0).getReg();
2864 
2865  unsigned PhiStage =
2866  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
2867  unsigned LoopStage =
2868  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2869  unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
2870  if (NumPhis > StageNum)
2871  NumPhis = StageNum;
2872  for (unsigned np = 0; np <= NumPhis; ++np) {
2873  unsigned NewVal =
2874  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
2875  if (!NewVal)
2876  NewVal = InitVal;
2877  rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
2878  PhiDef, NewVal);
2879  }
2880  }
2881 }
2882 
2883 /// Rewrite a previously scheduled instruction to use the register value
2884 /// from the new instruction. Make sure the instruction occurs in the
2885 /// basic block, and we don't change the uses in the new instruction.
2886 void SwingSchedulerDAG::rewriteScheduledInstr(
2887  MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
2888  unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
2889  unsigned NewReg, unsigned PrevReg) {
2890  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
2891  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
2892  // Rewrite uses that have been scheduled already to use the new
2893  // Phi register.
2894  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
2895  EI = MRI.use_end();
2896  UI != EI;) {
2897  MachineOperand &UseOp = *UI;
2898  MachineInstr *UseMI = UseOp.getParent();
2899  ++UI;
2900  if (UseMI->getParent() != BB)
2901  continue;
2902  if (UseMI->isPHI()) {
2903  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
2904  continue;
2905  if (getLoopPhiReg(*UseMI, BB) != OldReg)
2906  continue;
2907  }
2908  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
2909  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
2910  SUnit *OrigMISU = getSUnit(OrigInstr->second);
2911  int StageSched = Schedule.stageScheduled(OrigMISU);
2912  int CycleSched = Schedule.cycleScheduled(OrigMISU);
2913  unsigned ReplaceReg = 0;
2914  // This is the stage for the scheduled instruction.
2915  if (StagePhi == StageSched && Phi->isPHI()) {
2916  int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
2917  if (PrevReg && InProlog)
2918  ReplaceReg = PrevReg;
2919  else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
2920  (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
2921  ReplaceReg = PrevReg;
2922  else
2923  ReplaceReg = NewReg;
2924  }
2925  // The scheduled instruction occurs before the scheduled Phi, and the
2926  // Phi is not loop carried.
2927  if (!InProlog && StagePhi + 1 == StageSched &&
2928  !Schedule.isLoopCarried(this, *Phi))
2929  ReplaceReg = NewReg;
2930  if (StagePhi > StageSched && Phi->isPHI())
2931  ReplaceReg = NewReg;
2932  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
2933  ReplaceReg = NewReg;
2934  if (ReplaceReg) {
2935  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
2936  UseOp.setReg(ReplaceReg);
2937  }
2938  }
2939 }
2940 
2941 /// Check if we can change the instruction to use an offset value from the
2942 /// previous iteration. If so, return true and set the base and offset values
2943 /// so that we can rewrite the load, if necessary.
2944 /// v1 = Phi(v0, v3)
2945 /// v2 = load v1, 0
2946 /// v3 = post_store v1, 4, x
2947 /// This function enables the load to be rewritten as v2 = load v3, 4.
2948 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
2949  unsigned &BasePos,
2950  unsigned &OffsetPos,
2951  unsigned &NewBase,
2952  int64_t &Offset) {
2953  // Get the load instruction.
2954  if (TII->isPostIncrement(*MI))
2955  return false;
2956  unsigned BasePosLd, OffsetPosLd;
2957  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
2958  return false;
2959  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
2960 
2961  // Look for the Phi instruction.
2963  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
2964  if (!Phi || !Phi->isPHI())
2965  return false;
2966  // Get the register defined in the loop block.
2967  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
2968  if (!PrevReg)
2969  return false;
2970 
2971  // Check for the post-increment load/store instruction.
2972  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
2973  if (!PrevDef || PrevDef == MI)
2974  return false;
2975 
2976  if (!TII->isPostIncrement(*PrevDef))
2977  return false;
2978 
2979  unsigned BasePos1 = 0, OffsetPos1 = 0;
2980  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
2981  return false;
2982 
2983  // Make sure that the instructions do not access the same memory location in
2984  // the next iteration.
2985  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
2986  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
2987  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2988  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
2989  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
2990  MF.DeleteMachineInstr(NewMI);
2991  if (!Disjoint)
2992  return false;
2993 
2994  // Set the return value once we determine that we return true.
2995  BasePos = BasePosLd;
2996  OffsetPos = OffsetPosLd;
2997  NewBase = PrevReg;
2998  Offset = StoreOffset;
2999  return true;
3000 }
3001 
3002 /// Apply changes to the instruction if needed. The changes are need
3003 /// to improve the scheduling and depend up on the final schedule.
3005  SMSchedule &Schedule) {
3006  SUnit *SU = getSUnit(MI);
3008  InstrChanges.find(SU);
3009  if (It != InstrChanges.end()) {
3010  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3011  unsigned BasePos, OffsetPos;
3012  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3013  return;
3014  unsigned BaseReg = MI->getOperand(BasePos).getReg();
3015  MachineInstr *LoopDef = findDefInLoop(BaseReg);
3016  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3017  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3018  int BaseStageNum = Schedule.stageScheduled(SU);
3019  int BaseCycleNum = Schedule.cycleScheduled(SU);
3020  if (BaseStageNum < DefStageNum) {
3021  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3022  int OffsetDiff = DefStageNum - BaseStageNum;
3023  if (DefCycleNum < BaseCycleNum) {
3024  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3025  if (OffsetDiff > 0)
3026  --OffsetDiff;
3027  }
3028  int64_t NewOffset =
3029  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3030  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3031  SU->setInstr(NewMI);
3032  MISUnitMap[NewMI] = SU;
3033  NewMIs.insert(NewMI);
3034  }
3035  }
3036 }
3037 
3038 /// Return true for an order or output dependence that is loop carried
3039 /// potentially. A dependence is loop carried if the destination defines a valu
3040 /// that may be used or defined by the source in a subsequent iteration.
3042  bool isSucc) {
3043  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
3044  Dep.isArtificial())
3045  return false;
3046 
3047  if (!SwpPruneLoopCarried)
3048  return true;
3049 
3050  if (Dep.getKind() == SDep::Output)
3051  return true;
3052 
3053  MachineInstr *SI = Source->getInstr();
3054  MachineInstr *DI = Dep.getSUnit()->getInstr();
3055  if (!isSucc)
3056  std::swap(SI, DI);
3057  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3058 
3059  // Assume ordered loads and stores may have a loop carried dependence.
3060  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3062  return true;
3063 
3064  // Only chain dependences between a load and store can be loop carried.
3065  if (!DI->mayStore() || !SI->mayLoad())
3066  return false;
3067 
3068  unsigned DeltaS, DeltaD;
3069  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
3070  return true;
3071 
3072  MachineOperand *BaseOpS, *BaseOpD;
3073  int64_t OffsetS, OffsetD;
3075  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
3076  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
3077  return true;
3078 
3079  if (!BaseOpS->isIdenticalTo(*BaseOpD))
3080  return true;
3081 
3082  // Check that the base register is incremented by a constant value for each
3083  // iteration.
3084  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
3085  if (!Def || !Def->isPHI())
3086  return true;
3087  unsigned InitVal = 0;
3088  unsigned LoopVal = 0;
3089  getPhiRegs(*Def, BB, InitVal, LoopVal);
3090  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
3091  int D = 0;
3092  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
3093  return true;
3094 
3095  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3096  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3097 
3098  // This is the main test, which checks the offset values and the loop
3099  // increment value to determine if the accesses may be loop carried.
3100  if (OffsetS >= OffsetD)
3101  return OffsetS + AccessSizeS > DeltaS;
3102  else
3103  return OffsetD + AccessSizeD > DeltaD;
3104 
3105  return true;
3106 }
3107 
3108 void SwingSchedulerDAG::postprocessDAG() {
3109  for (auto &M : Mutations)
3110  M->apply(this);
3111 }
3112 
3113 /// Try to schedule the node at the specified StartCycle and continue
3114 /// until the node is schedule or the EndCycle is reached. This function
3115 /// returns true if the node is scheduled. This routine may search either
3116 /// forward or backward for a place to insert the instruction based upon
3117 /// the relative values of StartCycle and EndCycle.
3118 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3119  bool forward = true;
3120  if (StartCycle > EndCycle)
3121  forward = false;
3122 
3123  // The terminating condition depends on the direction.
3124  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3125  for (int curCycle = StartCycle; curCycle != termCycle;
3126  forward ? ++curCycle : --curCycle) {
3127 
3128  // Add the already scheduled instructions at the specified cycle to the DFA.
3129  Resources->clearResources();
3130  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3131  checkCycle <= LastCycle; checkCycle += II) {
3132  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3133 
3134  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3135  E = cycleInstrs.end();
3136  I != E; ++I) {
3137  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3138  continue;
3139  assert(Resources->canReserveResources(*(*I)->getInstr()) &&
3140  "These instructions have already been scheduled.");
3141  Resources->reserveResources(*(*I)->getInstr());
3142  }
3143  }
3144  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3145  Resources->canReserveResources(*SU->getInstr())) {
3146  LLVM_DEBUG({
3147  dbgs() << "\tinsert at cycle " << curCycle << " ";
3148  SU->getInstr()->dump();
3149  });
3150 
3151  ScheduledInstrs[curCycle].push_back(SU);
3152  InstrToCycle.insert(std::make_pair(SU, curCycle));
3153  if (curCycle > LastCycle)
3154  LastCycle = curCycle;
3155  if (curCycle < FirstCycle)
3156  FirstCycle = curCycle;
3157  return true;
3158  }
3159  LLVM_DEBUG({
3160  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3161  SU->getInstr()->dump();
3162  });
3163  }
3164  return false;
3165 }
3166 
3167 // Return the cycle of the earliest scheduled instruction in the chain.
3169  SmallPtrSet<SUnit *, 8> Visited;
3170  SmallVector<SDep, 8> Worklist;
3171  Worklist.push_back(Dep);
3172  int EarlyCycle = INT_MAX;
3173  while (!Worklist.empty()) {
3174  const SDep &Cur = Worklist.pop_back_val();
3175  SUnit *PrevSU = Cur.getSUnit();
3176  if (Visited.count(PrevSU))
3177  continue;
3178  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3179  if (it == InstrToCycle.end())
3180  continue;
3181  EarlyCycle = std::min(EarlyCycle, it->second);
3182  for (const auto &PI : PrevSU->Preds)
3183  if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3184  Worklist.push_back(PI);
3185  Visited.insert(PrevSU);
3186  }
3187  return EarlyCycle;
3188 }
3189 
3190 // Return the cycle of the latest scheduled instruction in the chain.
3192  SmallPtrSet<SUnit *, 8> Visited;
3193  SmallVector<SDep, 8> Worklist;
3194  Worklist.push_back(Dep);
3195  int LateCycle = INT_MIN;
3196  while (!Worklist.empty()) {
3197  const SDep &Cur = Worklist.pop_back_val();
3198  SUnit *SuccSU = Cur.getSUnit();
3199  if (Visited.count(SuccSU))
3200  continue;
3201  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3202  if (it == InstrToCycle.end())
3203  continue;
3204  LateCycle = std::max(LateCycle, it->second);
3205  for (const auto &SI : SuccSU->Succs)
3206  if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3207  Worklist.push_back(SI);
3208  Visited.insert(SuccSU);
3209  }
3210  return LateCycle;
3211 }
3212 
3213 /// If an instruction has a use that spans multiple iterations, then
3214 /// return true. These instructions are characterized by having a back-ege
3215 /// to a Phi, which contains a reference to another Phi.
3217  for (auto &P : SU->Preds)
3218  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
3219  for (auto &S : P.getSUnit()->Succs)
3220  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
3221  return P.getSUnit();
3222  return nullptr;
3223 }
3224 
3225 /// Compute the scheduling start slot for the instruction. The start slot
3226 /// depends on any predecessor or successor nodes scheduled already.
3227 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3228  int *MinEnd, int *MaxStart, int II,
3229  SwingSchedulerDAG *DAG) {
3230  // Iterate over each instruction that has been scheduled already. The start
3231  // slot computation depends on whether the previously scheduled instruction
3232  // is a predecessor or successor of the specified instruction.
3233  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3234 
3235  // Iterate over each instruction in the current cycle.
3236  for (SUnit *I : getInstructions(cycle)) {
3237  // Because we're processing a DAG for the dependences, we recognize
3238  // the back-edge in recurrences by anti dependences.
3239  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
3240  const SDep &Dep = SU->Preds[i];
3241  if (Dep.getSUnit() == I) {
3242  if (!DAG->isBackedge(SU, Dep)) {
3243  int EarlyStart = cycle + Dep.getLatency() -
3244  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3245  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3246  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
3247  int End = earliestCycleInChain(Dep) + (II - 1);
3248  *MinEnd = std::min(*MinEnd, End);
3249  }
3250  } else {
3251  int LateStart = cycle - Dep.getLatency() +
3252  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3253  *MinLateStart = std::min(*MinLateStart, LateStart);
3254  }
3255  }
3256  // For instruction that requires multiple iterations, make sure that
3257  // the dependent instruction is not scheduled past the definition.
3258  SUnit *BE = multipleIterations(I, DAG);
3259  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3260  !SU->isPred(I))
3261  *MinLateStart = std::min(*MinLateStart, cycle);
3262  }
3263  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
3264  if (SU->Succs[i].getSUnit() == I) {
3265  const SDep &Dep = SU->Succs[i];
3266  if (!DAG->isBackedge(SU, Dep)) {
3267  int LateStart = cycle - Dep.getLatency() +
3268  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3269  *MinLateStart = std::min(*MinLateStart, LateStart);
3270  if (DAG->isLoopCarriedDep(SU, Dep)) {
3271  int Start = latestCycleInChain(Dep) + 1 - II;
3272  *MaxStart = std::max(*MaxStart, Start);
3273  }
3274  } else {
3275  int EarlyStart = cycle + Dep.getLatency() -
3276  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3277  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3278  }
3279  }
3280  }
3281  }
3282  }
3283 }
3284 
3285 /// Order the instructions within a cycle so that the definitions occur
3286 /// before the uses. Returns true if the instruction is added to the start
3287 /// of the list, or false if added to the end.
3289  std::deque<SUnit *> &Insts) {
3290  MachineInstr *MI = SU->getInstr();
3291  bool OrderBeforeUse = false;
3292  bool OrderAfterDef = false;
3293  bool OrderBeforeDef = false;
3294  unsigned MoveDef = 0;
3295  unsigned MoveUse = 0;
3296  int StageInst1 = stageScheduled(SU);
3297 
3298  unsigned Pos = 0;
3299  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3300  ++I, ++Pos) {
3301  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3302  MachineOperand &MO = MI->getOperand(i);
3304  continue;
3305 
3306  unsigned Reg = MO.getReg();
3307  unsigned BasePos, OffsetPos;
3308  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3309  if (MI->getOperand(BasePos).getReg() == Reg)
3310  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3311  Reg = NewReg;
3312  bool Reads, Writes;
3313  std::tie(Reads, Writes) =
3314  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3315  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3316  OrderBeforeUse = true;
3317  if (MoveUse == 0)
3318  MoveUse = Pos;
3319  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3320  // Add the instruction after the scheduled instruction.
3321  OrderAfterDef = true;
3322  MoveDef = Pos;
3323  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3324  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3325  OrderBeforeUse = true;
3326  if (MoveUse == 0)
3327  MoveUse = Pos;
3328  } else {
3329  OrderAfterDef = true;
3330  MoveDef = Pos;
3331  }
3332  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3333  OrderBeforeUse = true;
3334  if (MoveUse == 0)
3335  MoveUse = Pos;
3336  if (MoveUse != 0) {
3337  OrderAfterDef = true;
3338  MoveDef = Pos - 1;
3339  }
3340  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3341  // Add the instruction before the scheduled instruction.
3342  OrderBeforeUse = true;
3343  if (MoveUse == 0)
3344  MoveUse = Pos;
3345  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3346  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3347  if (MoveUse == 0) {
3348  OrderBeforeDef = true;
3349  MoveUse = Pos;
3350  }
3351  }
3352  }
3353  // Check for order dependences between instructions. Make sure the source
3354  // is ordered before the destination.
3355  for (auto &S : SU->Succs) {
3356  if (S.getSUnit() != *I)
3357  continue;
3358  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3359  OrderBeforeUse = true;
3360  if (Pos < MoveUse)
3361  MoveUse = Pos;
3362  }
3363  }
3364  for (auto &P : SU->Preds) {
3365  if (P.getSUnit() != *I)
3366  continue;
3367  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3368  OrderAfterDef = true;
3369  MoveDef = Pos;
3370  }
3371  }
3372  }
3373 
3374  // A circular dependence.
3375  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3376  OrderBeforeUse = false;
3377 
3378  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3379  // to a loop-carried dependence.
3380  if (OrderBeforeDef)
3381  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3382 
3383  // The uncommon case when the instruction order needs to be updated because
3384  // there is both a use and def.
3385  if (OrderBeforeUse && OrderAfterDef) {
3386  SUnit *UseSU = Insts.at(MoveUse);
3387  SUnit *DefSU = Insts.at(MoveDef);
3388  if (MoveUse > MoveDef) {
3389  Insts.erase(Insts.begin() + MoveUse);
3390  Insts.erase(Insts.begin() + MoveDef);
3391  } else {
3392  Insts.erase(Insts.begin() + MoveDef);
3393  Insts.erase(Insts.begin() + MoveUse);
3394  }
3395  orderDependence(SSD, UseSU, Insts);
3396  orderDependence(SSD, SU, Insts);
3397  orderDependence(SSD, DefSU, Insts);
3398  return;
3399  }
3400  // Put the new instruction first if there is a use in the list. Otherwise,
3401  // put it at the end of the list.
3402  if (OrderBeforeUse)
3403  Insts.push_front(SU);
3404  else
3405  Insts.push_back(SU);
3406 }
3407 
3408 /// Return true if the scheduled Phi has a loop carried operand.
3410  if (!Phi.isPHI())
3411  return false;
3412  assert(Phi.isPHI() && "Expecting a Phi.");
3413  SUnit *DefSU = SSD->getSUnit(&Phi);
3414  unsigned DefCycle = cycleScheduled(DefSU);
3415  int DefStage = stageScheduled(DefSU);
3416 
3417  unsigned InitVal = 0;
3418  unsigned LoopVal = 0;
3419  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3420  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3421  if (!UseSU)
3422  return true;
3423  if (UseSU->getInstr()->isPHI())
3424  return true;
3425  unsigned LoopCycle = cycleScheduled(UseSU);
3426  int LoopStage = stageScheduled(UseSU);
3427  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3428 }
3429 
3430 /// Return true if the instruction is a definition that is loop carried
3431 /// and defines the use on the next iteration.
3432 /// v1 = phi(v2, v3)
3433 /// (Def) v3 = op v1
3434 /// (MO) = v1
3435 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
3436 /// register.
3439  if (!MO.isReg())
3440  return false;
3441  if (Def->isPHI())
3442  return false;
3443  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
3444  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3445  return false;
3446  if (!isLoopCarried(SSD, *Phi))
3447  return false;
3448  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3449  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
3450  MachineOperand &DMO = Def->getOperand(i);
3451  if (!DMO.isReg() || !DMO.isDef())
3452  continue;
3453  if (DMO.getReg() == LoopReg)
3454  return true;
3455  }
3456  return false;
3457 }
3458 
3459 // Check if the generated schedule is valid. This function checks if
3460 // an instruction that uses a physical register is scheduled in a
3461 // different stage than the definition. The pipeliner does not handle
3462 // physical register values that may cross a basic block boundary.
3464  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
3465  SUnit &SU = SSD->SUnits[i];
3466  if (!SU.hasPhysRegDefs)
3467  continue;
3468  int StageDef = stageScheduled(&SU);
3469  assert(StageDef != -1 && "Instruction should have been scheduled.");
3470  for (auto &SI : SU.Succs)
3471  if (SI.isAssignedRegDep())
3472  if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
3473  if (stageScheduled(SI.getSUnit()) != StageDef)
3474  return false;
3475  }
3476  return true;
3477 }
3478 
3479 /// A property of the node order in swing-modulo-scheduling is
3480 /// that for nodes outside circuits the following holds:
3481 /// none of them is scheduled after both a successor and a
3482 /// predecessor.
3483 /// The method below checks whether the property is met.
3484 /// If not, debug information is printed and statistics information updated.
3485 /// Note that we do not use an assert statement.
3486 /// The reason is that although an invalid node oder may prevent
3487 /// the pipeliner from finding a pipelined schedule for arbitrary II,
3488 /// it does not lead to the generation of incorrect code.
3489 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
3490 
3491  // a sorted vector that maps each SUnit to its index in the NodeOrder
3492  typedef std::pair<SUnit *, unsigned> UnitIndex;
3493  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
3494 
3495  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3496  Indices.push_back(std::make_pair(NodeOrder[i], i));
3497 
3498  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3499  return std::get<0>(i1) < std::get<0>(i2);
3500  };
3501 
3502  // sort, so that we can perform a binary search
3503  llvm::sort(Indices, CompareKey);
3504 
3505  bool Valid = true;
3506  (void)Valid;
3507  // for each SUnit in the NodeOrder, check whether
3508  // it appears after both a successor and a predecessor
3509  // of the SUnit. If this is the case, and the SUnit
3510  // is not part of circuit, then the NodeOrder is not
3511  // valid.
3512  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3513  SUnit *SU = NodeOrder[i];
3514  unsigned Index = i;
3515 
3516  bool PredBefore = false;
3517  bool SuccBefore = false;
3518 
3519  SUnit *Succ;
3520  SUnit *Pred;
3521  (void)Succ;
3522  (void)Pred;
3523 
3524  for (SDep &PredEdge : SU->Preds) {
3525  SUnit *PredSU = PredEdge.getSUnit();
3526  unsigned PredIndex =
3527  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
3528  std::make_pair(PredSU, 0), CompareKey));
3529  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
3530  PredBefore = true;
3531  Pred = PredSU;
3532  break;
3533  }
3534  }
3535 
3536  for (SDep &SuccEdge : SU->Succs) {
3537  SUnit *SuccSU = SuccEdge.getSUnit();
3538  unsigned SuccIndex =
3539  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
3540  std::make_pair(SuccSU, 0), CompareKey));
3541  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
3542  SuccBefore = true;
3543  Succ = SuccSU;
3544  break;
3545  }
3546  }
3547 
3548  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
3549  // instructions in circuits are allowed to be scheduled
3550  // after both a successor and predecessor.
3551  bool InCircuit = std::any_of(
3552  Circuits.begin(), Circuits.end(),
3553  [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
3554  if (InCircuit)
3555  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
3556  else {
3557  Valid = false;
3558  NumNodeOrderIssues++;
3559  LLVM_DEBUG(dbgs() << "Predecessor ";);
3560  }
3561  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
3562  << " are scheduled before node " << SU->NodeNum
3563  << "\n";);
3564  }
3565  }
3566 
3567  LLVM_DEBUG({
3568  if (!Valid)
3569  dbgs() << "Invalid node order found!\n";
3570  });
3571 }
3572 
3573 /// Attempt to fix the degenerate cases when the instruction serialization
3574 /// causes the register lifetimes to overlap. For example,
3575 /// p' = store_pi(p, b)
3576 /// = load p, offset
3577 /// In this case p and p' overlap, which means that two registers are needed.
3578 /// Instead, this function changes the load to use p' and updates the offset.
3579 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
3580  unsigned OverlapReg = 0;
3581  unsigned NewBaseReg = 0;
3582  for (SUnit *SU : Instrs) {
3583  MachineInstr *MI = SU->getInstr();
3584  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3585  const MachineOperand &MO = MI->getOperand(i);
3586  // Look for an instruction that uses p. The instruction occurs in the
3587  // same cycle but occurs later in the serialized order.
3588  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
3589  // Check that the instruction appears in the InstrChanges structure,
3590  // which contains instructions that can have the offset updated.
3592  InstrChanges.find(SU);
3593  if (It != InstrChanges.end()) {
3594  unsigned BasePos, OffsetPos;
3595  // Update the base register and adjust the offset.
3596  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
3597  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3598  NewMI->getOperand(BasePos).setReg(NewBaseReg);
3599  int64_t NewOffset =
3600  MI->getOperand(OffsetPos).getImm() - It->second.second;
3601  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3602  SU->setInstr(NewMI);
3603  MISUnitMap[NewMI] = SU;
3604  NewMIs.insert(NewMI);
3605  }
3606  }
3607  OverlapReg = 0;
3608  NewBaseReg = 0;
3609  break;
3610  }
3611  // Look for an instruction of the form p' = op(p), which uses and defines
3612  // two virtual registers that get allocated to the same physical register.
3613  unsigned TiedUseIdx = 0;
3614  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3615  // OverlapReg is p in the example above.
3616  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
3617  // NewBaseReg is p' in the example above.
3618  NewBaseReg = MI->getOperand(i).getReg();
3619  break;
3620  }
3621  }
3622  }
3623 }
3624 
3625 /// After the schedule has been formed, call this function to combine
3626 /// the instructions from the different stages/cycles. That is, this
3627 /// function creates a schedule that represents a single iteration.
3629  // Move all instructions to the first stage from later stages.
3630  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3631  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3632  ++stage) {
3633  std::deque<SUnit *> &cycleInstrs =
3634  ScheduledInstrs[cycle + (stage * InitiationInterval)];
3635  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
3636  E = cycleInstrs.rend();
3637  I != E; ++I)
3638  ScheduledInstrs[cycle].push_front(*I);
3639  }
3640  }
3641  // Iterate over the definitions in each instruction, and compute the
3642  // stage difference for each use. Keep the maximum value.
3643  for (auto &I : InstrToCycle) {
3644  int DefStage = stageScheduled(I.first);
3645  MachineInstr *MI = I.first->getInstr();
3646  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3647  MachineOperand &Op = MI->getOperand(i);
3648  if (!Op.isReg() || !Op.isDef())
3649  continue;
3650 
3651  unsigned Reg = Op.getReg();
3652  unsigned MaxDiff = 0;
3653  bool PhiIsSwapped = false;
3654  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
3655  EI = MRI.use_end();
3656  UI != EI; ++UI) {
3657  MachineOperand &UseOp = *UI;
3658  MachineInstr *UseMI = UseOp.getParent();
3659  SUnit *SUnitUse = SSD->getSUnit(UseMI);
3660  int UseStage = stageScheduled(SUnitUse);
3661  unsigned Diff = 0;
3662  if (UseStage != -1 && UseStage >= DefStage)
3663  Diff = UseStage - DefStage;
3664  if (MI->isPHI()) {
3665  if (isLoopCarried(SSD, *MI))
3666  ++Diff;
3667  else
3668  PhiIsSwapped = true;
3669  }
3670  MaxDiff = std::max(Diff, MaxDiff);
3671  }
3672  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
3673  }
3674  }
3675 
3676  // Erase all the elements in the later stages. Only one iteration should
3677  // remain in the scheduled list, and it contains all the instructions.
3678  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3679  ScheduledInstrs.erase(cycle);
3680 
3681  // Change the registers in instruction as specified in the InstrChanges
3682  // map. We need to use the new registers to create the correct order.
3683  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
3684  SUnit *SU = &SSD->SUnits[i];
3685  SSD->applyInstrChange(SU->getInstr(), *this);
3686  }
3687 
3688  // Reorder the instructions in each cycle to fix and improve the
3689  // generated code.
3690  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
3691  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3692  std::deque<SUnit *> newOrderPhi;
3693  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3694  SUnit *SU = cycleInstrs[i];
3695  if (SU->getInstr()->isPHI())
3696  newOrderPhi.push_back(SU);
3697  }
3698  std::deque<SUnit *> newOrderI;
3699  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3700  SUnit *SU = cycleInstrs[i];
3701  if (!SU->getInstr()->isPHI())
3702  orderDependence(SSD, SU, newOrderI);
3703  }
3704  // Replace the old order with the new order.
3705  cycleInstrs.swap(newOrderPhi);
3706  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
3707  SSD->fixupRegisterOverlaps(cycleInstrs);
3708  }
3709 
3710  LLVM_DEBUG(dump(););
3711 }
3712 
3713 void NodeSet::print(raw_ostream &os) const {
3714  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
3715  << " depth " << MaxDepth << " col " << Colocate << "\n";
3716  for (const auto &I : Nodes)
3717  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
3718  os << "\n";
3719 }
3720 
3721 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3722 /// Print the schedule information to the given output.
3724  // Iterate over each cycle.
3725  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3726  // Iterate over each instruction in the cycle.
3727  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
3728  for (SUnit *CI : cycleInstrs->second) {
3729  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
3730  os << "(" << CI->NodeNum << ") ";
3731  CI->getInstr()->print(os);
3732  os << "\n";
3733  }
3734  }
3735 }
3736 
3737 /// Utility function used for debugging to print the schedule.
3740 
3741 #endif
3742 
3743 
3744 
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:754
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:454
RegisterClassInfo RegClassInfo
A common definition of LaneBitmask for use in TableGen and CodeGen.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void clear()
Definition: MapVector.h:89
instr_iterator instr_begin()
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:633
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:270
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static constexpr LocationSize unknown()
bool empty() const
cl::opt< bool > SwpEnableCopyToPhi
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
bool isValidSchedule(SwingSchedulerDAG *SSD)
static void getUnderlyingObjects(MachineInstr *MI, SmallVectorImpl< Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
const MachineLoopInfo * MLI
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
unsigned getSubReg() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
bool isInlineAsm() const
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
bool isRegSequence() const
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
F(f)
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
block Block Frequency true
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
bool isPHI() const
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr *> &PrevInsts, unsigned Iter, unsigned MaxIter) const
Generate code to reduce the loop iteration by one and check if the loop is finished.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:264
SUnit * getNode(unsigned i) const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:260
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
virtual bool getMemOperandWithOffset(MachineInstr &MI, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
int compareRecMII(NodeSet &RHS)
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:263
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:370
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:265
Modulo Software Pipelining
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1186
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:284
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
#define LLVM_DUMP_METHOD
Definition: Compiler.h:74
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
BlockT * getHeader() const
Definition: LoopInfo.h:100
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:56
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
SlotIndexes pass.
Definition: SlotIndexes.h:331
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
void dump() const
Definition: Pass.cpp:130
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Itinerary data supplied by a subtarget to be used by a target.
auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1282
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
SUnit * getSUnit() const
Definition: ScheduleDAG.h:484
unsigned count(SUnit *SU) const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator find(const KeyT &Key)
Definition: MapVector.h:148
bool insert(SUnit *SU)
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
virtual bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:348
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1252
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Scheduling dependency.
Definition: ScheduleDAG.h:50
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:583
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:577
void finishBlock() override
Clean up after the software pipeliner runs.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:820
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
int getFirstCycle() const
Return the first cycle in the completed schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:377
The main class in the implementation of the target independent software pipeliner pass...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
MachineFunction * MF
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:516
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
MachineInstrBuilder & UseMI
unsigned getStagesForPhi(int Reg)
The number of stages for a Phi is a little different than other instructions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:549
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
void dump() const
Utility function used for debugging to print the schedule.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
const TargetInstrInfo * TII
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
void print(raw_ostream &os) const
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
void fixupRegisterOverlaps(std::deque< SUnit *> &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
LLVM_DUMP_METHOD void dump() const
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
bool isCopy() const
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:57
size_t size() const
Definition: SmallVector.h:53
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
const InstrItineraryData * InstrItins
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
hexagon gen pred
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Definition: ScheduleDAG.h:70
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SetVector< SUnit * >::const_iterator iterator
typename vector_type::const_iterator iterator
Definition: SetVector.h:49
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1167
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:534
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
MachineInstrBuilder MachineInstrBuilder & DefMI
bool canReserveResources(const MCInstrDesc *MID)
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
void setRecMII(unsigned mii)
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:747
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:327
This class represents the scheduled code.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:266
unsigned getStagesForReg(int Reg, unsigned CurStage)
Return the max.
LiveInterval & createEmptyInterval(unsigned Reg)
Interval creation.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
void setColocate(unsigned c)
std::set< NodeId > NodeSet
Definition: RDFGraph.h:514
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:64
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
iterator begin() const
Definition: SmallPtrSet.h:397
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit *> &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
void setSubReg(unsigned subReg)
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:124
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:490
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
iterator end() const
Definition: SmallPtrSet.h:402
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:268
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:807
bool memoperands_empty() const
Return true if we don&#39;t have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:546
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:72
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand *> MemRefs)
Assign this MachineInstr&#39;s memory reference descriptor list.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:453
A vector that has set insertion semantics.
Definition: SetVector.h:41
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:261
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
SmallVector< MachineOperand, 4 > BrCond
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
void setPos(MachineBasicBlock::const_iterator Pos)
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:566
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
std::vector< MachineBasicBlock * >::iterator succ_iterator
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:246
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
void reserveResources(const MCInstrDesc *MID)
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:165
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:435