LLVM  8.0.1
LoopInterchange.cpp
Go to the documentation of this file.
1 //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This Pass handles loop interchange transform.
11 // This pass interchanges loops to provide a more cache-friendly memory access
12 // patterns.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DiagnosticInfo.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/InstrTypes.h"
32 #include "llvm/IR/Instruction.h"
33 #include "llvm/IR/Instructions.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/User.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/Debug.h"
43 #include "llvm/Transforms/Scalar.h"
44 #include "llvm/Transforms/Utils.h"
47 #include <cassert>
48 #include <utility>
49 #include <vector>
50 
51 using namespace llvm;
52 
53 #define DEBUG_TYPE "loop-interchange"
54 
55 STATISTIC(LoopsInterchanged, "Number of loops interchanged");
56 
58  "loop-interchange-threshold", cl::init(0), cl::Hidden,
59  cl::desc("Interchange if you gain more than this number"));
60 
61 namespace {
62 
63 using LoopVector = SmallVector<Loop *, 8>;
64 
65 // TODO: Check if we can use a sparse matrix here.
66 using CharMatrix = std::vector<std::vector<char>>;
67 
68 } // end anonymous namespace
69 
70 // Maximum number of dependencies that can be handled in the dependency matrix.
71 static const unsigned MaxMemInstrCount = 100;
72 
73 // Maximum loop depth supported.
74 static const unsigned MaxLoopNestDepth = 10;
75 
76 #ifdef DUMP_DEP_MATRICIES
77 static void printDepMatrix(CharMatrix &DepMatrix) {
78  for (auto &Row : DepMatrix) {
79  for (auto D : Row)
80  LLVM_DEBUG(dbgs() << D << " ");
81  LLVM_DEBUG(dbgs() << "\n");
82  }
83 }
84 #endif
85 
86 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
87  Loop *L, DependenceInfo *DI) {
88  using ValueVector = SmallVector<Value *, 16>;
89 
90  ValueVector MemInstr;
91 
92  // For each block.
93  for (BasicBlock *BB : L->blocks()) {
94  // Scan the BB and collect legal loads and stores.
95  for (Instruction &I : *BB) {
96  if (!isa<Instruction>(I))
97  return false;
98  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
99  if (!Ld->isSimple())
100  return false;
101  MemInstr.push_back(&I);
102  } else if (auto *St = dyn_cast<StoreInst>(&I)) {
103  if (!St->isSimple())
104  return false;
105  MemInstr.push_back(&I);
106  }
107  }
108  }
109 
110  LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
111  << " Loads and Stores to analyze\n");
112 
113  ValueVector::iterator I, IE, J, JE;
114 
115  for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
116  for (J = I, JE = MemInstr.end(); J != JE; ++J) {
117  std::vector<char> Dep;
118  Instruction *Src = cast<Instruction>(*I);
119  Instruction *Dst = cast<Instruction>(*J);
120  if (Src == Dst)
121  continue;
122  // Ignore Input dependencies.
123  if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
124  continue;
125  // Track Output, Flow, and Anti dependencies.
126  if (auto D = DI->depends(Src, Dst, true)) {
127  assert(D->isOrdered() && "Expected an output, flow or anti dep.");
128  LLVM_DEBUG(StringRef DepType =
129  D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
130  dbgs() << "Found " << DepType
131  << " dependency between Src and Dst\n"
132  << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
133  unsigned Levels = D->getLevels();
134  char Direction;
135  for (unsigned II = 1; II <= Levels; ++II) {
136  const SCEV *Distance = D->getDistance(II);
137  const SCEVConstant *SCEVConst =
138  dyn_cast_or_null<SCEVConstant>(Distance);
139  if (SCEVConst) {
140  const ConstantInt *CI = SCEVConst->getValue();
141  if (CI->isNegative())
142  Direction = '<';
143  else if (CI->isZero())
144  Direction = '=';
145  else
146  Direction = '>';
147  Dep.push_back(Direction);
148  } else if (D->isScalar(II)) {
149  Direction = 'S';
150  Dep.push_back(Direction);
151  } else {
152  unsigned Dir = D->getDirection(II);
153  if (Dir == Dependence::DVEntry::LT ||
155  Direction = '<';
156  else if (Dir == Dependence::DVEntry::GT ||
158  Direction = '>';
159  else if (Dir == Dependence::DVEntry::EQ)
160  Direction = '=';
161  else
162  Direction = '*';
163  Dep.push_back(Direction);
164  }
165  }
166  while (Dep.size() != Level) {
167  Dep.push_back('I');
168  }
169 
170  DepMatrix.push_back(Dep);
171  if (DepMatrix.size() > MaxMemInstrCount) {
172  LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
173  << " dependencies inside loop\n");
174  return false;
175  }
176  }
177  }
178  }
179 
180  return true;
181 }
182 
183 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
184 // matrix by exchanging the two columns.
185 static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
186  unsigned ToIndx) {
187  unsigned numRows = DepMatrix.size();
188  for (unsigned i = 0; i < numRows; ++i) {
189  char TmpVal = DepMatrix[i][ToIndx];
190  DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
191  DepMatrix[i][FromIndx] = TmpVal;
192  }
193 }
194 
195 // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
196 // '>'
197 static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
198  unsigned Column) {
199  for (unsigned i = 0; i <= Column; ++i) {
200  if (DepMatrix[Row][i] == '<')
201  return false;
202  if (DepMatrix[Row][i] == '>')
203  return true;
204  }
205  // All dependencies were '=','S' or 'I'
206  return false;
207 }
208 
209 // Checks if no dependence exist in the dependency matrix in Row before Column.
210 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
211  unsigned Column) {
212  for (unsigned i = 0; i < Column; ++i) {
213  if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
214  DepMatrix[Row][i] != 'I')
215  return false;
216  }
217  return true;
218 }
219 
220 static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
221  unsigned OuterLoopId, char InnerDep,
222  char OuterDep) {
223  if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
224  return false;
225 
226  if (InnerDep == OuterDep)
227  return true;
228 
229  // It is legal to interchange if and only if after interchange no row has a
230  // '>' direction as the leftmost non-'='.
231 
232  if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
233  return true;
234 
235  if (InnerDep == '<')
236  return true;
237 
238  if (InnerDep == '>') {
239  // If OuterLoopId represents outermost loop then interchanging will make the
240  // 1st dependency as '>'
241  if (OuterLoopId == 0)
242  return false;
243 
244  // If all dependencies before OuterloopId are '=','S'or 'I'. Then
245  // interchanging will result in this row having an outermost non '='
246  // dependency of '>'
247  if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
248  return true;
249  }
250 
251  return false;
252 }
253 
254 // Checks if it is legal to interchange 2 loops.
255 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
256 // if the direction matrix, after the same permutation is applied to its
257 // columns, has no ">" direction as the leftmost non-"=" direction in any row.
258 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
259  unsigned InnerLoopId,
260  unsigned OuterLoopId) {
261  unsigned NumRows = DepMatrix.size();
262  // For each row check if it is valid to interchange.
263  for (unsigned Row = 0; Row < NumRows; ++Row) {
264  char InnerDep = DepMatrix[Row][InnerLoopId];
265  char OuterDep = DepMatrix[Row][OuterLoopId];
266  if (InnerDep == '*' || OuterDep == '*')
267  return false;
268  if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
269  return false;
270  }
271  return true;
272 }
273 
274 static LoopVector populateWorklist(Loop &L) {
275  LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
276  << L.getHeader()->getParent()->getName() << " Loop: %"
277  << L.getHeader()->getName() << '\n');
278  LoopVector LoopList;
279  Loop *CurrentLoop = &L;
280  const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
281  while (!Vec->empty()) {
282  // The current loop has multiple subloops in it hence it is not tightly
283  // nested.
284  // Discard all loops above it added into Worklist.
285  if (Vec->size() != 1)
286  return {};
287 
288  LoopList.push_back(CurrentLoop);
289  CurrentLoop = Vec->front();
290  Vec = &CurrentLoop->getSubLoops();
291  }
292  LoopList.push_back(CurrentLoop);
293  return LoopList;
294 }
295 
297  PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
298  if (InnerIndexVar)
299  return InnerIndexVar;
300  if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
301  return nullptr;
302  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
303  PHINode *PhiVar = cast<PHINode>(I);
304  Type *PhiTy = PhiVar->getType();
305  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
306  !PhiTy->isPointerTy())
307  return nullptr;
308  const SCEVAddRecExpr *AddRec =
309  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
310  if (!AddRec || !AddRec->isAffine())
311  continue;
312  const SCEV *Step = AddRec->getStepRecurrence(*SE);
313  if (!isa<SCEVConstant>(Step))
314  continue;
315  // Found the induction variable.
316  // FIXME: Handle loops with more than one induction variable. Note that,
317  // currently, legality makes sure we have only one induction variable.
318  return PhiVar;
319  }
320  return nullptr;
321 }
322 
323 namespace {
324 
325 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
326 class LoopInterchangeLegality {
327 public:
328  LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
330  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
331 
332  /// Check if the loops can be interchanged.
333  bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
334  CharMatrix &DepMatrix);
335 
336  /// Check if the loop structure is understood. We do not handle triangular
337  /// loops for now.
338  bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
339 
340  bool currentLimitations();
341 
342  const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
343  return OuterInnerReductions;
344  }
345 
346 private:
347  bool tightlyNested(Loop *Outer, Loop *Inner);
348  bool containsUnsafeInstructions(BasicBlock *BB);
349 
350  /// Discover induction and reduction PHIs in the header of \p L. Induction
351  /// PHIs are added to \p Inductions, reductions are added to
352  /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
353  /// to be passed as \p InnerLoop.
354  bool findInductionAndReductions(Loop *L,
355  SmallVector<PHINode *, 8> &Inductions,
356  Loop *InnerLoop);
357 
358  Loop *OuterLoop;
359  Loop *InnerLoop;
360 
361  ScalarEvolution *SE;
362 
363  /// Interface to emit optimization remarks.
365 
366  /// Set of reduction PHIs taking part of a reduction across the inner and
367  /// outer loop.
368  SmallPtrSet<PHINode *, 4> OuterInnerReductions;
369 };
370 
371 /// LoopInterchangeProfitability checks if it is profitable to interchange the
372 /// loop.
373 class LoopInterchangeProfitability {
374 public:
375  LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
377  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
378 
379  /// Check if the loop interchange is profitable.
380  bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
381  CharMatrix &DepMatrix);
382 
383 private:
384  int getInstrOrderCost();
385 
386  Loop *OuterLoop;
387  Loop *InnerLoop;
388 
389  /// Scev analysis.
390  ScalarEvolution *SE;
391 
392  /// Interface to emit optimization remarks.
394 };
395 
396 /// LoopInterchangeTransform interchanges the loop.
397 class LoopInterchangeTransform {
398 public:
399  LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
400  LoopInfo *LI, DominatorTree *DT,
401  BasicBlock *LoopNestExit,
402  const LoopInterchangeLegality &LIL)
403  : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
404  LoopExit(LoopNestExit), LIL(LIL) {}
405 
406  /// Interchange OuterLoop and InnerLoop.
407  bool transform();
408  void restructureLoops(Loop *NewInner, Loop *NewOuter,
409  BasicBlock *OrigInnerPreHeader,
410  BasicBlock *OrigOuterPreHeader);
411  void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
412 
413 private:
414  void splitInnerLoopLatch(Instruction *);
415  void splitInnerLoopHeader();
416  bool adjustLoopLinks();
417  void adjustLoopPreheaders();
418  bool adjustLoopBranches();
419 
420  Loop *OuterLoop;
421  Loop *InnerLoop;
422 
423  /// Scev analysis.
424  ScalarEvolution *SE;
425 
426  LoopInfo *LI;
427  DominatorTree *DT;
428  BasicBlock *LoopExit;
429 
430  const LoopInterchangeLegality &LIL;
431 };
432 
433 // Main LoopInterchange Pass.
434 struct LoopInterchange : public LoopPass {
435  static char ID;
436  ScalarEvolution *SE = nullptr;
437  LoopInfo *LI = nullptr;
438  DependenceInfo *DI = nullptr;
439  DominatorTree *DT = nullptr;
440 
441  /// Interface to emit optimization remarks.
443 
444  LoopInterchange() : LoopPass(ID) {
446  }
447 
448  void getAnalysisUsage(AnalysisUsage &AU) const override {
451 
453  }
454 
455  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
456  if (skipLoop(L) || L->getParentLoop())
457  return false;
458 
459  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
460  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
461  DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
462  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
463  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
464 
465  return processLoopList(populateWorklist(*L));
466  }
467 
468  bool isComputableLoopNest(LoopVector LoopList) {
469  for (Loop *L : LoopList) {
470  const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
471  if (ExitCountOuter == SE->getCouldNotCompute()) {
472  LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
473  return false;
474  }
475  if (L->getNumBackEdges() != 1) {
476  LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
477  return false;
478  }
479  if (!L->getExitingBlock()) {
480  LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
481  return false;
482  }
483  }
484  return true;
485  }
486 
487  unsigned selectLoopForInterchange(const LoopVector &LoopList) {
488  // TODO: Add a better heuristic to select the loop to be interchanged based
489  // on the dependence matrix. Currently we select the innermost loop.
490  return LoopList.size() - 1;
491  }
492 
493  bool processLoopList(LoopVector LoopList) {
494  bool Changed = false;
495  unsigned LoopNestDepth = LoopList.size();
496  if (LoopNestDepth < 2) {
497  LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
498  return false;
499  }
500  if (LoopNestDepth > MaxLoopNestDepth) {
501  LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
502  << MaxLoopNestDepth << "\n");
503  return false;
504  }
505  if (!isComputableLoopNest(LoopList)) {
506  LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
507  return false;
508  }
509 
510  LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
511  << "\n");
512 
513  CharMatrix DependencyMatrix;
514  Loop *OuterMostLoop = *(LoopList.begin());
515  if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
516  OuterMostLoop, DI)) {
517  LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
518  return false;
519  }
520 #ifdef DUMP_DEP_MATRICIES
521  LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
522  printDepMatrix(DependencyMatrix);
523 #endif
524 
525  // Get the Outermost loop exit.
526  BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
527  if (!LoopNestExit) {
528  LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
529  return false;
530  }
531 
532  unsigned SelecLoopId = selectLoopForInterchange(LoopList);
533  // Move the selected loop outwards to the best possible position.
534  for (unsigned i = SelecLoopId; i > 0; i--) {
535  bool Interchanged =
536  processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
537  if (!Interchanged)
538  return Changed;
539  // Loops interchanged reflect the same in LoopList
540  std::swap(LoopList[i - 1], LoopList[i]);
541 
542  // Update the DependencyMatrix
543  interChangeDependencies(DependencyMatrix, i, i - 1);
544 #ifdef DUMP_DEP_MATRICIES
545  LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
546  printDepMatrix(DependencyMatrix);
547 #endif
548  Changed |= Interchanged;
549  }
550  return Changed;
551  }
552 
553  bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
554  unsigned OuterLoopId, BasicBlock *LoopNestExit,
555  std::vector<std::vector<char>> &DependencyMatrix) {
556  LLVM_DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId
557  << " and OuterLoopId = " << OuterLoopId << "\n");
558  Loop *InnerLoop = LoopList[InnerLoopId];
559  Loop *OuterLoop = LoopList[OuterLoopId];
560 
561  LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
562  if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
563  LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
564  return false;
565  }
566  LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
567  LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
568  if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
569  LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
570  return false;
571  }
572 
573  ORE->emit([&]() {
574  return OptimizationRemark(DEBUG_TYPE, "Interchanged",
575  InnerLoop->getStartLoc(),
576  InnerLoop->getHeader())
577  << "Loop interchanged with enclosing loop.";
578  });
579 
580  LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit,
581  LIL);
582  LIT.transform();
583  LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
584  LoopsInterchanged++;
585  return true;
586  }
587 };
588 
589 } // end anonymous namespace
590 
591 bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
592  return any_of(*BB, [](const Instruction &I) {
593  return I.mayHaveSideEffects() || I.mayReadFromMemory();
594  });
595 }
596 
597 bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
598  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
599  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
600  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
601 
602  LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
603 
604  // A perfectly nested loop will not have any branch in between the outer and
605  // inner block i.e. outer header will branch to either inner preheader and
606  // outerloop latch.
607  BranchInst *OuterLoopHeaderBI =
608  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
609  if (!OuterLoopHeaderBI)
610  return false;
611 
612  for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
613  if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
614  Succ != OuterLoopLatch)
615  return false;
616 
617  LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
618  // We do not have any basic block in between now make sure the outer header
619  // and outer loop latch doesn't contain any unsafe instructions.
620  if (containsUnsafeInstructions(OuterLoopHeader) ||
621  containsUnsafeInstructions(OuterLoopLatch))
622  return false;
623 
624  LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
625  // We have a perfect loop nest.
626  return true;
627 }
628 
629 bool LoopInterchangeLegality::isLoopStructureUnderstood(
630  PHINode *InnerInduction) {
631  unsigned Num = InnerInduction->getNumOperands();
632  BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
633  for (unsigned i = 0; i < Num; ++i) {
634  Value *Val = InnerInduction->getOperand(i);
635  if (isa<Constant>(Val))
636  continue;
638  if (!I)
639  return false;
640  // TODO: Handle triangular loops.
641  // e.g. for(int i=0;i<N;i++)
642  // for(int j=i;j<N;j++)
643  unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
644  if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
645  InnerLoopPreheader &&
646  !OuterLoop->isLoopInvariant(I)) {
647  return false;
648  }
649  }
650  return true;
651 }
652 
653 // If SV is a LCSSA PHI node with a single incoming value, return the incoming
654 // value.
655 static Value *followLCSSA(Value *SV) {
656  PHINode *PHI = dyn_cast<PHINode>(SV);
657  if (!PHI)
658  return SV;
659 
660  if (PHI->getNumIncomingValues() != 1)
661  return SV;
662  return followLCSSA(PHI->getIncomingValue(0));
663 }
664 
665 // Check V's users to see if it is involved in a reduction in L.
667  for (Value *User : V->users()) {
668  if (PHINode *PHI = dyn_cast<PHINode>(User)) {
669  if (PHI->getNumIncomingValues() == 1)
670  continue;
672  if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
673  return PHI;
674  return nullptr;
675  }
676  }
677 
678  return nullptr;
679 }
680 
681 bool LoopInterchangeLegality::findInductionAndReductions(
682  Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
683  if (!L->getLoopLatch() || !L->getLoopPredecessor())
684  return false;
685  for (PHINode &PHI : L->getHeader()->phis()) {
688  if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
689  Inductions.push_back(&PHI);
690  else {
691  // PHIs in inner loops need to be part of a reduction in the outer loop,
692  // discovered when checking the PHIs of the outer loop earlier.
693  if (!InnerLoop) {
694  if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) {
695  LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
696  "across the outer loop.\n");
697  return false;
698  }
699  } else {
700  assert(PHI.getNumIncomingValues() == 2 &&
701  "Phis in loop header should have exactly 2 incoming values");
702  // Check if we have a PHI node in the outer loop that has a reduction
703  // result from the inner loop as an incoming value.
704  Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
705  PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
706  if (!InnerRedPhi ||
707  !llvm::any_of(InnerRedPhi->incoming_values(),
708  [&PHI](Value *V) { return V == &PHI; })) {
709  LLVM_DEBUG(
710  dbgs()
711  << "Failed to recognize PHI as an induction or reduction.\n");
712  return false;
713  }
714  OuterInnerReductions.insert(&PHI);
715  OuterInnerReductions.insert(InnerRedPhi);
716  }
717  }
718  }
719  return true;
720 }
721 
722 static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
723  for (PHINode &PHI : Block->phis()) {
724  // Reduction lcssa phi will have only 1 incoming block that from loop latch.
725  if (PHI.getNumIncomingValues() > 1)
726  return false;
727  Instruction *Ins = dyn_cast<Instruction>(PHI.getIncomingValue(0));
728  if (!Ins)
729  return false;
730  // Incoming value for lcssa phi's in outer loop exit can only be inner loop
731  // exits lcssa phi else it would not be tightly nested.
732  if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
733  return false;
734  }
735  return true;
736 }
737 
738 // This function indicates the current limitations in the transform as a result
739 // of which we do not proceed.
740 bool LoopInterchangeLegality::currentLimitations() {
741  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
742  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
743 
744  // transform currently expects the loop latches to also be the exiting
745  // blocks.
746  if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
747  OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
748  !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
749  !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
750  LLVM_DEBUG(
751  dbgs() << "Loops where the latch is not the exiting block are not"
752  << " supported currently.\n");
753  ORE->emit([&]() {
754  return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
755  OuterLoop->getStartLoc(),
756  OuterLoop->getHeader())
757  << "Loops where the latch is not the exiting block cannot be"
758  " interchange currently.";
759  });
760  return true;
761  }
762 
763  PHINode *InnerInductionVar;
764  SmallVector<PHINode *, 8> Inductions;
765  if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
766  LLVM_DEBUG(
767  dbgs() << "Only outer loops with induction or reduction PHI nodes "
768  << "are supported currently.\n");
769  ORE->emit([&]() {
770  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
771  OuterLoop->getStartLoc(),
772  OuterLoop->getHeader())
773  << "Only outer loops with induction or reduction PHI nodes can be"
774  " interchanged currently.";
775  });
776  return true;
777  }
778 
779  // TODO: Currently we handle only loops with 1 induction variable.
780  if (Inductions.size() != 1) {
781  LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
782  << "supported currently.\n");
783  ORE->emit([&]() {
784  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter",
785  OuterLoop->getStartLoc(),
786  OuterLoop->getHeader())
787  << "Only outer loops with 1 induction variable can be "
788  "interchanged currently.";
789  });
790  return true;
791  }
792 
793  Inductions.clear();
794  if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) {
795  LLVM_DEBUG(
796  dbgs() << "Only inner loops with induction or reduction PHI nodes "
797  << "are supported currently.\n");
798  ORE->emit([&]() {
799  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
800  InnerLoop->getStartLoc(),
801  InnerLoop->getHeader())
802  << "Only inner loops with induction or reduction PHI nodes can be"
803  " interchange currently.";
804  });
805  return true;
806  }
807 
808  // TODO: Currently we handle only loops with 1 induction variable.
809  if (Inductions.size() != 1) {
810  LLVM_DEBUG(
811  dbgs() << "We currently only support loops with 1 induction variable."
812  << "Failed to interchange due to current limitation\n");
813  ORE->emit([&]() {
814  return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner",
815  InnerLoop->getStartLoc(),
816  InnerLoop->getHeader())
817  << "Only inner loops with 1 induction variable can be "
818  "interchanged currently.";
819  });
820  return true;
821  }
822  InnerInductionVar = Inductions.pop_back_val();
823 
824  // TODO: Triangular loops are not handled for now.
825  if (!isLoopStructureUnderstood(InnerInductionVar)) {
826  LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
827  ORE->emit([&]() {
828  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
829  InnerLoop->getStartLoc(),
830  InnerLoop->getHeader())
831  << "Inner loop structure not understood currently.";
832  });
833  return true;
834  }
835 
836  // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
837  BasicBlock *InnerExit = InnerLoop->getExitBlock();
838  if (!containsSafePHI(InnerExit, false)) {
839  LLVM_DEBUG(
840  dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
841  ORE->emit([&]() {
842  return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner",
843  InnerLoop->getStartLoc(),
844  InnerLoop->getHeader())
845  << "Only inner loops with LCSSA PHIs can be interchange "
846  "currently.";
847  });
848  return true;
849  }
850 
851  // TODO: Current limitation: Since we split the inner loop latch at the point
852  // were induction variable is incremented (induction.next); We cannot have
853  // more than 1 user of induction.next since it would result in broken code
854  // after split.
855  // e.g.
856  // for(i=0;i<N;i++) {
857  // for(j = 0;j<M;j++) {
858  // A[j+1][i+2] = A[j][i]+k;
859  // }
860  // }
861  Instruction *InnerIndexVarInc = nullptr;
862  if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
863  InnerIndexVarInc =
864  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
865  else
866  InnerIndexVarInc =
867  dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
868 
869  if (!InnerIndexVarInc) {
870  LLVM_DEBUG(
871  dbgs() << "Did not find an instruction to increment the induction "
872  << "variable.\n");
873  ORE->emit([&]() {
874  return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner",
875  InnerLoop->getStartLoc(),
876  InnerLoop->getHeader())
877  << "The inner loop does not increment the induction variable.";
878  });
879  return true;
880  }
881 
882  // Since we split the inner loop latch on this induction variable. Make sure
883  // we do not have any instruction between the induction variable and branch
884  // instruction.
885 
886  bool FoundInduction = false;
887  for (const Instruction &I :
888  llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) {
889  if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) ||
890  isa<ZExtInst>(I))
891  continue;
892 
893  // We found an instruction. If this is not induction variable then it is not
894  // safe to split this loop latch.
895  if (!I.isIdenticalTo(InnerIndexVarInc)) {
896  LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction "
897  << "variable increment and branch.\n");
898  ORE->emit([&]() {
900  DEBUG_TYPE, "UnsupportedInsBetweenInduction",
901  InnerLoop->getStartLoc(), InnerLoop->getHeader())
902  << "Found unsupported instruction between induction variable "
903  "increment and branch.";
904  });
905  return true;
906  }
907 
908  FoundInduction = true;
909  break;
910  }
911  // The loop latch ended and we didn't find the induction variable return as
912  // current limitation.
913  if (!FoundInduction) {
914  LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n");
915  ORE->emit([&]() {
916  return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable",
917  InnerLoop->getStartLoc(),
918  InnerLoop->getHeader())
919  << "Did not find the induction variable.";
920  });
921  return true;
922  }
923  return false;
924 }
925 
926 // We currently support LCSSA PHI nodes in the outer loop exit, if their
927 // incoming values do not come from the outer loop latch or if the
928 // outer loop latch has a single predecessor. In that case, the value will
929 // be available if both the inner and outer loop conditions are true, which
930 // will still be true after interchanging. If we have multiple predecessor,
931 // that may not be the case, e.g. because the outer loop latch may be executed
932 // if the inner loop is not executed.
933 static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
934  BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
935  for (PHINode &PHI : LoopNestExit->phis()) {
936  // FIXME: We currently are not able to detect floating point reductions
937  // and have to use floating point PHIs as a proxy to prevent
938  // interchanging in the presence of floating point reductions.
939  if (PHI.getType()->isFloatingPointTy())
940  return false;
941  for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
942  Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
943  if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
944  continue;
945 
946  // The incoming value is defined in the outer loop latch. Currently we
947  // only support that in case the outer loop latch has a single predecessor.
948  // This guarantees that the outer loop latch is executed if and only if
949  // the inner loop is executed (because tightlyNested() guarantees that the
950  // outer loop header only branches to the inner loop or the outer loop
951  // latch).
952  // FIXME: We could weaken this logic and allow multiple predecessors,
953  // if the values are produced outside the loop latch. We would need
954  // additional logic to update the PHI nodes in the exit block as
955  // well.
956  if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
957  return false;
958  }
959  }
960  return true;
961 }
962 
963 bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
964  unsigned OuterLoopId,
965  CharMatrix &DepMatrix) {
966  if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
967  LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
968  << " and OuterLoopId = " << OuterLoopId
969  << " due to dependence\n");
970  ORE->emit([&]() {
971  return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
972  InnerLoop->getStartLoc(),
973  InnerLoop->getHeader())
974  << "Cannot interchange loops due to dependences.";
975  });
976  return false;
977  }
978  // Check if outer and inner loop contain legal instructions only.
979  for (auto *BB : OuterLoop->blocks())
980  for (Instruction &I : BB->instructionsWithoutDebug())
981  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
982  // readnone functions do not prevent interchanging.
983  if (CI->doesNotReadMemory())
984  continue;
985  LLVM_DEBUG(
986  dbgs() << "Loops with call instructions cannot be interchanged "
987  << "safely.");
988  ORE->emit([&]() {
989  return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
990  CI->getDebugLoc(),
991  CI->getParent())
992  << "Cannot interchange loops due to call instruction.";
993  });
994 
995  return false;
996  }
997 
998  // TODO: The loops could not be interchanged due to current limitations in the
999  // transform module.
1000  if (currentLimitations()) {
1001  LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
1002  return false;
1003  }
1004 
1005  // Check if the loops are tightly nested.
1006  if (!tightlyNested(OuterLoop, InnerLoop)) {
1007  LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
1008  ORE->emit([&]() {
1009  return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
1010  InnerLoop->getStartLoc(),
1011  InnerLoop->getHeader())
1012  << "Cannot interchange loops because they are not tightly "
1013  "nested.";
1014  });
1015  return false;
1016  }
1017 
1018  if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
1019  LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
1020  ORE->emit([&]() {
1021  return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1022  OuterLoop->getStartLoc(),
1023  OuterLoop->getHeader())
1024  << "Found unsupported PHI node in loop exit.";
1025  });
1026  return false;
1027  }
1028 
1029  return true;
1030 }
1031 
1032 int LoopInterchangeProfitability::getInstrOrderCost() {
1033  unsigned GoodOrder, BadOrder;
1034  BadOrder = GoodOrder = 0;
1035  for (BasicBlock *BB : InnerLoop->blocks()) {
1036  for (Instruction &Ins : *BB) {
1037  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
1038  unsigned NumOp = GEP->getNumOperands();
1039  bool FoundInnerInduction = false;
1040  bool FoundOuterInduction = false;
1041  for (unsigned i = 0; i < NumOp; ++i) {
1042  const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
1043  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
1044  if (!AR)
1045  continue;
1046 
1047  // If we find the inner induction after an outer induction e.g.
1048  // for(int i=0;i<N;i++)
1049  // for(int j=0;j<N;j++)
1050  // A[i][j] = A[i-1][j-1]+k;
1051  // then it is a good order.
1052  if (AR->getLoop() == InnerLoop) {
1053  // We found an InnerLoop induction after OuterLoop induction. It is
1054  // a good order.
1055  FoundInnerInduction = true;
1056  if (FoundOuterInduction) {
1057  GoodOrder++;
1058  break;
1059  }
1060  }
1061  // If we find the outer induction after an inner induction e.g.
1062  // for(int i=0;i<N;i++)
1063  // for(int j=0;j<N;j++)
1064  // A[j][i] = A[j-1][i-1]+k;
1065  // then it is a bad order.
1066  if (AR->getLoop() == OuterLoop) {
1067  // We found an OuterLoop induction after InnerLoop induction. It is
1068  // a bad order.
1069  FoundOuterInduction = true;
1070  if (FoundInnerInduction) {
1071  BadOrder++;
1072  break;
1073  }
1074  }
1075  }
1076  }
1077  }
1078  }
1079  return GoodOrder - BadOrder;
1080 }
1081 
1082 static bool isProfitableForVectorization(unsigned InnerLoopId,
1083  unsigned OuterLoopId,
1084  CharMatrix &DepMatrix) {
1085  // TODO: Improve this heuristic to catch more cases.
1086  // If the inner loop is loop independent or doesn't carry any dependency it is
1087  // profitable to move this to outer position.
1088  for (auto &Row : DepMatrix) {
1089  if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I')
1090  return false;
1091  // TODO: We need to improve this heuristic.
1092  if (Row[OuterLoopId] != '=')
1093  return false;
1094  }
1095  // If outer loop has dependence and inner loop is loop independent then it is
1096  // profitable to interchange to enable parallelism.
1097  // If there are no dependences, interchanging will not improve anything.
1098  return !DepMatrix.empty();
1099 }
1100 
1101 bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
1102  unsigned OuterLoopId,
1103  CharMatrix &DepMatrix) {
1104  // TODO: Add better profitability checks.
1105  // e.g
1106  // 1) Construct dependency matrix and move the one with no loop carried dep
1107  // inside to enable vectorization.
1108 
1109  // This is rough cost estimation algorithm. It counts the good and bad order
1110  // of induction variables in the instruction and allows reordering if number
1111  // of bad orders is more than good.
1112  int Cost = getInstrOrderCost();
1113  LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1114  if (Cost < -LoopInterchangeCostThreshold)
1115  return true;
1116 
1117  // It is not profitable as per current cache profitability model. But check if
1118  // we can move this loop outside to improve parallelism.
1119  if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix))
1120  return true;
1121 
1122  ORE->emit([&]() {
1123  return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1124  InnerLoop->getStartLoc(),
1125  InnerLoop->getHeader())
1126  << "Interchanging loops is too costly (cost="
1127  << ore::NV("Cost", Cost) << ", threshold="
1128  << ore::NV("Threshold", LoopInterchangeCostThreshold)
1129  << ") and it does not improve parallelism.";
1130  });
1131  return false;
1132 }
1133 
1134 void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
1135  Loop *InnerLoop) {
1136  for (Loop *L : *OuterLoop)
1137  if (L == InnerLoop) {
1138  OuterLoop->removeChildLoop(L);
1139  return;
1140  }
1141  llvm_unreachable("Couldn't find loop");
1142 }
1143 
1144 /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
1145 /// new inner and outer loop after interchanging: NewInner is the original
1146 /// outer loop and NewOuter is the original inner loop.
1147 ///
1148 /// Before interchanging, we have the following structure
1149 /// Outer preheader
1150 // Outer header
1151 // Inner preheader
1152 // Inner header
1153 // Inner body
1154 // Inner latch
1155 // outer bbs
1156 // Outer latch
1157 //
1158 // After interchanging:
1159 // Inner preheader
1160 // Inner header
1161 // Outer preheader
1162 // Outer header
1163 // Inner body
1164 // outer bbs
1165 // Outer latch
1166 // Inner latch
1167 void LoopInterchangeTransform::restructureLoops(
1168  Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
1169  BasicBlock *OrigOuterPreHeader) {
1170  Loop *OuterLoopParent = OuterLoop->getParentLoop();
1171  // The original inner loop preheader moves from the new inner loop to
1172  // the parent loop, if there is one.
1173  NewInner->removeBlockFromLoop(OrigInnerPreHeader);
1174  LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
1175 
1176  // Switch the loop levels.
1177  if (OuterLoopParent) {
1178  // Remove the loop from its parent loop.
1179  removeChildLoop(OuterLoopParent, NewInner);
1180  removeChildLoop(NewInner, NewOuter);
1181  OuterLoopParent->addChildLoop(NewOuter);
1182  } else {
1183  removeChildLoop(NewInner, NewOuter);
1184  LI->changeTopLevelLoop(NewInner, NewOuter);
1185  }
1186  while (!NewOuter->empty())
1187  NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
1188  NewOuter->addChildLoop(NewInner);
1189 
1190  // BBs from the original inner loop.
1191  SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
1192 
1193  // Add BBs from the original outer loop to the original inner loop (excluding
1194  // BBs already in inner loop)
1195  for (BasicBlock *BB : NewInner->blocks())
1196  if (LI->getLoopFor(BB) == NewInner)
1197  NewOuter->addBlockEntry(BB);
1198 
1199  // Now remove inner loop header and latch from the new inner loop and move
1200  // other BBs (the loop body) to the new inner loop.
1201  BasicBlock *OuterHeader = NewOuter->getHeader();
1202  BasicBlock *OuterLatch = NewOuter->getLoopLatch();
1203  for (BasicBlock *BB : OrigInnerBBs) {
1204  // Nothing will change for BBs in child loops.
1205  if (LI->getLoopFor(BB) != NewOuter)
1206  continue;
1207  // Remove the new outer loop header and latch from the new inner loop.
1208  if (BB == OuterHeader || BB == OuterLatch)
1209  NewInner->removeBlockFromLoop(BB);
1210  else
1211  LI->changeLoopFor(BB, NewInner);
1212  }
1213 
1214  // The preheader of the original outer loop becomes part of the new
1215  // outer loop.
1216  NewOuter->addBlockEntry(OrigOuterPreHeader);
1217  LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
1218 
1219  // Tell SE that we move the loops around.
1220  SE->forgetLoop(NewOuter);
1221  SE->forgetLoop(NewInner);
1222 }
1223 
1225  bool Transformed = false;
1226  Instruction *InnerIndexVar;
1227 
1228  if (InnerLoop->getSubLoops().empty()) {
1229  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1230  LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n");
1231  PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
1232  if (!InductionPHI) {
1233  LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
1234  return false;
1235  }
1236 
1237  if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
1238  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
1239  else
1240  InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
1241 
1242  // Ensure that InductionPHI is the first Phi node.
1243  if (&InductionPHI->getParent()->front() != InductionPHI)
1244  InductionPHI->moveBefore(&InductionPHI->getParent()->front());
1245 
1246  // Split at the place were the induction variable is
1247  // incremented/decremented.
1248  // TODO: This splitting logic may not work always. Fix this.
1249  splitInnerLoopLatch(InnerIndexVar);
1250  LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n");
1251 
1252  // Splits the inner loops phi nodes out into a separate basic block.
1253  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1254  SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
1255  LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
1256  }
1257 
1258  Transformed |= adjustLoopLinks();
1259  if (!Transformed) {
1260  LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
1261  return false;
1262  }
1263 
1264  return true;
1265 }
1266 
1267 void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
1268  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1269  BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
1270  InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
1271 }
1272 
1273 /// \brief Move all instructions except the terminator from FromBB right before
1274 /// InsertBefore
1275 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1276  auto &ToList = InsertBefore->getParent()->getInstList();
1277  auto &FromList = FromBB->getInstList();
1278 
1279  ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
1280  FromBB->getTerminator()->getIterator());
1281 }
1282 
1283 static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
1284  BasicBlock *NewPred) {
1285  for (PHINode &PHI : CurrBlock->phis()) {
1286  unsigned Num = PHI.getNumIncomingValues();
1287  for (unsigned i = 0; i < Num; ++i) {
1288  if (PHI.getIncomingBlock(i) == OldPred)
1289  PHI.setIncomingBlock(i, NewPred);
1290  }
1291  }
1292 }
1293 
1294 /// Update BI to jump to NewBB instead of OldBB. Records updates to
1295 /// the dominator tree in DTUpdates, if DT should be preserved.
1296 static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
1297  BasicBlock *NewBB,
1298  std::vector<DominatorTree::UpdateType> &DTUpdates) {
1300  [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&
1301  "BI must jump to OldBB at most once.");
1302  for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) {
1303  if (BI->getSuccessor(i) == OldBB) {
1304  BI->setSuccessor(i, NewBB);
1305 
1306  DTUpdates.push_back(
1308  DTUpdates.push_back(
1310  break;
1311  }
1312  }
1313 }
1314 
1315 // Move Lcssa PHIs to the right place.
1316 static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
1317  BasicBlock *OuterLatch) {
1318  SmallVector<PHINode *, 8> LcssaInnerExit;
1319  for (PHINode &P : InnerExit->phis())
1320  LcssaInnerExit.push_back(&P);
1321 
1322  SmallVector<PHINode *, 8> LcssaInnerLatch;
1323  for (PHINode &P : InnerLatch->phis())
1324  LcssaInnerLatch.push_back(&P);
1325 
1326  // Lcssa PHIs for values used outside the inner loop are in InnerExit.
1327  // If a PHI node has users outside of InnerExit, it has a use outside the
1328  // interchanged loop and we have to preserve it. We move these to
1329  // InnerLatch, which will become the new exit block for the innermost
1330  // loop after interchanging. For PHIs only used in InnerExit, we can just
1331  // replace them with the incoming value.
1332  for (PHINode *P : LcssaInnerExit) {
1333  bool hasUsersOutside = false;
1334  for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) {
1335  Use &U = *UI;
1336  ++UI;
1337  auto *Usr = cast<Instruction>(U.getUser());
1338  if (Usr->getParent() != InnerExit) {
1339  hasUsersOutside = true;
1340  continue;
1341  }
1342  U.set(P->getIncomingValueForBlock(InnerLatch));
1343  }
1344  if (hasUsersOutside)
1345  P->moveBefore(InnerLatch->getFirstNonPHI());
1346  else
1347  P->eraseFromParent();
1348  }
1349 
1350  // If the inner loop latch contains LCSSA PHIs, those come from a child loop
1351  // and we have to move them to the new inner latch.
1352  for (PHINode *P : LcssaInnerLatch)
1353  P->moveBefore(InnerExit->getFirstNonPHI());
1354 
1355  // Now adjust the incoming blocks for the LCSSA PHIs.
1356  // For PHIs moved from Inner's exit block, we need to replace Inner's latch
1357  // with the new latch.
1358  updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch);
1359 }
1360 
1361 bool LoopInterchangeTransform::adjustLoopBranches() {
1362  LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
1363  std::vector<DominatorTree::UpdateType> DTUpdates;
1364 
1365  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1366  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1367 
1368  assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
1369  InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
1370  InnerLoopPreHeader && "Guaranteed by loop-simplify form");
1371  // Ensure that both preheaders do not contain PHI nodes and have single
1372  // predecessors. This allows us to move them easily. We use
1373  // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
1374  // preheaders do not satisfy those conditions.
1375  if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
1376  !OuterLoopPreHeader->getUniquePredecessor())
1377  OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, DT, LI, true);
1378  if (InnerLoopPreHeader == OuterLoop->getHeader())
1379  InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, DT, LI, true);
1380 
1381  // Adjust the loop preheader
1382  BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1383  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1384  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
1385  BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
1386  BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
1387  BasicBlock *InnerLoopLatchPredecessor =
1388  InnerLoopLatch->getUniquePredecessor();
1389  BasicBlock *InnerLoopLatchSuccessor;
1390  BasicBlock *OuterLoopLatchSuccessor;
1391 
1392  BranchInst *OuterLoopLatchBI =
1393  dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
1394  BranchInst *InnerLoopLatchBI =
1395  dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
1396  BranchInst *OuterLoopHeaderBI =
1397  dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
1398  BranchInst *InnerLoopHeaderBI =
1399  dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
1400 
1401  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
1402  !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
1403  !InnerLoopHeaderBI)
1404  return false;
1405 
1406  BranchInst *InnerLoopLatchPredecessorBI =
1407  dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
1408  BranchInst *OuterLoopPredecessorBI =
1409  dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
1410 
1411  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
1412  return false;
1413  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
1414  if (!InnerLoopHeaderSuccessor)
1415  return false;
1416 
1417  // Adjust Loop Preheader and headers
1418  updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1419  InnerLoopPreHeader, DTUpdates);
1420  updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates);
1421  updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1422  InnerLoopHeaderSuccessor, DTUpdates);
1423 
1424  // Adjust reduction PHI's now that the incoming block has changed.
1425  updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
1426  OuterLoopHeader);
1427 
1428  updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
1429  OuterLoopPreHeader, DTUpdates);
1430 
1431  // -------------Adjust loop latches-----------
1432  if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
1433  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
1434  else
1435  InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
1436 
1437  updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
1438  InnerLoopLatchSuccessor, DTUpdates);
1439 
1440 
1441  if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
1442  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
1443  else
1444  OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
1445 
1446  updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
1447  OuterLoopLatchSuccessor, DTUpdates);
1448  updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
1449  DTUpdates);
1450 
1451  DT->applyUpdates(DTUpdates);
1452  restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
1453  OuterLoopPreHeader);
1454 
1455  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch);
1456  // For PHIs in the exit block of the outer loop, outer's latch has been
1457  // replaced by Inners'.
1458  updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
1459 
1460  // Now update the reduction PHIs in the inner and outer loop headers.
1461  SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1462  for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1))
1463  InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
1464  for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1))
1465  OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
1466 
1467  auto &OuterInnerReductions = LIL.getOuterInnerReductions();
1468  (void)OuterInnerReductions;
1469 
1470  // Now move the remaining reduction PHIs from outer to inner loop header and
1471  // vice versa. The PHI nodes must be part of a reduction across the inner and
1472  // outer loop and all the remains to do is and updating the incoming blocks.
1473  for (PHINode *PHI : OuterLoopPHIs) {
1474  PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
1475  assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
1476  "Expected a reduction PHI node");
1477  }
1478  for (PHINode *PHI : InnerLoopPHIs) {
1479  PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
1480  assert(OuterInnerReductions.find(PHI) != OuterInnerReductions.end() &&
1481  "Expected a reduction PHI node");
1482  }
1483 
1484  // Update the incoming blocks for moved PHI nodes.
1485  updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader);
1486  updateIncomingBlock(OuterLoopHeader, InnerLoopLatch, OuterLoopLatch);
1487  updateIncomingBlock(InnerLoopHeader, OuterLoopPreHeader, InnerLoopPreHeader);
1488  updateIncomingBlock(InnerLoopHeader, OuterLoopLatch, InnerLoopLatch);
1489 
1490  return true;
1491 }
1492 
1493 void LoopInterchangeTransform::adjustLoopPreheaders() {
1494  // We have interchanged the preheaders so we need to interchange the data in
1495  // the preheader as well.
1496  // This is because the content of inner preheader was previously executed
1497  // inside the outer loop.
1498  BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
1499  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1500  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1501  BranchInst *InnerTermBI =
1502  cast<BranchInst>(InnerLoopPreHeader->getTerminator());
1503 
1504  // These instructions should now be executed inside the loop.
1505  // Move instruction into a new block after outer header.
1506  moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator());
1507  // These instructions were not executed previously in the loop so move them to
1508  // the older inner loop preheader.
1509  moveBBContents(OuterLoopPreHeader, InnerTermBI);
1510 }
1511 
1512 bool LoopInterchangeTransform::adjustLoopLinks() {
1513  // Adjust all branches in the inner and outer loop.
1514  bool Changed = adjustLoopBranches();
1515  if (Changed)
1516  adjustLoopPreheaders();
1517  return Changed;
1518 }
1519 
1520 char LoopInterchange::ID = 0;
1521 
1522 INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
1523  "Interchanges loops for cache reuse", false, false)
1527 
1528 INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
1529  "Interchanges loops for cache reuse", false, false)
1530 
1531 Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction *> *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
static LoopVector populateWorklist(Loop &L)
Legacy pass manager pass to access dependence information.
static bool isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix)
void push_back(const T &Elt)
Definition: SmallVector.h:218
LoopT * removeChildLoop(iterator I)
This removes the specified child from being a subloop of this loop.
Definition: LoopInfo.h:340
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
BasicBlock * InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
InsertPreheaderForLoop - Once we discover that a loop doesn&#39;t have a preheader, this method is called...
auto count_if(R &&Range, UnaryPredicate P) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1260
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
Hexagon Common GEP
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
DependenceInfo - This class is the main dependence-analysis driver.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:227
unsigned getNumSuccessors() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
void addBlockEntry(BlockT *BB)
This adds a basic block directly to the basic block list.
Definition: LoopInfo.h:359
BlockT * getHeader() const
Definition: LoopInfo.h:100
ConstantInt * getValue() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
static cl::opt< int > LoopInterchangeCostThreshold("loop-interchange-threshold", cl::init(0), cl::Hidden, cl::desc("Interchange if you gain more than this number"))
User * getUser() const LLVM_READONLY
Returns the User that contains this Use.
Definition: Use.cpp:41
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
This node represents a polynomial recurrence on the trip count of the specified loop.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:247
static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, unsigned OuterLoopId, char InnerDep, char OuterDep)
static const unsigned MaxLoopNestDepth
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, BasicBlock *NewBB, std::vector< DominatorTree::UpdateType > &DTUpdates)
Update BI to jump to NewBB instead of OldBB.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
loop interchange
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
Value * getOperand(unsigned i) const
Definition: User.h:170
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:854
const SCEV * getCouldNotCompute()
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:190
void set(Value *Val)
Definition: Value.h:671
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug() const
Return a const iterator range over the instructions in the block, skipping any debug instructions...
Definition: BasicBlock.cpp:95
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:281
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:562
loop Interchanges loops for cache reuse
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
void splice(iterator where, iplist_impl &L2)
Definition: ilist.h:329
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
self_iterator getIterator()
Definition: ilist_node.h:82
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:76
Used in the streaming interface as the general argument type.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:145
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:365
iterator_range< decltype(adl_begin(std::declval< T >)))> drop_begin(T &&t, int n)
static unsigned getIncomingValueNumForOperand(unsigned i)
size_t size() const
Definition: SmallVector.h:53
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
void initializeLoopInterchangePass(PassRegistry &)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:58
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
bool isNegative() const
Definition: Constants.h:188
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:63
#define DEBUG_TYPE
BlockT * getLoopPredecessor() const
If the given loop&#39;s header has exactly one unique predecessor outside the loop, return it...
Definition: LoopInfoImpl.h:202
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:334
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
A struct for saving information about induction variables.
Pass * createLoopInterchangePass()
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore)
Move all instructions except the terminator from FromBB right before InsertBefore.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
iterator begin() const
Definition: LoopInfo.h:142
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static const unsigned MaxMemInstrCount
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:113
iterator_range< user_iterator > users()
Definition: Value.h:400
INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_END(LoopInterchange
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
static PHINode * findInnerReductionPhi(Loop *L, Value *V)
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:131
This class represents an analyzed expression in the program.
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:331
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch, BasicBlock *OuterLatch)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:132
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:325
static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, unsigned Column)
void removeBlockFromLoop(BlockT *BB)
This removes the specified basic block from the current loop, updating the Blocks as appropriate...
Definition: LoopInfo.h:396
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1268
bool empty() const
Definition: LoopInfo.h:146
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx)
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static Value * followLCSSA(Value *SV)
succ_range successors(Instruction *I)
Definition: CFG.h:264
OptimizationRemarkEmitter legacy analysis pass.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:87
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, BasicBlock *NewPred)
#define LLVM_DEBUG(X)
Definition: Debug.h:123
op_range incoming_values()
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
The optimization diagnostic interface.
static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop)
loops
Definition: LoopInfo.cpp:772
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
static PHINode * getInductionVariable(Loop *L, ScalarEvolution *SE)
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:277