LLVM  8.0.1
LoopLoadElimination.cpp
Go to the documentation of this file.
1 //===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implement a loop-aware load elimination pass.
11 //
12 // It uses LoopAccessAnalysis to identify loop-carried dependences with a
13 // distance of one between stores and loads. These form the candidates for the
14 // transformation. The source value of each store then propagated to the user
15 // of the corresponding load. This makes the load dead.
16 //
17 // The pass can also version the loop and add memchecks in order to prove that
18 // may-aliasing stores can't change the value in memory before it's read by the
19 // load.
20 //
21 //===----------------------------------------------------------------------===//
22 
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
36 #include "llvm/Analysis/LoopInfo.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/Module.h"
46 #include "llvm/IR/PassManager.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/Pass.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
54 #include "llvm/Transforms/Scalar.h"
55 #include "llvm/Transforms/Utils.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <forward_list>
60 #include <set>
61 #include <tuple>
62 #include <utility>
63 
64 using namespace llvm;
65 
66 #define LLE_OPTION "loop-load-elim"
67 #define DEBUG_TYPE LLE_OPTION
68 
70  "runtime-check-per-loop-load-elim", cl::Hidden,
71  cl::desc("Max number of memchecks allowed per eliminated load on average"),
72  cl::init(1));
73 
75  "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
76  cl::desc("The maximum number of SCEV checks allowed for Loop "
77  "Load Elimination"));
78 
79 STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
80 
81 namespace {
82 
83 /// Represent a store-to-forwarding candidate.
84 struct StoreToLoadForwardingCandidate {
85  LoadInst *Load;
87 
88  StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
89  : Load(Load), Store(Store) {}
90 
91  /// Return true if the dependence from the store to the load has a
92  /// distance of one. E.g. A[i+1] = A[i]
93  bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
94  Loop *L) const {
95  Value *LoadPtr = Load->getPointerOperand();
96  Value *StorePtr = Store->getPointerOperand();
97  Type *LoadPtrType = LoadPtr->getType();
98  Type *LoadType = LoadPtrType->getPointerElementType();
99 
100  assert(LoadPtrType->getPointerAddressSpace() ==
101  StorePtr->getType()->getPointerAddressSpace() &&
102  LoadType == StorePtr->getType()->getPointerElementType() &&
103  "Should be a known dependence");
104 
105  // Currently we only support accesses with unit stride. FIXME: we should be
106  // able to handle non unit stirde as well as long as the stride is equal to
107  // the dependence distance.
108  if (getPtrStride(PSE, LoadPtr, L) != 1 ||
109  getPtrStride(PSE, StorePtr, L) != 1)
110  return false;
111 
112  auto &DL = Load->getParent()->getModule()->getDataLayout();
113  unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
114 
115  auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
116  auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
117 
118  // We don't need to check non-wrapping here because forward/backward
119  // dependence wouldn't be valid if these weren't monotonic accesses.
120  auto *Dist = cast<SCEVConstant>(
121  PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
122  const APInt &Val = Dist->getAPInt();
123  return Val == TypeByteSize;
124  }
125 
126  Value *getLoadPtr() const { return Load->getPointerOperand(); }
127 
128 #ifndef NDEBUG
129  friend raw_ostream &operator<<(raw_ostream &OS,
130  const StoreToLoadForwardingCandidate &Cand) {
131  OS << *Cand.Store << " -->\n";
132  OS.indent(2) << *Cand.Load << "\n";
133  return OS;
134  }
135 #endif
136 };
137 
138 } // end anonymous namespace
139 
140 /// Check if the store dominates all latches, so as long as there is no
141 /// intervening store this value will be loaded in the next iteration.
142 static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
143  DominatorTree *DT) {
145  L->getLoopLatches(Latches);
146  return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
147  return DT->dominates(StoreBlock, Latch);
148  });
149 }
150 
151 /// Return true if the load is not executed on all paths in the loop.
152 static bool isLoadConditional(LoadInst *Load, Loop *L) {
153  return Load->getParent() != L->getHeader();
154 }
155 
156 namespace {
157 
158 /// The per-loop class that does most of the work.
159 class LoadEliminationForLoop {
160 public:
161  LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
162  DominatorTree *DT)
163  : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
164 
165  /// Look through the loop-carried and loop-independent dependences in
166  /// this loop and find store->load dependences.
167  ///
168  /// Note that no candidate is returned if LAA has failed to analyze the loop
169  /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
170  std::forward_list<StoreToLoadForwardingCandidate>
171  findStoreToLoadDependences(const LoopAccessInfo &LAI) {
172  std::forward_list<StoreToLoadForwardingCandidate> Candidates;
173 
174  const auto *Deps = LAI.getDepChecker().getDependences();
175  if (!Deps)
176  return Candidates;
177 
178  // Find store->load dependences (consequently true dep). Both lexically
179  // forward and backward dependences qualify. Disqualify loads that have
180  // other unknown dependences.
181 
182  SmallPtrSet<Instruction *, 4> LoadsWithUnknownDepedence;
183 
184  for (const auto &Dep : *Deps) {
185  Instruction *Source = Dep.getSource(LAI);
186  Instruction *Destination = Dep.getDestination(LAI);
187 
188  if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
189  if (isa<LoadInst>(Source))
190  LoadsWithUnknownDepedence.insert(Source);
191  if (isa<LoadInst>(Destination))
192  LoadsWithUnknownDepedence.insert(Destination);
193  continue;
194  }
195 
196  if (Dep.isBackward())
197  // Note that the designations source and destination follow the program
198  // order, i.e. source is always first. (The direction is given by the
199  // DepType.)
200  std::swap(Source, Destination);
201  else
202  assert(Dep.isForward() && "Needs to be a forward dependence");
203 
204  auto *Store = dyn_cast<StoreInst>(Source);
205  if (!Store)
206  continue;
207  auto *Load = dyn_cast<LoadInst>(Destination);
208  if (!Load)
209  continue;
210 
211  // Only progagate the value if they are of the same type.
212  if (Store->getPointerOperandType() != Load->getPointerOperandType())
213  continue;
214 
215  Candidates.emplace_front(Load, Store);
216  }
217 
218  if (!LoadsWithUnknownDepedence.empty())
219  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
220  return LoadsWithUnknownDepedence.count(C.Load);
221  });
222 
223  return Candidates;
224  }
225 
226  /// Return the index of the instruction according to program order.
227  unsigned getInstrIndex(Instruction *Inst) {
228  auto I = InstOrder.find(Inst);
229  assert(I != InstOrder.end() && "No index for instruction");
230  return I->second;
231  }
232 
233  /// If a load has multiple candidates associated (i.e. different
234  /// stores), it means that it could be forwarding from multiple stores
235  /// depending on control flow. Remove these candidates.
236  ///
237  /// Here, we rely on LAA to include the relevant loop-independent dependences.
238  /// LAA is known to omit these in the very simple case when the read and the
239  /// write within an alias set always takes place using the *same* pointer.
240  ///
241  /// However, we know that this is not the case here, i.e. we can rely on LAA
242  /// to provide us with loop-independent dependences for the cases we're
243  /// interested. Consider the case for example where a loop-independent
244  /// dependece S1->S2 invalidates the forwarding S3->S2.
245  ///
246  /// A[i] = ... (S1)
247  /// ... = A[i] (S2)
248  /// A[i+1] = ... (S3)
249  ///
250  /// LAA will perform dependence analysis here because there are two
251  /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
252  void removeDependencesFromMultipleStores(
253  std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
254  // If Store is nullptr it means that we have multiple stores forwarding to
255  // this store.
256  using LoadToSingleCandT =
258  LoadToSingleCandT LoadToSingleCand;
259 
260  for (const auto &Cand : Candidates) {
261  bool NewElt;
262  LoadToSingleCandT::iterator Iter;
263 
264  std::tie(Iter, NewElt) =
265  LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
266  if (!NewElt) {
267  const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
268  // Already multiple stores forward to this load.
269  if (OtherCand == nullptr)
270  continue;
271 
272  // Handle the very basic case when the two stores are in the same block
273  // so deciding which one forwards is easy. The later one forwards as
274  // long as they both have a dependence distance of one to the load.
275  if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
276  Cand.isDependenceDistanceOfOne(PSE, L) &&
277  OtherCand->isDependenceDistanceOfOne(PSE, L)) {
278  // They are in the same block, the later one will forward to the load.
279  if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
280  OtherCand = &Cand;
281  } else
282  OtherCand = nullptr;
283  }
284  }
285 
286  Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
287  if (LoadToSingleCand[Cand.Load] != &Cand) {
288  LLVM_DEBUG(
289  dbgs() << "Removing from candidates: \n"
290  << Cand
291  << " The load may have multiple stores forwarding to "
292  << "it\n");
293  return true;
294  }
295  return false;
296  });
297  }
298 
299  /// Given two pointers operations by their RuntimePointerChecking
300  /// indices, return true if they require an alias check.
301  ///
302  /// We need a check if one is a pointer for a candidate load and the other is
303  /// a pointer for a possibly intervening store.
304  bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
305  const SmallPtrSet<Value *, 4> &PtrsWrittenOnFwdingPath,
306  const std::set<Value *> &CandLoadPtrs) {
307  Value *Ptr1 =
309  Value *Ptr2 =
311  return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
312  (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
313  }
314 
315  /// Return pointers that are possibly written to on the path from a
316  /// forwarding store to a load.
317  ///
318  /// These pointers need to be alias-checked against the forwarding candidates.
319  SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath(
321  // From FirstStore to LastLoad neither of the elimination candidate loads
322  // should overlap with any of the stores.
323  //
324  // E.g.:
325  //
326  // st1 C[i]
327  // ld1 B[i] <-------,
328  // ld0 A[i] <----, | * LastLoad
329  // ... | |
330  // st2 E[i] | |
331  // st3 B[i+1] -- | -' * FirstStore
332  // st0 A[i+1] ---'
333  // st4 D[i]
334  //
335  // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
336  // ld0.
337 
338  LoadInst *LastLoad =
339  std::max_element(Candidates.begin(), Candidates.end(),
340  [&](const StoreToLoadForwardingCandidate &A,
341  const StoreToLoadForwardingCandidate &B) {
342  return getInstrIndex(A.Load) < getInstrIndex(B.Load);
343  })
344  ->Load;
345  StoreInst *FirstStore =
346  std::min_element(Candidates.begin(), Candidates.end(),
347  [&](const StoreToLoadForwardingCandidate &A,
348  const StoreToLoadForwardingCandidate &B) {
349  return getInstrIndex(A.Store) <
350  getInstrIndex(B.Store);
351  })
352  ->Store;
353 
354  // We're looking for stores after the first forwarding store until the end
355  // of the loop, then from the beginning of the loop until the last
356  // forwarded-to load. Collect the pointer for the stores.
357  SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath;
358 
359  auto InsertStorePtr = [&](Instruction *I) {
360  if (auto *S = dyn_cast<StoreInst>(I))
361  PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
362  };
363  const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
364  std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
365  MemInstrs.end(), InsertStorePtr);
366  std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
367  InsertStorePtr);
368 
369  return PtrsWrittenOnFwdingPath;
370  }
371 
372  /// Determine the pointer alias checks to prove that there are no
373  /// intervening stores.
376 
377  SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath =
378  findPointersWrittenOnForwardingPath(Candidates);
379 
380  // Collect the pointers of the candidate loads.
381  // FIXME: SmallPtrSet does not work with std::inserter.
382  std::set<Value *> CandLoadPtrs;
383  transform(Candidates,
384  std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
385  std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
386 
387  const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
389 
390  copy_if(AllChecks, std::back_inserter(Checks),
392  for (auto PtrIdx1 : Check.first->Members)
393  for (auto PtrIdx2 : Check.second->Members)
394  if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
395  CandLoadPtrs))
396  return true;
397  return false;
398  });
399 
400  LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size()
401  << "):\n");
403 
404  return Checks;
405  }
406 
407  /// Perform the transformation for a candidate.
408  void
409  propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
410  SCEVExpander &SEE) {
411  // loop:
412  // %x = load %gep_i
413  // = ... %x
414  // store %y, %gep_i_plus_1
415  //
416  // =>
417  //
418  // ph:
419  // %x.initial = load %gep_0
420  // loop:
421  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
422  // %x = load %gep_i <---- now dead
423  // = ... %x.storeforward
424  // store %y, %gep_i_plus_1
425 
426  Value *Ptr = Cand.Load->getPointerOperand();
427  auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
428  auto *PH = L->getLoopPreheader();
429  Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
430  PH->getTerminator());
431  Value *Initial =
432  new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
433  Cand.Load->getAlignment(), PH->getTerminator());
434 
435  PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
436  &L->getHeader()->front());
437  PHI->addIncoming(Initial, PH);
438  PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
439 
440  Cand.Load->replaceAllUsesWith(PHI);
441  }
442 
443  /// Top-level driver for each loop: find store->load forwarding
444  /// candidates, add run-time checks and perform transformation.
445  bool processLoop() {
446  LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
447  << "\" checking " << *L << "\n");
448 
449  // Look for store-to-load forwarding cases across the
450  // backedge. E.g.:
451  //
452  // loop:
453  // %x = load %gep_i
454  // = ... %x
455  // store %y, %gep_i_plus_1
456  //
457  // =>
458  //
459  // ph:
460  // %x.initial = load %gep_0
461  // loop:
462  // %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
463  // %x = load %gep_i <---- now dead
464  // = ... %x.storeforward
465  // store %y, %gep_i_plus_1
466 
467  // First start with store->load dependences.
468  auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
469  if (StoreToLoadDependences.empty())
470  return false;
471 
472  // Generate an index for each load and store according to the original
473  // program order. This will be used later.
474  InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
475 
476  // To keep things simple for now, remove those where the load is potentially
477  // fed by multiple stores.
478  removeDependencesFromMultipleStores(StoreToLoadDependences);
479  if (StoreToLoadDependences.empty())
480  return false;
481 
482  // Filter the candidates further.
484  unsigned NumForwarding = 0;
485  for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
486  LLVM_DEBUG(dbgs() << "Candidate " << Cand);
487 
488  // Make sure that the stored values is available everywhere in the loop in
489  // the next iteration.
490  if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
491  continue;
492 
493  // If the load is conditional we can't hoist its 0-iteration instance to
494  // the preheader because that would make it unconditional. Thus we would
495  // access a memory location that the original loop did not access.
496  if (isLoadConditional(Cand.Load, L))
497  continue;
498 
499  // Check whether the SCEV difference is the same as the induction step,
500  // thus we load the value in the next iteration.
501  if (!Cand.isDependenceDistanceOfOne(PSE, L))
502  continue;
503 
504  ++NumForwarding;
505  LLVM_DEBUG(
506  dbgs()
507  << NumForwarding
508  << ". Valid store-to-load forwarding across the loop backedge\n");
509  Candidates.push_back(Cand);
510  }
511  if (Candidates.empty())
512  return false;
513 
514  // Check intervening may-alias stores. These need runtime checks for alias
515  // disambiguation.
517  collectMemchecks(Candidates);
518 
519  // Too many checks are likely to outweigh the benefits of forwarding.
520  if (Checks.size() > Candidates.size() * CheckPerElim) {
521  LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n");
522  return false;
523  }
524 
525  if (LAI.getPSE().getUnionPredicate().getComplexity() >
527  LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
528  return false;
529  }
530 
531  if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
532  if (L->getHeader()->getParent()->optForSize()) {
533  LLVM_DEBUG(
534  dbgs() << "Versioning is needed but not allowed when optimizing "
535  "for size.\n");
536  return false;
537  }
538 
539  if (!L->isLoopSimplifyForm()) {
540  LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form");
541  return false;
542  }
543 
544  // Point of no-return, start the transformation. First, version the loop
545  // if necessary.
546 
547  LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
548  LV.setAliasChecks(std::move(Checks));
549  LV.setSCEVChecks(LAI.getPSE().getUnionPredicate());
550  LV.versionLoop();
551  }
552 
553  // Next, propagate the value stored by the store to the users of the load.
554  // Also for the first iteration, generate the initial value of the load.
556  "storeforward");
557  for (const auto &Cand : Candidates)
558  propagateStoredValueToLoadUsers(Cand, SEE);
559  NumLoopLoadEliminted += NumForwarding;
560 
561  return true;
562  }
563 
564 private:
565  Loop *L;
566 
567  /// Maps the load/store instructions to their index according to
568  /// program order.
570 
571  // Analyses used.
572  LoopInfo *LI;
573  const LoopAccessInfo &LAI;
574  DominatorTree *DT;
576 };
577 
578 } // end anonymous namespace
579 
580 static bool
582  function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
583  // Build up a worklist of inner-loops to transform to avoid iterator
584  // invalidation.
585  // FIXME: This logic comes from other passes that actually change the loop
586  // nest structure. It isn't clear this is necessary (or useful) for a pass
587  // which merely optimizes the use of loads in a loop.
588  SmallVector<Loop *, 8> Worklist;
589 
590  for (Loop *TopLevelLoop : LI)
591  for (Loop *L : depth_first(TopLevelLoop))
592  // We only handle inner-most loops.
593  if (L->empty())
594  Worklist.push_back(L);
595 
596  // Now walk the identified inner loops.
597  bool Changed = false;
598  for (Loop *L : Worklist) {
599  // The actual work is performed by LoadEliminationForLoop.
600  LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
601  Changed |= LEL.processLoop();
602  }
603  return Changed;
604 }
605 
606 namespace {
607 
608 /// The pass. Most of the work is delegated to the per-loop
609 /// LoadEliminationForLoop class.
610 class LoopLoadElimination : public FunctionPass {
611 public:
612  static char ID;
613 
614  LoopLoadElimination() : FunctionPass(ID) {
616  }
617 
618  bool runOnFunction(Function &F) override {
619  if (skipFunction(F))
620  return false;
621 
622  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
623  auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
624  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
625 
626  // Process each loop nest in the function.
628  F, LI, DT,
629  [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
630  }
631 
632  void getAnalysisUsage(AnalysisUsage &AU) const override {
641  }
642 };
643 
644 } // end anonymous namespace
645 
647 
648 static const char LLE_name[] = "Loop Load Elimination";
649 
650 INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
655 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
656 INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
657 
659  return new LoopLoadElimination();
660 }
661 
664  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
665  auto &LI = AM.getResult<LoopAnalysis>(F);
666  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
667  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
668  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
669  auto &AA = AM.getResult<AAManager>(F);
670  auto &AC = AM.getResult<AssumptionAnalysis>(F);
671 
672  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
673  bool Changed = eliminateLoadsAcrossLoops(
674  F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
675  LoopStandardAnalysisResults AR = {AA, AC, DT, LI,
676  SE, TLI, TTI, nullptr};
677  return LAM.getResult<LoopAccessAnalysis>(L, AR);
678  });
679 
680  if (!Changed)
681  return PreservedAnalyses::all();
682 
684  return PA;
685 }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
uint64_t CallInst * C
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, function_ref< const LoopAccessInfo &(Loop &)> GetLAI)
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
This class represents lattice values for constants.
Definition: AllocatorList.h:24
This is the interface for a simple mod/ref and alias analysis over globals.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1233
static cl::opt< unsigned > LoadElimSCEVCheckThreshold("loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Load Elimination"))
const RuntimePointerChecking * getRuntimePointerChecking() const
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:117
Analysis pass providing the TargetTransformInfo.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
STATISTIC(NumFunctions, "Total number of functions")
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:231
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:168
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Type * getPointerElementType() const
Definition: Type.h:376
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
static bool isLoadConditional(LoadInst *Load, Loop *L)
Return true if the load is not executed on all paths in the loop.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
static const char LLE_name[]
void getLoopLatches(SmallVectorImpl< BlockT *> &LoopLatches) const
Return all loop latch blocks of this loop.
Definition: LoopInfo.h:304
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:945
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
This header provides classes for managing per-loop analyses.
void initializeLoopLoadEliminationPass(PassRegistry &)
An instruction for storing to memory.
Definition: Instructions.h:321
#define LLE_OPTION
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This analysis provides dependence information for the memory accesses of a loop.
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
const Instruction & front() const
Definition: BasicBlock.h:281
A manager for alias analyses.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Represent the analysis usage information of a pass.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:285
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
void setAliasChecks(SmallVector< RuntimePointerChecking::PointerCheck, 4 > Checks)
Sets the runtime alias checks for versioning the loop.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
size_t size() const
Definition: SmallVector.h:53
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
char & LoopSimplifyID
A function analysis which provides an AssumptionCache.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
This header defines the LoopLoadEliminationPass object.
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:299
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
Module.h This file contains the declarations for the Module class.
FunctionPass * createLoopLoadEliminationPass()
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
Drive the analysis of memory accesses in the loop.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
Class for arbitrary precision integers.
Definition: APInt.h:70
#define SEE(c)
Definition: regcomp.c:253
This class uses information about analyze scalars to rewrite expressions in canonical form...
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:193
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
Type * getPointerOperandType() const
Definition: Instructions.h:288
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2039
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1268
bool empty() const
Definition: LoopInfo.h:146
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< unsigned > CheckPerElim("runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), cl::init(1))
LLVM Value Representation.
Definition: Value.h:73
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:970
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
This pass exposes codegen information to IR-level passes.
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1179
This header defines various interfaces for pass management in LLVM.
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, DominatorTree *DT)
Check if the store dominates all latches, so as long as there is no intervening store this value will...
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
Value * getPointerOperand()
Definition: Instructions.h:413
const BasicBlock * getParent() const
Definition: Instruction.h:67
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1038