LLVM  8.0.1
LoopDistribute.cpp
Go to the documentation of this file.
1 //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Loop Distribution Pass. Its main focus is to
11 // distribute loops that cannot be vectorized due to dependence cycles. It
12 // tries to isolate the offending dependences into a new loop allowing
13 // vectorization of the remaining parts.
14 //
15 // For dependence analysis, the pass uses the LoopVectorizer's
16 // LoopAccessAnalysis. Because this analysis presumes no change in the order of
17 // memory operations, special care is taken to preserve the lexical order of
18 // these operations.
19 //
20 // Similarly to the Vectorizer, the pass also supports loop versioning to
21 // run-time disambiguate potentially overlapping arrays.
22 //
23 //===----------------------------------------------------------------------===//
24 
26 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/Optional.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/ADT/Twine.h"
42 #include "llvm/Analysis/LoopInfo.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DiagnosticInfo.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/IR/PassManager.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/Pass.h"
60 #include "llvm/Support/Casting.h"
62 #include "llvm/Support/Debug.h"
64 #include "llvm/Transforms/Scalar.h"
70 #include <cassert>
71 #include <functional>
72 #include <list>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define LDIST_NAME "loop-distribute"
79 #define DEBUG_TYPE LDIST_NAME
80 
81 /// @{
82 /// Metadata attribute names
83 static const char *const LLVMLoopDistributeFollowupAll =
84  "llvm.loop.distribute.followup_all";
85 static const char *const LLVMLoopDistributeFollowupCoincident =
86  "llvm.loop.distribute.followup_coincident";
87 static const char *const LLVMLoopDistributeFollowupSequential =
88  "llvm.loop.distribute.followup_sequential";
89 static const char *const LLVMLoopDistributeFollowupFallback =
90  "llvm.loop.distribute.followup_fallback";
91 /// @}
92 
93 static cl::opt<bool>
94  LDistVerify("loop-distribute-verify", cl::Hidden,
95  cl::desc("Turn on DominatorTree and LoopInfo verification "
96  "after Loop Distribution"),
97  cl::init(false));
98 
100  "loop-distribute-non-if-convertible", cl::Hidden,
101  cl::desc("Whether to distribute into a loop that may not be "
102  "if-convertible by the loop vectorizer"),
103  cl::init(false));
104 
106  "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
107  cl::desc("The maximum number of SCEV checks allowed for Loop "
108  "Distribution"));
109 
111  "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
112  cl::Hidden,
113  cl::desc(
114  "The maximum number of SCEV checks allowed for Loop "
115  "Distribution for loop marked with #pragma loop distribute(enable)"));
116 
118  "enable-loop-distribute", cl::Hidden,
119  cl::desc("Enable the new, experimental LoopDistribution Pass"),
120  cl::init(false));
121 
122 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
123 
124 namespace {
125 
126 /// Maintains the set of instructions of the loop for a partition before
127 /// cloning. After cloning, it hosts the new loop.
128 class InstPartition {
129  using InstructionSet = SmallPtrSet<Instruction *, 8>;
130 
131 public:
132  InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
133  : DepCycle(DepCycle), OrigLoop(L) {
134  Set.insert(I);
135  }
136 
137  /// Returns whether this partition contains a dependence cycle.
138  bool hasDepCycle() const { return DepCycle; }
139 
140  /// Adds an instruction to this partition.
141  void add(Instruction *I) { Set.insert(I); }
142 
143  /// Collection accessors.
144  InstructionSet::iterator begin() { return Set.begin(); }
145  InstructionSet::iterator end() { return Set.end(); }
146  InstructionSet::const_iterator begin() const { return Set.begin(); }
147  InstructionSet::const_iterator end() const { return Set.end(); }
148  bool empty() const { return Set.empty(); }
149 
150  /// Moves this partition into \p Other. This partition becomes empty
151  /// after this.
152  void moveTo(InstPartition &Other) {
153  Other.Set.insert(Set.begin(), Set.end());
154  Set.clear();
155  Other.DepCycle |= DepCycle;
156  }
157 
158  /// Populates the partition with a transitive closure of all the
159  /// instructions that the seeded instructions dependent on.
160  void populateUsedSet() {
161  // FIXME: We currently don't use control-dependence but simply include all
162  // blocks (possibly empty at the end) and let simplifycfg mostly clean this
163  // up.
164  for (auto *B : OrigLoop->getBlocks())
165  Set.insert(B->getTerminator());
166 
167  // Follow the use-def chains to form a transitive closure of all the
168  // instructions that the originally seeded instructions depend on.
169  SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
170  while (!Worklist.empty()) {
171  Instruction *I = Worklist.pop_back_val();
172  // Insert instructions from the loop that we depend on.
173  for (Value *V : I->operand_values()) {
174  auto *I = dyn_cast<Instruction>(V);
175  if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
176  Worklist.push_back(I);
177  }
178  }
179  }
180 
181  /// Clones the original loop.
182  ///
183  /// Updates LoopInfo and DominatorTree using the information that block \p
184  /// LoopDomBB dominates the loop.
185  Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
186  unsigned Index, LoopInfo *LI,
187  DominatorTree *DT) {
188  ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
189  VMap, Twine(".ldist") + Twine(Index),
190  LI, DT, ClonedLoopBlocks);
191  return ClonedLoop;
192  }
193 
194  /// The cloned loop. If this partition is mapped to the original loop,
195  /// this is null.
196  const Loop *getClonedLoop() const { return ClonedLoop; }
197 
198  /// Returns the loop where this partition ends up after distribution.
199  /// If this partition is mapped to the original loop then use the block from
200  /// the loop.
201  Loop *getDistributedLoop() const {
202  return ClonedLoop ? ClonedLoop : OrigLoop;
203  }
204 
205  /// The VMap that is populated by cloning and then used in
206  /// remapinstruction to remap the cloned instructions.
207  ValueToValueMapTy &getVMap() { return VMap; }
208 
209  /// Remaps the cloned instructions using VMap.
210  void remapInstructions() {
211  remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
212  }
213 
214  /// Based on the set of instructions selected for this partition,
215  /// removes the unnecessary ones.
216  void removeUnusedInsts() {
218 
219  for (auto *Block : OrigLoop->getBlocks())
220  for (auto &Inst : *Block)
221  if (!Set.count(&Inst)) {
222  Instruction *NewInst = &Inst;
223  if (!VMap.empty())
224  NewInst = cast<Instruction>(VMap[NewInst]);
225 
226  assert(!isa<BranchInst>(NewInst) &&
227  "Branches are marked used early on");
228  Unused.push_back(NewInst);
229  }
230 
231  // Delete the instructions backwards, as it has a reduced likelihood of
232  // having to update as many def-use and use-def chains.
233  for (auto *Inst : reverse(Unused)) {
234  if (!Inst->use_empty())
235  Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
236  Inst->eraseFromParent();
237  }
238  }
239 
240  void print() const {
241  if (DepCycle)
242  dbgs() << " (cycle)\n";
243  for (auto *I : Set)
244  // Prefix with the block name.
245  dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
246  }
247 
248  void printBlocks() const {
249  for (auto *BB : getDistributedLoop()->getBlocks())
250  dbgs() << *BB;
251  }
252 
253 private:
254  /// Instructions from OrigLoop selected for this partition.
255  InstructionSet Set;
256 
257  /// Whether this partition contains a dependence cycle.
258  bool DepCycle;
259 
260  /// The original loop.
261  Loop *OrigLoop;
262 
263  /// The cloned loop. If this partition is mapped to the original loop,
264  /// this is null.
265  Loop *ClonedLoop = nullptr;
266 
267  /// The blocks of ClonedLoop including the preheader. If this
268  /// partition is mapped to the original loop, this is empty.
269  SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
270 
271  /// These gets populated once the set of instructions have been
272  /// finalized. If this partition is mapped to the original loop, these are not
273  /// set.
274  ValueToValueMapTy VMap;
275 };
276 
277 /// Holds the set of Partitions. It populates them, merges them and then
278 /// clones the loops.
279 class InstPartitionContainer {
280  using InstToPartitionIdT = DenseMap<Instruction *, int>;
281 
282 public:
283  InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
284  : L(L), LI(LI), DT(DT) {}
285 
286  /// Returns the number of partitions.
287  unsigned getSize() const { return PartitionContainer.size(); }
288 
289  /// Adds \p Inst into the current partition if that is marked to
290  /// contain cycles. Otherwise start a new partition for it.
291  void addToCyclicPartition(Instruction *Inst) {
292  // If the current partition is non-cyclic. Start a new one.
293  if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
294  PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
295  else
296  PartitionContainer.back().add(Inst);
297  }
298 
299  /// Adds \p Inst into a partition that is not marked to contain
300  /// dependence cycles.
301  ///
302  // Initially we isolate memory instructions into as many partitions as
303  // possible, then later we may merge them back together.
304  void addToNewNonCyclicPartition(Instruction *Inst) {
305  PartitionContainer.emplace_back(Inst, L);
306  }
307 
308  /// Merges adjacent non-cyclic partitions.
309  ///
310  /// The idea is that we currently only want to isolate the non-vectorizable
311  /// partition. We could later allow more distribution among these partition
312  /// too.
313  void mergeAdjacentNonCyclic() {
314  mergeAdjacentPartitionsIf(
315  [](const InstPartition *P) { return !P->hasDepCycle(); });
316  }
317 
318  /// If a partition contains only conditional stores, we won't vectorize
319  /// it. Try to merge it with a previous cyclic partition.
320  void mergeNonIfConvertible() {
321  mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
322  if (Partition->hasDepCycle())
323  return true;
324 
325  // Now, check if all stores are conditional in this partition.
326  bool seenStore = false;
327 
328  for (auto *Inst : *Partition)
329  if (isa<StoreInst>(Inst)) {
330  seenStore = true;
332  return false;
333  }
334  return seenStore;
335  });
336  }
337 
338  /// Merges the partitions according to various heuristics.
339  void mergeBeforePopulating() {
340  mergeAdjacentNonCyclic();
342  mergeNonIfConvertible();
343  }
344 
345  /// Merges partitions in order to ensure that no loads are duplicated.
346  ///
347  /// We can't duplicate loads because that could potentially reorder them.
348  /// LoopAccessAnalysis provides dependency information with the context that
349  /// the order of memory operation is preserved.
350  ///
351  /// Return if any partitions were merged.
352  bool mergeToAvoidDuplicatedLoads() {
353  using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
354  using ToBeMergedT = EquivalenceClasses<InstPartition *>;
355 
356  LoadToPartitionT LoadToPartition;
357  ToBeMergedT ToBeMerged;
358 
359  // Step through the partitions and create equivalence between partitions
360  // that contain the same load. Also put partitions in between them in the
361  // same equivalence class to avoid reordering of memory operations.
362  for (PartitionContainerT::iterator I = PartitionContainer.begin(),
363  E = PartitionContainer.end();
364  I != E; ++I) {
365  auto *PartI = &*I;
366 
367  // If a load occurs in two partitions PartI and PartJ, merge all
368  // partitions (PartI, PartJ] into PartI.
369  for (Instruction *Inst : *PartI)
370  if (isa<LoadInst>(Inst)) {
371  bool NewElt;
372  LoadToPartitionT::iterator LoadToPart;
373 
374  std::tie(LoadToPart, NewElt) =
375  LoadToPartition.insert(std::make_pair(Inst, PartI));
376  if (!NewElt) {
377  LLVM_DEBUG(dbgs()
378  << "Merging partitions due to this load in multiple "
379  << "partitions: " << PartI << ", " << LoadToPart->second
380  << "\n"
381  << *Inst << "\n");
382 
383  auto PartJ = I;
384  do {
385  --PartJ;
386  ToBeMerged.unionSets(PartI, &*PartJ);
387  } while (&*PartJ != LoadToPart->second);
388  }
389  }
390  }
391  if (ToBeMerged.empty())
392  return false;
393 
394  // Merge the member of an equivalence class into its class leader. This
395  // makes the members empty.
396  for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
397  I != E; ++I) {
398  if (!I->isLeader())
399  continue;
400 
401  auto PartI = I->getData();
402  for (auto PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
403  ToBeMerged.member_end())) {
404  PartJ->moveTo(*PartI);
405  }
406  }
407 
408  // Remove the empty partitions.
409  PartitionContainer.remove_if(
410  [](const InstPartition &P) { return P.empty(); });
411 
412  return true;
413  }
414 
415  /// Sets up the mapping between instructions to partitions. If the
416  /// instruction is duplicated across multiple partitions, set the entry to -1.
417  void setupPartitionIdOnInstructions() {
418  int PartitionID = 0;
419  for (const auto &Partition : PartitionContainer) {
420  for (Instruction *Inst : Partition) {
421  bool NewElt;
422  InstToPartitionIdT::iterator Iter;
423 
424  std::tie(Iter, NewElt) =
425  InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
426  if (!NewElt)
427  Iter->second = -1;
428  }
429  ++PartitionID;
430  }
431  }
432 
433  /// Populates the partition with everything that the seeding
434  /// instructions require.
435  void populateUsedSet() {
436  for (auto &P : PartitionContainer)
437  P.populateUsedSet();
438  }
439 
440  /// This performs the main chunk of the work of cloning the loops for
441  /// the partitions.
442  void cloneLoops() {
443  BasicBlock *OrigPH = L->getLoopPreheader();
444  // At this point the predecessor of the preheader is either the memcheck
445  // block or the top part of the original preheader.
446  BasicBlock *Pred = OrigPH->getSinglePredecessor();
447  assert(Pred && "Preheader does not have a single predecessor");
448  BasicBlock *ExitBlock = L->getExitBlock();
449  assert(ExitBlock && "No single exit block");
450  Loop *NewLoop;
451 
452  assert(!PartitionContainer.empty() && "at least two partitions expected");
453  // We're cloning the preheader along with the loop so we already made sure
454  // it was empty.
455  assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
456  "preheader not empty");
457 
458  // Preserve the original loop ID for use after the transformation.
459  MDNode *OrigLoopID = L->getLoopID();
460 
461  // Create a loop for each partition except the last. Clone the original
462  // loop before PH along with adding a preheader for the cloned loop. Then
463  // update PH to point to the newly added preheader.
464  BasicBlock *TopPH = OrigPH;
465  unsigned Index = getSize() - 1;
466  for (auto I = std::next(PartitionContainer.rbegin()),
467  E = PartitionContainer.rend();
468  I != E; ++I, --Index, TopPH = NewLoop->getLoopPreheader()) {
469  auto *Part = &*I;
470 
471  NewLoop = Part->cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
472 
473  Part->getVMap()[ExitBlock] = TopPH;
474  Part->remapInstructions();
475  setNewLoopID(OrigLoopID, Part);
476  }
477  Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
478 
479  // Also set a new loop ID for the last loop.
480  setNewLoopID(OrigLoopID, &PartitionContainer.back());
481 
482  // Now go in forward order and update the immediate dominator for the
483  // preheaders with the exiting block of the previous loop. Dominance
484  // within the loop is updated in cloneLoopWithPreheader.
485  for (auto Curr = PartitionContainer.cbegin(),
486  Next = std::next(PartitionContainer.cbegin()),
487  E = PartitionContainer.cend();
488  Next != E; ++Curr, ++Next)
490  Next->getDistributedLoop()->getLoopPreheader(),
491  Curr->getDistributedLoop()->getExitingBlock());
492  }
493 
494  /// Removes the dead instructions from the cloned loops.
495  void removeUnusedInsts() {
496  for (auto &Partition : PartitionContainer)
497  Partition.removeUnusedInsts();
498  }
499 
500  /// For each memory pointer, it computes the partitionId the pointer is
501  /// used in.
502  ///
503  /// This returns an array of int where the I-th entry corresponds to I-th
504  /// entry in LAI.getRuntimePointerCheck(). If the pointer is used in multiple
505  /// partitions its entry is set to -1.
507  computePartitionSetForPointers(const LoopAccessInfo &LAI) {
508  const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
509 
510  unsigned N = RtPtrCheck->Pointers.size();
511  SmallVector<int, 8> PtrToPartitions(N);
512  for (unsigned I = 0; I < N; ++I) {
513  Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
514  auto Instructions =
515  LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
516 
517  int &Partition = PtrToPartitions[I];
518  // First set it to uninitialized.
519  Partition = -2;
520  for (Instruction *Inst : Instructions) {
521  // Note that this could be -1 if Inst is duplicated across multiple
522  // partitions.
523  int ThisPartition = this->InstToPartitionId[Inst];
524  if (Partition == -2)
525  Partition = ThisPartition;
526  // -1 means belonging to multiple partitions.
527  else if (Partition == -1)
528  break;
529  else if (Partition != (int)ThisPartition)
530  Partition = -1;
531  }
532  assert(Partition != -2 && "Pointer not belonging to any partition");
533  }
534 
535  return PtrToPartitions;
536  }
537 
538  void print(raw_ostream &OS) const {
539  unsigned Index = 0;
540  for (const auto &P : PartitionContainer) {
541  OS << "Partition " << Index++ << " (" << &P << "):\n";
542  P.print();
543  }
544  }
545 
546  void dump() const { print(dbgs()); }
547 
548 #ifndef NDEBUG
549  friend raw_ostream &operator<<(raw_ostream &OS,
550  const InstPartitionContainer &Partitions) {
551  Partitions.print(OS);
552  return OS;
553  }
554 #endif
555 
556  void printBlocks() const {
557  unsigned Index = 0;
558  for (const auto &P : PartitionContainer) {
559  dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
560  P.printBlocks();
561  }
562  }
563 
564 private:
565  using PartitionContainerT = std::list<InstPartition>;
566 
567  /// List of partitions.
568  PartitionContainerT PartitionContainer;
569 
570  /// Mapping from Instruction to partition Id. If the instruction
571  /// belongs to multiple partitions the entry contains -1.
572  InstToPartitionIdT InstToPartitionId;
573 
574  Loop *L;
575  LoopInfo *LI;
576  DominatorTree *DT;
577 
578  /// The control structure to merge adjacent partitions if both satisfy
579  /// the \p Predicate.
580  template <class UnaryPredicate>
581  void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
582  InstPartition *PrevMatch = nullptr;
583  for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
584  auto DoesMatch = Predicate(&*I);
585  if (PrevMatch == nullptr && DoesMatch) {
586  PrevMatch = &*I;
587  ++I;
588  } else if (PrevMatch != nullptr && DoesMatch) {
589  I->moveTo(*PrevMatch);
590  I = PartitionContainer.erase(I);
591  } else {
592  PrevMatch = nullptr;
593  ++I;
594  }
595  }
596  }
597 
598  /// Assign new LoopIDs for the partition's cloned loop.
599  void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
601  OrigLoopID,
603  Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
604  : LLVMLoopDistributeFollowupCoincident});
605  if (PartitionID.hasValue()) {
606  Loop *NewLoop = Part->getDistributedLoop();
607  NewLoop->setLoopID(PartitionID.getValue());
608  }
609  }
610 };
611 
612 /// For each memory instruction, this class maintains difference of the
613 /// number of unsafe dependences that start out from this instruction minus
614 /// those that end here.
615 ///
616 /// By traversing the memory instructions in program order and accumulating this
617 /// number, we know whether any unsafe dependence crosses over a program point.
618 class MemoryInstructionDependences {
620 
621 public:
622  struct Entry {
623  Instruction *Inst;
624  unsigned NumUnsafeDependencesStartOrEnd = 0;
625 
626  Entry(Instruction *Inst) : Inst(Inst) {}
627  };
628 
629  using AccessesType = SmallVector<Entry, 8>;
630 
631  AccessesType::const_iterator begin() const { return Accesses.begin(); }
632  AccessesType::const_iterator end() const { return Accesses.end(); }
633 
634  MemoryInstructionDependences(
635  const SmallVectorImpl<Instruction *> &Instructions,
636  const SmallVectorImpl<Dependence> &Dependences) {
637  Accesses.append(Instructions.begin(), Instructions.end());
638 
639  LLVM_DEBUG(dbgs() << "Backward dependences:\n");
640  for (auto &Dep : Dependences)
641  if (Dep.isPossiblyBackward()) {
642  // Note that the designations source and destination follow the program
643  // order, i.e. source is always first. (The direction is given by the
644  // DepType.)
645  ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
646  --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
647 
648  LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
649  }
650  }
651 
652 private:
653  AccessesType Accesses;
654 };
655 
656 /// The actual class performing the per-loop work.
657 class LoopDistributeForLoop {
658 public:
659  LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
661  : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) {
662  setForced();
663  }
664 
665  /// Try to distribute an inner-most loop.
666  bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
667  assert(L->empty() && "Only process inner loops.");
668 
669  LLVM_DEBUG(dbgs() << "\nLDist: In \""
670  << L->getHeader()->getParent()->getName()
671  << "\" checking " << *L << "\n");
672 
673  if (!L->getExitBlock())
674  return fail("MultipleExitBlocks", "multiple exit blocks");
675  if (!L->isLoopSimplifyForm())
676  return fail("NotLoopSimplifyForm",
677  "loop is not in loop-simplify form");
678 
679  BasicBlock *PH = L->getLoopPreheader();
680 
681  // LAA will check that we only have a single exiting block.
682  LAI = &GetLAA(*L);
683 
684  // Currently, we only distribute to isolate the part of the loop with
685  // dependence cycles to enable partial vectorization.
686  if (LAI->canVectorizeMemory())
687  return fail("MemOpsCanBeVectorized",
688  "memory operations are safe for vectorization");
689 
690  auto *Dependences = LAI->getDepChecker().getDependences();
691  if (!Dependences || Dependences->empty())
692  return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
693 
694  InstPartitionContainer Partitions(L, LI, DT);
695 
696  // First, go through each memory operation and assign them to consecutive
697  // partitions (the order of partitions follows program order). Put those
698  // with unsafe dependences into "cyclic" partition otherwise put each store
699  // in its own "non-cyclic" partition (we'll merge these later).
700  //
701  // Note that a memory operation (e.g. Load2 below) at a program point that
702  // has an unsafe dependence (Store3->Load1) spanning over it must be
703  // included in the same cyclic partition as the dependent operations. This
704  // is to preserve the original program order after distribution. E.g.:
705  //
706  // NumUnsafeDependencesStartOrEnd NumUnsafeDependencesActive
707  // Load1 -. 1 0->1
708  // Load2 | /Unsafe/ 0 1
709  // Store3 -' -1 1->0
710  // Load4 0 0
711  //
712  // NumUnsafeDependencesActive > 0 indicates this situation and in this case
713  // we just keep assigning to the same cyclic partition until
714  // NumUnsafeDependencesActive reaches 0.
715  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
716  MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
717  *Dependences);
718 
719  int NumUnsafeDependencesActive = 0;
720  for (auto &InstDep : MID) {
721  Instruction *I = InstDep.Inst;
722  // We update NumUnsafeDependencesActive post-instruction, catch the
723  // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
724  if (NumUnsafeDependencesActive ||
725  InstDep.NumUnsafeDependencesStartOrEnd > 0)
726  Partitions.addToCyclicPartition(I);
727  else
728  Partitions.addToNewNonCyclicPartition(I);
729  NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
730  assert(NumUnsafeDependencesActive >= 0 &&
731  "Negative number of dependences active");
732  }
733 
734  // Add partitions for values used outside. These partitions can be out of
735  // order from the original program order. This is OK because if the
736  // partition uses a load we will merge this partition with the original
737  // partition of the load that we set up in the previous loop (see
738  // mergeToAvoidDuplicatedLoads).
739  auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
740  for (auto *Inst : DefsUsedOutside)
741  Partitions.addToNewNonCyclicPartition(Inst);
742 
743  LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
744  if (Partitions.getSize() < 2)
745  return fail("CantIsolateUnsafeDeps",
746  "cannot isolate unsafe dependencies");
747 
748  // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
749  // should be able to vectorize these together.
750  Partitions.mergeBeforePopulating();
751  LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
752  if (Partitions.getSize() < 2)
753  return fail("CantIsolateUnsafeDeps",
754  "cannot isolate unsafe dependencies");
755 
756  // Now, populate the partitions with non-memory operations.
757  Partitions.populateUsedSet();
758  LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
759 
760  // In order to preserve original lexical order for loads, keep them in the
761  // partition that we set up in the MemoryInstructionDependences loop.
762  if (Partitions.mergeToAvoidDuplicatedLoads()) {
763  LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
764  << Partitions);
765  if (Partitions.getSize() < 2)
766  return fail("CantIsolateUnsafeDeps",
767  "cannot isolate unsafe dependencies");
768  }
769 
770  // Don't distribute the loop if we need too many SCEV run-time checks.
771  const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
772  if (Pred.getComplexity() > (IsForced.getValueOr(false)
775  return fail("TooManySCEVRuntimeChecks",
776  "too many SCEV run-time checks needed.\n");
777 
778  if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
779  return fail("HeuristicDisabled", "distribution heuristic disabled");
780 
781  LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
782  // We're done forming the partitions set up the reverse mapping from
783  // instructions to partitions.
784  Partitions.setupPartitionIdOnInstructions();
785 
786  // To keep things simple have an empty preheader before we version or clone
787  // the loop. (Also split if this has no predecessor, i.e. entry, because we
788  // rely on PH having a predecessor.)
789  if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
790  SplitBlock(PH, PH->getTerminator(), DT, LI);
791 
792  // If we need run-time checks, version the loop now.
793  auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
794  const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
795  const auto &AllChecks = RtPtrChecking->getChecks();
796  auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
797  RtPtrChecking);
798 
799  if (!Pred.isAlwaysTrue() || !Checks.empty()) {
800  MDNode *OrigLoopID = L->getLoopID();
801 
802  LLVM_DEBUG(dbgs() << "\nPointers:\n");
804  LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
805  LVer.setAliasChecks(std::move(Checks));
806  LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
807  LVer.versionLoop(DefsUsedOutside);
808  LVer.annotateLoopWithNoAlias();
809 
810  // The unversioned loop will not be changed, so we inherit all attributes
811  // from the original loop, but remove the loop distribution metadata to
812  // avoid to distribute it again.
813  MDNode *UnversionedLoopID =
814  makeFollowupLoopID(OrigLoopID,
816  LLVMLoopDistributeFollowupFallback},
817  "llvm.loop.distribute.", true)
818  .getValue();
819  LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
820  }
821 
822  // Create identical copies of the original loop for each partition and hook
823  // them up sequentially.
824  Partitions.cloneLoops();
825 
826  // Now, we remove the instruction from each loop that don't belong to that
827  // partition.
828  Partitions.removeUnusedInsts();
829  LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
830  LLVM_DEBUG(Partitions.printBlocks());
831 
832  if (LDistVerify) {
833  LI->verify(*DT);
835  }
836 
837  ++NumLoopsDistributed;
838  // Report the success.
839  ORE->emit([&]() {
840  return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
841  L->getHeader())
842  << "distributed loop";
843  });
844  return true;
845  }
846 
847  /// Provide diagnostics then \return with false.
848  bool fail(StringRef RemarkName, StringRef Message) {
849  LLVMContext &Ctx = F->getContext();
850  bool Forced = isForced().getValueOr(false);
851 
852  LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
853 
854  // With Rpass-missed report that distribution failed.
855  ORE->emit([&]() {
856  return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
857  L->getStartLoc(), L->getHeader())
858  << "loop not distributed: use -Rpass-analysis=loop-distribute for "
859  "more "
860  "info";
861  });
862 
863  // With Rpass-analysis report why. This is on by default if distribution
864  // was requested explicitly.
865  ORE->emit(OptimizationRemarkAnalysis(
867  RemarkName, L->getStartLoc(), L->getHeader())
868  << "loop not distributed: " << Message);
869 
870  // Also issue a warning if distribution was requested explicitly but it
871  // failed.
872  if (Forced)
874  *F, L->getStartLoc(), "loop not distributed: failed "
875  "explicitly specified loop distribution"));
876 
877  return false;
878  }
879 
880  /// Return if distribution forced to be enabled/disabled for the loop.
881  ///
882  /// If the optional has a value, it indicates whether distribution was forced
883  /// to be enabled (true) or disabled (false). If the optional has no value
884  /// distribution was not forced either way.
885  const Optional<bool> &isForced() const { return IsForced; }
886 
887 private:
888  /// Filter out checks between pointers from the same partition.
889  ///
890  /// \p PtrToPartition contains the partition number for pointers. Partition
891  /// number -1 means that the pointer is used in multiple partitions. In this
892  /// case we can't safely omit the check.
894  includeOnlyCrossPartitionChecks(
896  const SmallVectorImpl<int> &PtrToPartition,
897  const RuntimePointerChecking *RtPtrChecking) {
899 
900  copy_if(AllChecks, std::back_inserter(Checks),
902  for (unsigned PtrIdx1 : Check.first->Members)
903  for (unsigned PtrIdx2 : Check.second->Members)
904  // Only include this check if there is a pair of pointers
905  // that require checking and the pointers fall into
906  // separate partitions.
907  //
908  // (Note that we already know at this point that the two
909  // pointer groups need checking but it doesn't follow
910  // that each pair of pointers within the two groups need
911  // checking as well.
912  //
913  // In other words we don't want to include a check just
914  // because there is a pair of pointers between the two
915  // pointer groups that require checks and a different
916  // pair whose pointers fall into different partitions.)
917  if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
919  PtrToPartition, PtrIdx1, PtrIdx2))
920  return true;
921  return false;
922  });
923 
924  return Checks;
925  }
926 
927  /// Check whether the loop metadata is forcing distribution to be
928  /// enabled/disabled.
929  void setForced() {
931  findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
932  if (!Value)
933  return;
934 
935  const MDOperand *Op = *Value;
936  assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
937  IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
938  }
939 
940  Loop *L;
941  Function *F;
942 
943  // Analyses used.
944  LoopInfo *LI;
945  const LoopAccessInfo *LAI = nullptr;
946  DominatorTree *DT;
947  ScalarEvolution *SE;
949 
950  /// Indicates whether distribution is forced to be enabled/disabled for
951  /// the loop.
952  ///
953  /// If the optional has a value, it indicates whether distribution was forced
954  /// to be enabled (true) or disabled (false). If the optional has no value
955  /// distribution was not forced either way.
956  Optional<bool> IsForced;
957 };
958 
959 } // end anonymous namespace
960 
961 /// Shared implementation between new and old PMs.
962 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
964  std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
965  // Build up a worklist of inner-loops to vectorize. This is necessary as the
966  // act of distributing a loop creates new loops and can invalidate iterators
967  // across the loops.
968  SmallVector<Loop *, 8> Worklist;
969 
970  for (Loop *TopLevelLoop : *LI)
971  for (Loop *L : depth_first(TopLevelLoop))
972  // We only handle inner-most loops.
973  if (L->empty())
974  Worklist.push_back(L);
975 
976  // Now walk the identified inner loops.
977  bool Changed = false;
978  for (Loop *L : Worklist) {
979  LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
980 
981  // If distribution was forced for the specific loop to be
982  // enabled/disabled, follow that. Otherwise use the global flag.
983  if (LDL.isForced().getValueOr(EnableLoopDistribute))
984  Changed |= LDL.processLoop(GetLAA);
985  }
986 
987  // Process each loop nest in the function.
988  return Changed;
989 }
990 
991 namespace {
992 
993 /// The pass class.
994 class LoopDistributeLegacy : public FunctionPass {
995 public:
996  static char ID;
997 
998  LoopDistributeLegacy() : FunctionPass(ID) {
999  // The default is set by the caller.
1001  }
1002 
1003  bool runOnFunction(Function &F) override {
1004  if (skipFunction(F))
1005  return false;
1006 
1007  auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1008  auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
1009  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1010  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1011  auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1012  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1013  [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
1014 
1015  return runImpl(F, LI, DT, SE, ORE, GetLAA);
1016  }
1017 
1018  void getAnalysisUsage(AnalysisUsage &AU) const override {
1027  }
1028 };
1029 
1030 } // end anonymous namespace
1031 
1034  auto &LI = AM.getResult<LoopAnalysis>(F);
1035  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1036  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1038 
1039  // We don't directly need these analyses but they're required for loop
1040  // analyses so provide them below.
1041  auto &AA = AM.getResult<AAManager>(F);
1042  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1043  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1044  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1045 
1046  auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
1047  std::function<const LoopAccessInfo &(Loop &)> GetLAA =
1048  [&](Loop &L) -> const LoopAccessInfo & {
1049  LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
1050  return LAM.getResult<LoopAccessAnalysis>(L, AR);
1051  };
1052 
1053  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
1054  if (!Changed)
1055  return PreservedAnalyses::all();
1056  PreservedAnalyses PA;
1057  PA.preserve<LoopAnalysis>();
1059  PA.preserve<GlobalsAA>();
1060  return PA;
1061 }
1062 
1064 
1065 static const char ldist_name[] = "Loop Distribution";
1066 
1067 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
1068  false)
1074 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
1075 
1076 FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
Legacy wrapper pass to provide the GlobalsAAResult object.
static bool Check(DecodeStatus &Out, DecodeStatus In)
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:711
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Diagnostic information for missed-optimization remarks.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
#define LDIST_NAME
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
This class represents lattice values for constants.
Definition: AllocatorList.h:24
This is the interface for a simple mod/ref and alias analysis over globals.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1233
static cl::opt< unsigned > PragmaDistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold-with-pragma", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution for loop marked with #pragma loop distribute(enable)"))
const RuntimePointerChecking * getRuntimePointerChecking() const
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
static cl::opt< bool > LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " "after Loop Distribution"), cl::init(false))
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
Analysis pass providing the TargetTransformInfo.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:864
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:231
F(f)
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: LoopInfoImpl.h:704
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
static const char *const LLVMLoopDistributeFollowupSequential
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Diagnostic information for optimization failures.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:250
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:945
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:114
BlockT * getHeader() const
Definition: LoopInfo.h:100
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
static const char *const LLVMLoopDistributeFollowupCoincident
static const char *const LLVMLoopDistributeFollowupFallback
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:239
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:161
static cl::opt< bool > DistributeNonIfConvertible("loop-distribute-non-if-convertible", cl::Hidden, cl::desc("Whether to distribute into a loop that may not be " "if-convertible by the loop vectorizer"), cl::init(false))
This header provides classes for managing per-loop analyses.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, std::function< const LoopAccessInfo &(Loop &)> &GetLAA)
Shared implementation between new and old PMs.
static bool runOnFunction(Function &F, bool PostInlining)
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This analysis provides dependence information for the memory accesses of a loop.
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) FunctionPass *llvm
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
This file contains the declarations for the subclasses of Constant, which represent the different fla...
A manager for alias analyses.
Diagnostic information for applied optimization remarks.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Represent the analysis usage information of a pass.
Analysis pass providing a never-invalidated alias analysis result.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:76
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:365
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static cl::opt< unsigned > DistributeSCEVCheckThreshold("loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed for Loop " "Distribution"))
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:210
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
bool isAlwaysTrue() const override
Implementation of the SCEVPredicate interface.
const SCEVUnionPredicate & getUnionPredicate() const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:27
void initializeLoopDistributeLegacyPass(PassRegistry &)
Drive the analysis of memory accesses in the loop.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can&#39;t ove...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:246
static const char *const LLVMLoopDistributeFollowupAll
static const char ldist_name[]
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
Analysis pass that exposes the ScalarEvolution for a function.
unsigned getComplexity() const override
We estimate the complexity of a union predicate as the size number of predicates in the union...
bool hasValue() const
Definition: Optional.h:165
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:193
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:215
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
Dependece between memory access instructions.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopUtils.cpp:195
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
iterator_range< value_op_iterator > operand_values()
Definition: User.h:262
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:175
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2039
bool empty() const
Definition: LoopInfo.h:146
Analysis pass providing the TargetLibraryInfo.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents a composition of other SCEV predicates, and is the class that most clients will...
static cl::opt< bool > EnableLoopDistribute("enable-loop-distribute", cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass"), cl::init(false))
LLVM Value Representation.
Definition: Value.h:73
OptimizationRemarkEmitter legacy analysis pass.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
Loop * cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, Loop *OrigLoop, ValueToValueMapTy &VMap, const Twine &NameSuffix, LoopInfo *LI, DominatorTree *DT, SmallVectorImpl< BasicBlock *> &Blocks)
Clones a loop OrigLoop.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:970
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:327
const SmallVector< PointerCheck, 4 > & getChecks() const
Returns the checks that generateChecks created.
Dependence - This class represents a dependence between two memory memory references in a function...
#define LLVM_DEBUG(X)
Definition: Debug.h:123
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
The optimization diagnostic interface.
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles...
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
FunctionPass * createLoopDistributePass()
const BasicBlock * getParent() const
Definition: Instruction.h:67
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock *> &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1038