LLVM  8.0.1
LoopUnrollPass.cpp
Go to the documentation of this file.
1 //===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements a simple loop unroller. It works best when loops have
11 // been canonicalized by the -indvars pass, allowing it to determine the trip
12 // counts of loops easily.
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseMapInfo.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Analysis/LoopInfo.h"
30 #include "llvm/Analysis/LoopPass.h"
36 #include "llvm/IR/BasicBlock.h"
37 #include "llvm/IR/CFG.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DiagnosticInfo.h"
41 #include "llvm/IR/Dominators.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Metadata.h"
47 #include "llvm/IR/PassManager.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/Debug.h"
54 #include "llvm/Transforms/Scalar.h"
56 #include "llvm/Transforms/Utils.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <limits>
64 #include <string>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "loop-unroll"
71 
72 static cl::opt<unsigned>
73  UnrollThreshold("unroll-threshold", cl::Hidden,
74  cl::desc("The cost threshold for loop unrolling"));
75 
77  "unroll-partial-threshold", cl::Hidden,
78  cl::desc("The cost threshold for partial loop unrolling"));
79 
81  "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
82  cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
83  "to the threshold when aggressively unrolling a loop due to the "
84  "dynamic cost savings. If completely unrolling a loop will reduce "
85  "the total runtime from X to Y, we boost the loop unroll "
86  "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
87  "X/Y). This limit avoids excessive code bloat."));
88 
90  "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
91  cl::desc("Don't allow loop unrolling to simulate more than this number of"
92  "iterations when checking full unroll profitability"));
93 
95  "unroll-count", cl::Hidden,
96  cl::desc("Use this unroll count for all loops including those with "
97  "unroll_count pragma values, for testing purposes"));
98 
100  "unroll-max-count", cl::Hidden,
101  cl::desc("Set the max unroll count for partial and runtime unrolling, for"
102  "testing purposes"));
103 
105  "unroll-full-max-count", cl::Hidden,
106  cl::desc(
107  "Set the max unroll count for full unrolling, for testing purposes"));
108 
110  "unroll-peel-count", cl::Hidden,
111  cl::desc("Set the unroll peeling count, for testing purposes"));
112 
113 static cl::opt<bool>
114  UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
115  cl::desc("Allows loops to be partially unrolled until "
116  "-unroll-threshold loop size is reached."));
117 
119  "unroll-allow-remainder", cl::Hidden,
120  cl::desc("Allow generation of a loop remainder (extra iterations) "
121  "when unrolling a loop."));
122 
123 static cl::opt<bool>
124  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
125  cl::desc("Unroll loops with run-time trip counts"));
126 
128  "unroll-max-upperbound", cl::init(8), cl::Hidden,
129  cl::desc(
130  "The max of trip count upper bound that is considered in unrolling"));
131 
133  "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
134  cl::desc("Unrolled size limit for loops with an unroll(full) or "
135  "unroll_count pragma."));
136 
138  "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
139  cl::desc("If the runtime tripcount for the loop is lower than the "
140  "threshold, the loop is considered as flat and will be less "
141  "aggressively unrolled."));
142 
143 static cl::opt<bool>
144  UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
145  cl::desc("Allows loops to be peeled when the dynamic "
146  "trip count is known to be low."));
147 
149  "unroll-remainder", cl::Hidden,
150  cl::desc("Allow the loop remainder to be unrolled."));
151 
152 // This option isn't ever intended to be enabled, it serves to allow
153 // experiments to check the assumptions about when this kind of revisit is
154 // necessary.
156  "unroll-revisit-child-loops", cl::Hidden,
157  cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
158  "This shouldn't typically be needed as child loops (or their "
159  "clones) were already visited."));
160 
161 /// A magic value for use with the Threshold parameter to indicate
162 /// that the loop unroll should be performed regardless of how much
163 /// code expansion would result.
165 
166 /// Gather the various unrolling parameters based on the defaults, compiler
167 /// flags, TTI overrides and user specified parameters.
169  Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
170  Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
171  Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
172  Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
174 
175  // Set up the defaults
176  UP.Threshold = OptLevel > 2 ? 300 : 150;
177  UP.MaxPercentThresholdBoost = 400;
178  UP.OptSizeThreshold = 0;
179  UP.PartialThreshold = 150;
181  UP.Count = 0;
182  UP.PeelCount = 0;
186  UP.BEInsns = 2;
187  UP.Partial = false;
188  UP.Runtime = false;
189  UP.AllowRemainder = true;
190  UP.UnrollRemainder = false;
191  UP.AllowExpensiveTripCount = false;
192  UP.Force = false;
193  UP.UpperBound = false;
194  UP.AllowPeeling = true;
195  UP.UnrollAndJam = false;
197 
198  // Override with any target specific settings
199  TTI.getUnrollingPreferences(L, SE, UP);
200 
201  // Apply size attributes
202  if (L->getHeader()->getParent()->optForSize()) {
203  UP.Threshold = UP.OptSizeThreshold;
205  }
206 
207  // Apply any user values specified by cl::opt
208  if (UnrollThreshold.getNumOccurrences() > 0)
210  if (UnrollPartialThreshold.getNumOccurrences() > 0)
212  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
214  if (UnrollMaxCount.getNumOccurrences() > 0)
216  if (UnrollFullMaxCount.getNumOccurrences() > 0)
218  if (UnrollPeelCount.getNumOccurrences() > 0)
220  if (UnrollAllowPartial.getNumOccurrences() > 0)
222  if (UnrollAllowRemainder.getNumOccurrences() > 0)
224  if (UnrollRuntime.getNumOccurrences() > 0)
225  UP.Runtime = UnrollRuntime;
226  if (UnrollMaxUpperBound == 0)
227  UP.UpperBound = false;
228  if (UnrollAllowPeeling.getNumOccurrences() > 0)
230  if (UnrollUnrollRemainder.getNumOccurrences() > 0)
232 
233  // Apply user values provided by argument
234  if (UserThreshold.hasValue()) {
235  UP.Threshold = *UserThreshold;
236  UP.PartialThreshold = *UserThreshold;
237  }
238  if (UserCount.hasValue())
239  UP.Count = *UserCount;
240  if (UserAllowPartial.hasValue())
241  UP.Partial = *UserAllowPartial;
242  if (UserRuntime.hasValue())
243  UP.Runtime = *UserRuntime;
244  if (UserUpperBound.hasValue())
245  UP.UpperBound = *UserUpperBound;
246  if (UserAllowPeeling.hasValue())
247  UP.AllowPeeling = *UserAllowPeeling;
248 
249  return UP;
250 }
251 
252 namespace {
253 
254 /// A struct to densely store the state of an instruction after unrolling at
255 /// each iteration.
256 ///
257 /// This is designed to work like a tuple of <Instruction *, int> for the
258 /// purposes of hashing and lookup, but to be able to associate two boolean
259 /// states with each key.
260 struct UnrolledInstState {
261  Instruction *I;
262  int Iteration : 30;
263  unsigned IsFree : 1;
264  unsigned IsCounted : 1;
265 };
266 
267 /// Hashing and equality testing for a set of the instruction states.
268 struct UnrolledInstStateKeyInfo {
269  using PtrInfo = DenseMapInfo<Instruction *>;
271 
272  static inline UnrolledInstState getEmptyKey() {
273  return {PtrInfo::getEmptyKey(), 0, 0, 0};
274  }
275 
276  static inline UnrolledInstState getTombstoneKey() {
277  return {PtrInfo::getTombstoneKey(), 0, 0, 0};
278  }
279 
280  static inline unsigned getHashValue(const UnrolledInstState &S) {
281  return PairInfo::getHashValue({S.I, S.Iteration});
282  }
283 
284  static inline bool isEqual(const UnrolledInstState &LHS,
285  const UnrolledInstState &RHS) {
286  return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
287  }
288 };
289 
290 struct EstimatedUnrollCost {
291  /// The estimated cost after unrolling.
292  unsigned UnrolledCost;
293 
294  /// The estimated dynamic cost of executing the instructions in the
295  /// rolled form.
296  unsigned RolledDynamicCost;
297 };
298 
299 } // end anonymous namespace
300 
301 /// Figure out if the loop is worth full unrolling.
302 ///
303 /// Complete loop unrolling can make some loads constant, and we need to know
304 /// if that would expose any further optimization opportunities. This routine
305 /// estimates this optimization. It computes cost of unrolled loop
306 /// (UnrolledCost) and dynamic cost of the original loop (RolledDynamicCost). By
307 /// dynamic cost we mean that we won't count costs of blocks that are known not
308 /// to be executed (i.e. if we have a branch in the loop and we know that at the
309 /// given iteration its condition would be resolved to true, we won't add up the
310 /// cost of the 'false'-block).
311 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
312 /// the analysis failed (no benefits expected from the unrolling, or the loop is
313 /// too big to analyze), the returned value is None.
315  const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE,
316  const SmallPtrSetImpl<const Value *> &EphValues,
317  const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) {
318  // We want to be able to scale offsets by the trip count and add more offsets
319  // to them without checking for overflows, and we already don't want to
320  // analyze *massive* trip counts, so we force the max to be reasonably small.
322  (unsigned)(std::numeric_limits<int>::max() / 2) &&
323  "The unroll iterations max is too large!");
324 
325  // Only analyze inner loops. We can't properly estimate cost of nested loops
326  // and we won't visit inner loops again anyway.
327  if (!L->empty())
328  return None;
329 
330  // Don't simulate loops with a big or unknown tripcount
331  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
333  return None;
334 
337  DenseMap<Value *, Constant *> SimplifiedValues;
338  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
339 
340  // The estimated cost of the unrolled form of the loop. We try to estimate
341  // this by simplifying as much as we can while computing the estimate.
342  unsigned UnrolledCost = 0;
343 
344  // We also track the estimated dynamic (that is, actually executed) cost in
345  // the rolled form. This helps identify cases when the savings from unrolling
346  // aren't just exposing dead control flows, but actual reduced dynamic
347  // instructions due to the simplifications which we expect to occur after
348  // unrolling.
349  unsigned RolledDynamicCost = 0;
350 
351  // We track the simplification of each instruction in each iteration. We use
352  // this to recursively merge costs into the unrolled cost on-demand so that
353  // we don't count the cost of any dead code. This is essentially a map from
354  // <instruction, int> to <bool, bool>, but stored as a densely packed struct.
356 
357  // A small worklist used to accumulate cost of instructions from each
358  // observable and reached root in the loop.
359  SmallVector<Instruction *, 16> CostWorklist;
360 
361  // PHI-used worklist used between iterations while accumulating cost.
362  SmallVector<Instruction *, 4> PHIUsedList;
363 
364  // Helper function to accumulate cost for instructions in the loop.
365  auto AddCostRecursively = [&](Instruction &RootI, int Iteration) {
366  assert(Iteration >= 0 && "Cannot have a negative iteration!");
367  assert(CostWorklist.empty() && "Must start with an empty cost list");
368  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
369  CostWorklist.push_back(&RootI);
370  for (;; --Iteration) {
371  do {
372  Instruction *I = CostWorklist.pop_back_val();
373 
374  // InstCostMap only uses I and Iteration as a key, the other two values
375  // don't matter here.
376  auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
377  if (CostIter == InstCostMap.end())
378  // If an input to a PHI node comes from a dead path through the loop
379  // we may have no cost data for it here. What that actually means is
380  // that it is free.
381  continue;
382  auto &Cost = *CostIter;
383  if (Cost.IsCounted)
384  // Already counted this instruction.
385  continue;
386 
387  // Mark that we are counting the cost of this instruction now.
388  Cost.IsCounted = true;
389 
390  // If this is a PHI node in the loop header, just add it to the PHI set.
391  if (auto *PhiI = dyn_cast<PHINode>(I))
392  if (PhiI->getParent() == L->getHeader()) {
393  assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
394  "inherently simplify during unrolling.");
395  if (Iteration == 0)
396  continue;
397 
398  // Push the incoming value from the backedge into the PHI used list
399  // if it is an in-loop instruction. We'll use this to populate the
400  // cost worklist for the next iteration (as we count backwards).
401  if (auto *OpI = dyn_cast<Instruction>(
402  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
403  if (L->contains(OpI))
404  PHIUsedList.push_back(OpI);
405  continue;
406  }
407 
408  // First accumulate the cost of this instruction.
409  if (!Cost.IsFree) {
410  UnrolledCost += TTI.getUserCost(I);
411  LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
412  << Iteration << "): ");
413  LLVM_DEBUG(I->dump());
414  }
415 
416  // We must count the cost of every operand which is not free,
417  // recursively. If we reach a loop PHI node, simply add it to the set
418  // to be considered on the next iteration (backwards!).
419  for (Value *Op : I->operands()) {
420  // Check whether this operand is free due to being a constant or
421  // outside the loop.
422  auto *OpI = dyn_cast<Instruction>(Op);
423  if (!OpI || !L->contains(OpI))
424  continue;
425 
426  // Otherwise accumulate its cost.
427  CostWorklist.push_back(OpI);
428  }
429  } while (!CostWorklist.empty());
430 
431  if (PHIUsedList.empty())
432  // We've exhausted the search.
433  break;
434 
435  assert(Iteration > 0 &&
436  "Cannot track PHI-used values past the first iteration!");
437  CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
438  PHIUsedList.clear();
439  }
440  };
441 
442  // Ensure that we don't violate the loop structure invariants relied on by
443  // this analysis.
444  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
445  assert(L->isLCSSAForm(DT) &&
446  "Must have loops in LCSSA form to track live-out values.");
447 
448  LLVM_DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
449 
450  // Simulate execution of each iteration of the loop counting instructions,
451  // which would be simplified.
452  // Since the same load will take different values on different iterations,
453  // we literally have to go through all loop's iterations.
454  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
455  LLVM_DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
456 
457  // Prepare for the iteration by collecting any simplified entry or backedge
458  // inputs.
459  for (Instruction &I : *L->getHeader()) {
460  auto *PHI = dyn_cast<PHINode>(&I);
461  if (!PHI)
462  break;
463 
464  // The loop header PHI nodes must have exactly two input: one from the
465  // loop preheader and one from the loop latch.
466  assert(
467  PHI->getNumIncomingValues() == 2 &&
468  "Must have an incoming value only for the preheader and the latch.");
469 
470  Value *V = PHI->getIncomingValueForBlock(
471  Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
472  Constant *C = dyn_cast<Constant>(V);
473  if (Iteration != 0 && !C)
474  C = SimplifiedValues.lookup(V);
475  if (C)
476  SimplifiedInputValues.push_back({PHI, C});
477  }
478 
479  // Now clear and re-populate the map for the next iteration.
480  SimplifiedValues.clear();
481  while (!SimplifiedInputValues.empty())
482  SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
483 
484  UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
485 
486  BBWorklist.clear();
487  BBWorklist.insert(L->getHeader());
488  // Note that we *must not* cache the size, this loop grows the worklist.
489  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
490  BasicBlock *BB = BBWorklist[Idx];
491 
492  // Visit all instructions in the given basic block and try to simplify
493  // it. We don't change the actual IR, just count optimization
494  // opportunities.
495  for (Instruction &I : *BB) {
496  // These won't get into the final code - don't even try calculating the
497  // cost for them.
498  if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
499  continue;
500 
501  // Track this instruction's expected baseline cost when executing the
502  // rolled loop form.
503  RolledDynamicCost += TTI.getUserCost(&I);
504 
505  // Visit the instruction to analyze its loop cost after unrolling,
506  // and if the visitor returns true, mark the instruction as free after
507  // unrolling and continue.
508  bool IsFree = Analyzer.visit(I);
509  bool Inserted = InstCostMap.insert({&I, (int)Iteration,
510  (unsigned)IsFree,
511  /*IsCounted*/ false}).second;
512  (void)Inserted;
513  assert(Inserted && "Cannot have a state for an unvisited instruction!");
514 
515  if (IsFree)
516  continue;
517 
518  // Can't properly model a cost of a call.
519  // FIXME: With a proper cost model we should be able to do it.
520  if (auto *CI = dyn_cast<CallInst>(&I)) {
521  const Function *Callee = CI->getCalledFunction();
522  if (!Callee || TTI.isLoweredToCall(Callee)) {
523  LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
524  return None;
525  }
526  }
527 
528  // If the instruction might have a side-effect recursively account for
529  // the cost of it and all the instructions leading up to it.
530  if (I.mayHaveSideEffects())
531  AddCostRecursively(I, Iteration);
532 
533  // If unrolled body turns out to be too big, bail out.
534  if (UnrolledCost > MaxUnrolledLoopSize) {
535  LLVM_DEBUG(dbgs() << " Exceeded threshold.. exiting.\n"
536  << " UnrolledCost: " << UnrolledCost
537  << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
538  << "\n");
539  return None;
540  }
541  }
542 
543  Instruction *TI = BB->getTerminator();
544 
545  // Add in the live successors by first checking whether we have terminator
546  // that may be simplified based on the values simplified by this call.
547  BasicBlock *KnownSucc = nullptr;
548  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
549  if (BI->isConditional()) {
550  if (Constant *SimpleCond =
551  SimplifiedValues.lookup(BI->getCondition())) {
552  // Just take the first successor if condition is undef
553  if (isa<UndefValue>(SimpleCond))
554  KnownSucc = BI->getSuccessor(0);
555  else if (ConstantInt *SimpleCondVal =
556  dyn_cast<ConstantInt>(SimpleCond))
557  KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
558  }
559  }
560  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
561  if (Constant *SimpleCond =
562  SimplifiedValues.lookup(SI->getCondition())) {
563  // Just take the first successor if condition is undef
564  if (isa<UndefValue>(SimpleCond))
565  KnownSucc = SI->getSuccessor(0);
566  else if (ConstantInt *SimpleCondVal =
567  dyn_cast<ConstantInt>(SimpleCond))
568  KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
569  }
570  }
571  if (KnownSucc) {
572  if (L->contains(KnownSucc))
573  BBWorklist.insert(KnownSucc);
574  else
575  ExitWorklist.insert({BB, KnownSucc});
576  continue;
577  }
578 
579  // Add BB's successors to the worklist.
580  for (BasicBlock *Succ : successors(BB))
581  if (L->contains(Succ))
582  BBWorklist.insert(Succ);
583  else
584  ExitWorklist.insert({BB, Succ});
585  AddCostRecursively(*TI, Iteration);
586  }
587 
588  // If we found no optimization opportunities on the first iteration, we
589  // won't find them on later ones too.
590  if (UnrolledCost == RolledDynamicCost) {
591  LLVM_DEBUG(dbgs() << " No opportunities found.. exiting.\n"
592  << " UnrolledCost: " << UnrolledCost << "\n");
593  return None;
594  }
595  }
596 
597  while (!ExitWorklist.empty()) {
598  BasicBlock *ExitingBB, *ExitBB;
599  std::tie(ExitingBB, ExitBB) = ExitWorklist.pop_back_val();
600 
601  for (Instruction &I : *ExitBB) {
602  auto *PN = dyn_cast<PHINode>(&I);
603  if (!PN)
604  break;
605 
606  Value *Op = PN->getIncomingValueForBlock(ExitingBB);
607  if (auto *OpI = dyn_cast<Instruction>(Op))
608  if (L->contains(OpI))
609  AddCostRecursively(*OpI, TripCount - 1);
610  }
611  }
612 
613  LLVM_DEBUG(dbgs() << "Analysis finished:\n"
614  << "UnrolledCost: " << UnrolledCost << ", "
615  << "RolledDynamicCost: " << RolledDynamicCost << "\n");
616  return {{UnrolledCost, RolledDynamicCost}};
617 }
618 
619 /// ApproximateLoopSize - Approximate the size of the loop.
621  const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
622  const TargetTransformInfo &TTI,
623  const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
625  for (BasicBlock *BB : L->blocks())
626  Metrics.analyzeBasicBlock(BB, TTI, EphValues);
627  NumCalls = Metrics.NumInlineCandidates;
628  NotDuplicatable = Metrics.notDuplicatable;
629  Convergent = Metrics.convergent;
630 
631  unsigned LoopSize = Metrics.NumInsts;
632 
633  // Don't allow an estimate of size zero. This would allows unrolling of loops
634  // with huge iteration counts, which is a compile time problem even if it's
635  // not a problem for code quality. Also, the code using this size may assume
636  // that each loop has at least three instructions (likely a conditional
637  // branch, a comparison feeding that branch, and some kind of loop increment
638  // feeding that comparison instruction).
639  LoopSize = std::max(LoopSize, BEInsns + 1);
640 
641  return LoopSize;
642 }
643 
644 // Returns the loop hint metadata node with the given name (for example,
645 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
646 // returned.
648  if (MDNode *LoopID = L->getLoopID())
649  return GetUnrollMetadata(LoopID, Name);
650  return nullptr;
651 }
652 
653 // Returns true if the loop has an unroll(full) pragma.
654 static bool HasUnrollFullPragma(const Loop *L) {
655  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
656 }
657 
658 // Returns true if the loop has an unroll(enable) pragma. This metadata is used
659 // for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
660 static bool HasUnrollEnablePragma(const Loop *L) {
661  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
662 }
663 
664 // Returns true if the loop has an runtime unroll(disable) pragma.
665 static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
666  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
667 }
668 
669 // If loop has an unroll_count pragma return the (necessarily
670 // positive) value from the pragma. Otherwise return 0.
671 static unsigned UnrollCountPragmaValue(const Loop *L) {
672  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
673  if (MD) {
674  assert(MD->getNumOperands() == 2 &&
675  "Unroll count hint metadata should have two operands.");
676  unsigned Count =
677  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
678  assert(Count >= 1 && "Unroll count must be positive.");
679  return Count;
680  }
681  return 0;
682 }
683 
684 // Computes the boosting factor for complete unrolling.
685 // If fully unrolling the loop would save a lot of RolledDynamicCost, it would
686 // be beneficial to fully unroll the loop even if unrolledcost is large. We
687 // use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
688 // the unroll threshold.
689 static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
690  unsigned MaxPercentThresholdBoost) {
691  if (Cost.RolledDynamicCost >= std::numeric_limits<unsigned>::max() / 100)
692  return 100;
693  else if (Cost.UnrolledCost != 0)
694  // The boosting factor is RolledDynamicCost / UnrolledCost
695  return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
696  MaxPercentThresholdBoost);
697  else
698  return MaxPercentThresholdBoost;
699 }
700 
701 // Returns loop size estimation for unrolled loop.
702 static uint64_t getUnrolledLoopSize(
703  unsigned LoopSize,
705  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
706  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
707 }
708 
709 // Returns true if unroll count was set explicitly.
710 // Calculates unroll count and writes it to UP.Count.
711 // Unless IgnoreUser is true, will also use metadata and command-line options
712 // that are specific to to the LoopUnroll pass (which, for instance, are
713 // irrelevant for the LoopUnrollAndJam pass).
714 // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
715 // many LoopUnroll-specific options. The shared functionality should be
716 // refactored into it own function.
718  Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
719  ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
720  OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
721  unsigned &TripMultiple, unsigned LoopSize,
722  TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
723 
724  // Check for explicit Count.
725  // 1st priority is unroll count set by "unroll-count" option.
726  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
727  if (UserUnrollCount) {
728  UP.Count = UnrollCount;
729  UP.AllowExpensiveTripCount = true;
730  UP.Force = true;
731  if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
732  return true;
733  }
734 
735  // 2nd priority is unroll count set by pragma.
736  unsigned PragmaCount = UnrollCountPragmaValue(L);
737  if (PragmaCount > 0) {
738  UP.Count = PragmaCount;
739  UP.Runtime = true;
740  UP.AllowExpensiveTripCount = true;
741  UP.Force = true;
742  if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
744  return true;
745  }
746  bool PragmaFullUnroll = HasUnrollFullPragma(L);
747  if (PragmaFullUnroll && TripCount != 0) {
748  UP.Count = TripCount;
749  if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
750  return false;
751  }
752 
753  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
754  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
755  PragmaEnableUnroll || UserUnrollCount;
756 
757  if (ExplicitUnroll && TripCount != 0) {
758  // If the loop has an unrolling pragma, we want to be more aggressive with
759  // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
760  // value which is larger than the default limits.
761  UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
762  UP.PartialThreshold =
763  std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
764  }
765 
766  // 3rd priority is full unroll count.
767  // Full unroll makes sense only when TripCount or its upper bound could be
768  // statically calculated.
769  // Also we need to check if we exceed FullUnrollMaxCount.
770  // If using the upper bound to unroll, TripMultiple should be set to 1 because
771  // we do not know when loop may exit.
772  // MaxTripCount and ExactTripCount cannot both be non zero since we only
773  // compute the former when the latter is zero.
774  unsigned ExactTripCount = TripCount;
775  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
776  "ExtractTripCount and MaxTripCount cannot both be non zero.");
777  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
778  UP.Count = FullUnrollTripCount;
779  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
780  // When computing the unrolled size, note that BEInsns are not replicated
781  // like the rest of the loop body.
782  if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
783  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
784  TripCount = FullUnrollTripCount;
785  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
786  return ExplicitUnroll;
787  } else {
788  // The loop isn't that small, but we still can fully unroll it if that
789  // helps to remove a significant number of instructions.
790  // To check that, run additional analysis on the loop.
792  L, FullUnrollTripCount, DT, SE, EphValues, TTI,
793  UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
794  unsigned Boost =
796  if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
797  UseUpperBound = (MaxTripCount == FullUnrollTripCount);
798  TripCount = FullUnrollTripCount;
799  TripMultiple = UP.UpperBound ? 1 : TripMultiple;
800  return ExplicitUnroll;
801  }
802  }
803  }
804  }
805 
806  // 4th priority is loop peeling.
807  computePeelCount(L, LoopSize, UP, TripCount, SE);
808  if (UP.PeelCount) {
809  UP.Runtime = false;
810  UP.Count = 1;
811  return ExplicitUnroll;
812  }
813 
814  // 5th priority is partial unrolling.
815  // Try partial unroll only when TripCount could be statically calculated.
816  if (TripCount) {
817  UP.Partial |= ExplicitUnroll;
818  if (!UP.Partial) {
819  LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
820  << "-unroll-allow-partial not given\n");
821  UP.Count = 0;
822  return false;
823  }
824  if (UP.Count == 0)
825  UP.Count = TripCount;
826  if (UP.PartialThreshold != NoThreshold) {
827  // Reduce unroll count to be modulo of TripCount for partial unrolling.
828  if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
829  UP.Count =
830  (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
831  (LoopSize - UP.BEInsns);
832  if (UP.Count > UP.MaxCount)
833  UP.Count = UP.MaxCount;
834  while (UP.Count != 0 && TripCount % UP.Count != 0)
835  UP.Count--;
836  if (UP.AllowRemainder && UP.Count <= 1) {
837  // If there is no Count that is modulo of TripCount, set Count to
838  // largest power-of-two factor that satisfies the threshold limit.
839  // As we'll create fixup loop, do the type of unrolling only if
840  // remainder loop is allowed.
842  while (UP.Count != 0 &&
843  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
844  UP.Count >>= 1;
845  }
846  if (UP.Count < 2) {
847  if (PragmaEnableUnroll)
848  ORE->emit([&]() {
850  "UnrollAsDirectedTooLarge",
851  L->getStartLoc(), L->getHeader())
852  << "Unable to unroll loop as directed by unroll(enable) "
853  "pragma "
854  "because unrolled size is too large.";
855  });
856  UP.Count = 0;
857  }
858  } else {
859  UP.Count = TripCount;
860  }
861  if (UP.Count > UP.MaxCount)
862  UP.Count = UP.MaxCount;
863  if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
864  UP.Count != TripCount)
865  ORE->emit([&]() {
866  return OptimizationRemarkMissed(DEBUG_TYPE,
867  "FullUnrollAsDirectedTooLarge",
868  L->getStartLoc(), L->getHeader())
869  << "Unable to fully unroll loop as directed by unroll pragma "
870  "because "
871  "unrolled size is too large.";
872  });
873  return ExplicitUnroll;
874  }
875  assert(TripCount == 0 &&
876  "All cases when TripCount is constant should be covered here.");
877  if (PragmaFullUnroll)
878  ORE->emit([&]() {
880  DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount",
881  L->getStartLoc(), L->getHeader())
882  << "Unable to fully unroll loop as directed by unroll(full) "
883  "pragma "
884  "because loop has a runtime trip count.";
885  });
886 
887  // 6th priority is runtime unrolling.
888  // Don't unroll a runtime trip count loop when it is disabled.
890  UP.Count = 0;
891  return false;
892  }
893 
894  // Check if the runtime trip count is too small when profile is available.
895  if (L->getHeader()->getParent()->hasProfileData()) {
896  if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
897  if (*ProfileTripCount < FlatLoopTripCountThreshold)
898  return false;
899  else
900  UP.AllowExpensiveTripCount = true;
901  }
902  }
903 
904  // Reduce count based on the type of unrolling and the threshold values.
905  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
906  if (!UP.Runtime) {
907  LLVM_DEBUG(
908  dbgs() << " will not try to unroll loop with runtime trip count "
909  << "-unroll-runtime not given\n");
910  UP.Count = 0;
911  return false;
912  }
913  if (UP.Count == 0)
915 
916  // Reduce unroll count to be the largest power-of-two factor of
917  // the original count which satisfies the threshold limit.
918  while (UP.Count != 0 &&
919  getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
920  UP.Count >>= 1;
921 
922 #ifndef NDEBUG
923  unsigned OrigCount = UP.Count;
924 #endif
925 
926  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
927  while (UP.Count != 0 && TripMultiple % UP.Count != 0)
928  UP.Count >>= 1;
929  LLVM_DEBUG(
930  dbgs() << "Remainder loop is restricted (that could architecture "
931  "specific or because the loop contains a convergent "
932  "instruction), so unroll count must divide the trip "
933  "multiple, "
934  << TripMultiple << ". Reducing unroll count from " << OrigCount
935  << " to " << UP.Count << ".\n");
936 
937  using namespace ore;
938 
939  if (PragmaCount > 0 && !UP.AllowRemainder)
940  ORE->emit([&]() {
942  "DifferentUnrollCountFromDirected",
943  L->getStartLoc(), L->getHeader())
944  << "Unable to unroll loop the number of times directed by "
945  "unroll_count pragma because remainder loop is restricted "
946  "(that could architecture specific or because the loop "
947  "contains a convergent instruction) and so must have an "
948  "unroll "
949  "count that divides the loop trip multiple of "
950  << NV("TripMultiple", TripMultiple) << ". Unrolling instead "
951  << NV("UnrollCount", UP.Count) << " time(s).";
952  });
953  }
954 
955  if (UP.Count > UP.MaxCount)
956  UP.Count = UP.MaxCount;
957  LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
958  << "\n");
959  if (UP.Count < 2)
960  UP.Count = 0;
961  return ExplicitUnroll;
962 }
963 
965  Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
966  const TargetTransformInfo &TTI, AssumptionCache &AC,
967  OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
968  bool OnlyWhenForced, Optional<unsigned> ProvidedCount,
969  Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
970  Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
971  Optional<bool> ProvidedAllowPeeling) {
972  LLVM_DEBUG(dbgs() << "Loop Unroll: F["
973  << L->getHeader()->getParent()->getName() << "] Loop %"
974  << L->getHeader()->getName() << "\n");
976  if (TM & TM_Disable)
978  if (!L->isLoopSimplifyForm()) {
979  LLVM_DEBUG(
980  dbgs() << " Not unrolling loop which is not in loop-simplify form.\n");
982  }
983 
984  // When automtatic unrolling is disabled, do not unroll unless overridden for
985  // this loop.
986  if (OnlyWhenForced && !(TM & TM_Enable))
988 
989  unsigned NumInlineCandidates;
990  bool NotDuplicatable;
991  bool Convergent;
993  L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
994  ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
995  ProvidedAllowPeeling);
996  // Exit early if unrolling is disabled.
997  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
999 
1001  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
1002 
1003  unsigned LoopSize =
1004  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
1005  TTI, EphValues, UP.BEInsns);
1006  LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
1007  if (NotDuplicatable) {
1008  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
1009  << " instructions.\n");
1011  }
1012  if (NumInlineCandidates != 0) {
1013  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
1015  }
1016 
1017  // Find trip count and trip multiple if count is not available
1018  unsigned TripCount = 0;
1019  unsigned MaxTripCount = 0;
1020  unsigned TripMultiple = 1;
1021  // If there are multiple exiting blocks but one of them is the latch, use the
1022  // latch for the trip count estimation. Otherwise insist on a single exiting
1023  // block for the trip count estimation.
1024  BasicBlock *ExitingBlock = L->getLoopLatch();
1025  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
1026  ExitingBlock = L->getExitingBlock();
1027  if (ExitingBlock) {
1028  TripCount = SE.getSmallConstantTripCount(L, ExitingBlock);
1029  TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock);
1030  }
1031 
1032  // If the loop contains a convergent operation, the prelude we'd add
1033  // to do the first few instructions before we hit the unrolled loop
1034  // is unsafe -- it adds a control-flow dependency to the convergent
1035  // operation. Therefore restrict remainder loop (try unrollig without).
1036  //
1037  // TODO: This is quite conservative. In practice, convergent_op()
1038  // is likely to be called unconditionally in the loop. In this
1039  // case, the program would be ill-formed (on most architectures)
1040  // unless n were the same on all threads in a thread group.
1041  // Assuming n is the same on all threads, any kind of unrolling is
1042  // safe. But currently llvm's notion of convergence isn't powerful
1043  // enough to express this.
1044  if (Convergent)
1045  UP.AllowRemainder = false;
1046 
1047  // Try to find the trip count upper bound if we cannot find the exact trip
1048  // count.
1049  bool MaxOrZero = false;
1050  if (!TripCount) {
1051  MaxTripCount = SE.getSmallConstantMaxTripCount(L);
1052  MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
1053  // We can unroll by the upper bound amount if it's generally allowed or if
1054  // we know that the loop is executed either the upper bound or zero times.
1055  // (MaxOrZero unrolling keeps only the first loop test, so the number of
1056  // loop tests remains the same compared to the non-unrolled version, whereas
1057  // the generic upper bound unrolling keeps all but the last loop test so the
1058  // number of loop tests goes up which may end up being worse on targets with
1059  // constrained branch predictor resources so is controlled by an option.)
1060  // In addition we only unroll small upper bounds.
1061  if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
1062  MaxTripCount = 0;
1063  }
1064  }
1065 
1066  // computeUnrollCount() decides whether it is beneficial to use upper bound to
1067  // fully unroll the loop.
1068  bool UseUpperBound = false;
1069  bool IsCountSetExplicitly = computeUnrollCount(
1070  L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
1071  TripMultiple, LoopSize, UP, UseUpperBound);
1072  if (!UP.Count)
1074  // Unroll factor (Count) must be less or equal to TripCount.
1075  if (TripCount && UP.Count > TripCount)
1076  UP.Count = TripCount;
1077 
1078  // Save loop properties before it is transformed.
1079  MDNode *OrigLoopID = L->getLoopID();
1080 
1081  // Unroll the loop.
1082  Loop *RemainderLoop = nullptr;
1083  LoopUnrollResult UnrollResult = UnrollLoop(
1084  L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
1085  UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
1086  LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
1087  if (UnrollResult == LoopUnrollResult::Unmodified)
1089 
1090  if (RemainderLoop) {
1091  Optional<MDNode *> RemainderLoopID =
1094  if (RemainderLoopID.hasValue())
1095  RemainderLoop->setLoopID(RemainderLoopID.getValue());
1096  }
1097 
1098  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
1099  Optional<MDNode *> NewLoopID =
1102  if (NewLoopID.hasValue()) {
1103  L->setLoopID(NewLoopID.getValue());
1104 
1105  // Do not setLoopAlreadyUnrolled if loop attributes have been specified
1106  // explicitly.
1107  return UnrollResult;
1108  }
1109  }
1110 
1111  // If loop has an unroll count pragma or unrolled by explicitly set count
1112  // mark loop as unrolled to prevent unrolling beyond that requested.
1113  // If the loop was peeled, we already "used up" the profile information
1114  // we had, so we don't want to unroll or peel again.
1115  if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
1116  (IsCountSetExplicitly || UP.PeelCount))
1118 
1119  return UnrollResult;
1120 }
1121 
1122 namespace {
1123 
1124 class LoopUnroll : public LoopPass {
1125 public:
1126  static char ID; // Pass ID, replacement for typeid
1127 
1128  int OptLevel;
1129 
1130  /// If false, use a cost model to determine whether unrolling of a loop is
1131  /// profitable. If true, only loops that explicitly request unrolling via
1132  /// metadata are considered. All other loops are skipped.
1133  bool OnlyWhenForced;
1134 
1135  Optional<unsigned> ProvidedCount;
1136  Optional<unsigned> ProvidedThreshold;
1137  Optional<bool> ProvidedAllowPartial;
1138  Optional<bool> ProvidedRuntime;
1139  Optional<bool> ProvidedUpperBound;
1140  Optional<bool> ProvidedAllowPeeling;
1141 
1142  LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
1144  Optional<unsigned> Count = None,
1145  Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
1146  Optional<bool> UpperBound = None,
1147  Optional<bool> AllowPeeling = None)
1148  : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1149  ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold),
1150  ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime),
1151  ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) {
1153  }
1154 
1155  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1156  if (skipLoop(L))
1157  return false;
1158 
1159  Function &F = *L->getHeader()->getParent();
1160 
1161  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1162  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1163  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1164  const TargetTransformInfo &TTI =
1165  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1166  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1167  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
1168  // pass. Function analyses need to be preserved across loop transformations
1169  // but ORE cannot be preserved (see comment before the pass definition).
1170  OptimizationRemarkEmitter ORE(&F);
1171  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
1172 
1174  L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
1175  ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1176  ProvidedUpperBound, ProvidedAllowPeeling);
1177 
1178  if (Result == LoopUnrollResult::FullyUnrolled)
1179  LPM.markLoopAsDeleted(*L);
1180 
1181  return Result != LoopUnrollResult::Unmodified;
1182  }
1183 
1184  /// This transformation requires natural loop information & requires that
1185  /// loop preheaders be inserted into the CFG...
1186  void getAnalysisUsage(AnalysisUsage &AU) const override {
1189  // FIXME: Loop passes are required to preserve domtree, and for now we just
1190  // recreate dom info if anything gets unrolled.
1192  }
1193 };
1194 
1195 } // end anonymous namespace
1196 
1197 char LoopUnroll::ID = 0;
1198 
1199 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1203 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
1204 
1205 Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
1206  int Threshold, int Count, int AllowPartial,
1207  int Runtime, int UpperBound,
1208  int AllowPeeling) {
1209  // TODO: It would make more sense for this function to take the optionals
1210  // directly, but that's dangerous since it would silently break out of tree
1211  // callers.
1212  return new LoopUnroll(
1213  OptLevel, OnlyWhenForced,
1214  Threshold == -1 ? None : Optional<unsigned>(Threshold),
1215  Count == -1 ? None : Optional<unsigned>(Count),
1216  AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
1217  Runtime == -1 ? None : Optional<bool>(Runtime),
1218  UpperBound == -1 ? None : Optional<bool>(UpperBound),
1219  AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
1220 }
1221 
1222 Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced) {
1223  return createLoopUnrollPass(OptLevel, OnlyWhenForced, -1, -1, 0, 0, 0, 0);
1224 }
1225 
1228  LPMUpdater &Updater) {
1229  const auto &FAM =
1230  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
1231  Function *F = L.getHeader()->getParent();
1232 
1233  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
1234  // FIXME: This should probably be optional rather than required.
1235  if (!ORE)
1237  "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not "
1238  "cached at a higher level");
1239 
1240  // Keep track of the previous loop structure so we can identify new loops
1241  // created by unrolling.
1242  Loop *ParentL = L.getParentLoop();
1243  SmallPtrSet<Loop *, 4> OldLoops;
1244  if (ParentL)
1245  OldLoops.insert(ParentL->begin(), ParentL->end());
1246  else
1247  OldLoops.insert(AR.LI.begin(), AR.LI.end());
1248 
1249  std::string LoopName = L.getName();
1250 
1251  bool Changed =
1252  tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
1253  /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
1254  /*Count*/ None,
1255  /*Threshold*/ None, /*AllowPartial*/ false,
1256  /*Runtime*/ false, /*UpperBound*/ false,
1257  /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
1258  if (!Changed)
1259  return PreservedAnalyses::all();
1260 
1261  // The parent must not be damaged by unrolling!
1262 #ifndef NDEBUG
1263  if (ParentL)
1264  ParentL->verifyLoop();
1265 #endif
1266 
1267  // Unrolling can do several things to introduce new loops into a loop nest:
1268  // - Full unrolling clones child loops within the current loop but then
1269  // removes the current loop making all of the children appear to be new
1270  // sibling loops.
1271  //
1272  // When a new loop appears as a sibling loop after fully unrolling,
1273  // its nesting structure has fundamentally changed and we want to revisit
1274  // it to reflect that.
1275  //
1276  // When unrolling has removed the current loop, we need to tell the
1277  // infrastructure that it is gone.
1278  //
1279  // Finally, we support a debugging/testing mode where we revisit child loops
1280  // as well. These are not expected to require further optimizations as either
1281  // they or the loop they were cloned from have been directly visited already.
1282  // But the debugging mode allows us to check this assumption.
1283  bool IsCurrentLoopValid = false;
1284  SmallVector<Loop *, 4> SibLoops;
1285  if (ParentL)
1286  SibLoops.append(ParentL->begin(), ParentL->end());
1287  else
1288  SibLoops.append(AR.LI.begin(), AR.LI.end());
1289  erase_if(SibLoops, [&](Loop *SibLoop) {
1290  if (SibLoop == &L) {
1291  IsCurrentLoopValid = true;
1292  return true;
1293  }
1294 
1295  // Otherwise erase the loop from the list if it was in the old loops.
1296  return OldLoops.count(SibLoop) != 0;
1297  });
1298  Updater.addSiblingLoops(SibLoops);
1299 
1300  if (!IsCurrentLoopValid) {
1301  Updater.markLoopAsDeleted(L, LoopName);
1302  } else {
1303  // We can only walk child loops if the current loop remained valid.
1305  // Walk *all* of the child loops.
1306  SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
1307  Updater.addChildLoops(ChildLoops);
1308  }
1309  }
1310 
1312 }
1313 
1314 template <typename RangeT>
1316  SmallVector<Loop *, 8> Worklist;
1317  // We use an internal worklist to build up the preorder traversal without
1318  // recursion.
1319  SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
1320 
1321  for (Loop *RootL : Loops) {
1322  assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");
1323  assert(PreOrderWorklist.empty() &&
1324  "Must start with an empty preorder walk worklist.");
1325  PreOrderWorklist.push_back(RootL);
1326  do {
1327  Loop *L = PreOrderWorklist.pop_back_val();
1328  PreOrderWorklist.append(L->begin(), L->end());
1329  PreOrderLoops.push_back(L);
1330  } while (!PreOrderWorklist.empty());
1331 
1332  Worklist.append(PreOrderLoops.begin(), PreOrderLoops.end());
1333  PreOrderLoops.clear();
1334  }
1335  return Worklist;
1336 }
1337 
1340  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
1341  auto &LI = AM.getResult<LoopAnalysis>(F);
1342  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1343  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1344  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1346 
1347  LoopAnalysisManager *LAM = nullptr;
1348  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
1349  LAM = &LAMProxy->getManager();
1350 
1351  const ModuleAnalysisManager &MAM =
1353  ProfileSummaryInfo *PSI =
1355 
1356  bool Changed = false;
1357 
1358  // The unroller requires loops to be in simplified form, and also needs LCSSA.
1359  // Since simplification may add new inner loops, it has to run before the
1360  // legality and profitability checks. This means running the loop unroller
1361  // will simplify all loops, regardless of whether anything end up being
1362  // unrolled.
1363  for (auto &L : LI) {
1364  Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */);
1365  Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
1366  }
1367 
1369 
1370  while (!Worklist.empty()) {
1371  // Because the LoopInfo stores the loops in RPO, we walk the worklist
1372  // from back to front so that we work forward across the CFG, which
1373  // for unrolling is only needed to get optimization remarks emitted in
1374  // a forward order.
1375  Loop &L = *Worklist.pop_back_val();
1376 #ifndef NDEBUG
1377  Loop *ParentL = L.getParentLoop();
1378 #endif
1379 
1380  // Check if the profile summary indicates that the profiled application
1381  // has a huge working set size, in which case we disable peeling to avoid
1382  // bloating it further.
1383  Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
1384  if (PSI && PSI->hasHugeWorkingSetSize())
1385  LocalAllowPeeling = false;
1386  std::string LoopName = L.getName();
1387  // The API here is quite complex to call and we allow to select some
1388  // flavors of unrolling during construction time (by setting UnrollOpts).
1390  &L, DT, &LI, SE, TTI, AC, ORE,
1391  /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1392  /*Count*/ None,
1393  /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1394  UnrollOpts.AllowUpperBound, LocalAllowPeeling);
1395  Changed |= Result != LoopUnrollResult::Unmodified;
1396 
1397  // The parent must not be damaged by unrolling!
1398 #ifndef NDEBUG
1399  if (Result != LoopUnrollResult::Unmodified && ParentL)
1400  ParentL->verifyLoop();
1401 #endif
1402 
1403  // Clear any cached analysis results for L if we removed it completely.
1404  if (LAM && Result == LoopUnrollResult::FullyUnrolled)
1405  LAM->clear(L, LoopName);
1406  }
1407 
1408  if (!Changed)
1409  return PreservedAnalyses::all();
1410 
1412 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
uint64_t CallInst * C
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool convergent
True if this function contains a call to a convergent function.
Definition: CodeMetrics.h:57
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:177
Implements a dense probed hash-table based set.
Definition: DenseSet.h:250
Analysis providing profile information.
The main scalar evolution driver.
#define DEBUG_TYPE
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false)
This file contains the declarations for metadata subclasses.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
An immutable pass that tracks lazily created AssumptionCache objects.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned second
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
Definition: CodeMetrics.h:78
Metadata node.
Definition: Metadata.h:864
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:231
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1069
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
bool notDuplicatable
True if this function cannot be duplicated.
Definition: CodeMetrics.h:54
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:335
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
const char *const LLVMLoopUnrollFollowupUnrolled
Definition: UnrollLoop.h:41
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
Definition: LCSSA.cpp:360
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4298
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
Hexagon Hardware Loops
amdgpu Simplify well known AMD library false Value Value const Twine & Name
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
bool AllowPeeling
Allow peeling off loop iterations for loops with low dynamic tripcount.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:945
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BlockT * getHeader() const
Definition: LoopInfo.h:100
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The transformation should be applied without considering a cost model.
Definition: LoopUtils.h:218
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:90
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:239
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:161
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:43
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
amdgpu Simplify well known AMD library false Value * Callee
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Definition: LoopInfo.h:203
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
iterator end() const
Definition: LoopInfo.h:666
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:257
Machine Trace Metrics
char & LCSSAID
Definition: LCSSA.cpp:440
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
op_range operands()
Definition: User.h:238
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:616
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:365
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:40
The transformation should not be applied.
Definition: LoopUtils.h:221
A function analysis which provides an AssumptionCache.
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1154
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
iterator begin() const
Definition: LoopInfo.h:142
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:42
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
static unsigned UnrollCountPragmaValue(const Loop *L)
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:143
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1330
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:246
iterator begin() const
Definition: LoopInfo.h:665
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:896
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool hasValue() const
Definition: Optional.h:165
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:193
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:215
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
unsigned Threshold
The cost threshold for the unrolled loop.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
StringRef getName() const
Definition: LoopInfo.h:589
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
TransformationMode hasUnrollTransformation(Loop *L)
Definition: LoopUtils.cpp:331
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:132
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:789
iterator end() const
Definition: LoopInfo.h:143
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:212
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool empty() const
Definition: LoopInfo.h:146
Multiway switch.
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
succ_range successors(Instruction *I)
Definition: CFG.h:264
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
bool UpperBound
Allow using trip count upper bound to unroll loops.
void verifyLoop() const
Verify loop structure.
Definition: LoopInfoImpl.h:295
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1075
#define LLVM_DEBUG(X)
Definition: Debug.h:123
unsigned NumInsts
Number of instructions in the analyzed blocks.
Definition: CodeMetrics.h:63
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
The optimization diagnostic interface.
bool hasProfileData() const
Return true if the function is annotated with profile data.
Definition: Function.h:308
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1038
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:52