70 #define DEBUG_TYPE "loop-unroll" 74 cl::desc(
"The cost threshold for loop unrolling"));
78 cl::desc(
"The cost threshold for partial loop unrolling"));
82 cl::desc(
"The maximum 'boost' (represented as a percentage >= 100) applied " 83 "to the threshold when aggressively unrolling a loop due to the " 84 "dynamic cost savings. If completely unrolling a loop will reduce " 85 "the total runtime from X to Y, we boost the loop unroll " 86 "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " 87 "X/Y). This limit avoids excessive code bloat."));
91 cl::desc(
"Don't allow loop unrolling to simulate more than this number of" 92 "iterations when checking full unroll profitability"));
96 cl::desc(
"Use this unroll count for all loops including those with " 97 "unroll_count pragma values, for testing purposes"));
101 cl::desc(
"Set the max unroll count for partial and runtime unrolling, for" 102 "testing purposes"));
107 "Set the max unroll count for full unrolling, for testing purposes"));
111 cl::desc(
"Set the unroll peeling count, for testing purposes"));
115 cl::desc(
"Allows loops to be partially unrolled until " 116 "-unroll-threshold loop size is reached."));
120 cl::desc(
"Allow generation of a loop remainder (extra iterations) " 121 "when unrolling a loop."));
125 cl::desc(
"Unroll loops with run-time trip counts"));
130 "The max of trip count upper bound that is considered in unrolling"));
134 cl::desc(
"Unrolled size limit for loops with an unroll(full) or " 135 "unroll_count pragma."));
139 cl::desc(
"If the runtime tripcount for the loop is lower than the " 140 "threshold, the loop is considered as flat and will be less " 141 "aggressively unrolled."));
145 cl::desc(
"Allows loops to be peeled when the dynamic " 146 "trip count is known to be low."));
150 cl::desc(
"Allow the loop remainder to be unrolled."));
157 cl::desc(
"Enqueue and re-visit child loops in the loop PM after unrolling. " 158 "This shouldn't typically be needed as child loops (or their " 159 "clones) were already visited."));
239 UP.
Count = *UserCount;
241 UP.
Partial = *UserAllowPartial;
260 struct UnrolledInstState {
264 unsigned IsCounted : 1;
268 struct UnrolledInstStateKeyInfo {
272 static inline UnrolledInstState getEmptyKey() {
273 return {PtrInfo::getEmptyKey(), 0, 0, 0};
276 static inline UnrolledInstState getTombstoneKey() {
277 return {PtrInfo::getTombstoneKey(), 0, 0, 0};
280 static inline unsigned getHashValue(
const UnrolledInstState &S) {
281 return PairInfo::getHashValue({S.I, S.Iteration});
284 static inline bool isEqual(
const UnrolledInstState &LHS,
285 const UnrolledInstState &RHS) {
290 struct EstimatedUnrollCost {
292 unsigned UnrolledCost;
296 unsigned RolledDynamicCost;
323 "The unroll iterations max is too large!");
342 unsigned UnrolledCost = 0;
349 unsigned RolledDynamicCost = 0;
365 auto AddCostRecursively = [&](
Instruction &RootI,
int Iteration) {
366 assert(Iteration >= 0 &&
"Cannot have a negative iteration!");
367 assert(CostWorklist.
empty() &&
"Must start with an empty cost list");
368 assert(PHIUsedList.
empty() &&
"Must start with an empty phi used list");
370 for (;; --Iteration) {
376 auto CostIter = InstCostMap.
find({
I, Iteration, 0, 0});
377 if (CostIter == InstCostMap.
end())
382 auto &Cost = *CostIter;
388 Cost.IsCounted =
true;
391 if (
auto *PhiI = dyn_cast<PHINode>(I))
392 if (PhiI->getParent() == L->
getHeader()) {
393 assert(Cost.IsFree &&
"Loop PHIs shouldn't be evaluated as they " 394 "inherently simplify during unrolling.");
401 if (
auto *OpI = dyn_cast<Instruction>(
412 << Iteration <<
"): ");
429 }
while (!CostWorklist.
empty());
431 if (PHIUsedList.
empty())
436 "Cannot track PHI-used values past the first iteration!");
446 "Must have loops in LCSSA form to track live-out values.");
448 LLVM_DEBUG(
dbgs() <<
"Starting LoopUnroll profitability analysis...\n");
454 for (
unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
455 LLVM_DEBUG(
dbgs() <<
" Analyzing iteration " << Iteration <<
"\n");
467 PHI->getNumIncomingValues() == 2 &&
468 "Must have an incoming value only for the preheader and the latch.");
470 Value *V = PHI->getIncomingValueForBlock(
473 if (Iteration != 0 && !C)
474 C = SimplifiedValues.
lookup(V);
476 SimplifiedInputValues.
push_back({PHI, C});
480 SimplifiedValues.
clear();
481 while (!SimplifiedInputValues.
empty())
489 for (
unsigned Idx = 0; Idx != BBWorklist.
size(); ++Idx) {
498 if (isa<DbgInfoIntrinsic>(
I) || EphValues.
count(&
I))
508 bool IsFree = Analyzer.
visit(
I);
509 bool Inserted = InstCostMap.
insert({&
I, (int)Iteration,
513 assert(Inserted &&
"Cannot have a state for an unvisited instruction!");
520 if (
auto *CI = dyn_cast<CallInst>(&
I)) {
530 if (
I.mayHaveSideEffects())
531 AddCostRecursively(
I, Iteration);
534 if (UnrolledCost > MaxUnrolledLoopSize) {
536 <<
" UnrolledCost: " << UnrolledCost
537 <<
", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
548 if (
BranchInst *BI = dyn_cast<BranchInst>(TI)) {
549 if (BI->isConditional()) {
551 SimplifiedValues.
lookup(BI->getCondition())) {
553 if (isa<UndefValue>(SimpleCond))
554 KnownSucc = BI->getSuccessor(0);
556 dyn_cast<ConstantInt>(SimpleCond))
557 KnownSucc = BI->getSuccessor(SimpleCondVal->isZero() ? 1 : 0);
562 SimplifiedValues.
lookup(
SI->getCondition())) {
564 if (isa<UndefValue>(SimpleCond))
565 KnownSucc =
SI->getSuccessor(0);
567 dyn_cast<ConstantInt>(SimpleCond))
568 KnownSucc =
SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
573 BBWorklist.
insert(KnownSucc);
575 ExitWorklist.
insert({BB, KnownSucc});
584 ExitWorklist.
insert({BB, Succ});
585 AddCostRecursively(*TI, Iteration);
590 if (UnrolledCost == RolledDynamicCost) {
592 <<
" UnrolledCost: " << UnrolledCost <<
"\n");
597 while (!ExitWorklist.
empty()) {
599 std::tie(ExitingBB, ExitBB) = ExitWorklist.
pop_back_val();
606 Value *
Op = PN->getIncomingValueForBlock(ExitingBB);
607 if (
auto *OpI = dyn_cast<Instruction>(Op))
609 AddCostRecursively(*OpI, TripCount - 1);
614 <<
"UnrolledCost: " << UnrolledCost <<
", " 615 <<
"RolledDynamicCost: " << RolledDynamicCost <<
"\n");
616 return {{UnrolledCost, RolledDynamicCost}};
621 const Loop *L,
unsigned &NumCalls,
bool &NotDuplicatable,
bool &
Convergent,
631 unsigned LoopSize = Metrics.
NumInsts;
639 LoopSize =
std::max(LoopSize, BEInsns + 1);
675 "Unroll count hint metadata should have two operands.");
677 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
678 assert(Count >= 1 &&
"Unroll count must be positive.");
690 unsigned MaxPercentThresholdBoost) {
693 else if (Cost.UnrolledCost != 0)
695 return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
696 MaxPercentThresholdBoost);
698 return MaxPercentThresholdBoost;
705 assert(LoopSize >= UP.
BEInsns &&
"LoopSize should not be less than BEInsns!");
721 unsigned &TripMultiple,
unsigned LoopSize,
726 bool UserUnrollCount =
UnrollCount.getNumOccurrences() > 0;
727 if (UserUnrollCount) {
737 if (PragmaCount > 0) {
738 UP.
Count = PragmaCount;
747 if (PragmaFullUnroll && TripCount != 0) {
748 UP.
Count = TripCount;
754 bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
755 PragmaEnableUnroll || UserUnrollCount;
757 if (ExplicitUnroll && TripCount != 0) {
774 unsigned ExactTripCount = TripCount;
775 assert((ExactTripCount == 0 || MaxTripCount == 0) &&
776 "ExtractTripCount and MaxTripCount cannot both be non zero.");
777 unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
778 UP.
Count = FullUnrollTripCount;
783 UseUpperBound = (MaxTripCount == FullUnrollTripCount);
784 TripCount = FullUnrollTripCount;
785 TripMultiple = UP.
UpperBound ? 1 : TripMultiple;
786 return ExplicitUnroll;
792 L, FullUnrollTripCount, DT, SE, EphValues, TTI,
796 if (Cost->UnrolledCost < UP.
Threshold * Boost / 100) {
797 UseUpperBound = (MaxTripCount == FullUnrollTripCount);
798 TripCount = FullUnrollTripCount;
799 TripMultiple = UP.
UpperBound ? 1 : TripMultiple;
800 return ExplicitUnroll;
811 return ExplicitUnroll;
820 <<
"-unroll-allow-partial not given\n");
825 UP.
Count = TripCount;
834 while (UP.
Count != 0 && TripCount % UP.
Count != 0)
842 while (UP.
Count != 0 &&
847 if (PragmaEnableUnroll)
850 "UnrollAsDirectedTooLarge",
852 <<
"Unable to unroll loop as directed by unroll(enable) " 854 "because unrolled size is too large.";
859 UP.
Count = TripCount;
863 if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
864 UP.
Count != TripCount)
866 return OptimizationRemarkMissed(DEBUG_TYPE,
867 "FullUnrollAsDirectedTooLarge",
868 L->getStartLoc(), L->getHeader())
869 <<
"Unable to fully unroll loop as directed by unroll pragma " 871 "unrolled size is too large.";
873 return ExplicitUnroll;
876 "All cases when TripCount is constant should be covered here.");
877 if (PragmaFullUnroll)
880 DEBUG_TYPE,
"CantFullUnrollAsDirectedRuntimeTripCount",
882 <<
"Unable to fully unroll loop as directed by unroll(full) " 884 "because loop has a runtime trip count.";
905 UP.
Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
908 dbgs() <<
" will not try to unroll loop with runtime trip count " 909 <<
"-unroll-runtime not given\n");
918 while (UP.
Count != 0 &&
923 unsigned OrigCount = UP.
Count;
927 while (UP.
Count != 0 && TripMultiple % UP.
Count != 0)
930 dbgs() <<
"Remainder loop is restricted (that could architecture " 931 "specific or because the loop contains a convergent " 932 "instruction), so unroll count must divide the trip " 934 << TripMultiple <<
". Reducing unroll count from " << OrigCount
935 <<
" to " << UP.
Count <<
".\n");
942 "DifferentUnrollCountFromDirected",
944 <<
"Unable to unroll loop the number of times directed by " 945 "unroll_count pragma because remainder loop is restricted " 946 "(that could architecture specific or because the loop " 947 "contains a convergent instruction) and so must have an " 949 "count that divides the loop trip multiple of " 950 <<
NV(
"TripMultiple", TripMultiple) <<
". Unrolling instead " 951 <<
NV(
"UnrollCount", UP.
Count) <<
" time(s).";
961 return ExplicitUnroll;
980 dbgs() <<
" Not unrolling loop which is not in loop-simplify form.\n");
989 unsigned NumInlineCandidates;
990 bool NotDuplicatable;
993 L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
994 ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
995 ProvidedAllowPeeling);
1007 if (NotDuplicatable) {
1008 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop which contains non-duplicatable" 1009 <<
" instructions.\n");
1012 if (NumInlineCandidates != 0) {
1013 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop with inlinable calls.\n");
1018 unsigned TripCount = 0;
1019 unsigned MaxTripCount = 0;
1020 unsigned TripMultiple = 1;
1049 bool MaxOrZero =
false;
1068 bool UseUpperBound =
false;
1070 L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
1071 TripMultiple, LoopSize, UP, UseUpperBound);
1075 if (TripCount && UP.
Count > TripCount)
1076 UP.
Count = TripCount;
1082 Loop *RemainderLoop =
nullptr;
1086 LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
1090 if (RemainderLoop) {
1107 return UnrollResult;
1119 return UnrollResult;
1124 class LoopUnroll :
public LoopPass {
1133 bool OnlyWhenForced;
1142 LoopUnroll(
int OptLevel = 2,
bool OnlyWhenForced =
false,
1148 :
LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
1149 ProvidedCount(std::move(Count)), ProvidedThreshold(
Threshold),
1150 ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime),
1151 ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) {
1161 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1162 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1163 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1165 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1166 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1171 bool PreserveLCSSA = mustPreserveAnalysisID(
LCSSAID);
1174 L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
1175 ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
1176 ProvidedUpperBound, ProvidedAllowPeeling);
1206 int Threshold,
int Count,
int AllowPartial,
1207 int Runtime,
int UpperBound,
1212 return new LoopUnroll(
1213 OptLevel, OnlyWhenForced,
1231 Function *
F = L.getHeader()->getParent();
1237 "LoopFullUnrollPass: OptimizationRemarkEmitterAnalysis not " 1238 "cached at a higher level");
1242 Loop *ParentL = L.getParentLoop();
1249 std::string LoopName = L.getName();
1253 true, OptLevel, OnlyWhenForced,
1283 bool IsCurrentLoopValid =
false;
1290 if (SibLoop == &L) {
1291 IsCurrentLoopValid =
true;
1296 return OldLoops.
count(SibLoop) != 0;
1300 if (!IsCurrentLoopValid) {
1314 template <
typename RangeT>
1322 assert(PreOrderLoops.
empty() &&
"Must start with an empty preorder walk.");
1324 "Must start with an empty preorder walk worklist.");
1330 }
while (!PreOrderWorklist.
empty());
1333 PreOrderLoops.
clear();
1349 LAM = &LAMProxy->getManager();
1356 bool Changed =
false;
1363 for (
auto &L : LI) {
1364 Changed |=
simplifyLoop(L, &DT, &LI, &SE, &AC,
false );
1370 while (!Worklist.
empty()) {
1385 LocalAllowPeeling =
false;
1386 std::string LoopName = L.
getName();
1390 &L, DT, &LI, SE, TTI, AC, ORE,
1391 true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
1393 None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
1394 UnrollOpts.AllowUpperBound, LocalAllowPeeling);
1405 LAM->clear(L, LoopName);
Pass interface - Implemented by all 'passes'.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop)...
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_NODISCARD T pop_back_val()
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
This class represents lattice values for constants.
size_type size() const
Determine the number of elements in the SetVector.
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
bool convergent
True if this function contains a call to a convergent function.
static bool HasUnrollEnablePragma(const Loop *L)
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Implements a dense probed hash-table based set.
void push_back(const T &Elt)
Analysis providing profile information.
The main scalar evolution driver.
static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost)
Pass * createSimpleLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
unsigned NumInlineCandidates
The number of calls to internal functions with a single caller.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Analysis pass which computes a DominatorTree.
const MDOperand & getOperand(unsigned I) const
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
bool notDuplicatable
True if this function cannot be duplicated.
static cl::opt< unsigned > UnrollFullMaxCount("unroll-full-max-count", cl::Hidden, cl::desc("Set the max unroll count for full unrolling, for testing purposes"))
LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
const char *const LLVMLoopUnrollFollowupUnrolled
void addChildLoops(ArrayRef< Loop *> NewChildLoops)
Loop passes should use this method to indicate they have added new child loops of the current loop...
static cl::opt< unsigned > UnrollPartialThreshold("unroll-partial-threshold", cl::Hidden, cl::desc("The cost threshold for partial loop unrolling"))
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE)
Put a loop nest into LCSSA form.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void dump() const
Support for debugging, callable in GDB: V->dump()
static cl::opt< bool > UnrollRevisitChildLoops("unroll-revisit-child-loops", cl::Hidden, cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. " "This shouldn't typically be needed as child loops (or their " "clones) were already visited."))
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Pass * createLoopUnrollPass(int OptLevel=2, bool OnlyWhenForced=false, int Threshold=-1, int Count=-1, int AllowPartial=-1, int Runtime=-1, int UpperBound=-1, int AllowPeeling=-1)
amdgpu Simplify well known AMD library false Value Value const Twine & Name
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
static cl::opt< unsigned > FlatLoopTripCountThreshold("flat-loop-tripcount-threshold", cl::init(5), cl::Hidden, cl::desc("If the runtime tripcount for the loop is lower than the " "threshold, the loop is considered as flat and will be less " "aggressively unrolled."))
static bool HasUnrollFullPragma(const Loop *L)
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
Analysis pass that exposes the LoopInfo for a function.
static cl::opt< bool > UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts"))
static Optional< EstimatedUnrollCost > analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
Figure out if the loop is worth full unrolling.
BlockT * getHeader() const
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The transformation should be applied without considering a cost model.
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
void visit(Iterator Start, Iterator End)
bool insert(const value_type &X)
Insert a new element into the SetVector.
static cl::opt< unsigned > UnrollMaxUpperBound("unroll-max-upperbound", cl::init(8), cl::Hidden, cl::desc("The max of trip count upper bound that is considered in unrolling"))
static bool isEqual(const Function &Caller, const Function &Callee)
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
const T & getValue() const LLVM_LVALUE_FUNCTION
const char *const LLVMLoopUnrollFollowupRemainder
This header provides classes for managing per-loop analyses.
static cl::opt< bool > UnrollAllowRemainder("unroll-allow-remainder", cl::Hidden, cl::desc("Allow generation of a loop remainder (extra iterations) " "when unrolling a loop."))
static cl::opt< unsigned > UnrollMaxPercentThresholdBoost("unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden, cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied " "to the threshold when aggressively unrolling a loop due to the " "dynamic cost savings. If completely unrolling a loop will reduce " "the total runtime from X to Y, we boost the loop unroll " "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, " "X/Y). This limit avoids excessive code bloat."))
void initializeLoopUnrollPass(PassRegistry &)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
amdgpu Simplify well known AMD library false Value * Callee
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
static cl::opt< bool > UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low."))
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
A set of analyses that are preserved following a run of a transformation pass.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
bool isLoopExiting(const BlockT *BB) const
True if terminator in the block can branch to another block that is outside of the current loop...
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
This is an important base class in LLVM.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
static cl::opt< unsigned > UnrollMaxCount("unroll-max-count", cl::Hidden, cl::desc("Set the max unroll count for partial and runtime unrolling, for" "testing purposes"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
std::pair< iterator, bool > insert(const ValueT &V)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
static cl::opt< unsigned > UnrollThreshold("unroll-threshold", cl::Hidden, cl::desc("The cost threshold for loop unrolling"))
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop's estimated trip count based on branch weight metadata.
static cl::opt< unsigned > UnrollPeelCount("unroll-peel-count", cl::Hidden, cl::desc("Set the unroll peeling count, for testing purposes"))
void addSiblingLoops(ArrayRef< Loop *> NewSibLoops)
Loop passes should use this method to indicate they have added new sibling loops to the current loop...
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name)
Loop passes should use this method to indicate they have deleted a loop from the nest.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
TargetTransformInfo & TTI
const char *const LLVMLoopUnrollFollowupAll
The transformation should not be applied.
A function analysis which provides an AssumptionCache.
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
A SetVector that performs no allocations if smaller than a certain size.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
This is the shared class of boolean and integer constants.
static cl::opt< bool > UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached."))
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
static cl::opt< unsigned > PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 *1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll(full) or " "unroll_count pragma."))
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Utility to calculate the size and a few similar metrics for a set of basic blocks.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static unsigned UnrollCountPragmaValue(const Loop *L)
LLVM_NODISCARD T pop_back_val()
void markLoopAsDeleted(Loop &L)
static uint64_t getUnrolledLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void clear()
Completely clear the SetVector.
static cl::opt< bool > UnrollUnrollRemainder("unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled."))
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Analysis pass that exposes the ScalarEvolution for a function.
static const unsigned NoThreshold
A magic value for use with the Threshold parameter to indicate that the loop unroll should be perform...
LoopT * getParentLoop() const
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
LLVM_NODISCARD bool empty() const
StringRef getName() const
Represents a single loop in the control flow graph.
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
bool empty() const
Determine if the SetVector is empty or not.
iterator find(const_arg_type_t< ValueT > V)
TransformationMode hasUnrollTransformation(Loop *L)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
TransformationMode
The mode sets how eager a transformation should be applied.
static SmallVector< Loop *, 8 > appendLoopsToWorklist(RangeT &&Loops)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
static bool HasRuntimeUnrollDisablePragma(const Loop *L)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, Optional< unsigned > ProvidedCount, Optional< unsigned > ProvidedThreshold, Optional< bool > ProvidedAllowPartial, Optional< bool > ProvidedRuntime, Optional< bool > ProvidedUpperBound, Optional< bool > ProvidedAllowPeeling)
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
succ_range successors(Instruction *I)
The loop was not modified.
static cl::opt< unsigned > UnrollMaxIterationsCountToAnalyze("unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden, cl::desc("Don't allow loop unrolling to simulate more than this number of" "iterations when checking full unroll profitability"))
bool isBackedgeTakenCountMaxOrZero(const Loop *L)
Return true if the backedge taken count is either the value returned by getMaxBackedgeTakenCount or z...
void verifyLoop() const
Verify loop structure.
StringRef - Represent a constant reference to a string, i.e.
A container for analyses that lazily runs them and caches their results.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
unsigned NumInsts
Number of instructions in the analyzed blocks.
iterator_range< block_iterator > blocks() const
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
bool hasProfileData() const
Return true if the function is annotated with profile data.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
LoopUnrollResult
Represents the result of a UnrollLoop invocation.