57 #define DEBUG_TYPE "loop-unroll-and-jam" 62 "llvm.loop.unroll_and_jam.followup_all";
64 "llvm.loop.unroll_and_jam.followup_inner";
66 "llvm.loop.unroll_and_jam.followup_outer";
68 "llvm.loop.unroll_and_jam.followup_remainder_inner";
70 "llvm.loop.unroll_and_jam.followup_remainder_outer";
75 cl::desc(
"Allows loops to be unroll-and-jammed."));
79 cl::desc(
"Use this unroll count for all loops including those with " 80 "unroll_and_jam_count pragma values, for testing purposes"));
84 cl::desc(
"Threshold to use for inner loop when doing unroll and jam."));
88 cl::desc(
"Unrolled size limit for loops with an unroll_and_jam(full) or " 89 "unroll_count pragma."));
105 assert(LoopID->getNumOperands() > 0 &&
"requires at least one operand");
106 assert(LoopID->getOperand(0) == LoopID &&
"invalid loop id");
108 for (
unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
135 "Unroll count hint metadata should have two operands.");
137 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
138 assert(Count >= 1 &&
"Unroll count must be positive.");
148 assert(LoopSize >= UP.
BEInsns &&
"LoopSize should not be less than BEInsns!");
159 unsigned OuterTripMultiple,
unsigned OuterLoopSize,
unsigned InnerTripCount,
166 unsigned MaxTripCount = 0;
167 bool UseUpperBound =
false;
169 L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
170 OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
171 if (ExplicitUnroll || UseUpperBound) {
174 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; explicit count set by " 175 "computeUnrollCount\n");
182 if (UserUnrollCount) {
194 if (PragmaCount > 0) {
195 UP.
Count = PragmaCount;
198 if ((UP.
AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
206 bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
207 bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
211 if (ExplicitUnrollAndJam)
216 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; can't create remainder and " 217 "inner loop too large\n");
233 if (ExplicitUnrollAndJam)
238 if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.
Threshold) {
239 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; small inner loop count is " 240 "being left for the unroller\n");
249 dbgs() <<
"Won't unroll-and-jam; More than one inner loop block\n");
257 unsigned NumInvariant = 0;
260 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
261 Value *V = Ld->getPointerOperand();
268 if (NumInvariant == 0) {
269 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; No loop invariant loads\n");
294 if (Latch != Exit || SubLoopLatch != SubLoopExit)
331 unsigned NumInlineCandidates;
332 bool NotDuplicatable;
336 unsigned InnerLoopSize =
338 Convergent, TTI, EphValues, UP.
BEInsns);
339 unsigned OuterLoopSize =
342 LLVM_DEBUG(
dbgs() <<
" Outer Loop Size: " << OuterLoopSize <<
"\n");
343 LLVM_DEBUG(
dbgs() <<
" Inner Loop Size: " << InnerLoopSize <<
"\n");
344 if (NotDuplicatable) {
345 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop which contains non-duplicatable " 349 if (NumInlineCandidates != 0) {
350 LLVM_DEBUG(
dbgs() <<
" Not unrolling loop with inlinable calls.\n");
355 dbgs() <<
" Not unrolling loop with convergent instructions.\n");
368 LLVMLoopUnrollAndJamFollowupRemainderInner});
369 if (NewInnerEpilogueLoopID.
hasValue())
379 L, SubLoop, TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount,
380 OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP);
384 if (OuterTripCount && UP.
Count > OuterTripCount)
385 UP.
Count = OuterTripCount;
387 Loop *EpilogueOuterLoop =
nullptr;
390 &SE, &DT, &AC, &ORE, &EpilogueOuterLoop);
393 if (EpilogueOuterLoop) {
396 LLVMLoopUnrollAndJamFollowupRemainderOuter});
397 if (NewOuterEpilogueLoopID.
hasValue())
403 LLVMLoopUnrollAndJamFollowupInner});
431 class LoopUnrollAndJam :
public LoopPass {
436 LoopUnrollAndJam(
int OptLevel = 2) :
LoopPass(ID), OptLevel(OptLevel) {
446 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
447 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
448 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
450 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
451 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
452 auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
482 "Unroll and Jam loops",
false,
false)
491 return new LoopUnrollAndJam(OptLevel);
499 Function *
F = L.getHeader()->getParent();
505 "LoopUnrollAndJamPass: OptimizationRemarkEmitterAnalysis not cached at " 511 &L, AR.
DT, &AR.
LI, AR.
SE, AR.
TTI, AR.
AC, DI, *ORE, OptLevel);
Pass interface - Implemented by all 'passes'.
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop)...
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
This class represents lattice values for constants.
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
static bool HasUnrollAndJamEnablePragma(const Loop *L)
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) INITIALIZE_PASS_END(LoopUnrollAndJam
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))
Legacy pass manager pass to access dependence information.
The main scalar evolution driver.
An immutable pass that tracks lazily created AssumptionCache objects.
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
A cache of @llvm.assume calls within a function.
static const char *const LLVMLoopUnrollAndJamFollowupOuter
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
const MDOperand & getOperand(unsigned I) const
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
DependenceInfo - This class is the main dependence-analysis driver.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
amdgpu Simplify well known AMD library false Value Value const Twine & Name
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
TransformationMode hasUnrollAndJamTransformation(Loop *L)
static const char *const LLVMLoopUnrollAndJamFollowupInner
BlockT * getHeader() const
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
const T & getValue() const LLVM_LVALUE_FUNCTION
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI)
This header provides classes for managing per-loop analyses.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Pass * createLoopUnrollAndJamPass(int OptLevel=2)
StringRef getString() const
loop unroll and Unroll and Jam loops
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
A set of analyses that are preserved following a run of a transformation pass.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP)
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Represent the analysis usage information of a pass.
static unsigned UnrollAndJamCountPragmaValue(const Loop *L)
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
TargetTransformInfo & TTI
The transformation should not be applied.
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
void markLoopAsDeleted(Loop &L)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
static const char *const LLVMLoopUnrollAndJamFollowupAll
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
This class represents an analyzed expression in the program.
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))
Represents a single loop in the control flow graph.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass's AnalysisUsage.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
TransformationMode
The mode sets how eager a transformation should be applied.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
The loop was not modified.
void initializeLoopUnrollAndJamPass(PassRegistry &)
StringRef - Represent a constant reference to a string, i.e.
A container for analyses that lazily runs them and caches their results.
static bool HasAnyUnrollPragma(const Loop *L, StringRef Prefix)
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)