53 #define DEBUG_TYPE "loop-unroll" 55 STATISTIC(NumPeeled,
"Number of loops peeled");
59 cl::desc(
"Max average trip count which will cause loop peeling."));
63 cl::desc(
"Force a peel count regardless of profiling information."));
108 "Non-loop Phi should not be checked for turning into invariant.");
111 auto I = IterationsToInvariance.
find(Phi);
112 if (I != IterationsToInvariance.
end())
124 else if (
PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
126 if (IncPhi->getParent() != L->
getHeader())
127 return InfiniteIterationsToInvariance;
131 IncPhi, L, BackEdge, IterationsToInvariance);
132 if (InputToInvariance != InfiniteIterationsToInvariance)
133 ToInvariance = InputToInvariance + 1u;
137 if (ToInvariance != InfiniteIterationsToInvariance)
138 IterationsToInvariance[Phi] = ToInvariance;
154 unsigned DesiredPeelCount = 0;
156 for (
auto *BB : L.
blocks()) {
158 if (!BI || BI->isUnconditional())
165 Value *Condition = BI->getCondition();
166 Value *LeftVal, *RightVal;
183 if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
184 if (isa<SCEVAddRecExpr>(RightSCEV)) {
208 unsigned NewPeelCount = DesiredPeelCount;
220 while (NewPeelCount < MaxPeelCount &&
228 if (NewPeelCount > DesiredPeelCount &&
231 DesiredPeelCount = NewPeelCount;
234 return DesiredPeelCount;
241 assert(LoopSize > 0 &&
"Zero loop size is not allowed!");
257 <<
" iterations.\n");
272 if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
279 unsigned DesiredPeelCount = TargetPeelCount;
281 assert(BackEdge &&
"Loop is not in simplified form?");
282 for (
auto BI = L->
getHeader()->
begin(); isa<PHINode>(&*BI); ++BI) {
283 PHINode *Phi = cast<PHINode>(&*BI);
285 Phi, L, BackEdge, IterationsToInvariance);
286 if (ToInvariance != InfiniteIterationsToInvariance)
287 DesiredPeelCount =
std::max(DesiredPeelCount, ToInvariance);
292 MaxPeelCount = std::min(MaxPeelCount, UP.
Threshold / LoopSize - 1);
294 DesiredPeelCount =
std::max(DesiredPeelCount,
297 if (DesiredPeelCount > 0) {
298 DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
300 assert(DesiredPeelCount > 0 &&
"Wrong loop size estimation?");
302 <<
" iteration(s) to turn" 303 <<
" some Phis into invariants.\n");
324 LLVM_DEBUG(
dbgs() <<
"Profile-based estimated trip count is " << *PeelCount
329 (LoopSize * (*PeelCount + 1) <= UP.
Threshold)) {
331 <<
" iterations.\n");
335 LLVM_DEBUG(
dbgs() <<
"Requested peel count: " << *PeelCount <<
"\n");
364 unsigned IterNumber,
unsigned AvgIters,
365 uint64_t &PeeledHeaderWeight) {
370 if (PeeledHeaderWeight) {
371 uint64_t FallThruWeight =
372 PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
373 uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
374 PeeledHeaderWeight -= ExitWeight;
376 unsigned HeaderIdx = (LatchBR->
getSuccessor(0) == Header ? 0 : 1);
379 HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
380 : MDB.createBranchWeights(FallThruWeight, ExitWeight);
444 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
446 unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
448 LatchBR->setSuccessor(1 - HeaderIdx, Exit);
461 PHINode *NewPHI = cast<PHINode>(VMap[&*
I]);
462 if (IterNumber == 0) {
467 if (LatchInst && L->
contains(LatchInst))
468 VMap[&*
I] = LVMap[LatchInst];
470 VMap[&*
I] = LatchVal;
472 cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
483 if (LatchInst && L->
contains(LatchInst))
484 LatchVal = VMap[LatchVal];
485 PHI->
addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch]));
490 for (
const auto &KV : VMap)
491 LVMap[KV.first] = KV.second;
506 assert(PeelCount > 0 &&
"Attempt to peel out zero iterations?");
507 assert(
canPeel(L) &&
"Attempt to peel a loop which is not peelable?");
572 NewPreHeader->setName(PreHeader->
getName() +
".peel.newph");
579 cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
580 unsigned HeaderIdx = (LatchBR->
getSuccessor(0) == Header ? 0 : 1);
582 uint64_t TrueWeight, FalseWeight;
583 uint64_t ExitWeight = 0, CurHeaderWeight = 0;
585 ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
588 CurHeaderWeight = TrueWeight + FalseWeight;
592 for (
unsigned Iter = 0; Iter < PeelCount; ++Iter) {
600 if (ExitWeight < CurHeaderWeight)
601 CurHeaderWeight -= ExitWeight;
606 NewBlocks, LoopBlocks, VMap, LVMap, DT, LI);
618 #ifdef EXPENSIVE_CHECKS 623 auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
625 PeelCount, ExitWeight);
630 InsertTop = InsertBot;
636 NewBlocks[0]->getIterator(), F->
end());
645 if (LatchInst && L->
contains(LatchInst))
646 NewVal = LVMap[LatchInst];
656 uint64_t BackEdgeWeight = 0;
657 if (ExitWeight < CurHeaderWeight)
658 BackEdgeWeight = CurHeaderWeight - ExitWeight;
663 HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
664 : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
673 simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
void push_back(const T &Elt)
The main scalar evolution driver.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
A cache of @llvm.assume calls within a function.
LLVMContext & getContext() const
All values hold a context through their type.
BasicBlock * getSuccessor(unsigned i) const
bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, bool &Increasing)
Return true if, for all loop invariant X, the predicate "LHS `Pred` X" is monotonically increasing or...
STATISTIC(NumFunctions, "Total number of functions")
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
iterator begin()
Instruction iterator methods.
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the edge connecting specified block.
static unsigned calculateIterationsToInvariance(PHINode *Phi, Loop *L, BasicBlock *BackEdge, SmallDenseMap< PHINode *, unsigned > &IterationsToInvariance)
bool match(Val *V, const Pattern &P)
static cl::opt< unsigned > UnrollPeelMaxCount("unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling."))
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
const Loop * getLoop() const
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, unsigned IterNumber, unsigned AvgIters, uint64_t &PeeledHeaderWeight)
Update the branch weights of the latch of a peeled-off loop iteration.
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA)
Simplify each loop in a loop nest recursively.
void setName(const Twine &Name)
Change the name of the value.
RPOIterator endRPO() const
BlockT * getHeader() const
void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE)
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
iterator find(const_arg_type_t< KeyT > Val)
initializer< Ty > init(const Ty &Val)
LLVM Basic Block Representation.
Conditional or Unconditional Branch instruction.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
static cl::opt< unsigned > UnrollForcePeelCount("unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information."))
DomTreeNodeBase * getIDom() const
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA)
Peel off the first PeelCount iterations of loop L.
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE)
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
void splice(iterator where, iplist_impl &L2)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
self_iterator getIterator()
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop's estimated trip count based on branch weight metadata.
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Iterator for intrusive lists based on ilist_node.
Type * getType() const
Return the LLVM type of this SCEV expression.
static const unsigned InfiniteIterationsToInvariance
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Exit, SmallVectorImpl< BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI)
Clones the body of the loop L, putting it between InsertTop and InsertBot.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Store the result of a depth first search within basic blocks contained by a single loop...
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LoopT * getParentLoop() const
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
void setIncomingValue(unsigned i, Value *V)
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
iterator_range< block_iterator > blocks() const
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
bool hasProfileData() const
Return true if the function is annotated with profile data.
const BasicBlock * getParent() const
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
void remapInstructionsInBlocks(const SmallVectorImpl< BasicBlock *> &Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.