45 #define DEBUG_TYPE "loop-unroll" 48 "Number of loops unrolled with run-time trip counts");
51 cl::desc(
"Allow runtime unrolling for loops with multiple exits, when " 52 "epilog is generated"));
85 assert(Latch &&
"Loop must have a latch");
86 BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
94 for (
PHINode &PN : Succ->phis()) {
108 NewPN->
addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
115 Value *V = PN.getIncomingValueForBlock(Latch);
129 PN.setIncomingValue(PN.getBasicBlockIndex(NewPreHeader), NewPN);
131 PN.addIncoming(NewPN, PrologExit);
145 nullptr, PreserveLCSSA);
153 assert(Count != 0 &&
"nonsensical Count!");
164 nullptr, PreserveLCSSA);
166 B.
CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
190 assert(Latch &&
"Loop must have a latch");
191 BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
220 assert(PN.hasOneUse() &&
"The phi should have 1 use");
221 PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
222 assert(EpilogPN->getParent() == Exit &&
"EpilogPN should be in Exit block");
227 Value *V = PN.getIncomingValueForBlock(Latch);
234 EpilogPN->addIncoming(V, EpilogLatch);
236 assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
237 "EpilogPN should have EpilogPreHeader incoming block");
239 EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
255 for (
PHINode &PN : Succ->phis()) {
261 NewPN->
addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
263 NewPN->
addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
267 PHINode *VPN = cast<PHINode>(VMap[&PN]);
275 assert(Exit &&
"Loop must have a single exit block only");
302 const bool UseEpilogRemainder,
const bool UnrollRemainder,
305 std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks,
307 StringRef suffix = UseEpilogRemainder ?
"epil" :
"prol";
315 NewLoops[ParentLoop] = ParentLoop;
316 if (!CreateRemainderLoop)
317 NewLoops[L] = ParentLoop;
323 NewBlocks.push_back(NewBB);
328 if (CreateRemainderLoop || LI->
getLoopFor(*BB) != L || ParentLoop)
345 DT->
addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
352 VMap.
erase((*BB)->getTerminator());
353 BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
356 if (!CreateRemainderLoop) {
357 Builder.CreateBr(InsertBot);
366 Builder.CreateIsNotNull(IdxSub, NewIdx->
getName() +
".cmp");
367 Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
371 LatchBR->eraseFromParent();
378 PHINode *NewPHI = cast<PHINode>(VMap[&*
I]);
379 if (!CreateRemainderLoop) {
380 if (UseEpilogRemainder) {
386 cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
391 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
399 if (CreateRemainderLoop) {
400 Loop *NewLoop = NewLoops[L];
402 assert(NewLoop &&
"L should have been cloned");
432 bool UseEpilogRemainder) {
442 for (
auto *BB : Exits)
451 dbgs() <<
"Bailout for multi-exit handling when latch exit has >1 " 471 bool PreserveLCSSA,
bool UseEpilogRemainder) {
476 PreserveLCSSA, UseEpilogRemainder) &&
477 "Should be safe to unroll before checking profitability!");
501 if (ExitingBlocks.
size() > 2)
508 return (OtherExits.
size() == 1 &&
509 OtherExits[0]->getTerminatingDeoptimizeCall());
555 bool AllowExpensiveTripCount,
556 bool UseEpilogRemainder,
563 LLVM_DEBUG(UseEpilogRemainder ?
dbgs() <<
"Using epilog remainder.\n" 564 :
dbgs() <<
"Using prolog remainder.\n");
582 <<
"Loop latch not terminated by a conditional branch.\n");
586 unsigned ExitIndex = LatchBR->
getSuccessor(0) == Header ? 1 : 0;
594 <<
"One of the loop latch successors must be the exit block.\n");
600 bool isMultiExitUnrollingEnabled =
602 UseEpilogRemainder) &&
606 if (!isMultiExitUnrollingEnabled &&
610 <<
"Multiple exit/exiting blocks in loop and multi-exit unrolling not " 626 if (isa<SCEVCouldNotCompute>(BECountSC) ||
635 const SCEV *TripCountSC =
637 if (isa<SCEVCouldNotCompute>(TripCountSC)) {
646 if (!AllowExpensiveTripCount &&
648 LLVM_DEBUG(
dbgs() <<
"High cost for expanding trip count scev!\n");
654 if (
Log2_32(Count) > BEWidth) {
657 <<
"Count failed constraint on overflow trip count calculation.\n");
675 if (UseEpilogRemainder) {
683 nullptr, PreserveLCSSA);
690 EpilogPreHeader =
SplitBlock(NewExit, NewExitTerminator, DT, LI);
695 PrologPreHeader =
SplitEdge(PreHeader, Header, DT, LI);
732 ModVal = B.
CreateAnd(TripCount, Count - 1,
"xtraiter");
760 BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
761 BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
763 B.
CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
766 if (UseEpilogRemainder)
782 std::vector<BasicBlock *> NewBlocks;
787 bool CreateRemainderLoop = (Count != 2);
792 BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
793 BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
795 L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
796 InsertTop, InsertBot,
797 NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
802 NewBlocks[0]->getIterator(),
809 for (
auto *BB : OtherExits) {
810 for (
auto &II : *BB) {
815 if (!isa<PHINode>(II))
817 PHINode *Phi = cast<PHINode>(&II);
821 for (
unsigned i =0; i < oldNumOperands; i++){
835 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) 838 [SuccBB](
BasicBlock *EB) {
return EB == SuccBB; }) ||
839 SuccBB == LatchExit) &&
840 "Breaks the definition of dedicated exits!");
857 for (
auto *BB : L->
blocks()) {
858 auto *DomNodeBB = DT->
getNode(BB);
859 for (
auto *DomChild : DomNodeBB->getChildren()) {
860 auto *DomChildBB = DomChild->
getBlock();
865 for (
auto *BB : ChildrenToUpdate)
893 if (UseEpilogRemainder) {
897 EpilogPreHeader, NewPreHeader, VMap, DT, LI,
903 Value *TestVal = B2.CreateSub(TripCount, ModVal,
"unroll_iter");
905 B2.SetInsertPoint(LatchBR);
913 IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->
getName() +
".ncmp");
915 IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->
getName() +
".ncmp");
922 ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
923 NewPreHeader, VMap, DT, LI, PreserveLCSSA);
931 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) 939 if (OtherExits.size() > 0) {
950 if (remainderLoop && UnrollRemainder) {
953 UnrollLoop(remainderLoop, Count - 1, Count - 1,
957 0,
false, LI, SE, DT, AC,
958 nullptr, PreserveLCSSA);
962 *ResultLoop = remainderLoop;
963 NumRuntimeUnrolled++;
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it. ...
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
static bool canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock *> &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder)
Returns true if we can safely unroll a multi-exit/exiting loop.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return an i1 value testing if Arg is not null.
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
void push_back(const T &Elt)
The main scalar evolution driver.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool isHighCostExpansion(const SCEV *Expr, Loop *L, const Instruction *At=nullptr)
Return true for expressions that may incur non-trivial cost to evaluate at runtime.
A cache of @llvm.assume calls within a function.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst, unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
iterator begin()
Instruction iterator methods.
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the edge connecting specified block.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI)
Create a clone of the blocks in a loop and connect them together.
bool isIntegerTy() const
True if this is an instance of IntegerType.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
void setName(const Twine &Name)
Change the name of the value.
RPOIterator endRPO() const
BlockT * getHeader() const
Type * getType() const
All values are typed, get the type of this value.
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
const T & getValue() const LLVM_LVALUE_FUNCTION
const char *const LLVMLoopUnrollFollowupRemainder
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling prolog code to the original loop.
The loop was fully unrolled into straight-line code.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
initializer< Ty > init(const Ty &Val)
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM Basic Block Representation.
Conditional or Unconditional Branch instruction.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase * getIDom() const
Value * getIncomingValueForBlock(const BasicBlock *BB) const
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
static bool canProfitablyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock *> &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
void splice(iterator where, iplist_impl &L2)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
self_iterator getIterator()
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Connect the unrolling epilog code to the original loop.
const char *const LLVMLoopUnrollFollowupAll
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Iterator for intrusive lists based on ilist_node.
Type * getType() const
Return the LLVM type of this SCEV expression.
void setIncomingBlock(unsigned i, BasicBlock *BB)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Store the result of a depth first search within basic blocks contained by a single loop...
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM...
This class uses information about analyze scalars to rewrite expressions in canonical form...
If this flag is set, the remapper ignores missing function-local entries (Argument, Instruction, BasicBlock) that are not in the value map.
LoopT * getParentLoop() const
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
StringRef getName() const
Return a constant reference to the value's name.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
bool isUnconditional() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
void setCondition(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
succ_range successors(Instruction *I)
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
The loop was not modified.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the specified block at the specified instruction - everything before SplitPt stays in Old and e...
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
void getUniqueExitBlocks(SmallVectorImpl< BlockT *> &ExitBlocks) const
Return all unique successor blocks of this loop.
StringRef - Represent a constant reference to a string, i.e.
const SCEV * getExitCount(const Loop *L, BasicBlock *ExitingBlock)
Get the expression for the number of loop iterations for which this loop is guaranteed not to exit vi...
void setIncomingValue(unsigned i, Value *V)
iterator_range< block_iterator > blocks() const
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
bool erase(const KeyT &Val)
void forgetTopmostLoop(const Loop *L)