43 #define DEBUG_TYPE "loop-unroll-and-jam" 45 STATISTIC(NumUnrolledAndJammed,
"Number of loops unroll and jammed");
46 STATISTIC(NumCompletelyUnrolledAndJammed,
"Number of loops unroll and jammed");
73 if (BB == SubLoopPreHeader)
96 for (
auto &Phi : Header->
phis()) {
97 Value *V = Phi.getIncomingValueForBlock(Latch);
102 while (!Worklist.
empty()) {
124 std::vector<Instruction *> Visited;
128 Visited.push_back(I);
175 Loop *L,
unsigned Count,
unsigned TripCount,
unsigned TripMultiple,
185 if (TripCount == 0 && Count < 2) {
186 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; almost nothing to do\n");
192 assert(TripCount == 0 || TripCount % TripMultiple == 0);
195 bool CompletelyUnroll = (Count == TripCount);
198 if (TripMultiple == 1 || TripMultiple % Count != 0) {
201 UnrollRemainder, LI, SE, DT, AC,
true,
203 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; remainder loop could not be " 204 "generated when assuming runtime trip count\n");
218 if (CompletelyUnroll) {
220 << Header->
getName() <<
" with trip count " << TripCount
224 <<
"completely unroll and jammed loop with " 225 <<
NV(
"UnrollCount", TripCount) <<
" iterations");
227 auto DiagBuilder = [&]() {
230 return Diag <<
"unroll and jammed loop by a factor of " 231 <<
NV(
"UnrollCount", Count);
236 if (TripMultiple != 1) {
237 LLVM_DEBUG(
dbgs() <<
" with " << TripMultiple <<
" trips per branch");
239 return DiagBuilder() <<
" with " <<
NV(
"TripMultiple", TripMultiple)
240 <<
" trips per branch";
244 ORE->
emit([&]() {
return DiagBuilder() <<
" with run-time trip count"; });
252 assert(Preheader && LatchBlock && Header);
256 bool SubLoopContinueOnTrue = SubLoop->contains(
257 SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));
270 std::vector<BasicBlock *> ForeBlocksFirst;
271 std::vector<BasicBlock *> ForeBlocksLast;
272 std::vector<BasicBlock *> SubLoopBlocksFirst;
273 std::vector<BasicBlock *> SubLoopBlocksLast;
274 std::vector<BasicBlock *> AftBlocksFirst;
275 std::vector<BasicBlock *> AftBlocksLast;
276 ForeBlocksFirst.push_back(Header);
277 ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
278 SubLoopBlocksFirst.push_back(SubLoop->getHeader());
279 SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
280 AftBlocksFirst.push_back(SubLoop->getExitBlock());
287 Header, LatchBlock, SubLoop->getLoopPreheader()->
getTerminator(),
302 if (!isa<DbgInfoIntrinsic>(&
I))
304 auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
306 I.setDebugLoc(NewDIL.getValue());
309 <<
"Failed to create new discriminator: " 310 << DIL->getFilename() <<
" Line: " << DIL->getLine());
314 for (
unsigned It = 1; It != Count; ++It) {
315 std::vector<BasicBlock *> NewBlocks;
324 if (ForeBlocks.
count(*BB)) {
327 if (*BB == ForeBlocksFirst[0])
328 ForeBlocksFirst.push_back(New);
329 if (*BB == ForeBlocksLast[0])
330 ForeBlocksLast.push_back(New);
331 }
else if (SubLoopBlocks.
count(*BB)) {
332 SubLoop->addBasicBlockToLoop(New, *LI);
334 if (*BB == SubLoopBlocksFirst[0])
335 SubLoopBlocksFirst.push_back(New);
336 if (*BB == SubLoopBlocksLast[0])
337 SubLoopBlocksLast.push_back(New);
338 }
else if (AftBlocks.
count(*BB)) {
341 if (*BB == AftBlocksFirst[0])
342 AftBlocksFirst.push_back(New);
343 if (*BB == AftBlocksLast[0])
344 AftBlocksLast.push_back(New);
350 PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
351 LastValueMap[*BB] = New;
354 PrevItValueMap[
VI->second] =
355 const_cast<Value *
>(It == 1 ?
VI->first : LastValueMap[
VI->first]);
356 LastValueMap[
VI->first] =
VI->second;
359 NewBlocks.push_back(New);
362 if (*BB == ForeBlocksFirst[0])
364 else if (*BB == SubLoopBlocksFirst[0])
366 else if (*BB == AftBlocksFirst[0])
371 auto BBDomNode = DT->
getNode(*BB);
372 auto BBIDom = BBDomNode->
getIDom();
373 BasicBlock *OriginalBBIDom = BBIDom->getBlock();
375 assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
377 New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
385 if (
auto *II = dyn_cast<IntrinsicInst>(&
I))
393 for (
PHINode &Phi : ForeBlocksFirst[It]->phis()) {
394 Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
395 assert(OldValue &&
"should have incoming edge from Aft[It]");
396 Value *NewValue = OldValue;
397 if (
Value *PrevValue = PrevItValueMap[OldValue])
398 NewValue = PrevValue;
400 assert(Phi.getNumOperands() == 2);
401 Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
402 Phi.setIncomingValue(0, NewValue);
403 Phi.removeIncomingValue(1);
415 int I = Phi.getBasicBlockIndex(OldBB);
416 Phi.setIncomingBlock(I, NewBB);
425 for (
unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
426 if (Phi.getIncomingBlock(b) == OldBB) {
427 Value *OldValue = Phi.getIncomingValue(b);
428 if (
Value *LastValue = LastValueMap[OldValue])
429 Phi.setIncomingValue(b, LastValue);
430 Phi.setIncomingBlock(b, NewBB);
444 updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.
back(),
449 cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
451 ForeTerm->setSuccessor(0, Dest);
453 if (CompletelyUnroll) {
454 while (
PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->
begin())) {
456 Phi->getParent()->getInstList().erase(Phi);
460 updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
461 AftBlocksLast.back(), LastValueMap);
464 for (
unsigned It = 1; It != Count; It++) {
467 cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
474 cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
475 SubTerm->
setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
476 SubTerm->
setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
477 updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0],
478 ForeBlocksLast.back());
479 updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0],
480 SubLoopBlocksLast.back());
482 for (
unsigned It = 1; It != Count; It++) {
486 cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
490 updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It],
491 ForeBlocksLast.back());
492 updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It],
493 SubLoopBlocksLast.back());
494 movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
498 BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
499 if (CompletelyUnroll) {
503 Term->
setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
505 updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0],
506 SubLoopBlocksLast.back());
508 for (
unsigned It = 1; It != Count; It++) {
512 cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
516 updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It],
517 SubLoopBlocksLast.back());
518 movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
526 SubLoopBlocksFirst[0]);
528 SubLoopBlocksLast[0], AftBlocksFirst[0]);
531 ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
533 SubLoopBlocksLast.back(), AftBlocksFirst[0]);
539 MergeBlocks.
insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
540 MergeBlocks.
insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
541 MergeBlocks.
insert(AftBlocksLast.begin(), AftBlocksLast.end());
542 while (!MergeBlocks.
empty()) {
551 MergeBlocks.
erase(Dest);
553 MergeBlocks.
erase(BB);
555 MergeBlocks.
erase(BB);
564 NumCompletelyUnrolledAndJammed += CompletelyUnroll;
565 ++NumUnrolledAndJammed;
570 Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop);
572 if (!CompletelyUnroll)
574 assert(SubLoop->isLoopSimplifyForm());
579 if (CompletelyUnroll)
592 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
596 }
else if (
auto *St = dyn_cast<StoreInst>(&
I)) {
600 }
else if (
I.mayReadOrWriteMemory()) {
610 unsigned LoopDepth,
bool InnerLoop,
614 for (
Value *
I : Earlier) {
615 for (
Value *J : Later) {
621 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
629 if (
auto D = DI.
depends(Src, Dst,
true)) {
630 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
632 if (
D->isConfused()) {
634 <<
" " << *Src <<
"\n" 635 <<
" " << *Dst <<
"\n");
641 <<
" " << *Src <<
"\n" 642 <<
" " << *Dst <<
"\n");
646 assert(LoopDepth + 1 <=
D->getLevels());
650 <<
" " << *Src <<
"\n" 651 <<
" " << *Dst <<
"\n");
738 if (SubLoopLatch != SubLoopExit)
752 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; Incompatible loop layout\n");
759 if (AftBlocks.
size() != 1) {
760 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; Can't currently handle " 761 "multiple blocks after the loop\n");
768 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; Inner loop iteration count is " 769 "not consistent on each iteration\n");
777 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; Something may throw\n");
789 Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](
Instruction *
I) {
806 "instructions after subloop to before it\n");
814 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; failed dependency check\n");
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
static bool getLoadsAndStores(BasicBlockSet &Blocks, SmallVector< Value *, 4 > &MemInstr)
bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const
Return true if this Loop and all inner subloops are in LCSSA form.
unsigned getLoopDepth() const
Return the nesting level of this loop.
void push_back(const T &Elt)
The main scalar evolution driver.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
A cache of @llvm.assume calls within a function.
BasicBlock * foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT)
Folds a basic block into its predecessor if it only has one predecessor, and that predecessor only ha...
static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, BasicBlockSet &AftBlocks, T Visit)
BasicBlock * getSuccessor(unsigned i) const
bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE)
Check inner loop (L) backedge count is known to be invariant on all iterations of its outer loop...
STATISTIC(NumFunctions, "Total number of functions")
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
DependenceInfo - This class is the main dependence-analysis driver.
static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header, BasicBlock *Latch, Instruction *InsertLoc, BasicBlockSet &AftBlocks)
static bool checkDependencies(SmallVector< Value *, 4 > &Earlier, SmallVector< Value *, 4 > &Later, unsigned LoopDepth, bool InnerLoop, DependenceInfo &DI)
iterator begin()
Instruction iterator methods.
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
BlockT * getHeader() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI)
This header provides classes for managing per-loop analyses.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
void remapInstruction(Instruction *I, ValueToValueMapTy &VMap)
Convert the instruction operands from referencing the current values into those specified by VMap...
The loop was fully unrolled into straight-line code.
void applyUpdates(ArrayRef< UpdateType > Updates)
Inform the dominator tree about a sequence of CFG edge insertions and deletions and perform a batch u...
SmallPtrSet< BasicBlock *, 4 > BasicBlockSet
bool isDebugInfoForProfiling() const
Returns true if we should emit debug info for profiling.
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
LLVM Basic Block Representation.
Conditional or Unconditional Branch instruction.
DomTreeNodeBase * getIDom() const
LLVM_NODISCARD bool empty() const
static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop, BasicBlockSet &ForeBlocks, BasicBlockSet &SubLoopBlocks, BasicBlockSet &AftBlocks, DominatorTree *DT)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
const Instruction & back() const
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Store the result of a depth first search within basic blocks contained by a single loop...
void push_back(pointer val)
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LoopT * getParentLoop() const
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
void registerAssumption(CallInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void emplace_back(ArgTypes &&... Args)
LLVM_NODISCARD bool empty() const
Represents a single loop in the control flow graph.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
block_iterator block_end() const
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
bool isUnconditional() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
The loop was not modified.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
iterator_range< block_iterator > blocks() const
block_iterator block_begin() const
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
const BasicBlock * getParent() const
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC)
Perform some cleanup and simplifications on loops after unrolling.