74 #define DEBUG_TYPE "block-placement" 76 STATISTIC(NumCondBranches,
"Number of conditional branches");
77 STATISTIC(NumUncondBranches,
"Number of unconditional branches");
79 "Potential frequency of taking conditional branches");
81 "Potential frequency of taking unconditional branches");
84 cl::desc(
"Force the alignment of all " 85 "blocks in the function."),
89 "align-all-nofallthru-blocks",
90 cl::desc(
"Force the alignment of all " 91 "blocks that have no fall-through predecessors (i.e. don't add " 92 "nops that are executed)."),
97 "block-placement-exit-block-bias",
98 cl::desc(
"Block frequency percentage a loop exit block needs " 99 "over the original exit to be considered the new exit."),
106 "loop-to-cold-block-ratio",
107 cl::desc(
"Outline loop blocks from loop chain if (frequency of loop) / " 108 "(frequency of block) is greater than this ratio"),
112 "force-loop-cold-block",
113 cl::desc(
"Force outlining cold blocks from loops."),
118 cl::desc(
"Model the cost of loop rotation more " 119 "precisely by using profile data."),
124 cl::desc(
"Force the use of precise cost " 125 "loop rotation strategy."),
130 cl::desc(
"Cost that models the probabilistic risk of an instruction " 131 "misfetch due to a jump comparing to falling through, whose cost " 136 cl::desc(
"Cost of jump instructions."),
140 cl::desc(
"Perform tail duplication during placement. " 141 "Creates more fallthrough opportunites in " 142 "outline branches."),
147 cl::desc(
"Perform branch folding during placement. " 148 "Reduces code size."),
153 "tail-dup-placement-threshold",
154 cl::desc(
"Instruction cutoff for tail duplication during layout. " 155 "Tail merging during layout is forced to have a threshold " 156 "that won't conflict."),
cl::init(2),
161 "tail-dup-placement-aggressive-threshold",
162 cl::desc(
"Instruction cutoff for aggressive tail duplication during " 163 "layout. Used at -O3. Tail merging during layout is forced to " 164 "have a threshold that won't conflict."),
cl::init(4),
169 "tail-dup-placement-penalty",
170 cl::desc(
"Cost penalty for blocks that can avoid breaking CFG by copying. " 171 "Copying can increase fallthrough, but it also increases icache " 172 "pressure. This parameter controls the penalty to account for that. " 173 "Percent as integer."),
179 "triangle-chain-count",
180 cl::desc(
"Number of triangle-shaped-CFG's that need to be in a row for the " 181 "triangle tail duplication heuristic to kick in. 0 to disable."),
228 BlockToChainMapType &BlockToChain;
237 : Blocks(1, BB), BlockToChain(BlockToChain) {
238 assert(BB &&
"Cannot create a chain with a null basic block");
239 BlockToChain[BB] =
this;
248 const_iterator
begin()
const {
return Blocks.
begin(); }
251 iterator
end() {
return Blocks.
end(); }
252 const_iterator
end()
const {
return Blocks.
end(); }
255 for(iterator i =
begin(); i !=
end(); ++i) {
271 assert(BB &&
"Can't merge a null block.");
272 assert(!Blocks.
empty() &&
"Can't merge into an empty chain.");
276 assert(!BlockToChain[BB] &&
277 "Passed chain is null, but BB has entry in BlockToChain.");
279 BlockToChain[BB] =
this;
283 assert(BB == *Chain->begin() &&
"Passed BB is not head of Chain.");
284 assert(Chain->begin() != Chain->end());
290 assert(BlockToChain[ChainBB] == Chain &&
"Incoming blocks not in chain.");
291 BlockToChain[ChainBB] =
this;
312 unsigned UnscheduledPredecessors = 0;
320 struct BlockAndTailDupResult {
326 struct WeightedEdge {
346 std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
399 void markChainSuccessors(
401 const BlockFilterSet *BlockFilter =
nullptr);
405 void markBlockSuccessors(
408 const BlockFilterSet *BlockFilter =
nullptr);
411 collectViableSuccessors(
413 const BlockFilterSet *BlockFilter,
415 bool shouldPredBlockBeOutlined(
417 const BlockChain &Chain,
const BlockFilterSet *BlockFilter,
419 bool repeatedlyTailDuplicateBlock(
422 BlockChain &Chain, BlockFilterSet *BlockFilter,
424 bool maybeTailDuplicateBlock(
426 BlockChain &Chain, BlockFilterSet *BlockFilter,
428 bool &DuplicatedToLPred);
429 bool hasBetterLayoutPredecessor(
433 const BlockFilterSet *BlockFilter);
434 BlockAndTailDupResult selectBestSuccessor(
436 const BlockFilterSet *BlockFilter);
440 const BlockChain &PlacedChain,
442 const BlockFilterSet *BlockFilter);
451 const BlockFilterSet *BlockFilter);
454 BlockFilterSet *BlockFilter =
nullptr);
456 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
458 const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
459 BlockFilterSet collectLoopBlockSet(
const MachineLoop &L);
463 const BlockFilterSet &LoopBlockSet);
464 void rotateLoopWithProfile(
466 const BlockFilterSet &LoopBlockSet);
467 void buildCFGChains();
468 void optimizeBranches();
475 bool isProfitableToTailDup(
478 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
483 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
486 BlockAndTailDupResult getBestTrellisSuccessor(
490 const BlockFilterSet *BlockFilter);
493 static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
499 bool canTailDuplicateUnplacedPreds(
501 const BlockChain &Chain,
const BlockFilterSet *BlockFilter);
505 void precomputeTriangleChains();
516 bool allowTailDupPlacement()
const {
539 "Branch Probability Basic Block Placement",
false,
false)
555 OS <<
" ('" << BB->
getName() <<
"')";
567 void MachineBlockPlacement::markChainSuccessors(
569 const BlockFilterSet *BlockFilter) {
573 markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
583 void MachineBlockPlacement::markBlockSuccessors(
591 if (BlockFilter && !BlockFilter->count(Succ))
593 BlockChain &SuccChain = *BlockToChain[Succ];
595 if (&Chain == &SuccChain || Succ == LoopHeaderBB)
600 if (SuccChain.UnscheduledPredecessors == 0 ||
601 --SuccChain.UnscheduledPredecessors > 0)
604 auto *NewBB = *SuccChain.begin();
605 if (NewBB->isEHPad())
618 const BlockFilterSet *BlockFilter,
638 bool SkipSucc =
false;
639 if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
642 BlockChain *SuccChain = BlockToChain[Succ];
643 if (SuccChain == &Chain) {
645 }
else if (Succ != *SuccChain->begin()) {
647 <<
" -> Mid chain!\n");
657 return AdjustedSumProb;
668 if (SuccProbN >= SuccProbD)
683 if (Successors.
count(&BB))
686 if (!Successors.
count(Succ))
712 uint64_t EntryFreq) {
715 return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
723 bool MachineBlockPlacement::isProfitableToTailDup(
726 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
753 auto AdjustedSuccSumProb =
754 collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
756 auto BBFreq = MBFI->getBlockFreq(BB);
757 auto SuccFreq = MBFI->getBlockFreq(Succ);
760 uint64_t EntryFreq = MBFI->getEntryFreq();
763 if (SuccSuccs.
size() == 0)
770 if (Prob > BestSuccSucc)
782 if (SuccPred == Succ || SuccPred == BB
783 || BlockToChain[SuccPred] == &Chain
784 || (BlockFilter && !BlockFilter->count(SuccPred)))
786 auto Freq = MBFI->getBlockFreq(SuccPred)
788 if (Freq > SuccBestPred)
856 if (UProb > AdjustedSuccSumProb / 2 &&
857 !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
861 (P + V), (Qout +
std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),
865 (Qout + std::min(Qin, F) * AdjustedSuccSumProb +
877 bool MachineBlockPlacement::isTrellis(
880 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
895 if (Successors.count(SuccPred)) {
898 if (!Successors.count(CheckSucc))
902 const BlockChain *PredChain = BlockToChain[SuccPred];
903 if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
904 PredChain == &Chain || PredChain == BlockToChain[Succ])
908 if (!SeenPreds.
insert(SuccPred).second)
926 std::pair<MachineBlockPlacement::WeightedEdge,
927 MachineBlockPlacement::WeightedEdge>
928 MachineBlockPlacement::getBestNonConflictingEdges(
939 auto Cmp = [](WeightedEdge A, WeightedEdge
B) {
return A.Weight >
B.Weight; };
941 std::stable_sort(Edges[0].
begin(), Edges[0].
end(), Cmp);
942 std::stable_sort(Edges[1].
begin(), Edges[1].
end(), Cmp);
943 auto BestA = Edges[0].begin();
944 auto BestB = Edges[1].begin();
947 if (BestA->Src == BestB->Src) {
949 auto SecondBestA = std::next(BestA);
950 auto SecondBestB = std::next(BestB);
953 if (BestAScore < BestBScore)
959 if (BestB->Src == BB)
961 return std::make_pair(*BestA, *BestB);
971 MachineBlockPlacement::BlockAndTailDupResult
972 MachineBlockPlacement::getBestTrellisSuccessor(
976 const BlockFilterSet *BlockFilter) {
978 BlockAndTailDupResult Result = {
nullptr,
false};
985 if (Successors.
size() != 2 || ViableSuccs.
size() != 2)
991 for (
auto Succ : ViableSuccs) {
995 if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
996 BlockToChain[SuccPred] == &Chain ||
997 BlockToChain[SuccPred] == BlockToChain[Succ])
1001 Edges[SuccIndex].
push_back({EdgeFreq, SuccPred, Succ});
1007 WeightedEdge BestA, BestB;
1008 std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
1010 if (BestA.Src != BB) {
1014 LLVM_DEBUG(
dbgs() <<
"Trellis, but not one of the chosen edges.\n");
1021 if (BestA.Dest == BestB.Src) {
1027 if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
1028 canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
1030 Chain, BlockFilter)) {
1034 <<
", probability: " << Succ2Prob
1035 <<
" (Tail Duplicate)\n");
1037 Result.ShouldTailDup =
true;
1043 ComputedEdges[BestB.Src] = { BestB.Dest,
false };
1045 auto TrellisSucc = BestA.Dest;
1049 <<
", probability: " << SuccProb <<
" (Trellis)\n");
1050 Result.BB = TrellisSucc;
1057 bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
1059 const BlockChain &Chain,
const BlockFilterSet *BlockFilter) {
1060 if (!shouldTailDuplicate(Succ))
1070 if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
1071 || BlockToChain[Pred] == &Chain)
1132 void MachineBlockPlacement::precomputeTriangleChains() {
1133 struct TriangleChain {
1134 std::vector<MachineBasicBlock *> Edges;
1137 : Edges({src, dst}) {}
1140 assert(getKey()->isSuccessor(dst) &&
1141 "Attempting to append a block that is not a successor.");
1142 Edges.push_back(dst);
1145 unsigned count()
const {
return Edges.size() - 1; }
1148 return Edges.
back();
1172 if (PDom ==
nullptr)
1179 if (!shouldTailDuplicate(PDom))
1181 bool CanTailDuplicate =
true;
1188 CanTailDuplicate =
false;
1194 if (!CanTailDuplicate)
1201 auto Found = TriangleChainMap.
find(&BB);
1204 if (Found != TriangleChainMap.
end()) {
1205 TriangleChain Chain = std::move(Found->second);
1206 TriangleChainMap.
erase(Found);
1208 TriangleChainMap.
insert(std::make_pair(Chain.getKey(), std::move(Chain)));
1210 auto InsertResult = TriangleChainMap.
try_emplace(PDom, &BB, PDom);
1211 assert(InsertResult.second &&
"Block seen twice.");
1219 for (
auto &ChainPair : TriangleChainMap) {
1220 TriangleChain &Chain = ChainPair.second;
1227 Chain.Edges.pop_back();
1231 <<
" as pre-computed based on triangles.\n");
1233 auto InsertResult = ComputedEdges.
insert({src, {dst,
true}});
1234 assert(InsertResult.second &&
"Block seen twice.");
1276 bool MachineBlockPlacement::hasBetterLayoutPredecessor(
1280 const BlockFilterSet *BlockFilter) {
1283 if (SuccChain.UnscheduledPredecessors == 0)
1403 BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
1404 bool BadCFGConflict =
false;
1407 if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
1408 (BlockFilter && !BlockFilter->count(Pred)) ||
1409 BlockToChain[Pred] == &Chain ||
1430 if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.
getCompl()) {
1431 BadCFGConflict =
true;
1436 if (BadCFGConflict) {
1438 << SuccProb <<
" (prob) (non-cold CFG conflict)\n");
1455 MachineBlockPlacement::BlockAndTailDupResult
1456 MachineBlockPlacement::selectBestSuccessor(
1458 const BlockFilterSet *BlockFilter) {
1461 BlockAndTailDupResult BestSucc = {
nullptr,
false };
1465 auto AdjustedSumProb =
1466 collectViableSuccessors(BB, Chain, BlockFilter, Successors);
1473 auto FoundEdge = ComputedEdges.
find(BB);
1474 if (FoundEdge != ComputedEdges.
end()) {
1476 ComputedEdges.
erase(FoundEdge);
1477 BlockChain *SuccChain = BlockToChain[Succ];
1478 if (BB->
isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
1479 SuccChain != &Chain && Succ == *SuccChain->begin())
1480 return FoundEdge->second;
1485 if (isTrellis(BB, Successors, Chain, BlockFilter))
1486 return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
1499 BlockChain &SuccChain = *BlockToChain[Succ];
1502 if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
1503 Chain, BlockFilter)) {
1505 if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
1506 DupCandidates.
push_back(std::make_tuple(SuccProb, Succ));
1512 <<
", probability: " << SuccProb
1513 << (SuccChain.UnscheduledPredecessors != 0 ?
" (CFG break)" :
"")
1516 if (BestSucc.BB && BestProb >= SuccProb) {
1523 BestProb = SuccProb;
1530 if (DupCandidates.
size() != 0) {
1532 [](
const std::tuple<BranchProbability, MachineBasicBlock *> &a,
1533 const std::tuple<BranchProbability, MachineBasicBlock *> &b) {
1534 return std::get<0>(a) > std::get<0>(b);
1536 std::stable_sort(DupCandidates.
begin(), DupCandidates.
end(), cmp);
1538 for(
auto &Tup : DupCandidates) {
1541 std::tie(DupProb, Succ) = Tup;
1542 if (DupProb < BestProb)
1544 if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
1545 && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
1547 <<
", probability: " << DupProb
1548 <<
" (Tail Duplicate)\n");
1550 BestSucc.ShouldTailDup =
true;
1579 return BlockToChain.
lookup(BB) == &Chain;
1583 if (WorkList.
empty())
1586 bool IsEHPad = WorkList[0]->isEHPad();
1592 "EHPad mismatch between block and work list.");
1594 BlockChain &SuccChain = *BlockToChain[MBB];
1595 if (&SuccChain == &Chain)
1598 assert(SuccChain.UnscheduledPredecessors == 0 &&
1599 "Found CFG-violating block");
1603 MBFI->printBlockFreq(
dbgs(), CandidateFreq) <<
" (freq)\n");
1623 if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
1627 BestFreq = CandidateFreq;
1641 const BlockChain &PlacedChain,
1643 const BlockFilterSet *BlockFilter) {
1646 if (BlockFilter && !BlockFilter->count(&*
I))
1648 if (BlockToChain[&*
I] != &PlacedChain) {
1649 PrevUnplacedBlockIt =
I;
1653 return *BlockToChain[&*
I]->
begin();
1659 void MachineBlockPlacement::fillWorkLists(
1662 const BlockFilterSet *BlockFilter =
nullptr) {
1663 BlockChain &Chain = *BlockToChain[MBB];
1664 if (!UpdatedPreds.
insert(&Chain).second)
1668 Chain.UnscheduledPredecessors == 0 &&
1669 "Attempting to place block with unscheduled predecessors in worklist.");
1671 assert(BlockToChain[ChainBB] == &Chain &&
1672 "Block in chain doesn't match BlockToChain map.");
1674 if (BlockFilter && !BlockFilter->count(Pred))
1676 if (BlockToChain[Pred] == &Chain)
1678 ++Chain.UnscheduledPredecessors;
1682 if (Chain.UnscheduledPredecessors != 0)
1692 void MachineBlockPlacement::buildChain(
1694 BlockFilterSet *BlockFilter) {
1695 assert(HeadBB &&
"BB must not be null.\n");
1696 assert(BlockToChain[HeadBB] == &Chain &&
"BlockToChainMap mis-match.\n");
1700 markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
1703 assert(BB &&
"null block found at end of chain in loop.");
1704 assert(BlockToChain[BB] == &Chain &&
"BlockToChainMap mis-match in loop.");
1705 assert(*std::prev(Chain.end()) == BB &&
"BB Not found at end of chain.");
1710 auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
1712 bool ShouldTailDup = Result.ShouldTailDup;
1713 if (allowTailDupPlacement())
1714 ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
1720 BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
1722 BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
1725 BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
1729 LLVM_DEBUG(
dbgs() <<
"Unnatural loop CFG detected, forcibly merging the " 1730 "layout successor until the CFG reduces\n");
1735 if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
1739 if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
1740 BlockFilter, PrevUnplacedBlockIt))
1745 BlockChain &SuccChain = *BlockToChain[BestSucc];
1748 SuccChain.UnscheduledPredecessors = 0;
1751 markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
1752 Chain.merge(BestSucc, &SuccChain);
1753 BB = *std::prev(Chain.end());
1771 MachineBlockPlacement::findBestLoopTop(
const MachineLoop &L,
1772 const BlockFilterSet &LoopBlockSet) {
1786 BlockChain &HeaderChain = *BlockToChain[L.
getHeader()];
1787 if (!LoopBlockSet.count(*HeaderChain.begin()))
1796 if (!LoopBlockSet.count(Pred))
1799 << Pred->succ_size() <<
" successors, ";
1800 MBFI->printBlockFreq(
dbgs(), Pred) <<
" freq\n");
1801 if (Pred->succ_size() > 1)
1805 if (!BestPred || PredFreq > BestPredFreq ||
1806 (!(PredFreq < BestPredFreq) &&
1807 Pred->isLayoutSuccessor(L.
getHeader()))) {
1809 BestPredFreq = PredFreq;
1835 MachineBlockPlacement::findBestLoopExit(
const MachineLoop &L,
1836 const BlockFilterSet &LoopBlockSet) {
1845 BlockChain &HeaderChain = *BlockToChain[L.
getHeader()];
1846 if (!LoopBlockSet.count(*HeaderChain.begin()))
1850 unsigned BestExitLoopDepth = 0;
1860 BlockChain &Chain = *BlockToChain[MBB];
1863 if (MBB != *std::prev(Chain.end()))
1872 bool HasLoopingSucc =
false;
1878 BlockChain &SuccChain = *BlockToChain[Succ];
1880 if (&Chain == &SuccChain) {
1887 if (LoopBlockSet.count(Succ)) {
1890 HasLoopingSucc =
true;
1894 unsigned SuccLoopDepth = 0;
1896 SuccLoopDepth = ExitLoop->getLoopDepth();
1897 if (ExitLoop->contains(&L))
1898 BlocksExitingToOuterLoop.
insert(MBB);
1901 BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
1905 MBFI->printBlockFreq(
dbgs(), ExitEdgeFreq) <<
")\n");
1911 if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
1912 ExitEdgeFreq > BestExitEdgeFreq ||
1914 !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
1915 BestExitEdgeFreq = ExitEdgeFreq;
1920 if (!HasLoopingSucc) {
1922 ExitingBB = OldExitingBB;
1923 BestExitEdgeFreq = OldBestExitEdgeFreq;
1930 dbgs() <<
" No other candidate exit blocks, using loop header\n");
1934 LLVM_DEBUG(
dbgs() <<
" Loop has 1 block, using loop header as exit\n");
1941 if (!BlocksExitingToOuterLoop.
empty() &&
1942 !BlocksExitingToOuterLoop.
count(ExitingBB))
1956 void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
1958 const BlockFilterSet &LoopBlockSet) {
1966 if (Bottom == ExitingBB)
1969 bool ViableTopFallthrough =
false;
1971 BlockChain *PredChain = BlockToChain[Pred];
1972 if (!LoopBlockSet.count(Pred) &&
1973 (!PredChain || Pred == *std::prev(PredChain->end()))) {
1974 ViableTopFallthrough =
true;
1982 if (ViableTopFallthrough) {
1984 BlockChain *SuccChain = BlockToChain[Succ];
1985 if (!LoopBlockSet.count(Succ) &&
1986 (!SuccChain || Succ == *SuccChain->begin()))
1991 BlockChain::iterator ExitIt =
llvm::find(LoopChain, ExitingBB);
1992 if (ExitIt == LoopChain.end())
2014 if (ViableTopFallthrough) {
2015 assert(std::next(ExitIt) != LoopChain.end() &&
2016 "Exit should not be last BB");
2025 std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
2041 void MachineBlockPlacement::rotateLoopWithProfile(
2043 const BlockFilterSet &LoopBlockSet) {
2045 auto HeaderIter =
llvm::find(LoopChain, HeaderBB);
2046 auto RotationPos = LoopChain.end();
2065 for (
auto *Pred : HeaderBB->predecessors()) {
2066 BlockChain *PredChain = BlockToChain[Pred];
2067 if (!LoopBlockSet.count(Pred) &&
2068 (!PredChain || Pred == *std::prev(PredChain->end()))) {
2071 auto FallThruCost = ScaleBlockFrequency(EdgeFreq,
MisfetchCost);
2074 if (Pred->succ_size() == 1)
2075 FallThruCost += ScaleBlockFrequency(EdgeFreq,
JumpInstCost);
2076 HeaderFallThroughCost =
std::max(HeaderFallThroughCost, FallThruCost);
2085 for (
auto BB : LoopChain) {
2088 BlockChain *SuccChain = BlockToChain[Succ];
2089 if (!LoopBlockSet.count(Succ) &&
2090 (!SuccChain || Succ == *SuccChain->begin())) {
2092 LargestExitEdgeProb =
std::max(LargestExitEdgeProb, SuccProb);
2096 auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
2104 for (
auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
2105 EndIter = LoopChain.end();
2106 Iter != EndIter; Iter++, TailIter++) {
2109 if (TailIter == LoopChain.end())
2110 TailIter = LoopChain.begin();
2112 auto TailBB = *TailIter;
2120 if (Iter != HeaderIter)
2121 Cost += HeaderFallThroughCost;
2125 for (
auto &ExitWithFreq : ExitsWithFreq)
2126 if (TailBB != ExitWithFreq.first)
2127 Cost += ExitWithFreq.second;
2143 if (TailBB->isSuccessor(*Iter)) {
2144 auto TailBBFreq = MBFI->getBlockFreq(TailBB);
2145 if (TailBB->succ_size() == 1)
2146 Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
2148 else if (TailBB->succ_size() == 2) {
2150 auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
2152 ? TailBBFreq * TailToHeadProb.
getCompl()
2154 Cost += ScaleBlockFrequency(TailToHeadFreq,
MisfetchCost) +
2163 if (Cost < SmallestRotationCost) {
2164 SmallestRotationCost = Cost;
2169 if (RotationPos != LoopChain.end()) {
2171 <<
" to the top\n");
2172 std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
2181 MachineBlockPlacement::collectLoopBlockSet(
const MachineLoop &L) {
2182 BlockFilterSet LoopBlockSet;
2197 LoopFreq += MBFI->getBlockFreq(LoopPred) *
2201 auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
2204 LoopBlockSet.insert(LoopBB);
2209 return LoopBlockSet;
2218 void MachineBlockPlacement::buildLoopChains(
const MachineLoop &L) {
2222 buildLoopChains(*InnerLoop);
2225 "BlockWorkList not empty when starting to build loop chains.");
2227 "EHPadWorkList not empty when starting to build loop chains.");
2228 BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
2233 bool RotateLoopWithProfile =
2243 RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
2251 PreferredLoopExit =
nullptr;
2252 if (!RotateLoopWithProfile && LoopTop == L.getHeader())
2253 PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
2255 BlockChain &LoopChain = *BlockToChain[LoopTop];
2261 assert(LoopChain.UnscheduledPredecessors == 0 &&
2262 "LoopChain should not have unscheduled predecessors.");
2263 UpdatedPreds.
insert(&LoopChain);
2266 fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
2268 buildChain(LoopTop, LoopChain, &LoopBlockSet);
2270 if (RotateLoopWithProfile)
2271 rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2273 rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
2277 bool BadLoop =
false;
2278 if (LoopChain.UnscheduledPredecessors) {
2280 dbgs() <<
"Loop chain contains a block without its preds placed!\n" 2281 <<
" Loop header: " << getBlockName(*L.block_begin()) <<
"\n" 2282 <<
" Chain header: " << getBlockName(*LoopChain.begin()) <<
"\n";
2286 if (!LoopBlockSet.remove(ChainBB)) {
2290 dbgs() <<
"Loop chain contains a block not contained by the loop!\n" 2291 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n" 2292 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n" 2297 if (!LoopBlockSet.empty()) {
2300 dbgs() <<
"Loop contains blocks never placed into a chain!\n" 2301 <<
" Loop header: " <<
getBlockName(*L.block_begin()) <<
"\n" 2302 <<
" Chain header: " <<
getBlockName(*LoopChain.begin()) <<
"\n" 2305 assert(!BadLoop &&
"Detected problems with the placement of this loop.");
2308 BlockWorkList.
clear();
2309 EHPadWorkList.
clear();
2312 void MachineBlockPlacement::buildCFGChains() {
2320 new (ChainAllocator.
Allocate()) BlockChain(BlockToChain, BB);
2326 if (!TII->
analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
2333 assert(NextFI != FE &&
"Can't fallthrough past the last block.");
2334 LLVM_DEBUG(
dbgs() <<
"Pre-merging due to unanalyzable fallthrough: " 2337 Chain->merge(NextBB,
nullptr);
2339 BlocksWithUnanalyzableExits.
insert(&*BB);
2347 PreferredLoopExit =
nullptr;
2349 buildLoopChains(*L);
2352 "BlockWorkList should be empty before building final chain.");
2354 "EHPadWorkList should be empty before building final chain.");
2358 fillWorkLists(&MBB, UpdatedPreds);
2360 BlockChain &FunctionChain = *BlockToChain[&F->front()];
2361 buildChain(&F->front(), FunctionChain);
2368 bool BadFunc =
false;
2369 FunctionBlockSetType FunctionBlockSet;
2371 FunctionBlockSet.insert(&MBB);
2374 if (!FunctionBlockSet.erase(ChainBB)) {
2376 dbgs() <<
"Function chain contains a block not in the function!\n" 2380 if (!FunctionBlockSet.empty()) {
2383 dbgs() <<
"Function contains blocks never placed into a chain!\n" 2384 <<
" Bad block: " <<
getBlockName(RemainingBB) <<
"\n";
2386 assert(!BadFunc &&
"Detected problems with the block placement.");
2391 LLVM_DEBUG(
dbgs() <<
"[MBP] Function: " << F->getName() <<
"\n");
2393 LLVM_DEBUG(
dbgs() << (ChainBB == *FunctionChain.begin() ?
"Placing chain " 2397 F->splice(InsertPos, ChainBB);
2402 if (ChainBB == *FunctionChain.begin())
2413 if (!BlocksWithUnanalyzableExits.
count(PrevBB)) {
2419 "Unexpected block with un-analyzable fallthrough!");
2421 TBB = FBB =
nullptr;
2454 F->
back().updateTerminator();
2456 BlockWorkList.
clear();
2457 EHPadWorkList.
clear();
2460 void MachineBlockPlacement::optimizeBranches() {
2461 BlockChain &FunctionChain = *BlockToChain[&F->
front()];
2476 if (TBB && !Cond.
empty() && FBB &&
2488 ChainBB->updateTerminator();
2494 void MachineBlockPlacement::alignBlocks() {
2503 BlockChain &FunctionChain = *BlockToChain[&F->
front()];
2504 if (FunctionChain.begin() == FunctionChain.end())
2511 if (ChainBB == *FunctionChain.begin())
2529 if (Freq < WeightedEntryFreq)
2536 if (Freq < (LoopHeaderFreq * ColdProb))
2547 ChainBB->setAlignment(Align);
2557 BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
2558 if (LayoutEdgeFreq <= (Freq * ColdProb))
2559 ChainBB->setAlignment(Align);
2578 bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
2581 BlockChain &Chain, BlockFilterSet *BlockFilter,
2583 bool Removed, DuplicatedToLPred;
2584 bool DuplicatedToOriginalLPred;
2585 Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
2586 PrevUnplacedBlockIt,
2590 DuplicatedToOriginalLPred = DuplicatedToLPred;
2596 while (DuplicatedToLPred) {
2597 assert(Removed &&
"Block must have been removed to be duplicated into its " 2598 "layout predecessor.");
2605 BlockChain::iterator ChainEnd = Chain.end();
2606 DupBB = *(--ChainEnd);
2608 if (ChainEnd == Chain.begin())
2610 DupPred = *std::prev(ChainEnd);
2611 Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
2612 PrevUnplacedBlockIt,
2620 LPred = *std::prev(Chain.end());
2621 if (DuplicatedToOriginalLPred)
2622 markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
2640 bool MachineBlockPlacement::maybeTailDuplicateBlock(
2642 BlockChain &Chain, BlockFilterSet *BlockFilter,
2644 bool &DuplicatedToLPred) {
2645 DuplicatedToLPred =
false;
2646 if (!shouldTailDuplicate(BB))
2654 bool Removed =
false;
2655 auto RemovalCallback =
2661 bool InWorkList =
true;
2663 if (BlockToChain.
count(RemBB)) {
2664 BlockChain *Chain = BlockToChain[RemBB];
2665 InWorkList = Chain->UnscheduledPredecessors == 0;
2666 Chain->remove(RemBB);
2667 BlockToChain.
erase(RemBB);
2671 if (&(*PrevUnplacedBlockIt) == RemBB) {
2672 PrevUnplacedBlockIt++;
2678 if (RemBB->isEHPad())
2679 RemoveList = EHPadWorkList;
2690 BlockFilter->remove(RemBB);
2695 if (RemBB == PreferredLoopExit)
2696 PreferredLoopExit =
nullptr;
2701 auto RemovalCallbackRef =
2707 &DuplicatedPreds, &RemovalCallbackRef);
2710 DuplicatedToLPred =
false;
2713 BlockChain* PredChain = BlockToChain[Pred];
2715 DuplicatedToLPred =
true;
2716 if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
2717 || PredChain == &Chain)
2720 if (BlockFilter && !BlockFilter->count(NewSucc))
2722 BlockChain *NewChain = BlockToChain[NewSucc];
2723 if (NewChain != &Chain && NewChain != PredChain)
2724 NewChain->UnscheduledPredecessors++;
2730 bool MachineBlockPlacement::runOnMachineFunction(
MachineFunction &MF) {
2735 if (std::next(MF.
begin()) == MF.
end())
2739 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
2740 MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
2741 getAnalysis<MachineBlockFrequencyInfo>());
2742 MLI = &getAnalysis<MachineLoopInfo>();
2749 PreferredLoopExit =
nullptr;
2752 "BlockToChain map should be empty before starting placement.");
2754 "Computed Edge map should be empty before starting placement.");
2774 if (allowTailDupPlacement()) {
2775 MPDT = &getAnalysis<MachinePostDominatorTree>();
2778 bool PreRegAlloc =
false;
2779 TailDup.
initMF(MF, PreRegAlloc, MBPI,
true, TailDupSize);
2780 precomputeTriangleChains();
2792 if (MF.
size() > 3 && EnableTailMerge) {
2795 *MBPI, TailMergeSize);
2798 getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
2801 BlockToChain.
clear();
2802 ComputedEdges.
clear();
2814 BlockToChain.
clear();
2815 ComputedEdges.
clear();
2825 for (
auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
2826 auto LayoutPred = std::prev(MBI);
2827 if (!LayoutPred->isSuccessor(&*MBI))
2831 if (ViewBlockLayoutWithBFI !=
GVDT_None &&
2832 (ViewBlockFreqFuncName.empty() ||
2834 MBFI->view(
"MBP." + MF.getName(),
false);
2882 "Basic Block Placement Stats",
false,
false)
2886 "Basic Block Placement
Stats", false, false)
2888 bool MachineBlockPlacementStats::runOnMachineFunction(
MachineFunction &F) {
2890 if (std::next(F.begin()) == F.end())
2893 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
2894 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2899 (MBB.
succ_size() > 1) ? NumCondBranches : NumUncondBranches;
2901 (MBB.
succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
static cl::opt< unsigned > TailDupPlacementThreshold("tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " "that won't conflict."), cl::init(2), cl::Hidden)
const_iterator end(StringRef path)
Get end iterator over path.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
BranchProbability getCompl() const
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
typename SuperClass::const_iterator const_iterator
This class represents lattice values for constants.
static cl::opt< unsigned > MisfetchCost("misfetch-cost", cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden)
static BranchProbability getAdjustedProbability(BranchProbability OrigProb, BranchProbability AdjustedSumProb)
The helper function returns the branch probability that is adjusted or normalized over the new total ...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void push_back(const T &Elt)
bool requiresStructuredCFG() const
virtual const TargetLowering * getTargetLowering() const
void initializeMachineBlockPlacementStatsPass(PassRegistry &)
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
An efficient, type-erasing, non-owning reference to a callable.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
STATISTIC(NumFunctions, "Total number of functions")
static BranchProbability getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB)
static BranchProbability getOne()
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
char & MachineBlockPlacementStatsID
MachineBlockPlacementStats - This pass collects statistics about the basic block placement using bran...
void setAlignment(unsigned Align)
Set alignment of the basic block.
CodeGenOpt::Level getOptLevel() const
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
iterator_range< succ_iterator > successors()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
static cl::opt< bool > TailDupPlacement("tail-dup-placement", cl::desc("Perform tail duplication during placement. " "Creates more fallthrough opportunites in " "outline branches."), cl::init(true), cl::Hidden)
bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, MachineModuleInfo *mmi, MachineLoopInfo *mli=nullptr, bool AfterPlacement=false)
Perhaps branch folding, tail merging and other CFG optimizations on the given function.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void removeBlock(MachineBasicBlock *BB)
This method completely removes BB from all data structures, including all of the Loop objects it is n...
block placement Basic Block Placement Stats
Target-Independent Code Generator Pass Configuration Options.
BlockT * getHeader() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void DestroyAll()
Call the destructor of each allocated object and deallocate all but the current slab and reset the cu...
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
virtual const TargetInstrInfo * getInstrInfo() const
cl::opt< std::string > ViewBlockFreqFuncName
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
TargetInstrInfo - Interface to description of machine instruction set.
iterator find(const_arg_type_t< KeyT > Val)
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
cl::opt< unsigned > ProfileLikelyProb
bool getEnableTailMerge() const
virtual bool alignLoopsWithOptSize() const
Should loops be aligned even when the function is marked OptSize (but not MinSize).
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock *> *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
initializer< Ty > init(const Ty &Val)
bool erase(const KeyT &Val)
cl::opt< GVDAGType > ViewBlockLayoutWithBFI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static cl::opt< bool > BranchFoldPlacement("branch-fold-placement", cl::desc("Perform branch folding during placement. " "Reduces code size."), cl::init(true), cl::Hidden)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
virtual unsigned getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
static cl::opt< unsigned > TriangleChainCount("triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), cl::init(2), cl::Hidden)
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Represent the analysis usage information of a pass.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
static bool greaterWithBias(BlockFrequency A, BlockFrequency B, uint64_t EntryFreq)
Compare 2 BlockFrequency's with a small penalty for A.
iterator_range< pred_iterator > predecessors()
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
succ_iterator succ_begin()
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
iterator erase(const_iterator CI)
const MachineBasicBlock & front() const
pred_iterator pred_begin()
void initializeMachineBlockPlacementPass(PassRegistry &)
static cl::opt< bool > ForcePreciseRotationCost("force-precise-rotation-cost", cl::desc("Force the use of precise cost " "loop rotation strategy."), cl::init(false), cl::Hidden)
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
static cl::opt< unsigned > TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Branch Probability Basic Block Placement
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
A SetVector that performs no allocations if smaller than a certain size.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
static cl::opt< bool > ForceLoopColdBlock("force-loop-cold-block", cl::desc("Force outlining cold blocks from loops."), cl::init(false), cl::Hidden)
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
void updateTerminator()
Update the terminator instructions in block to account for changes to the layout. ...
static cl::opt< unsigned > JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > TailDupPlacementAggressiveThreshold("tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " "have a threshold that won't conflict."), cl::init(4), cl::Hidden)
INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_END(MachineBlockPlacement
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
unsigned pred_size() const
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static cl::opt< unsigned > TailDupPlacementPenalty("tail-dup-placement-penalty", cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " "Copying can increase fallthrough, but it also increases icache " "pressure. This parameter controls the penalty to account for that. " "Percent as integer."), cl::init(2), cl::Hidden)
void setPreservesAll()
Set by analyses that do not transform their input at all.
typename SuperClass::iterator iterator
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
unsigned succ_size() const
Branch Probability Basic Block static false std::string getBlockName(const MachineBasicBlock *BB)
Helper to print the name of a MBB.
cl::opt< unsigned > StaticLikelyProb
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
unsigned succ_size(const Instruction *I)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
static cl::opt< unsigned > AlignAllBlock("align-all-blocks", cl::desc("Force the alignment of all " "blocks in the function."), cl::init(0), cl::Hidden)
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
static uint64_t getMaxFrequency()
Returns the maximum possible frequency, the saturation value.
void emplace_back(ArgTypes &&... Args)
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_NODISCARD bool empty() const
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
StringRef getName() const
Return a constant reference to the value's name.
static bool hasSameSuccessors(MachineBasicBlock &BB, SmallPtrSetImpl< const MachineBasicBlock *> &Successors)
Check if BB has exactly the successors in Successors.
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
char & MachineBlockPlacementID
MachineBlockPlacement - This pass places basic blocks based on branch probabilities.
block_iterator block_end() const
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
static cl::opt< unsigned > LoopToColdBlockRatio("loop-to-cold-block-ratio", cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden)
LLVM_NODISCARD bool empty() const
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
static BranchProbability getZero()
Utility class to perform tail duplication.
static cl::opt< bool > PreciseRotationCost("precise-rotation-cost", cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden)
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
block_iterator block_begin() const
static cl::opt< unsigned > AlignAllNonFallThruBlocks("align-all-nofallthru-blocks", cl::desc("Force the alignment of all " "blocks that have no fall-through predecessors (i.e. don't add " "nops that are executed)."), cl::init(0), cl::Hidden)
bool hasProfileData() const
Return true if the function is annotated with profile data.
uint32_t getNumerator() const
static cl::opt< unsigned > ExitBlockBias("block-placement-exit-block-bias", cl::desc("Block frequency percentage a loop exit block needs " "over the original exit to be considered the new exit."), cl::init(0), cl::Hidden)
This file describes how to lower LLVM code to machine code.