67 #define DEBUG_TYPE "partial-inlining" 70 "Number of callsites functions partially inlined into.");
71 STATISTIC(NumColdOutlinePartialInlined,
"Number of times functions with " 72 "cold outlined regions were partially " 73 "inlined into its caller(s).");
75 "Number of cold single entry/exit regions found.");
77 "Number of cold single entry/exit regions outlined.");
87 cl::desc(
"Disable multi-region partial inlining"));
93 cl::desc(
"Force outline regions with live exits"));
99 cl::desc(
"Mark outline function calls with ColdCC"));
105 cl::desc(
"Trace partial inlining."));
119 cl::desc(
"Minimum ratio comparing relative sizes of each " 120 "outline candidate and original function"));
125 cl::desc(
"Minimum block executions to consider " 126 "its BranchProbabilityInfo valid"));
131 cl::desc(
"Minimum BranchProbability to consider a region cold."));
135 cl::desc(
"Max number of blocks to be partially inlined"));
141 cl::desc(
"Max number of partial inlining. The default is unlimited"));
149 cl::desc(
"Relative frequency of outline region to " 154 cl::desc(
"A debug option to add additional penalty to the computed one."));
158 struct FunctionOutliningInfo {
159 FunctionOutliningInfo() =
default;
163 unsigned GetNumInlinedBlocks()
const {
return Entries.size() + 1; }
179 struct FunctionOutliningMultiRegionInfo {
180 FunctionOutliningMultiRegionInfo()
184 struct OutlineRegionInfo {
188 : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
189 ReturnBlock(ReturnBlock) {}
199 struct PartialInlinerImpl {
206 : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
216 std::pair<bool, Function *> unswitchFunction(
Function *
F);
222 struct FunctionCloner {
225 FunctionCloner(
Function *
F, FunctionOutliningInfo *OI,
227 FunctionCloner(
Function *
F, FunctionOutliningMultiRegionInfo *OMRI,
234 void NormalizeReturnBlock();
237 bool doMultiRegionFunctionOutlining();
244 Function *doSingleRegionFunctionOutlining();
249 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
255 bool IsFunctionInlined =
false;
257 int OutlinedRegionCost = 0;
259 std::unique_ptr<FunctionOutliningInfo> ClonedOI =
nullptr;
261 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI =
nullptr;
262 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI =
nullptr;
267 int NumPartialInlining = 0;
268 std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
269 std::function<TargetTransformInfo &(Function &)> *GetTTI;
281 bool shouldPartialInline(
CallSite CS, FunctionCloner &Cloner,
288 bool tryPartialInline(FunctionCloner &Cloner);
292 void computeCallsiteToProfCountMap(
Function *DuplicateFunction,
295 bool IsLimitReached() {
302 if (
CallInst *CI = dyn_cast<CallInst>(U))
304 else if (
InvokeInst *II = dyn_cast<InvokeInst>(U))
313 return getCallSite(User);
316 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(
Function *F) {
320 return std::make_tuple(DLoc, Block);
329 std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
334 static int computeBBInlineCost(
BasicBlock *BB);
336 std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(
Function *F);
337 std::unique_ptr<FunctionOutliningMultiRegionInfo>
341 struct PartialInlinerLegacyPass :
public ModulePass {
354 bool runOnModule(
Module &M)
override {
360 &getAnalysis<TargetTransformInfoWrapperPass>();
362 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
364 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
369 std::function<TargetTransformInfo &(Function &)> GetTTI =
371 return TTIWP->getTTI(F);
374 return PartialInlinerImpl(&GetAssumptionCache, &GetTTI,
NoneType::None, PSI)
381 std::unique_ptr<FunctionOutliningMultiRegionInfo>
382 PartialInlinerImpl::computeOutliningColdRegionsInfo(
Function *F,
389 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
393 BFI = ScopedBFI.get();
395 BFI = &(*GetBFI)(*F);
398 if (!PSI->hasInstrumentationProfile())
399 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
401 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
402 llvm::make_unique<FunctionOutliningMultiRegionInfo>();
412 for (
auto *Block : BlockList) {
419 <<
"Region dominated by " 420 <<
ore::NV(
"Block", BlockList.front()->getName())
421 <<
" has more than one region exit edge.";
440 int OverallFunctionCost = 0;
442 OverallFunctionCost += computeBBInlineCost(&BB);
446 dbgs() <<
"OverallFunctionCost = " << OverallFunctionCost <<
"\n";
448 int MinOutlineRegionCost =
452 MinBlockCounterExecution);
453 bool ColdCandidateFound =
false;
455 std::vector<BasicBlock *>
DFS;
457 DFS.push_back(CurrEntry);
458 VisitedMap[CurrEntry] =
true;
465 while (!DFS.empty()) {
466 auto *thisBB = DFS.back();
471 if (PSI->isColdBlock(thisBB, BFI) ||
477 VisitedMap[*
SI] =
true;
481 if (SuccProb > MinBranchProbability)
485 dbgs() <<
"Found cold edge: " << thisBB->getName() <<
"->" 486 << (*SI)->getName() <<
"\nBranch Probability = " << SuccProb
491 DT.getDescendants(*SI, DominateVector);
493 if (!IsSingleEntry(DominateVector))
497 if (!(ExitBlock = IsSingleExit(DominateVector)))
499 int OutlineRegionCost = 0;
500 for (
auto *BB : DominateVector)
501 OutlineRegionCost += computeBBInlineCost(BB);
505 dbgs() <<
"OutlineRegionCost = " << OutlineRegionCost <<
"\n";
508 if (OutlineRegionCost < MinOutlineRegionCost) {
512 <<
ore::NV(
"Callee", F) <<
" inline cost-savings smaller than " 513 <<
ore::NV(
"Cost", MinOutlineRegionCost);
521 for (
auto *BB : DominateVector)
522 VisitedMap[BB] =
true;
526 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
527 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
528 RegInfo.Region = DominateVector;
529 OutliningInfo->ORI.push_back(RegInfo);
532 dbgs() <<
"Found Cold Candidate starting at block: " 533 << DominateVector.front()->getName() <<
"\n";
536 ColdCandidateFound =
true;
537 NumColdRegionsFound++;
540 if (ColdCandidateFound)
541 return OutliningInfo;
543 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
546 std::unique_ptr<FunctionOutliningInfo>
547 PartialInlinerImpl::computeOutliningInfo(
Function *F) {
551 return std::unique_ptr<FunctionOutliningInfo>();
560 return isa<ReturnInst>(TI);
564 if (IsReturnBlock(Succ1))
565 return std::make_tuple(Succ1, Succ2);
566 if (IsReturnBlock(Succ2))
567 return std::make_tuple(Succ2, Succ1);
569 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
574 if (IsSuccessor(Succ1, Succ2))
575 return std::make_tuple(Succ1, Succ2);
576 if (IsSuccessor(Succ2, Succ1))
577 return std::make_tuple(Succ2, Succ1);
579 return std::make_tuple<BasicBlock *, BasicBlock *>(
nullptr,
nullptr);
582 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
583 llvm::make_unique<FunctionOutliningInfo>();
586 bool CandidateFound =
false;
601 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
604 OutliningInfo->Entries.push_back(CurrEntry);
605 OutliningInfo->ReturnBlock = ReturnBlock;
606 OutliningInfo->NonReturnBlock = NonReturnBlock;
607 CandidateFound =
true;
613 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
618 OutliningInfo->Entries.push_back(CurrEntry);
623 return std::unique_ptr<FunctionOutliningInfo>();
629 "Function Entry must be the first in Entries vector");
636 auto HasNonEntryPred = [Entries](
BasicBlock *BB) {
638 if (!Entries.count(Pred))
643 auto CheckAndNormalizeCandidate =
644 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
645 for (
BasicBlock *E : OutliningInfo->Entries) {
647 if (Entries.count(Succ))
649 if (Succ == OutliningInfo->ReturnBlock)
650 OutliningInfo->ReturnBlockPreds.push_back(E);
651 else if (Succ != OutliningInfo->NonReturnBlock)
655 if (HasNonEntryPred(E))
661 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
662 return std::unique_ptr<FunctionOutliningInfo>();
667 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
671 if (HasNonEntryPred(Cand))
678 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
679 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
686 OutliningInfo->Entries.push_back(Cand);
687 OutliningInfo->NonReturnBlock = NonReturnBlock;
688 OutliningInfo->ReturnBlockPreds.push_back(Cand);
689 Entries.insert(Cand);
692 return OutliningInfo;
700 for (
auto *E : OI->Entries) {
712 PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
713 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
715 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
716 auto OutliningCallFreq =
717 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
721 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
722 OutliningCallFreq = EntryFreq;
725 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
728 return OutlineRegionRelFreq;
743 return OutlineRegionRelFreq;
748 return OutlineRegionRelFreq;
751 bool PartialInlinerImpl::shouldPartialInline(
752 CallSite CS, FunctionCloner &Cloner,
759 assert(Callee == Cloner.ClonedFunc);
765 auto &CalleeTTI = (*GetTTI)(*Callee);
767 *GetAssumptionCache, GetBFI, PSI, &ORE);
772 <<
NV(
"Callee", Cloner.OrigFunc)
773 <<
" should always be fully inlined, not partially";
781 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into " 782 <<
NV(
"Caller", Caller)
783 <<
" because it should never be inlined (cost=never)";
791 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into " 792 <<
NV(
"Caller", Caller) <<
" because too costly to inline (cost=" 793 <<
NV(
"Cost", IC.
getCost()) <<
", threshold=" 805 if (NormWeightedSavings < WeightedOutliningRcost) {
809 <<
NV(
"Callee", Cloner.OrigFunc) <<
" not partially inlined into " 810 <<
NV(
"Caller", Caller) <<
" runtime overhead (overhead=" 811 <<
NV(
"Overhead", (
unsigned)WeightedOutliningRcost.
getFrequency())
815 <<
" of making the outlined call is too high";
823 <<
NV(
"Callee", Cloner.OrigFunc) <<
" can be partially inlined into " 824 <<
NV(
"Caller", Caller) <<
" with cost=" <<
NV(
"Cost", IC.
getCost())
834 int PartialInlinerImpl::computeBBInlineCost(
BasicBlock *BB) {
839 switch (
I.getOpcode()) {
840 case Instruction::BitCast:
841 case Instruction::PtrToInt:
842 case Instruction::IntToPtr:
843 case Instruction::Alloca:
844 case Instruction::PHI:
846 case Instruction::GetElementPtr:
847 if (cast<GetElementPtrInst>(&
I)->hasAllZeroIndices())
854 if (
I.isLifetimeStartOrEnd())
857 if (
CallInst *CI = dyn_cast<CallInst>(&
I)) {
877 PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
878 int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
879 for (
auto FuncBBPair : Cloner.OutlinedFunctions) {
880 Function *OutlinedFunc = FuncBBPair.first;
881 BasicBlock* OutliningCallBB = FuncBBPair.second;
884 OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
888 OutlinedFunctionCost += computeBBInlineCost(&BB);
890 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
891 "Outlined function cost should be no less than the outlined region");
896 OutlinedFunctionCost -=
899 int OutliningRuntimeOverhead =
900 OutliningFuncCallCost +
901 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
904 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
910 void PartialInlinerImpl::computeCallsiteToProfCountMap(
916 std::unique_ptr<BlockFrequencyInfo> TempBFI;
919 auto ComputeCurrBFI = [&,
this](
Function *Caller) {
926 CurrentCallerBFI = TempBFI.get();
929 CurrentCallerBFI = &(*GetBFI)(*Caller);
933 for (User *User :
Users) {
936 if (CurrentCaller != Caller) {
937 CurrentCaller = Caller;
938 ComputeCurrBFI(Caller);
940 assert(CurrentCallerBFI &&
"CallerBFI is not set");
945 CallSiteToProfCountMap[User] = *Count;
947 CallSiteToProfCountMap[User] = 0;
951 PartialInlinerImpl::FunctionCloner::FunctionCloner(
953 : OrigFunc(F), ORE(ORE) {
954 ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
960 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
961 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
963 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
967 ClonedOI->ReturnBlockPreds.push_back(NewE);
974 PartialInlinerImpl::FunctionCloner::FunctionCloner(
975 Function *F, FunctionOutliningMultiRegionInfo *OI,
977 : OrigFunc(F), ORE(ORE) {
978 ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
986 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegionInfo :
990 Region.
push_back(cast<BasicBlock>(VMap[BB]));
992 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
993 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
995 if (RegionInfo.ReturnBlock)
996 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
997 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
998 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
999 ClonedOMRI->ORI.push_back(MappedRegionInfo);
1006 void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
1010 while (I != BB->
end()) {
1031 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1033 PHINode *FirstPhi = getFirstPHI(PreReturn);
1034 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1040 Value *CommonValue = PN->getIncomingValue(0);
1041 if (
all_of(PN->incoming_values(),
1042 [&](
Value *V) {
return V == CommonValue; }))
1047 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1048 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1052 while (I != PreReturn->
end()) {
1060 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1063 for (
BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1072 if (
auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1078 for (
auto *DP : DeadPhis)
1079 DP->eraseFromParent();
1081 for (
auto E : ClonedOI->ReturnBlockPreds) {
1086 bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1091 Cost += computeBBInlineCost(BB);
1095 assert(ClonedOMRI &&
"Expecting OutlineInfo for multi region outline");
1097 if (ClonedOMRI->ORI.empty())
1110 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo
RegionInfo :
1112 int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
1115 ClonedFuncBFI.get(), &BPI,
false);
1117 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1121 dbgs() <<
"inputs: " << Inputs.
size() <<
"\n";
1122 dbgs() <<
"outputs: " << Outputs.
size() <<
"\n";
1123 for (
Value *value : Inputs)
1124 dbgs() <<
"value used in func: " << *value <<
"\n";
1125 for (
Value *output : Outputs)
1126 dbgs() <<
"instr used in func: " << *output <<
"\n";
1133 Function *OutlinedFunc = CE.extractCodeRegion();
1136 CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
1139 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1140 NumColdRegionsOutlined++;
1141 OutlinedRegionCost += CurrentOutlinedRegionCost;
1150 &RegionInfo.Region.front()->front())
1151 <<
"Failed to extract region at block " 1152 <<
ore::NV(
"Block", RegionInfo.Region.front());
1156 return !OutlinedFunctions.empty();
1160 PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1163 auto ToBeInlined = [&,
this](
BasicBlock *BB) {
1164 return BB == ClonedOI->ReturnBlock ||
1165 (
std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
1166 ClonedOI->Entries.end());
1169 assert(ClonedOI &&
"Expecting OutlineInfo for single region outline");
1180 std::vector<BasicBlock *> ToExtract;
1181 ToExtract.push_back(ClonedOI->NonReturnBlock);
1182 OutlinedRegionCost +=
1183 PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
1185 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1186 ToExtract.push_back(&BB);
1191 OutlinedRegionCost += computeBBInlineCost(&BB);
1197 ClonedFuncBFI.get(), &BPI,
1199 .extractCodeRegion();
1203 PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
1207 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1211 &ToExtract.front()->front())
1212 <<
"Failed to extract region at block " 1213 <<
ore::NV(
"Block", ToExtract.front());
1216 return OutlinedFunc;
1219 PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1223 ClonedFunc->eraseFromParent();
1224 if (!IsFunctionInlined) {
1227 for (
auto FuncBBPair : OutlinedFunctions) {
1234 std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(
Function *F) {
1237 return {
false,
nullptr};
1241 return {
false,
nullptr};
1244 return {
false,
nullptr};
1246 if (PSI->isFunctionEntryCold(F))
1247 return {
false,
nullptr};
1250 return {
false,
nullptr};
1258 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1259 computeOutliningColdRegionsInfo(F, ORE);
1261 FunctionCloner Cloner(F, OMRI.get(), ORE);
1265 dbgs() <<
"HotCountThreshold = " << PSI->getHotCountThreshold() <<
"\n";
1266 dbgs() <<
"ColdCountThreshold = " << PSI->getColdCountThreshold()
1270 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1275 dbgs() <<
">>>>>> Outlined (Cloned) Function >>>>>>\n";
1276 Cloner.ClonedFunc->print(
dbgs());
1277 dbgs() <<
"<<<<<< Outlined (Cloned) Function <<<<<<\n";
1281 if (tryPartialInline(Cloner))
1282 return {
true,
nullptr};
1290 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1292 return {
false,
nullptr};
1294 FunctionCloner Cloner(F, OI.get(), ORE);
1295 Cloner.NormalizeReturnBlock();
1297 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1299 if (!OutlinedFunction)
1300 return {
false,
nullptr};
1302 bool AnyInline = tryPartialInline(Cloner);
1305 return {
true, OutlinedFunction};
1307 return {
false,
nullptr};
1310 bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1311 if (Cloner.OutlinedFunctions.empty())
1316 int NonWeightedRcost;
1317 std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
1322 if (Cloner.ClonedOI) {
1323 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1332 WeightedRcost =
BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
1342 std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
1343 OrigFuncORE.
emit([&]() {
1346 <<
ore::NV(
"Function", Cloner.OrigFunc)
1347 <<
" not partially inlined into callers (Original Size = " 1348 <<
ore::NV(
"OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1349 <<
", Size of call sequence to outlined function = " 1350 <<
ore::NV(
"NewSize", SizeCost) <<
")";
1356 "F's users should all be replaced!");
1358 std::vector<User *>
Users(Cloner.ClonedFunc->user_begin(),
1359 Cloner.ClonedFunc->user_end());
1362 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1363 if (CalleeEntryCount)
1364 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
1366 uint64_t CalleeEntryCountV =
1367 (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
1369 bool AnyInline =
false;
1370 for (User *User :
Users) {
1373 if (IsLimitReached())
1377 if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
1383 OR <<
ore::NV(
"Callee", Cloner.OrigFunc) <<
" partially inlined into " 1390 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1397 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1398 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1399 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1403 NumPartialInlining++;
1405 if (Cloner.ClonedOI)
1406 NumPartialInlined++;
1408 NumColdOutlinePartialInlined++;
1413 Cloner.IsFunctionInlined =
true;
1414 if (CalleeEntryCount)
1415 Cloner.OrigFunc->setEntryCount(
1416 CalleeEntryCount.setCount(CalleeEntryCountV));
1418 OrigFuncORE.
emit([&]() {
1420 <<
"Partially inlined into at least one caller";
1428 bool PartialInlinerImpl::run(
Module &M) {
1432 std::vector<Function *> Worklist;
1433 Worklist.reserve(M.
size());
1436 Worklist.push_back(&F);
1438 bool Changed =
false;
1439 while (!Worklist.empty()) {
1441 Worklist.pop_back();
1446 bool Recursive =
false;
1447 for (User *U : CurrFunc->
users())
1449 if (
I->getParent()->getParent() == CurrFunc) {
1456 std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
1458 Worklist.push_back(Result.second);
1459 Changed |= Result.first;
1468 "Partial Inliner",
false,
false)
1476 return new PartialInlinerLegacyPass();
1483 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1488 std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1493 std::function<TargetTransformInfo &(Function &)> GetTTI =
1500 if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI)
A parsed version of the target data layout string in and methods for querying it. ...
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents lattice values for constants.
size_type size() const
Determine the number of elements in the SetVector.
void initializePartialInlinerLegacyPassPass(PassRegistry &)
A Module instance is used to store all the information related to an LLVM module. ...
static cl::opt< int > MaxNumPartialInlining("max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, cl::desc("Max number of partial inlining. The default is unlimited"))
Implements a dense probed hash-table based set.
void push_back(const T &Elt)
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Analysis providing profile information.
This class represents a function call, abstracting a target machine's calling convention.
Function * CloneFunction(Function *F, ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified function and add it to that function's module.
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
const BasicBlock & back() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
STATISTIC(NumFunctions, "Total number of functions")
InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
iv Induction Variable Users
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Represents the cost of inlining a function.
static cl::opt< bool > ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, cl::desc("Force outline regions with live exits"))
iterator begin()
Instruction iterator methods.
AnalysisUsage & addRequired()
ModulePass * createPartialInliningPass()
createPartialInliningPass - This pass inlines parts of functions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Value * removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty=true)
Remove an incoming value.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
static cl::opt< int > OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), cl::Hidden, cl::ZeroOrMore, cl::desc("Relative frequency of outline region to " "the entry block"))
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Type * getType() const
All values are typed, get the type of this value.
static cl::opt< bool > SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::desc("Skip Cost Analysis"))
const T & getValue() const LLVM_LVALUE_FUNCTION
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
static cl::opt< unsigned > MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, cl::desc("Minimum block executions to consider " "its BranchProbabilityInfo valid"))
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
amdgpu Simplify well known AMD library false Value * Callee
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Interval::succ_iterator succ_end(Interval *I)
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static cl::opt< unsigned > ExtraOutliningPenalty("partial-inlining-extra-penalty", cl::init(0), cl::Hidden, cl::desc("A debug option to add additional penalty to the computed one."))
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
void setCallingConv(CallingConv::ID CC)
static cl::opt< bool > MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, cl::desc("Mark outline function calls with ColdCC"))
initializer< Ty > init(const Ty &Val)
Control flow instructions. These all have token chains.
static cl::opt< bool > DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial inlining"))
A set of analyses that are preserved following a run of a transformation pass.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM Basic Block Representation.
Conditional or Unconditional Branch instruction.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static cl::opt< float > ColdBranchRatio("cold-branch-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum BranchProbability to consider a region cold."))
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug() const
Return a const iterator range over the instructions in the block, skipping any debug instructions...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
const Instruction & front() const
std::pair< iterator, bool > insert(const ValueT &V)
static ManagedStatic< OptionRegistry > OR
Represent the analysis usage information of a pass.
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Used in the streaming interface as the general argument type.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
A function analysis which provides an AssumptionCache.
Analysis pass which computes BlockFrequencyInfo.
Iterator for intrusive lists based on ilist_node.
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
unsigned getNumIncomingValues() const
Return the number of incoming edges.
BBTy * getParent() const
Get the basic block containing the call site.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static cl::opt< unsigned > MaxNumInlineBlocks("max-num-inline-blocks", cl::init(5), cl::Hidden, cl::desc("Max number of blocks to be partially inlined"))
iterator_range< user_iterator > users()
int getCost() const
Get the inline cost estimate.
int getCostDelta() const
Get the cost delta from the threshold for inlining.
static cl::opt< bool > DisableMultiRegionPartialInline("disable-mr-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable multi-region partial inlining"))
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_END(PartialInlinerLegacyPass
unsigned succ_size(const Instruction *I)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
FunTy * getCaller() const
Return the caller function for this call site.
Analysis providing branch probability information.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
int getCallsiteCost(CallSite CS, const DataLayout &DL)
Return the cost associated with a callsite, including parameter passing and the call/return instructi...
static cl::opt< bool > TracePartialInlining("trace-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Trace partial inlining."))
const Function * getParent() const
Return the enclosing method, or null if none.
static bool hasProfileData(Function *F, FunctionOutliningInfo *OI)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it...
bool isUnconditional() const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it's an indirect...
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
static cl::opt< float > MinRegionSizeRatio("min-region-size-ratio", cl::init(0.1), cl::Hidden, cl::desc("Minimum ratio comparing relative sizes of each " "outline candidate and original function"))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
const BasicBlock & front() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
succ_range successors(Instruction *I)
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
A container for analyses that lazily runs them and caches their results.
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
bool hasProfileData() const
Return true if the function is annotated with profile data.
Optional< uint64_t > getBlockProfileCount(const BasicBlock *BB) const
Returns the estimated profile count of BB.
const BasicBlock * getParent() const
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.