61 #define DEBUG_TYPE "hotcoldsplit" 63 STATISTIC(NumColdRegionsFound,
"Number of cold regions found.");
64 STATISTIC(NumColdRegionsOutlined,
"Number of cold regions outlined.");
73 cl::desc(
"Code size threshold for outlining within a " 74 "single BB (as a multiple of TCC_Basic)"));
79 PostDomTree(
Function &
F) { recalculate(F); }
99 return !(isa<ReturnInst>(
I) || isa<IndirectBrInst>(I));
117 dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
127 static bool mayExtractBlock(
const BasicBlock &BB) {
132 static bool isProfitableToOutline(
const BlockSequence &
Region,
134 if (Region.size() > 1)
140 if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
152 static bool markEntireFunctionCold(
Function &
F) {
154 bool Changed =
false;
163 class HotColdSplitting {
169 : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
173 bool shouldOutlineFrom(
const Function &F)
const;
185 std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
188 class HotColdSplittingLegacyPass :
public ModulePass {
191 HotColdSplittingLegacyPass() :
ModulePass(ID) {
202 bool runOnModule(
Module &M)
override;
209 bool HotColdSplitting::shouldOutlineFrom(
const Function &F)
const {
211 if (OutlinedFunctions.count(&F))
231 if (PSI->isFunctionEntryCold(&F))
236 Function *HotColdSplitting::extractColdRegion(
const BlockSequence &Region,
251 CE.findInputsOutputs(Inputs, Outputs, Sinks);
254 if (Outputs.
size() > 0) {
261 if (
Function *OutF = CE.extractCodeRegion()) {
265 NumColdRegionsOutlined++;
274 markEntireFunctionCold(*OutF);
279 &*Region[0]->
begin())
280 <<
ore::NV(
"Original", OrigF) <<
" split cold code into " 288 &*Region[0]->
begin())
289 <<
"Failed to extract region at block " 290 <<
ore::NV(
"Block", Region.front());
296 using BlockTy = std::pair<BasicBlock *, unsigned>;
301 class OutliningRegion {
313 bool EntireFunctionCold =
false;
319 static unsigned getEntryPointScore(
BasicBlock &BB,
unsigned Score) {
320 return isViableEntryPoint(BB) ? Score : 0;
325 static constexpr
unsigned ScoreForSuccBlock = 1;
326 static constexpr
unsigned ScoreForSinkBlock = 1;
328 OutliningRegion(
const OutliningRegion &) =
delete;
329 OutliningRegion &operator=(
const OutliningRegion &) =
delete;
332 OutliningRegion() =
default;
333 OutliningRegion(OutliningRegion &&) =
default;
334 OutliningRegion &operator=(OutliningRegion &&) =
default;
337 const PostDomTree &PDT) {
338 OutliningRegion ColdRegion;
342 auto addBlockToRegion = [&](
BasicBlock *BB,
unsigned Score) {
344 ColdRegion.Blocks.emplace_back(BB, Score);
345 assert(RegionBlocks.
size() == ColdRegion.Blocks.size() &&
"Duplicate BB");
349 unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
350 ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB :
nullptr;
351 unsigned BestScore = SinkScore;
355 auto PredEnd =
idf_end(&SinkBB);
356 while (PredIt != PredEnd) {
358 bool SinkPostDom = PDT.dominates(&SinkBB, &PredBB);
363 ColdRegion.EntireFunctionCold =
true;
369 if (!SinkPostDom || !mayExtractBlock(PredBB)) {
370 PredIt.skipChildren();
377 unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
378 if (PredScore > BestScore) {
379 ColdRegion.SuggestedEntryPoint = &PredBB;
380 BestScore = PredScore;
383 addBlockToRegion(&PredBB, PredScore);
389 addBlockToRegion(&SinkBB, SinkScore);
393 auto SuccEnd =
df_end(&SinkBB);
394 while (SuccIt != SuccEnd) {
396 bool SinkDom = DT.
dominates(&SinkBB, &SuccBB);
399 bool DuplicateBlock = RegionBlocks.
count(&SuccBB);
403 if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
404 SuccIt.skipChildren();
408 unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
409 if (SuccScore > BestScore) {
410 ColdRegion.SuggestedEntryPoint = &SuccBB;
411 BestScore = SuccScore;
414 addBlockToRegion(&SuccBB, SuccScore);
422 bool empty()
const {
return !SuggestedEntryPoint; }
428 bool isEntireFunctionCold()
const {
return EntireFunctionCold; }
432 assert(!
empty() && !isEntireFunctionCold() &&
"Nothing to extract");
437 BlockSequence SubRegion = {SuggestedEntryPoint};
439 unsigned NextScore = 0;
440 auto RegionEndIt = Blocks.
end();
443 unsigned Score = Block.second;
445 BB == SuggestedEntryPoint || DT.
dominates(SuggestedEntryPoint, BB);
446 if (!InSubRegion && Score > NextScore) {
450 if (InSubRegion && BB != SuggestedEntryPoint)
451 SubRegion.push_back(BB);
454 Blocks.
erase(RegionStartIt, RegionEndIt);
457 SuggestedEntryPoint = NextEntryPoint;
469 bool Changed =
false;
485 if (!mayExtractBlock(*BB))
489 if (ColdBlocks.
count(BB))
498 dbgs() <<
"Found a cold block:\n";
502 auto Region = OutliningRegion::create(*BB, DT, PDT);
506 if (Region.isEntireFunctionCold()) {
508 return markEntireFunctionCold(F);
516 bool RegionsOverlap =
any_of(Region.blocks(), [&](
const BlockTy &Block) {
517 return !ColdBlocks.
insert(Block.first).second;
523 ++NumColdRegionsFound;
527 unsigned OutlinedFunctionID = 1;
528 while (!OutliningWorklist.
empty()) {
529 OutliningRegion Region = OutliningWorklist.
pop_back_val();
530 assert(!Region.empty() &&
"Empty outlining region in worklist");
532 BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT);
533 if (!isProfitableToOutline(SubRegion, TTI)) {
535 dbgs() <<
"Skipping outlining; not profitable to outline\n";
536 SubRegion[0]->dump();
542 dbgs() <<
"Hot/cold splitting attempting to outline these blocks:\n";
548 extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID);
550 ++OutlinedFunctionID;
551 OutlinedFunctions.insert(Outlined);
554 }
while (!Region.empty());
560 bool HotColdSplitting::run(
Module &M) {
561 bool Changed =
false;
562 OutlinedFunctions.clear();
564 if (!shouldOutlineFrom(F)) {
575 Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE);
580 bool HotColdSplittingLegacyPass::runOnModule(
Module &M) {
584 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
586 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
589 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(
F).getBFI();
591 std::unique_ptr<OptimizationRemarkEmitter> ORE;
592 std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
598 return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
605 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
614 std::function<TargetTransformInfo &(Function &)> GTTI =
619 std::unique_ptr<OptimizationRemarkEmitter> ORE;
620 std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
628 if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
635 "Hot Cold Splitting",
false,
false)
642 return new HotColdSplittingLegacyPass();
INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit", "Hot Cold Splitting", false, false) INITIALIZE_PASS_END(HotColdSplittingLegacyPass
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents lattice values for constants.
size_type size() const
Determine the number of elements in the SetVector.
A Module instance is used to store all the information related to an LLVM module. ...
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Analysis providing profile information.
This class represents a function call, abstracting a target machine's calling convention.
static cl::opt< bool > EnableStaticAnalyis("hot-cold-static-analysis", cl::init(true), cl::Hidden)
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This defines the Use class.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI)
Returns true if BasicBlock BB is considered cold.
STATISTIC(NumColdRegionsFound, "Number of cold regions found.")
static bool blockEndsInUnreachable(const MachineBasicBlock *MBB)
A no successor, non-return block probably ends in unreachable and is cold.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
idf_iterator< T > idf_begin(const T &G)
Core dominator tree base class.
idf_iterator< T > idf_end(const T &G)
static cl::opt< int > MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden, cl::desc("Code size threshold for outlining within a " "single BB (as a multiple of TCC_Basic)"))
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
A set of analyses that are preserved following a run of a transformation pass.
LLVM Basic Block Representation.
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
df_iterator< T > df_end(const T &G)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Represent the analysis usage information of a pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool pred_empty(const BasicBlock *BB)
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
bool succ_empty(const Instruction *I)
iterator erase(const_iterator CI)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
A function analysis which provides an AssumptionCache.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Analysis pass which computes BlockFrequencyInfo.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
void initializeHotColdSplittingLegacyPassPass(PassRegistry &)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
df_iterator< T > df_begin(const T &G)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
ModulePass * createHotColdSplittingPass()
createHotColdSplittingPass - This pass outlines cold blocks into a separate function(s).
void emplace_back(ArgTypes &&... Args)
LLVM_NODISCARD bool empty() const
StringRef getName() const
Return a constant reference to the value's name.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
const std::string to_string(const T &Value)
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
bool isEHPad() const
Return true if this basic block is an exception handling block.
Module * getParent()
Get the module that this global value is contained inside of...
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
print Print MemDeps of function
A container for analyses that lazily runs them and caches their results.
This header defines various interfaces for pass management in LLVM.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...