LLVM  8.0.1
HotColdSplitting.cpp
Go to the documentation of this file.
1 //===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Outline cold regions to a separate function.
11 // TODO: Update BFI and BPI
12 // TODO: Add all the outlined functions to a separate section.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/CFG.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/CFG.h"
29 #include "llvm/IR/CallSite.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DiagnosticInfo.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Metadata.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/IR/Type.h"
41 #include "llvm/IR/Use.h"
42 #include "llvm/IR/User.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/Pass.h"
47 #include "llvm/Support/Debug.h"
49 #include "llvm/Transforms/IPO.h"
51 #include "llvm/Transforms/Scalar.h"
58 #include <algorithm>
59 #include <cassert>
60 
61 #define DEBUG_TYPE "hotcoldsplit"
62 
63 STATISTIC(NumColdRegionsFound, "Number of cold regions found.");
64 STATISTIC(NumColdRegionsOutlined, "Number of cold regions outlined.");
65 
66 using namespace llvm;
67 
68 static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
69  cl::init(true), cl::Hidden);
70 
71 static cl::opt<int>
72  MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden,
73  cl::desc("Code size threshold for outlining within a "
74  "single BB (as a multiple of TCC_Basic)"));
75 
76 namespace {
77 
78 struct PostDomTree : PostDomTreeBase<BasicBlock> {
79  PostDomTree(Function &F) { recalculate(F); }
80 };
81 
82 /// A sequence of basic blocks.
83 ///
84 /// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
85 using BlockSequence = SmallVector<BasicBlock *, 0>;
86 
87 // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
88 // this function unless you modify the MBB version as well.
89 //
90 /// A no successor, non-return block probably ends in unreachable and is cold.
91 /// Also consider a block that ends in an indirect branch to be a return block,
92 /// since many targets use plain indirect branches to return.
93 bool blockEndsInUnreachable(const BasicBlock &BB) {
94  if (!succ_empty(&BB))
95  return false;
96  if (BB.empty())
97  return true;
98  const Instruction *I = BB.getTerminator();
99  return !(isa<ReturnInst>(I) || isa<IndirectBrInst>(I));
100 }
101 
102 bool unlikelyExecuted(BasicBlock &BB) {
103  // Exception handling blocks are unlikely executed.
104  if (BB.isEHPad())
105  return true;
106 
107  // The block is cold if it calls/invokes a cold function.
108  for (Instruction &I : BB)
109  if (auto CS = CallSite(&I))
110  if (CS.hasFnAttr(Attribute::Cold))
111  return true;
112 
113  // The block is cold if it has an unreachable terminator, unless it's
114  // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp).
115  if (blockEndsInUnreachable(BB)) {
116  if (auto *CI =
117  dyn_cast_or_null<CallInst>(BB.getTerminator()->getPrevNode()))
118  if (CI->hasFnAttr(Attribute::NoReturn))
119  return false;
120  return true;
121  }
122 
123  return false;
124 }
125 
126 /// Check whether it's safe to outline \p BB.
127 static bool mayExtractBlock(const BasicBlock &BB) {
128  return !BB.hasAddressTaken() && !BB.isEHPad();
129 }
130 
131 /// Check whether \p Region is profitable to outline.
132 static bool isProfitableToOutline(const BlockSequence &Region,
133  TargetTransformInfo &TTI) {
134  if (Region.size() > 1)
135  return true;
136 
137  int Cost = 0;
138  const BasicBlock &BB = *Region[0];
139  for (const Instruction &I : BB) {
140  if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
141  continue;
142 
144 
146  return true;
147  }
148  return false;
149 }
150 
151 /// Mark \p F cold. Return true if it's changed.
152 static bool markEntireFunctionCold(Function &F) {
153  assert(!F.hasFnAttribute(Attribute::OptimizeNone) && "Can't mark this cold");
154  bool Changed = false;
157  Changed = true;
158  }
159  // TODO: Move this function into a cold section.
160  return Changed;
161 }
162 
163 class HotColdSplitting {
164 public:
165  HotColdSplitting(ProfileSummaryInfo *ProfSI,
169  : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
170  bool run(Module &M);
171 
172 private:
173  bool shouldOutlineFrom(const Function &F) const;
174  bool outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
176  DominatorTree &DT, PostDomTree &PDT,
178  Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
180  OptimizationRemarkEmitter &ORE, unsigned Count);
181  SmallPtrSet<const Function *, 2> OutlinedFunctions;
182  ProfileSummaryInfo *PSI;
185  std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
186 };
187 
188 class HotColdSplittingLegacyPass : public ModulePass {
189 public:
190  static char ID;
191  HotColdSplittingLegacyPass() : ModulePass(ID) {
193  }
194 
195  void getAnalysisUsage(AnalysisUsage &AU) const override {
200  }
201 
202  bool runOnModule(Module &M) override;
203 };
204 
205 } // end anonymous namespace
206 
207 // Returns false if the function should not be considered for hot-cold split
208 // optimization.
209 bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
210  // Do not try to outline again from an already outlined cold function.
211  if (OutlinedFunctions.count(&F))
212  return false;
213 
214  if (F.size() <= 2)
215  return false;
216 
217  // TODO: Consider only skipping functions marked `optnone` or `cold`.
218 
219  if (F.hasAddressTaken())
220  return false;
221 
223  return false;
224 
226  return false;
227 
229  return false;
230 
231  if (PSI->isFunctionEntryCold(&F))
232  return false;
233  return true;
234 }
235 
236 Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
237  DominatorTree &DT,
239  TargetTransformInfo &TTI,
241  unsigned Count) {
242  assert(!Region.empty());
243 
244  // TODO: Pass BFI and BPI to update profile information.
245  CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
246  /* BPI */ nullptr, /* AllowVarArgs */ false,
247  /* AllowAlloca */ false,
248  /* Suffix */ "cold." + std::to_string(Count));
249 
250  SetVector<Value *> Inputs, Outputs, Sinks;
251  CE.findInputsOutputs(Inputs, Outputs, Sinks);
252 
253  // Do not extract regions that have live exit variables.
254  if (Outputs.size() > 0) {
255  LLVM_DEBUG(llvm::dbgs() << "Not outlining; live outputs\n");
256  return nullptr;
257  }
258 
259  // TODO: Run MergeBasicBlockIntoOnlyPred on the outlined function.
260  Function *OrigF = Region[0]->getParent();
261  if (Function *OutF = CE.extractCodeRegion()) {
262  User *U = *OutF->user_begin();
263  CallInst *CI = cast<CallInst>(U);
264  CallSite CS(CI);
265  NumColdRegionsOutlined++;
266  if (TTI.useColdCCForColdCall(*OutF)) {
267  OutF->setCallingConv(CallingConv::Cold);
268  CS.setCallingConv(CallingConv::Cold);
269  }
270  CI->setIsNoInline();
271 
272  // Try to make the outlined code as small as possible on the assumption
273  // that it's cold.
274  markEntireFunctionCold(*OutF);
275 
276  LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
277  ORE.emit([&]() {
278  return OptimizationRemark(DEBUG_TYPE, "HotColdSplit",
279  &*Region[0]->begin())
280  << ore::NV("Original", OrigF) << " split cold code into "
281  << ore::NV("Split", OutF);
282  });
283  return OutF;
284  }
285 
286  ORE.emit([&]() {
287  return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
288  &*Region[0]->begin())
289  << "Failed to extract region at block "
290  << ore::NV("Block", Region.front());
291  });
292  return nullptr;
293 }
294 
295 /// A pair of (basic block, score).
296 using BlockTy = std::pair<BasicBlock *, unsigned>;
297 
298 namespace {
299 /// A maximal outlining region. This contains all blocks post-dominated by a
300 /// sink block, the sink block itself, and all blocks dominated by the sink.
301 class OutliningRegion {
302  /// A list of (block, score) pairs. A block's score is non-zero iff it's a
303  /// viable sub-region entry point. Blocks with higher scores are better entry
304  /// points (i.e. they are more distant ancestors of the sink block).
305  SmallVector<BlockTy, 0> Blocks = {};
306 
307  /// The suggested entry point into the region. If the region has multiple
308  /// entry points, all blocks within the region may not be reachable from this
309  /// entry point.
310  BasicBlock *SuggestedEntryPoint = nullptr;
311 
312  /// Whether the entire function is cold.
313  bool EntireFunctionCold = false;
314 
315  /// Whether or not \p BB could be the entry point of an extracted region.
316  static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
317 
318  /// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
319  static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
320  return isViableEntryPoint(BB) ? Score : 0;
321  }
322 
323  /// These scores should be lower than the score for predecessor blocks,
324  /// because regions starting at predecessor blocks are typically larger.
325  static constexpr unsigned ScoreForSuccBlock = 1;
326  static constexpr unsigned ScoreForSinkBlock = 1;
327 
328  OutliningRegion(const OutliningRegion &) = delete;
329  OutliningRegion &operator=(const OutliningRegion &) = delete;
330 
331 public:
332  OutliningRegion() = default;
333  OutliningRegion(OutliningRegion &&) = default;
334  OutliningRegion &operator=(OutliningRegion &&) = default;
335 
336  static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
337  const PostDomTree &PDT) {
338  OutliningRegion ColdRegion;
339 
340  SmallPtrSet<BasicBlock *, 4> RegionBlocks;
341 
342  auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
343  RegionBlocks.insert(BB);
344  ColdRegion.Blocks.emplace_back(BB, Score);
345  assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
346  };
347 
348  // The ancestor farthest-away from SinkBB, and also post-dominated by it.
349  unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
350  ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
351  unsigned BestScore = SinkScore;
352 
353  // Visit SinkBB's ancestors using inverse DFS.
354  auto PredIt = ++idf_begin(&SinkBB);
355  auto PredEnd = idf_end(&SinkBB);
356  while (PredIt != PredEnd) {
357  BasicBlock &PredBB = **PredIt;
358  bool SinkPostDom = PDT.dominates(&SinkBB, &PredBB);
359 
360  // If the predecessor is cold and has no predecessors, the entire
361  // function must be cold.
362  if (SinkPostDom && pred_empty(&PredBB)) {
363  ColdRegion.EntireFunctionCold = true;
364  return ColdRegion;
365  }
366 
367  // If SinkBB does not post-dominate a predecessor, do not mark the
368  // predecessor (or any of its predecessors) cold.
369  if (!SinkPostDom || !mayExtractBlock(PredBB)) {
370  PredIt.skipChildren();
371  continue;
372  }
373 
374  // Keep track of the post-dominated ancestor farthest away from the sink.
375  // The path length is always >= 2, ensuring that predecessor blocks are
376  // considered as entry points before the sink block.
377  unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
378  if (PredScore > BestScore) {
379  ColdRegion.SuggestedEntryPoint = &PredBB;
380  BestScore = PredScore;
381  }
382 
383  addBlockToRegion(&PredBB, PredScore);
384  ++PredIt;
385  }
386 
387  // Add SinkBB to the cold region. It's considered as an entry point before
388  // any sink-successor blocks.
389  addBlockToRegion(&SinkBB, SinkScore);
390 
391  // Find all successors of SinkBB dominated by SinkBB using DFS.
392  auto SuccIt = ++df_begin(&SinkBB);
393  auto SuccEnd = df_end(&SinkBB);
394  while (SuccIt != SuccEnd) {
395  BasicBlock &SuccBB = **SuccIt;
396  bool SinkDom = DT.dominates(&SinkBB, &SuccBB);
397 
398  // Don't allow the backwards & forwards DFSes to mark the same block.
399  bool DuplicateBlock = RegionBlocks.count(&SuccBB);
400 
401  // If SinkBB does not dominate a successor, do not mark the successor (or
402  // any of its successors) cold.
403  if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
404  SuccIt.skipChildren();
405  continue;
406  }
407 
408  unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
409  if (SuccScore > BestScore) {
410  ColdRegion.SuggestedEntryPoint = &SuccBB;
411  BestScore = SuccScore;
412  }
413 
414  addBlockToRegion(&SuccBB, SuccScore);
415  ++SuccIt;
416  }
417 
418  return ColdRegion;
419  }
420 
421  /// Whether this region has nothing to extract.
422  bool empty() const { return !SuggestedEntryPoint; }
423 
424  /// The blocks in this region.
425  ArrayRef<std::pair<BasicBlock *, unsigned>> blocks() const { return Blocks; }
426 
427  /// Whether the entire function containing this region is cold.
428  bool isEntireFunctionCold() const { return EntireFunctionCold; }
429 
430  /// Remove a sub-region from this region and return it as a block sequence.
431  BlockSequence takeSingleEntrySubRegion(DominatorTree &DT) {
432  assert(!empty() && !isEntireFunctionCold() && "Nothing to extract");
433 
434  // Remove blocks dominated by the suggested entry point from this region.
435  // During the removal, identify the next best entry point into the region.
436  // Ensure that the first extracted block is the suggested entry point.
437  BlockSequence SubRegion = {SuggestedEntryPoint};
438  BasicBlock *NextEntryPoint = nullptr;
439  unsigned NextScore = 0;
440  auto RegionEndIt = Blocks.end();
441  auto RegionStartIt = remove_if(Blocks, [&](const BlockTy &Block) {
442  BasicBlock *BB = Block.first;
443  unsigned Score = Block.second;
444  bool InSubRegion =
445  BB == SuggestedEntryPoint || DT.dominates(SuggestedEntryPoint, BB);
446  if (!InSubRegion && Score > NextScore) {
447  NextEntryPoint = BB;
448  NextScore = Score;
449  }
450  if (InSubRegion && BB != SuggestedEntryPoint)
451  SubRegion.push_back(BB);
452  return InSubRegion;
453  });
454  Blocks.erase(RegionStartIt, RegionEndIt);
455 
456  // Update the suggested entry point.
457  SuggestedEntryPoint = NextEntryPoint;
458 
459  return SubRegion;
460  }
461 };
462 } // namespace
463 
464 bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
465  BlockFrequencyInfo *BFI,
466  TargetTransformInfo &TTI,
467  DominatorTree &DT, PostDomTree &PDT,
469  bool Changed = false;
470 
471  // The set of cold blocks.
472  SmallPtrSet<BasicBlock *, 4> ColdBlocks;
473 
474  // The worklist of non-intersecting regions left to outline.
475  SmallVector<OutliningRegion, 2> OutliningWorklist;
476 
477  // Set up an RPO traversal. Experimentally, this performs better (outlines
478  // more) than a PO traversal, because we prevent region overlap by keeping
479  // the first region to contain a block.
481 
482  // Find all cold regions.
483  for (BasicBlock *BB : RPOT) {
484  // Skip blocks which can't be outlined.
485  if (!mayExtractBlock(*BB))
486  continue;
487 
488  // This block is already part of some outlining region.
489  if (ColdBlocks.count(BB))
490  continue;
491 
492  bool Cold = PSI.isColdBlock(BB, BFI) ||
493  (EnableStaticAnalyis && unlikelyExecuted(*BB));
494  if (!Cold)
495  continue;
496 
497  LLVM_DEBUG({
498  dbgs() << "Found a cold block:\n";
499  BB->dump();
500  });
501 
502  auto Region = OutliningRegion::create(*BB, DT, PDT);
503  if (Region.empty())
504  continue;
505 
506  if (Region.isEntireFunctionCold()) {
507  LLVM_DEBUG(dbgs() << "Entire function is cold\n");
508  return markEntireFunctionCold(F);
509  }
510 
511  // If this outlining region intersects with another, drop the new region.
512  //
513  // TODO: It's theoretically possible to outline more by only keeping the
514  // largest region which contains a block, but the extra bookkeeping to do
515  // this is tricky/expensive.
516  bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
517  return !ColdBlocks.insert(Block.first).second;
518  });
519  if (RegionsOverlap)
520  continue;
521 
522  OutliningWorklist.emplace_back(std::move(Region));
523  ++NumColdRegionsFound;
524  }
525 
526  // Outline single-entry cold regions, splitting up larger regions as needed.
527  unsigned OutlinedFunctionID = 1;
528  while (!OutliningWorklist.empty()) {
529  OutliningRegion Region = OutliningWorklist.pop_back_val();
530  assert(!Region.empty() && "Empty outlining region in worklist");
531  do {
532  BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT);
533  if (!isProfitableToOutline(SubRegion, TTI)) {
534  LLVM_DEBUG({
535  dbgs() << "Skipping outlining; not profitable to outline\n";
536  SubRegion[0]->dump();
537  });
538  continue;
539  }
540 
541  LLVM_DEBUG({
542  dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
543  for (BasicBlock *BB : SubRegion)
544  BB->dump();
545  });
546 
547  Function *Outlined =
548  extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID);
549  if (Outlined) {
550  ++OutlinedFunctionID;
551  OutlinedFunctions.insert(Outlined);
552  Changed = true;
553  }
554  } while (!Region.empty());
555  }
556 
557  return Changed;
558 }
559 
560 bool HotColdSplitting::run(Module &M) {
561  bool Changed = false;
562  OutlinedFunctions.clear();
563  for (auto &F : M) {
564  if (!shouldOutlineFrom(F)) {
565  LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n");
566  continue;
567  }
568  LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
569  DominatorTree DT(F);
570  PostDomTree PDT(F);
571  PDT.recalculate(F);
572  BlockFrequencyInfo *BFI = GetBFI(F);
573  TargetTransformInfo &TTI = GetTTI(F);
574  OptimizationRemarkEmitter &ORE = (*GetORE)(F);
575  Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE);
576  }
577  return Changed;
578 }
579 
580 bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
581  if (skipModule(M))
582  return false;
583  ProfileSummaryInfo *PSI =
584  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
585  auto GTTI = [this](Function &F) -> TargetTransformInfo & {
586  return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
587  };
588  auto GBFI = [this](Function &F) {
589  return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
590  };
591  std::unique_ptr<OptimizationRemarkEmitter> ORE;
592  std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
593  [&ORE](Function &F) -> OptimizationRemarkEmitter & {
594  ORE.reset(new OptimizationRemarkEmitter(&F));
595  return *ORE.get();
596  };
597 
598  return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
599 }
600 
603  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
604 
605  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
606  [&FAM](Function &F) -> AssumptionCache & {
607  return FAM.getResult<AssumptionAnalysis>(F);
608  };
609 
610  auto GBFI = [&FAM](Function &F) {
611  return &FAM.getResult<BlockFrequencyAnalysis>(F);
612  };
613 
614  std::function<TargetTransformInfo &(Function &)> GTTI =
615  [&FAM](Function &F) -> TargetTransformInfo & {
616  return FAM.getResult<TargetIRAnalysis>(F);
617  };
618 
619  std::unique_ptr<OptimizationRemarkEmitter> ORE;
620  std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
621  [&ORE](Function &F) -> OptimizationRemarkEmitter & {
622  ORE.reset(new OptimizationRemarkEmitter(&F));
623  return *ORE.get();
624  };
625 
627 
628  if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
629  return PreservedAnalyses::none();
630  return PreservedAnalyses::all();
631 }
632 
634 INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
635  "Hot Cold Splitting", false, false)
638 INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
639  "Hot Cold Splitting", false, false)
640 
642  return new HotColdSplittingLegacyPass();
643 }
size_t size() const
Definition: Function.h:661
INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit", "Hot Cold Splitting", false, false) INITIALIZE_PASS_END(HotColdSplittingLegacyPass
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:52
Diagnostic information for missed-optimization remarks.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
DiagnosticInfoOptimizationBase::Argument NV
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
This class represents lattice values for constants.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Hot Cold Splitting
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
Analysis providing profile information.
This class represents a function call, abstracting a target machine&#39;s calling convention.
static cl::opt< bool > EnableStaticAnalyis("hot-cold-static-analysis", cl::init(true), cl::Hidden)
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:117
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
F(f)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
This defines the Use class.
int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
bool empty() const
Definition: BasicBlock.h:280
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI)
Returns true if BasicBlock BB is considered cold.
STATISTIC(NumColdRegionsFound, "Number of cold regions found.")
static bool blockEndsInUnreachable(const MachineBasicBlock *MBB)
A no successor, non-return block probably ends in unreachable and is cold.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:157
idf_iterator< T > idf_begin(const T &G)
Core dominator tree base class.
Definition: LoopInfo.h:61
idf_iterator< T > idf_end(const T &G)
static cl::opt< int > MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden, cl::desc("Code size threshold for outlining within a " "single BB (as a multiple of TCC_Basic)"))
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
df_iterator< T > df_end(const T &G)
void setIsNoInline()
Definition: InstrTypes.h:1493
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:117
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1226
bool succ_empty(const Instruction *I)
Definition: CFG.h:258
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:210
size_type size() const
Definition: SmallPtrSet.h:93
A function analysis which provides an AssumptionCache.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:392
Analysis pass which computes BlockFrequencyInfo.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
void initializeHotColdSplittingLegacyPassPass(PassRegistry &)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
df_iterator< T > df_begin(const T &G)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
ModulePass * createHotColdSplittingPass()
createHotColdSplittingPass - This pass outlines cold blocks into a separate function(s).
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:225
const std::string to_string(const T &Value)
Definition: ScopedPrinter.h:62
bool hasAddressTaken(const User **=nullptr) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition: Function.cpp:1254
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:376
The cost of a typical &#39;add&#39; instruction.
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:399
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
#define DEBUG_TYPE
hotcoldsplit
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:230
print Print MemDeps of function
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
The optimization diagnostic interface.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1038