LLVM  8.0.1
SyntheticCountsPropagation.cpp
Go to the documentation of this file.
1 //=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a transformation that synthesizes entry counts for
11 // functions and attaches !prof metadata to functions with the synthesized
12 // counts. The presence of !prof metadata with counter name set to
13 // 'synthesized_function_entry_count' indicate that the value of the counter is
14 // an estimation of the likely execution count of the function. This transform
15 // is applied only in non PGO mode as functions get 'real' profile-based
16 // function entry counts in the PGO mode.
17 //
18 // The transformation works by first assigning some initial values to the entry
19 // counts of all functions and then doing a top-down traversal of the
20 // callgraph-scc to propagate the counts. For each function the set of callsites
21 // and their relative block frequency is gathered. The relative block frequency
22 // multiplied by the entry count of the caller and added to the callee's entry
23 // count. For non-trivial SCCs, the new counts are computed from the previous
24 // counts and updated in one shot.
25 //
26 //===----------------------------------------------------------------------===//
27 
29 #include "llvm/ADT/DenseSet.h"
30 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/IR/CallSite.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/Module.h"
40 #include "llvm/Support/Debug.h"
42 
43 using namespace llvm;
46 
47 #define DEBUG_TYPE "synthetic-counts-propagation"
48 
49 /// Initial synthetic count assigned to functions.
51  InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10),
53  cl::desc("Initial value of synthetic entry count."));
54 
55 /// Initial synthetic count assigned to inline functions.
57  "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore,
58  cl::desc("Initial synthetic entry count for inline functions."));
59 
60 /// Initial synthetic count assigned to cold functions.
62  "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore,
63  cl::desc("Initial synthetic entry count for cold functions."));
64 
65 // Assign initial synthetic entry counts to functions.
66 static void
67 initializeCounts(Module &M, function_ref<void(Function *, uint64_t)> SetCount) {
68  auto MayHaveIndirectCalls = [](Function &F) {
69  for (auto *U : F.users()) {
70  if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
71  return true;
72  }
73  return false;
74  };
75 
76  for (Function &F : M) {
77  uint64_t InitialCount = InitialSyntheticCount;
78  if (F.isDeclaration())
79  continue;
80  if (F.hasFnAttribute(Attribute::AlwaysInline) ||
81  F.hasFnAttribute(Attribute::InlineHint)) {
82  // Use a higher value for inline functions to account for the fact that
83  // these are usually beneficial to inline.
84  InitialCount = InlineSyntheticCount;
85  } else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) {
86  // Local functions without inline hints get counts only through
87  // propagation.
88  InitialCount = 0;
89  } else if (F.hasFnAttribute(Attribute::Cold) ||
90  F.hasFnAttribute(Attribute::NoInline)) {
91  // Use a lower value for noinline and cold functions.
92  InitialCount = ColdSyntheticCount;
93  }
94  SetCount(&F, InitialCount);
95  }
96 }
97 
99  ModuleAnalysisManager &MAM) {
101  MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
103  // Set initial entry counts.
105  M, [&](Function *F, uint64_t Count) { Counts[F] = Scaled64(Count, 0); });
106 
107  // Edge includes information about the source. Hence ignore the first
108  // parameter.
109  auto GetCallSiteProfCount = [&](const CallGraphNode *,
110  const CallGraphNode::CallRecord &Edge) {
111  Optional<Scaled64> Res = None;
112  if (!Edge.first)
113  return Res;
114  assert(isa<Instruction>(Edge.first));
115  CallSite CS(cast<Instruction>(Edge.first));
116  Function *Caller = CS.getCaller();
117  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
118 
119  // Now compute the callsite count from relative frequency and
120  // entry count:
121  BasicBlock *CSBB = CS.getInstruction()->getParent();
122  Scaled64 EntryFreq(BFI.getEntryFreq(), 0);
123  Scaled64 BBCount(BFI.getBlockFreq(CSBB).getFrequency(), 0);
124  BBCount /= EntryFreq;
125  BBCount *= Counts[Caller];
126  return Optional<Scaled64>(BBCount);
127  };
128 
129  CallGraph CG(M);
130  // Propgate the entry counts on the callgraph.
132  &CG, GetCallSiteProfCount, [&](const CallGraphNode *N, Scaled64 New) {
133  auto F = N->getFunction();
134  if (!F || F->isDeclaration())
135  return;
136 
137  Counts[F] += New;
138  });
139 
140  // Set the counts as metadata.
141  for (auto Entry : Counts) {
142  Entry.first->setEntryCount(ProfileCount(
143  Entry.second.template toInt<uint64_t>(), Function::PCT_Synthetic));
144  }
145 
146  return PreservedAnalyses::all();
147 }
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
This class represents lattice values for constants.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:117
cl::opt< int > InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10), cl::ZeroOrMore, cl::desc("Initial value of synthetic entry count."))
Initial synthetic count assigned to functions.
F(f)
A node in the call graph for a module.
Definition: CallGraph.h:165
static cl::opt< int > ColdSyntheticCount("cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore, cl::desc("Initial synthetic entry count for cold functions."))
Initial synthetic count assigned to cold functions.
InstrTy * getInstruction() const
Definition: CallSite.h:92
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
static void propagate(const CallGraphType &CG, GetProfCountTy GetProfCount, AddCountTy AddCount)
Propgate synthetic entry counts on a callgraph CG.
Function::ProfileCount ProfileCount
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
Class to represent profile counts.
Definition: Function.h:261
Function * getFunction() const
Returns the function that this call graph node represents.
Definition: CallGraph.h:188
Analysis pass which computes BlockFrequencyInfo.
Module.h This file contains the declarations for the Module class.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:74
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
std::pair< WeakTrackingVH, CallGraphNode * > CallRecord
A pair of the calling instruction (a call or invoke) and the call graph node being called...
Definition: CallGraph.h:169
#define N
static void initializeCounts(Module &M, function_ref< void(Function *, uint64_t)> SetCount)
static cl::opt< int > InlineSyntheticCount("inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore, cl::desc("Initial synthetic entry count for inline functions."))
Initial synthetic count assigned to inline functions.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ScaledNumber< uint64_t > Scaled64
A container for analyses that lazily runs them and caches their results.
const BasicBlock * getParent() const
Definition: Instruction.h:67
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:1038