LLVM  8.0.1
Inliner.cpp
Go to the documentation of this file.
1 //===- Inliner.cpp - Code common to all inliners --------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the mechanics required to implement inlining without
11 // missing any calls and updating the call graph. The decisions of which calls
12 // are profitable to inline are implemented elsewhere.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringRef.h"
39 #include "llvm/IR/Attributes.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/CallSite.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/DiagnosticInfo.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/InstIterator.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/IntrinsicInst.h"
51 #include "llvm/IR/Metadata.h"
52 #include "llvm/IR/Module.h"
53 #include "llvm/IR/PassManager.h"
54 #include "llvm/IR/User.h"
55 #include "llvm/IR/Value.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/Debug.h"
64 #include <algorithm>
65 #include <cassert>
66 #include <functional>
67 #include <sstream>
68 #include <tuple>
69 #include <utility>
70 #include <vector>
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "inline"
75 
76 STATISTIC(NumInlined, "Number of functions inlined");
77 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
78 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
79 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
80 
81 // This weirdly named statistic tracks the number of times that, when attempting
82 // to inline a function A into B, we analyze the callers of B in order to see
83 // if those would be more profitable and blocked inline steps.
84 STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
85 
86 /// Flag to disable manual alloca merging.
87 ///
88 /// Merging of allocas was originally done as a stack-size saving technique
89 /// prior to LLVM's code generator having support for stack coloring based on
90 /// lifetime markers. It is now in the process of being removed. To experiment
91 /// with disabling it and relying fully on lifetime marker based stack
92 /// coloring, you can pass this flag to LLVM.
93 static cl::opt<bool>
94  DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
95  cl::init(false), cl::Hidden);
96 
97 namespace {
98 
100  No = 0,
101  Basic = 1,
102  Verbose = 2,
103 };
104 
105 } // end anonymous namespace
106 
108  "inliner-function-import-stats",
111  "basic statistics"),
112  clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
113  "printing of statistics for each inlined function")),
114  cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
115 
116 /// Flag to add inline messages as callsite attributes 'inline-remark'.
117 static cl::opt<bool>
118  InlineRemarkAttribute("inline-remark-attribute", cl::init(false),
119  cl::Hidden,
120  cl::desc("Enable adding inline-remark attribute to"
121  " callsites processed by inliner but decided"
122  " to be not inlined"));
123 
125 
126 LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
127  : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
128 
129 /// For this class, we declare that we require and preserve the call graph.
130 /// If the derived class implements this method, it should
131 /// always explicitly call the implementation here.
138 }
139 
141 
142 /// Look at all of the allocas that we inlined through this call site. If we
143 /// have already inlined other allocas through other calls into this function,
144 /// then we know that they have disjoint lifetimes and that we can merge them.
145 ///
146 /// There are many heuristics possible for merging these allocas, and the
147 /// different options have different tradeoffs. One thing that we *really*
148 /// don't want to hurt is SRoA: once inlining happens, often allocas are no
149 /// longer address taken and so they can be promoted.
150 ///
151 /// Our "solution" for that is to only merge allocas whose outermost type is an
152 /// array type. These are usually not promoted because someone is using a
153 /// variable index into them. These are also often the most important ones to
154 /// merge.
155 ///
156 /// A better solution would be to have real memory lifetime markers in the IR
157 /// and not have the inliner do any merging of allocas at all. This would
158 /// allow the backend to do proper stack slot coloring of all allocas that
159 /// *actually make it to the backend*, which is really what we want.
160 ///
161 /// Because we don't have this information, we do this simple and useful hack.
163  Function *Caller, InlineFunctionInfo &IFI,
164  InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
165  SmallPtrSet<AllocaInst *, 16> UsedAllocas;
166 
167  // When processing our SCC, check to see if CS was inlined from some other
168  // call site. For example, if we're processing "A" in this code:
169  // A() { B() }
170  // B() { x = alloca ... C() }
171  // C() { y = alloca ... }
172  // Assume that C was not inlined into B initially, and so we're processing A
173  // and decide to inline B into A. Doing this makes an alloca available for
174  // reuse and makes a callsite (C) available for inlining. When we process
175  // the C call site we don't want to do any alloca merging between X and Y
176  // because their scopes are not disjoint. We could make this smarter by
177  // keeping track of the inline history for each alloca in the
178  // InlinedArrayAllocas but this isn't likely to be a significant win.
179  if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
180  return;
181 
182  // Loop over all the allocas we have so far and see if they can be merged with
183  // a previously inlined alloca. If not, remember that we had it.
184  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e;
185  ++AllocaNo) {
186  AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
187 
188  // Don't bother trying to merge array allocations (they will usually be
189  // canonicalized to be an allocation *of* an array), or allocations whose
190  // type is not itself an array (because we're afraid of pessimizing SRoA).
192  if (!ATy || AI->isArrayAllocation())
193  continue;
194 
195  // Get the list of all available allocas for this array type.
196  std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
197 
198  // Loop over the allocas in AllocasForType to see if we can reuse one. Note
199  // that we have to be careful not to reuse the same "available" alloca for
200  // multiple different allocas that we just inlined, we use the 'UsedAllocas'
201  // set to keep track of which "available" allocas are being used by this
202  // function. Also, AllocasForType can be empty of course!
203  bool MergedAwayAlloca = false;
204  for (AllocaInst *AvailableAlloca : AllocasForType) {
205  unsigned Align1 = AI->getAlignment(),
206  Align2 = AvailableAlloca->getAlignment();
207 
208  // The available alloca has to be in the right function, not in some other
209  // function in this SCC.
210  if (AvailableAlloca->getParent() != AI->getParent())
211  continue;
212 
213  // If the inlined function already uses this alloca then we can't reuse
214  // it.
215  if (!UsedAllocas.insert(AvailableAlloca).second)
216  continue;
217 
218  // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
219  // success!
220  LLVM_DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI
221  << "\n\t\tINTO: " << *AvailableAlloca << '\n');
222 
223  // Move affected dbg.declare calls immediately after the new alloca to
224  // avoid the situation when a dbg.declare precedes its alloca.
225  if (auto *L = LocalAsMetadata::getIfExists(AI))
226  if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
227  for (User *U : MDV->users())
228  if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
229  DDI->moveBefore(AvailableAlloca->getNextNode());
230 
231  AI->replaceAllUsesWith(AvailableAlloca);
232 
233  if (Align1 != Align2) {
234  if (!Align1 || !Align2) {
235  const DataLayout &DL = Caller->getParent()->getDataLayout();
236  unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
237 
238  Align1 = Align1 ? Align1 : TypeAlign;
239  Align2 = Align2 ? Align2 : TypeAlign;
240  }
241 
242  if (Align1 > Align2)
243  AvailableAlloca->setAlignment(AI->getAlignment());
244  }
245 
246  AI->eraseFromParent();
247  MergedAwayAlloca = true;
248  ++NumMergedAllocas;
249  IFI.StaticAllocas[AllocaNo] = nullptr;
250  break;
251  }
252 
253  // If we already nuked the alloca, we're done with it.
254  if (MergedAwayAlloca)
255  continue;
256 
257  // If we were unable to merge away the alloca either because there are no
258  // allocas of the right type available or because we reused them all
259  // already, remember that this alloca came from an inlined function and mark
260  // it used so we don't reuse it for other allocas from this inline
261  // operation.
262  AllocasForType.push_back(AI);
263  UsedAllocas.insert(AI);
264  }
265 }
266 
267 /// If it is possible to inline the specified call site,
268 /// do so and update the CallGraph for this operation.
269 ///
270 /// This function also does some basic book-keeping to update the IR. The
271 /// InlinedArrayAllocas map keeps track of any allocas that are already
272 /// available from other functions inlined into the caller. If we are able to
273 /// inline this call site we attempt to reuse already available allocas or add
274 /// any new allocas to the set if not possible.
276  CallSite CS, InlineFunctionInfo &IFI,
277  InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
278  bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
281  Function *Caller = CS.getCaller();
282 
283  AAResults &AAR = AARGetter(*Callee);
284 
285  // Try to inline the function. Get the list of static allocas that were
286  // inlined.
287  InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime);
288  if (!IR)
289  return IR;
290 
291  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
292  ImportedFunctionsStats.recordInline(*Caller, *Callee);
293 
295 
297  mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
298 
299  return IR; // success
300 }
301 
302 /// Return true if inlining of CS can block the caller from being
303 /// inlined which is proved to be more beneficial. \p IC is the
304 /// estimated inline cost associated with callsite \p CS.
305 /// \p TotalSecondaryCost will be set to the estimated cost of inlining the
306 /// caller if \p CS is suppressed for inlining.
307 static bool
309  int &TotalSecondaryCost,
310  function_ref<InlineCost(CallSite CS)> GetInlineCost) {
311  // For now we only handle local or inline functions.
312  if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
313  return false;
314  // If the cost of inlining CS is non-positive, it is not going to prevent the
315  // caller from being inlined into its callers and hence we don't need to
316  // defer.
317  if (IC.getCost() <= 0)
318  return false;
319  // Try to detect the case where the current inlining candidate caller (call
320  // it B) is a static or linkonce-ODR function and is an inlining candidate
321  // elsewhere, and the current candidate callee (call it C) is large enough
322  // that inlining it into B would make B too big to inline later. In these
323  // circumstances it may be best not to inline C into B, but to inline B into
324  // its callers.
325  //
326  // This only applies to static and linkonce-ODR functions because those are
327  // expected to be available for inlining in the translation units where they
328  // are used. Thus we will always have the opportunity to make local inlining
329  // decisions. Importantly the linkonce-ODR linkage covers inline functions
330  // and templates in C++.
331  //
332  // FIXME: All of this logic should be sunk into getInlineCost. It relies on
333  // the internal implementation of the inline cost metrics rather than
334  // treating them as truly abstract units etc.
335  TotalSecondaryCost = 0;
336  // The candidate cost to be imposed upon the current function.
337  int CandidateCost = IC.getCost() - 1;
338  // If the caller has local linkage and can be inlined to all its callers, we
339  // can apply a huge negative bonus to TotalSecondaryCost.
340  bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse();
341  // This bool tracks what happens if we DO inline C into B.
342  bool inliningPreventsSomeOuterInline = false;
343  for (User *U : Caller->users()) {
344  // If the caller will not be removed (either because it does not have a
345  // local linkage or because the LastCallToStaticBonus has been already
346  // applied), then we can exit the loop early.
347  if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost())
348  return false;
349  CallSite CS2(U);
350 
351  // If this isn't a call to Caller (it could be some other sort
352  // of reference) skip it. Such references will prevent the caller
353  // from being removed.
354  if (!CS2 || CS2.getCalledFunction() != Caller) {
355  ApplyLastCallBonus = false;
356  continue;
357  }
358 
359  InlineCost IC2 = GetInlineCost(CS2);
360  ++NumCallerCallersAnalyzed;
361  if (!IC2) {
362  ApplyLastCallBonus = false;
363  continue;
364  }
365  if (IC2.isAlways())
366  continue;
367 
368  // See if inlining of the original callsite would erase the cost delta of
369  // this callsite. We subtract off the penalty for the call instruction,
370  // which we would be deleting.
371  if (IC2.getCostDelta() <= CandidateCost) {
372  inliningPreventsSomeOuterInline = true;
373  TotalSecondaryCost += IC2.getCost();
374  }
375  }
376  // If all outer calls to Caller would get inlined, the cost for the last
377  // one is set very low by getInlineCost, in anticipation that Caller will
378  // be removed entirely. We did not account for this above unless there
379  // is only one caller of Caller.
380  if (ApplyLastCallBonus)
381  TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
382 
383  if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
384  return true;
385 
386  return false;
387 }
388 
389 static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,
390  const ore::NV &Arg) {
391  return R << Arg.Val;
392 }
393 
394 template <class RemarkT>
395 RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
396  using namespace ore;
397  if (IC.isAlways()) {
398  R << "(cost=always)";
399  } else if (IC.isNever()) {
400  R << "(cost=never)";
401  } else {
402  R << "(cost=" << ore::NV("Cost", IC.getCost())
403  << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
404  }
405  if (const char *Reason = IC.getReason())
406  R << ": " << ore::NV("Reason", Reason);
407  return R;
408 }
409 
410 static std::string inlineCostStr(const InlineCost &IC) {
411  std::stringstream Remark;
412  Remark << IC;
413  return Remark.str();
414 }
415 
416 /// Return the cost only if the inliner should attempt to inline at the given
417 /// CallSite. If we return the cost, we will emit an optimisation remark later
418 /// using that cost, so we won't do so from this function.
422  using namespace ore;
423 
424  InlineCost IC = GetInlineCost(CS);
425  Instruction *Call = CS.getInstruction();
427  Function *Caller = CS.getCaller();
428 
429  if (IC.isAlways()) {
430  LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
431  << ", Call: " << *CS.getInstruction() << "\n");
432  return IC;
433  }
434 
435  if (IC.isNever()) {
436  LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
437  << ", Call: " << *CS.getInstruction() << "\n");
438  ORE.emit([&]() {
439  return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
440  << NV("Callee", Callee) << " not inlined into "
441  << NV("Caller", Caller) << " because it should never be inlined "
442  << IC;
443  });
444  return IC;
445  }
446 
447  if (!IC) {
448  LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
449  << ", Call: " << *CS.getInstruction() << "\n");
450  ORE.emit([&]() {
451  return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
452  << NV("Callee", Callee) << " not inlined into "
453  << NV("Caller", Caller) << " because too costly to inline " << IC;
454  });
455  return IC;
456  }
457 
458  int TotalSecondaryCost = 0;
459  if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
460  LLVM_DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
461  << " Cost = " << IC.getCost()
462  << ", outer Cost = " << TotalSecondaryCost << '\n');
463  ORE.emit([&]() {
464  return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
465  Call)
466  << "Not inlining. Cost of inlining " << NV("Callee", Callee)
467  << " increases the cost of inlining " << NV("Caller", Caller)
468  << " in other contexts";
469  });
470 
471  // IC does not bool() to false, so get an InlineCost that will.
472  // This will not be inspected to make an error message.
473  return None;
474  }
475 
476  LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
477  << ", Call: " << *CS.getInstruction() << '\n');
478  return IC;
479 }
480 
481 /// Return true if the specified inline history ID
482 /// indicates an inline history that includes the specified function.
484  Function *F, int InlineHistoryID,
485  const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
486  while (InlineHistoryID != -1) {
487  assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
488  "Invalid inline history ID");
489  if (InlineHistory[InlineHistoryID].first == F)
490  return true;
491  InlineHistoryID = InlineHistory[InlineHistoryID].second;
492  }
493  return false;
494 }
495 
497  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
499  return false; // No changes to CallGraph.
500 }
501 
503  if (skipSCC(SCC))
504  return false;
505  return inlineCalls(SCC);
506 }
507 
509  const BasicBlock *Block, const Function &Callee,
510  const Function &Caller, const InlineCost &IC) {
511  ORE.emit([&]() {
512  bool AlwaysInline = IC.isAlways();
513  StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
514  return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block)
515  << ore::NV("Callee", &Callee) << " inlined into "
516  << ore::NV("Caller", &Caller) << " with " << IC;
517  });
518 }
519 
520 static void setInlineRemark(CallSite &CS, StringRef message) {
521  if (!InlineRemarkAttribute)
522  return;
523 
524  Attribute attr = Attribute::get(CS->getContext(), "inline-remark", message);
526 }
527 
528 static bool
530  std::function<AssumptionCache &(Function &)> GetAssumptionCache,
532  bool InsertLifetime,
533  function_ref<InlineCost(CallSite CS)> GetInlineCost,
534  function_ref<AAResults &(Function &)> AARGetter,
536  SmallPtrSet<Function *, 8> SCCFunctions;
537  LLVM_DEBUG(dbgs() << "Inliner visiting SCC:");
538  for (CallGraphNode *Node : SCC) {
539  Function *F = Node->getFunction();
540  if (F)
541  SCCFunctions.insert(F);
542  LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
543  }
544 
545  // Scan through and identify all call sites ahead of time so that we only
546  // inline call sites in the original functions, not call sites that result
547  // from inlining other functions.
549 
550  // When inlining a callee produces new call sites, we want to keep track of
551  // the fact that they were inlined from the callee. This allows us to avoid
552  // infinite inlining in some obscure cases. To represent this, we use an
553  // index into the InlineHistory vector.
554  SmallVector<std::pair<Function *, int>, 8> InlineHistory;
555 
556  for (CallGraphNode *Node : SCC) {
557  Function *F = Node->getFunction();
558  if (!F || F->isDeclaration())
559  continue;
560 
562  for (BasicBlock &BB : *F)
563  for (Instruction &I : BB) {
564  CallSite CS(cast<Value>(&I));
565  // If this isn't a call, or it is a call to an intrinsic, it can
566  // never be inlined.
567  if (!CS || isa<IntrinsicInst>(I))
568  continue;
569 
570  // If this is a direct call to an external function, we can never inline
571  // it. If it is an indirect call, inlining may resolve it to be a
572  // direct call, so we keep it.
573  if (Function *Callee = CS.getCalledFunction())
574  if (Callee->isDeclaration()) {
575  using namespace ore;
576 
577  setInlineRemark(CS, "unavailable definition");
578  ORE.emit([&]() {
579  return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
580  << NV("Callee", Callee) << " will not be inlined into "
581  << NV("Caller", CS.getCaller())
582  << " because its definition is unavailable"
583  << setIsVerbose();
584  });
585  continue;
586  }
587 
588  CallSites.push_back(std::make_pair(CS, -1));
589  }
590  }
591 
592  LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
593 
594  // If there are no calls in this function, exit early.
595  if (CallSites.empty())
596  return false;
597 
598  // Now that we have all of the call sites, move the ones to functions in the
599  // current SCC to the end of the list.
600  unsigned FirstCallInSCC = CallSites.size();
601  for (unsigned i = 0; i < FirstCallInSCC; ++i)
602  if (Function *F = CallSites[i].first.getCalledFunction())
603  if (SCCFunctions.count(F))
604  std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
605 
606  InlinedArrayAllocasTy InlinedArrayAllocas;
607  InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI);
608 
609  // Now that we have all of the call sites, loop over them and inline them if
610  // it looks profitable to do so.
611  bool Changed = false;
612  bool LocalChange;
613  do {
614  LocalChange = false;
615  // Iterate over the outer loop because inlining functions can cause indirect
616  // calls to become direct calls.
617  // CallSites may be modified inside so ranged for loop can not be used.
618  for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
619  CallSite CS = CallSites[CSi].first;
620 
621  Function *Caller = CS.getCaller();
623 
624  // We can only inline direct calls to non-declarations.
625  if (!Callee || Callee->isDeclaration())
626  continue;
627 
628  Instruction *Instr = CS.getInstruction();
629 
630  bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);
631 
632  int InlineHistoryID;
633  if (!IsTriviallyDead) {
634  // If this call site was obtained by inlining another function, verify
635  // that the include path for the function did not include the callee
636  // itself. If so, we'd be recursively inlining the same function,
637  // which would provide the same callsites, which would cause us to
638  // infinitely inline.
639  InlineHistoryID = CallSites[CSi].second;
640  if (InlineHistoryID != -1 &&
641  InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
642  setInlineRemark(CS, "recursive");
643  continue;
644  }
645  }
646 
647  // FIXME for new PM: because of the old PM we currently generate ORE and
648  // in turn BFI on demand. With the new PM, the ORE dependency should
649  // just become a regular analysis dependency.
650  OptimizationRemarkEmitter ORE(Caller);
651 
652  Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
653  // If the policy determines that we should inline this function,
654  // delete the call instead.
655  if (!OIC.hasValue()) {
656  setInlineRemark(CS, "deferred");
657  continue;
658  }
659 
660  if (!OIC.getValue()) {
661  // shouldInline() call returned a negative inline cost that explains
662  // why this callsite should not be inlined.
663  setInlineRemark(CS, inlineCostStr(*OIC));
664  continue;
665  }
666 
667  // If this call site is dead and it is to a readonly function, we should
668  // just delete the call instead of trying to inline it, regardless of
669  // size. This happens because IPSCCP propagates the result out of the
670  // call and then we're left with the dead call.
671  if (IsTriviallyDead) {
672  LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
673  // Update the call graph by deleting the edge from Callee to Caller.
674  setInlineRemark(CS, "trivially dead");
675  CG[Caller]->removeCallEdgeFor(CS);
676  Instr->eraseFromParent();
677  ++NumCallsDeleted;
678  } else {
679  // Get DebugLoc to report. CS will be invalid after Inliner.
680  DebugLoc DLoc = CS->getDebugLoc();
681  BasicBlock *Block = CS.getParent();
682 
683  // Attempt to inline the function.
684  using namespace ore;
685 
687  CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
688  InsertLifetime, AARGetter, ImportedFunctionsStats);
689  if (!IR) {
690  setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
691  ORE.emit([&]() {
692  return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
693  Block)
694  << NV("Callee", Callee) << " will not be inlined into "
695  << NV("Caller", Caller) << ": " << NV("Reason", IR.message);
696  });
697  continue;
698  }
699  ++NumInlined;
700 
701  emit_inlined_into(ORE, DLoc, Block, *Callee, *Caller, *OIC);
702 
703  // If inlining this function gave us any new call sites, throw them
704  // onto our worklist to process. They are useful inline candidates.
705  if (!InlineInfo.InlinedCalls.empty()) {
706  // Create a new inline history entry for this, so that we remember
707  // that these new callsites came about due to inlining Callee.
708  int NewHistoryID = InlineHistory.size();
709  InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
710 
711  for (Value *Ptr : InlineInfo.InlinedCalls)
712  CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
713  }
714  }
715 
716  // If we inlined or deleted the last possible call site to the function,
717  // delete the function body now.
718  if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
719  // TODO: Can remove if in SCC now.
720  !SCCFunctions.count(Callee) &&
721  // The function may be apparently dead, but if there are indirect
722  // callgraph references to the node, we cannot delete it yet, this
723  // could invalidate the CGSCC iterator.
724  CG[Callee]->getNumReferences() == 0) {
725  LLVM_DEBUG(dbgs() << " -> Deleting dead function: "
726  << Callee->getName() << "\n");
727  CallGraphNode *CalleeNode = CG[Callee];
728 
729  // Remove any call graph edges from the callee to its callees.
730  CalleeNode->removeAllCalledFunctions();
731 
732  // Removing the node for callee from the call graph and delete it.
733  delete CG.removeFunctionFromModule(CalleeNode);
734  ++NumDeleted;
735  }
736 
737  // Remove this call site from the list. If possible, use
738  // swap/pop_back for efficiency, but do not use it if doing so would
739  // move a call site to a function in this SCC before the
740  // 'FirstCallInSCC' barrier.
741  if (SCC.isSingular()) {
742  CallSites[CSi] = CallSites.back();
743  CallSites.pop_back();
744  } else {
745  CallSites.erase(CallSites.begin() + CSi);
746  }
747  --CSi;
748 
749  Changed = true;
750  LocalChange = true;
751  }
752  } while (LocalChange);
753 
754  return Changed;
755 }
756 
758  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
759  ACT = &getAnalysis<AssumptionCacheTracker>();
760  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
761  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
762  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
763  return ACT->getAssumptionCache(F);
764  };
765  return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
766  [this](CallSite CS) { return getInlineCost(CS); },
768 }
769 
770 /// Remove now-dead linkonce functions at the end of
771 /// processing to avoid breaking the SCC traversal.
773  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
774  ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
775  InlinerFunctionImportStatsOpts::Verbose);
776  return removeDeadFunctions(CG);
777 }
778 
779 /// Remove dead functions that are not included in DNR (Do Not Remove) list.
781  bool AlwaysInlineOnly) {
782  SmallVector<CallGraphNode *, 16> FunctionsToRemove;
783  SmallVector<Function *, 16> DeadFunctionsInComdats;
784 
785  auto RemoveCGN = [&](CallGraphNode *CGN) {
786  // Remove any call graph edges from the function to its callees.
787  CGN->removeAllCalledFunctions();
788 
789  // Remove any edges from the external node to the function's call graph
790  // node. These edges might have been made irrelegant due to
791  // optimization of the program.
793 
794  // Removing the node for callee from the call graph and delete it.
795  FunctionsToRemove.push_back(CGN);
796  };
797 
798  // Scan for all of the functions, looking for ones that should now be removed
799  // from the program. Insert the dead ones in the FunctionsToRemove set.
800  for (const auto &I : CG) {
801  CallGraphNode *CGN = I.second.get();
802  Function *F = CGN->getFunction();
803  if (!F || F->isDeclaration())
804  continue;
805 
806  // Handle the case when this function is called and we only want to care
807  // about always-inline functions. This is a bit of a hack to share code
808  // between here and the InlineAlways pass.
809  if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
810  continue;
811 
812  // If the only remaining users of the function are dead constants, remove
813  // them.
815 
816  if (!F->isDefTriviallyDead())
817  continue;
818 
819  // It is unsafe to drop a function with discardable linkage from a COMDAT
820  // without also dropping the other members of the COMDAT.
821  // The inliner doesn't visit non-function entities which are in COMDAT
822  // groups so it is unsafe to do so *unless* the linkage is local.
823  if (!F->hasLocalLinkage()) {
824  if (F->hasComdat()) {
825  DeadFunctionsInComdats.push_back(F);
826  continue;
827  }
828  }
829 
830  RemoveCGN(CGN);
831  }
832  if (!DeadFunctionsInComdats.empty()) {
833  // Filter out the functions whose comdats remain alive.
834  filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats);
835  // Remove the rest.
836  for (Function *F : DeadFunctionsInComdats)
837  RemoveCGN(CG[F]);
838  }
839 
840  if (FunctionsToRemove.empty())
841  return false;
842 
843  // Now that we know which functions to delete, do so. We didn't want to do
844  // this inline, because that would invalidate our CallGraph::iterator
845  // objects. :(
846  //
847  // Note that it doesn't matter that we are iterating over a non-stable order
848  // here to do this, it doesn't matter which order the functions are deleted
849  // in.
850  array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
851  FunctionsToRemove.erase(
852  std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
853  FunctionsToRemove.end());
854  for (CallGraphNode *CGN : FunctionsToRemove) {
855  delete CG.removeFunctionFromModule(CGN);
856  ++NumDeleted;
857  }
858  return true;
859 }
860 
863  assert(InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No);
864  ImportedFunctionsStats->dump(InlinerFunctionImportStats ==
865  InlinerFunctionImportStatsOpts::Verbose);
866  }
867 }
868 
871  CGSCCUpdateResult &UR) {
872  const ModuleAnalysisManager &MAM =
873  AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
874  bool Changed = false;
875 
876  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
877  Module &M = *InitialC.begin()->getFunction().getParent();
879 
880  if (!ImportedFunctionsStats &&
881  InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
883  llvm::make_unique<ImportedFunctionsInliningStatistics>();
885  }
886 
887  // We use a single common worklist for calls across the entire SCC. We
888  // process these in-order and append new calls introduced during inlining to
889  // the end.
890  //
891  // Note that this particular order of processing is actually critical to
892  // avoid very bad behaviors. Consider *highly connected* call graphs where
893  // each function contains a small amonut of code and a couple of calls to
894  // other functions. Because the LLVM inliner is fundamentally a bottom-up
895  // inliner, it can handle gracefully the fact that these all appear to be
896  // reasonable inlining candidates as it will flatten things until they become
897  // too big to inline, and then move on and flatten another batch.
898  //
899  // However, when processing call edges *within* an SCC we cannot rely on this
900  // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
901  // functions we can end up incrementally inlining N calls into each of
902  // N functions because each incremental inlining decision looks good and we
903  // don't have a topological ordering to prevent explosions.
904  //
905  // To compensate for this, we don't process transitive edges made immediate
906  // by inlining until we've done one pass of inlining across the entire SCC.
907  // Large, highly connected SCCs still lead to some amount of code bloat in
908  // this model, but it is uniformly spread across all the functions in the SCC
909  // and eventually they all become too large to inline, rather than
910  // incrementally maknig a single function grow in a super linear fashion.
912 
915  .getManager();
916 
917  // Populate the initial list of calls in this SCC.
918  for (auto &N : InitialC) {
919  auto &ORE =
920  FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
921  // We want to generally process call sites top-down in order for
922  // simplifications stemming from replacing the call with the returned value
923  // after inlining to be visible to subsequent inlining decisions.
924  // FIXME: Using instructions sequence is a really bad way to do this.
925  // Instead we should do an actual RPO walk of the function body.
926  for (Instruction &I : instructions(N.getFunction()))
927  if (auto CS = CallSite(&I))
928  if (Function *Callee = CS.getCalledFunction()) {
929  if (!Callee->isDeclaration())
930  Calls.push_back({CS, -1});
931  else if (!isa<IntrinsicInst>(I)) {
932  using namespace ore;
933  setInlineRemark(CS, "unavailable definition");
934  ORE.emit([&]() {
935  return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
936  << NV("Callee", Callee) << " will not be inlined into "
937  << NV("Caller", CS.getCaller())
938  << " because its definition is unavailable"
939  << setIsVerbose();
940  });
941  }
942  }
943  }
944  if (Calls.empty())
945  return PreservedAnalyses::all();
946 
947  // Capture updatable variables for the current SCC and RefSCC.
948  auto *C = &InitialC;
949  auto *RC = &C->getOuterRefSCC();
950 
951  // When inlining a callee produces new call sites, we want to keep track of
952  // the fact that they were inlined from the callee. This allows us to avoid
953  // infinite inlining in some obscure cases. To represent this, we use an
954  // index into the InlineHistory vector.
955  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
956 
957  // Track a set vector of inlined callees so that we can augment the caller
958  // with all of their edges in the call graph before pruning out the ones that
959  // got simplified away.
960  SmallSetVector<Function *, 4> InlinedCallees;
961 
962  // Track the dead functions to delete once finished with inlining calls. We
963  // defer deleting these to make it easier to handle the call graph updates.
964  SmallVector<Function *, 4> DeadFunctions;
965 
966  // Loop forward over all of the calls. Note that we cannot cache the size as
967  // inlining can introduce new calls that need to be processed.
968  for (int i = 0; i < (int)Calls.size(); ++i) {
969  // We expect the calls to typically be batched with sequences of calls that
970  // have the same caller, so we first set up some shared infrastructure for
971  // this caller. We also do any pruning we can at this layer on the caller
972  // alone.
973  Function &F = *Calls[i].first.getCaller();
974  LazyCallGraph::Node &N = *CG.lookup(F);
975  if (CG.lookupSCC(N) != C)
976  continue;
978  setInlineRemark(Calls[i].first, "optnone attribute");
979  continue;
980  }
981 
982  LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
983 
984  // Get a FunctionAnalysisManager via a proxy for this particular node. We
985  // do this each time we visit a node as the SCC may have changed and as
986  // we're going to mutate this particular function we want to make sure the
987  // proxy is in place to forward any invalidation events. We can use the
988  // manager we get here for looking up results for functions other than this
989  // node however because those functions aren't going to be mutated by this
990  // pass.
993  .getManager();
994 
995  // Get the remarks emission analysis for the caller.
996  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
997 
998  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
999  [&](Function &F) -> AssumptionCache & {
1000  return FAM.getResult<AssumptionAnalysis>(F);
1001  };
1002  auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
1003  return FAM.getResult<BlockFrequencyAnalysis>(F);
1004  };
1005 
1006  auto GetInlineCost = [&](CallSite CS) {
1008  auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
1009  return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
1010  PSI, &ORE);
1011  };
1012 
1013  // Now process as many calls as we have within this caller in the sequnece.
1014  // We bail out as soon as the caller has to change so we can update the
1015  // call graph and prepare the context of that new caller.
1016  bool DidInline = false;
1017  for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
1018  int InlineHistoryID;
1019  CallSite CS;
1020  std::tie(CS, InlineHistoryID) = Calls[i];
1022 
1023  if (InlineHistoryID != -1 &&
1024  InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
1025  setInlineRemark(CS, "recursive");
1026  continue;
1027  }
1028 
1029  // Check if this inlining may repeat breaking an SCC apart that has
1030  // already been split once before. In that case, inlining here may
1031  // trigger infinite inlining, much like is prevented within the inliner
1032  // itself by the InlineHistory above, but spread across CGSCC iterations
1033  // and thus hidden from the full inline history.
1034  if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
1035  UR.InlinedInternalEdges.count({&N, C})) {
1036  LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
1037  "previously split out of this SCC by inlining: "
1038  << F.getName() << " -> " << Callee.getName() << "\n");
1039  setInlineRemark(CS, "recursive SCC split");
1040  continue;
1041  }
1042 
1043  Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
1044  // Check whether we want to inline this callsite.
1045  if (!OIC.hasValue()) {
1046  setInlineRemark(CS, "deferred");
1047  continue;
1048  }
1049 
1050  if (!OIC.getValue()) {
1051  // shouldInline() call returned a negative inline cost that explains
1052  // why this callsite should not be inlined.
1053  setInlineRemark(CS, inlineCostStr(*OIC));
1054  continue;
1055  }
1056 
1057  // Setup the data structure used to plumb customization into the
1058  // `InlineFunction` routine.
1059  InlineFunctionInfo IFI(
1060  /*cg=*/nullptr, &GetAssumptionCache, PSI,
1061  &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
1062  &FAM.getResult<BlockFrequencyAnalysis>(Callee));
1063 
1064  // Get DebugLoc to report. CS will be invalid after Inliner.
1065  DebugLoc DLoc = CS->getDebugLoc();
1066  BasicBlock *Block = CS.getParent();
1067 
1068  using namespace ore;
1069 
1070  InlineResult IR = InlineFunction(CS, IFI);
1071  if (!IR) {
1072  setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
1073  ORE.emit([&]() {
1074  return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
1075  << NV("Callee", &Callee) << " will not be inlined into "
1076  << NV("Caller", &F) << ": " << NV("Reason", IR.message);
1077  });
1078  continue;
1079  }
1080  DidInline = true;
1081  InlinedCallees.insert(&Callee);
1082 
1083  ++NumInlined;
1084 
1085  emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC);
1086 
1087  // Add any new callsites to defined functions to the worklist.
1088  if (!IFI.InlinedCallSites.empty()) {
1089  int NewHistoryID = InlineHistory.size();
1090  InlineHistory.push_back({&Callee, InlineHistoryID});
1091  for (CallSite &CS : reverse(IFI.InlinedCallSites))
1092  if (Function *NewCallee = CS.getCalledFunction())
1093  if (!NewCallee->isDeclaration())
1094  Calls.push_back({CS, NewHistoryID});
1095  }
1096 
1097  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
1099 
1100  // Merge the attributes based on the inlining.
1102 
1103  // For local functions, check whether this makes the callee trivially
1104  // dead. In that case, we can drop the body of the function eagerly
1105  // which may reduce the number of callers of other functions to one,
1106  // changing inline cost thresholds.
1107  if (Callee.hasLocalLinkage()) {
1108  // To check this we also need to nuke any dead constant uses (perhaps
1109  // made dead by this operation on other functions).
1110  Callee.removeDeadConstantUsers();
1111  if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
1112  Calls.erase(
1113  std::remove_if(Calls.begin() + i + 1, Calls.end(),
1114  [&Callee](const std::pair<CallSite, int> &Call) {
1115  return Call.first.getCaller() == &Callee;
1116  }),
1117  Calls.end());
1118  // Clear the body and queue the function itself for deletion when we
1119  // finish inlining and call graph updates.
1120  // Note that after this point, it is an error to do anything other
1121  // than use the callee's address or delete it.
1122  Callee.dropAllReferences();
1123  assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
1124  "Cannot put cause a function to become dead twice!");
1125  DeadFunctions.push_back(&Callee);
1126  }
1127  }
1128  }
1129 
1130  // Back the call index up by one to put us in a good position to go around
1131  // the outer loop.
1132  --i;
1133 
1134  if (!DidInline)
1135  continue;
1136  Changed = true;
1137 
1138  // Add all the inlined callees' edges as ref edges to the caller. These are
1139  // by definition trivial edges as we always have *some* transitive ref edge
1140  // chain. While in some cases these edges are direct calls inside the
1141  // callee, they have to be modeled in the inliner as reference edges as
1142  // there may be a reference edge anywhere along the chain from the current
1143  // caller to the callee that causes the whole thing to appear like
1144  // a (transitive) reference edge that will require promotion to a call edge
1145  // below.
1146  for (Function *InlinedCallee : InlinedCallees) {
1147  LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
1148  for (LazyCallGraph::Edge &E : *CalleeN)
1149  RC->insertTrivialRefEdge(N, E.getNode());
1150  }
1151 
1152  // At this point, since we have made changes we have at least removed
1153  // a call instruction. However, in the process we do some incremental
1154  // simplification of the surrounding code. This simplification can
1155  // essentially do all of the same things as a function pass and we can
1156  // re-use the exact same logic for updating the call graph to reflect the
1157  // change.
1158  LazyCallGraph::SCC *OldC = C;
1159  C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
1160  LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
1161  RC = &C->getOuterRefSCC();
1162 
1163  // If this causes an SCC to split apart into multiple smaller SCCs, there
1164  // is a subtle risk we need to prepare for. Other transformations may
1165  // expose an "infinite inlining" opportunity later, and because of the SCC
1166  // mutation, we will revisit this function and potentially re-inline. If we
1167  // do, and that re-inlining also has the potentially to mutate the SCC
1168  // structure, the infinite inlining problem can manifest through infinite
1169  // SCC splits and merges. To avoid this, we capture the originating caller
1170  // node and the SCC containing the call edge. This is a slight over
1171  // approximation of the possible inlining decisions that must be avoided,
1172  // but is relatively efficient to store. We use C != OldC to know when
1173  // a new SCC is generated and the original SCC may be generated via merge
1174  // in later iterations.
1175  //
1176  // It is also possible that even if no new SCC is generated
1177  // (i.e., C == OldC), the original SCC could be split and then merged
1178  // into the same one as itself. and the original SCC will be added into
1179  // UR.CWorklist again, we want to catch such cases too.
1180  //
1181  // FIXME: This seems like a very heavyweight way of retaining the inline
1182  // history, we should look for a more efficient way of tracking it.
1183  if ((C != OldC || UR.CWorklist.count(OldC)) &&
1184  llvm::any_of(InlinedCallees, [&](Function *Callee) {
1185  return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
1186  })) {
1187  LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
1188  "retaining this to avoid infinite inlining.\n");
1189  UR.InlinedInternalEdges.insert({&N, OldC});
1190  }
1191  InlinedCallees.clear();
1192  }
1193 
1194  // Now that we've finished inlining all of the calls across this SCC, delete
1195  // all of the trivially dead functions, updating the call graph and the CGSCC
1196  // pass manager in the process.
1197  //
1198  // Note that this walks a pointer set which has non-deterministic order but
1199  // that is OK as all we do is delete things and add pointers to unordered
1200  // sets.
1201  for (Function *DeadF : DeadFunctions) {
1202  // Get the necessary information out of the call graph and nuke the
1203  // function there. Also, cclear out any cached analyses.
1204  auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
1207  .getManager();
1208  FAM.clear(*DeadF, DeadF->getName());
1209  AM.clear(DeadC, DeadC.getName());
1210  auto &DeadRC = DeadC.getOuterRefSCC();
1211  CG.removeDeadFunction(*DeadF);
1212 
1213  // Mark the relevant parts of the call graph as invalid so we don't visit
1214  // them.
1215  UR.InvalidatedSCCs.insert(&DeadC);
1216  UR.InvalidatedRefSCCs.insert(&DeadRC);
1217 
1218  // And delete the actual function from the module.
1219  M.getFunctionList().erase(DeadF);
1220  ++NumDeleted;
1221  }
1222 
1223  if (!Changed)
1224  return PreservedAnalyses::all();
1225 
1226  // Even if we change the IR, we update the core CGSCC data structures and so
1227  // can preserve the proxy to the function analysis manager.
1228  PreservedAnalyses PA;
1230  return PA;
1231 }
InlinerFunctionImportStatsOpts
Definition: Inliner.cpp:99
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool doFinalization(CallGraph &CG) override
Remove now-dead linkonce functions at the end of processing to avoid breaking the SCC traversal...
Definition: Inliner.cpp:772
void setModuleInfo(const Module &M)
Set information like AllFunctions, ImportedFunctions, ModuleName.
bool hasLocalLinkage() const
Definition: GlobalValue.h:436
Diagnostic information for missed-optimization remarks.
DiagnosticInfoOptimizationBase::Argument NV
bool isNever() const
Definition: InlineCost.h:105
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, const SmallVectorImpl< std::pair< Function *, int >> &InlineHistory)
Return true if the specified inline history ID indicates an inline history that includes the specifie...
Definition: Inliner.cpp:483
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LazyCallGraph::SCC & updateCGAndAnalysisManagerForFunctionPass(LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR)
Helper to update the call graph after running a function pass.
void recordInline(const Function &Caller, const Function &Callee)
Record inline of.
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
Definition: PassManager.h:740
static InlineResult InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime, function_ref< AAResults &(Function &)> &AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats)
If it is possible to inline the specified call site, do so and update the CallGraph for this operatio...
Definition: Inliner.cpp:275
virtual InlineCost getInlineCost(CallSite CS)=0
This method must be implemented by the subclass to determine the cost of inlining the specified call ...
SCC * lookupSCC(Node &N) const
Lookup a function&#39;s SCC in the graph.
Analysis providing profile information.
Implements a lazy call graph analysis and related passes for the new pass manager.
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:117
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
F(f)
InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true)
This function inlines the called function into the basic block of the caller.
Calculate and dump ThinLTO specific inliner stats.
static std::string inlineCostStr(const InlineCost &IC)
Definition: Inliner.cpp:410
This class captures the data input to the InlineFunction call, and records the auxiliary results prod...
Definition: Cloning.h:174
static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory)
Look at all of the allocas that we inlined through this call site.
Definition: Inliner.cpp:162
A proxy from a FunctionAnalysisManager to an SCC.
A node in the call graph for a module.
Definition: CallGraph.h:165
bool skipSCC(CallGraphSCC &SCC) const
Optional passes call this function to check whether the pass should be skipped.
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph...
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
Represents the cost of inlining a function.
Definition: InlineCost.h:64
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:114
AnalysisUsage & addRequired()
bool runOnSCC(CallGraphSCC &SCC) override
Main run interface method, this implements the interface required by the Pass class.
Definition: Inliner.cpp:502
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:113
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
This class is a functor to be used in legacy module or SCC passes for computing AA results for a func...
bool isLibFunction(Function &F) const
Test whether a function is a known and defined library function tracked by the call graph...
This file contains the simple types necessary to represent the attributes associated with functions a...
Function * removeFunctionFromModule(CallGraphNode *CGN)
Unlink the function from this module, returning it.
Definition: CallGraph.cpp:122
InstrTy * getInstruction() const
Definition: CallSite.h:92
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
Definition: Inliner.cpp:869
static Optional< InlineCost > shouldInline(CallSite CS, function_ref< InlineCost(CallSite CS)> GetInlineCost, OptimizationRemarkEmitter &ORE)
Return the cost only if the inliner should attempt to inline at the given CallSite.
Definition: Inliner.cpp:420
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:161
Class to represent array types.
Definition: DerivedTypes.h:369
A lazily constructed view of the call graph of a module.
const int LastCallToStaticBonus
Definition: InlineCost.h:47
DiagnosticInfoOptimizationBase::setIsVerbose setIsVerbose
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
InlineResult is basically true or false.
Definition: InlineCost.h:136
#define DEBUG_TYPE
Definition: Inliner.cpp:74
amdgpu Simplify well known AMD library false Value * Callee
void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
Definition: Constants.cpp:537
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:427
SmallVector< CallSite, 8 > InlinedCallSites
All of the new call sites inlined into the caller.
Definition: Cloning.h:205
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isAlways() const
Definition: InlineCost.h:104
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1083
void getAnalysisUsage(AnalysisUsage &Info) const override
For this class, we declare that we require and preserve the call graph.
Definition: Inliner.cpp:132
void removeAnyCallEdgeTo(CallGraphNode *Callee)
Removes all call edges from this node to the specified callee function.
Definition: CallGraph.cpp:203
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
static MetadataAsValue * getIfExists(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:114
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
bool inlineCalls(CallGraphSCC &SCC)
This function performs the main work of the pass.
Definition: Inliner.cpp:757
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:643
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
SmallPtrSetImpl< LazyCallGraph::RefSCC * > & InvalidatedRefSCCs
The set of invalidated RefSCCs which should be skipped if they are found in RCWorklist.
SmallPtrSetImpl< LazyCallGraph::SCC * > & InvalidatedSCCs
The set of invalidated SCCs which should be skipped if they are found in CWorklist.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Diagnostic information for applied optimization remarks.
Represent the analysis usage information of a pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
A node in the call graph.
static void emit_inlined_into(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC)
Definition: Inliner.cpp:508
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
A class used to represent edges in the call graph.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1226
Used in the streaming interface as the general argument type.
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
bool doInitialization(CallGraph &CG) override
doInitialization - This method is called before the SCC&#39;s of the program has been processed...
Definition: Inliner.cpp:496
size_t size() const
Definition: SmallVector.h:53
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207
int getThreshold() const
Get the threshold against which the cost was computed.
Definition: InlineCost.h:116
const char * getReason() const
Get the reason of Always or Never.
Definition: InlineCost.h:122
void getAAResultsAnalysisUsage(AnalysisUsage &AU)
A helper for the legacy pass manager to populate AU to add uses to make sure the analyses required by...
SmallPriorityWorklist< LazyCallGraph::SCC *, 1 > & CWorklist
Worklist of the SCCs queued for processing.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:106
unsigned first
Function * getFunction() const
Returns the function that this call graph node represents.
Definition: CallGraph.h:188
A function analysis which provides an AssumptionCache.
static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function< AssumptionCache &(Function &)> GetAssumptionCache, ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI, bool InsertLifetime, function_ref< InlineCost(CallSite CS)> GetInlineCost, function_ref< AAResults &(Function &)> AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats)
Definition: Inliner.cpp:529
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
Analysis pass which computes BlockFrequencyInfo.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void removeDeadFunction(Function &F)
Remove a dead function from the call graph (typically to delete it).
ImportedFunctionsInliningStatistics ImportedFunctionsStats
Definition: Inliner.h:78
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1154
static cl::opt< bool > DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden)
Flag to disable manual alloca merging.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static LocalAsMetadata * getIfExists(Value *Local)
Definition: Metadata.h:440
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
void dropAllReferences()
dropAllReferences() - This method causes all the subinstructions to "let go" of all references that t...
Definition: Function.cpp:346
AssumptionCacheTracker * ACT
Definition: Inliner.h:76
bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly=false)
Remove dead functions.
Definition: Inliner.cpp:780
Node * lookup(const Function &F) const
Lookup a function in the graph which has already been scanned and added.
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:97
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void mergeAttributesForInlining(Function &Caller, const Function &Callee)
Merge caller&#39;s and callee&#39;s attributes.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
void dump(bool Verbose)
Dump stats computed with InlinerStatistics class.
iterator_range< user_iterator > users()
Definition: Value.h:400
int getCost() const
Get the inline cost estimate.
Definition: InlineCost.h:110
int getCostDelta() const
Get the cost delta from the threshold for inlining.
Definition: InlineCost.h:131
bool hasComdat() const
Definition: GlobalObject.h:100
void addAttribute(unsigned i, Attribute::AttrKind Kind)
Definition: CallSite.h:337
static void setInlineRemark(CallSite &CS, StringRef message)
Definition: Inliner.cpp:520
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:618
amdgpu Simplify well known AMD library false Value Value * Arg
void filterDeadComdatFunctions(Module &M, SmallVectorImpl< Function *> &DeadComdatFunctions)
Filter out potentially dead comdat functions where other entries keep the entire comdat group alive...
SmallVector< AllocaInst *, 4 > StaticAllocas
InlineFunction fills this in with all static allocas that get copied into the caller.
Definition: Cloning.h:194
bool hasValue() const
Definition: Optional.h:165
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:74
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
FunTy * getCaller() const
Return the caller function for this call site.
Definition: CallSite.h:267
LegacyInlinerBase(char &ID)
Definition: Inliner.cpp:124
const char * message
Definition: InlineCost.h:137
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
SmallVector< WeakTrackingVH, 8 > InlinedCalls
InlineFunction fills this in with callsites that were inlined from the callee.
Definition: Cloning.h:198
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:789
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:175
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:81
ProfileSummaryInfo * PSI
Definition: Inliner.h:77
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2039
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:137
This header provides classes for managing passes over SCCs of the call graph.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:349
LLVM Value Representation.
Definition: Value.h:73
An SCC of the call graph.
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
AssumptionCache & getAssumptionCache(Function &F)
Get the cached assumptions for a function.
print Print MemDeps of function
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
inst_range instructions(Function *F)
Definition: InstIterator.h:134
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
static bool shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, int &TotalSecondaryCost, function_ref< InlineCost(CallSite CS)> GetInlineCost)
Return true if inlining of CS can block the caller from being inlined which is proved to be more bene...
Definition: Inliner.cpp:308
static cl::opt< InlinerFunctionImportStatsOpts > InlinerFunctionImportStats("inliner-function-import-stats", cl::init(InlinerFunctionImportStatsOpts::No), cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", "basic statistics"), clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", "printing of statistics for each inlined function")), cl::Hidden, cl::desc("Enable inliner stats for imported functions"))
This header defines various interfaces for pass management in LLVM.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
SmallDenseSet< std::pair< LazyCallGraph::Node *, LazyCallGraph::SCC * >, 4 > & InlinedInternalEdges
A hacky area where the inliner can retain history about inlining decisions that mutated the call grap...
This represents the llvm.dbg.declare instruction.
The optimization diagnostic interface.
bool use_empty() const
Definition: Value.h:323
Statically lint checks LLVM IR
Definition: Lint.cpp:193
bool isDefTriviallyDead() const
isDefTriviallyDead - Return true if it is trivially safe to remove this function definition from the ...
Definition: Function.cpp:1274
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
static cl::opt< bool > InlineRemarkAttribute("inline-remark-attribute", cl::init(false), cl::Hidden, cl::desc("Enable adding inline-remark attribute to" " callsites processed by inliner but decided" " to be not inlined"))
Flag to add inline messages as callsite attributes &#39;inline-remark&#39;.