40 #define DEBUG_TYPE "inline" 44 cl::desc(
"Cost of alloca argument"));
51 cl::desc(
"Maximum alloca size to use for inline cost"));
65 unsigned getInlineThreshold(
CallSite CS)
const;
83 "AMDGPU Function Integration/Inlining",
false,
false)
95 TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
99 void AMDGPUInliner::getAnalysisUsage(
AnalysisUsage &AU)
const {
104 unsigned AMDGPUInliner::getInlineThreshold(
CallSite CS)
const {
105 int Thres = Params.DefaultThreshold;
113 if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
115 Thres = Params.HintThreshold.getValue();
119 return (
unsigned)Thres;
124 uint64_t AllocaSize = 0;
127 Type *Ty = PtrArg->getType();
132 if (
const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
133 if (!AI->isStaticAlloca() || !AIVisited.
insert(AI).second)
147 return (
unsigned)Thres;
154 if (!Callee || Callee->
size() != 1)
158 if (!isa<CallInst>(
I)) {
161 if (isa<ReturnInst>(*std::next(
I->getIterator()))) {
195 bool RemarksEnabled =
false;
200 RemarksEnabled =
true;
204 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
206 return ACT->getAssumptionCache(
F);
210 None, PSI, RemarksEnabled ? &ORE :
nullptr);
Pass interface - Implemented by all 'passes'.
A parsed version of the target data layout string in and methods for querying it. ...
Thresholds to tune inline cost analysis.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents lattice values for constants.
An immutable pass that tracks lazily created AssumptionCache objects.
iterator_range< IterTy > args() const
A cache of @llvm.assume calls within a function.
Address space for private memory.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static bool isWrapperOnlyCall(CallSite CS)
Represents the cost of inlining a function.
bool isNoInline() const
Return true if the call should not be inlined.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
bool runOnSCC(CallGraphSCC &SCC) override
Main run interface method, this implements the interface required by the Pass class.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Pass * createAMDGPUFunctionInliningPass()
This class contains all of the helper code which is used to perform the inlining operations that do n...
amdgpu AMDGPU Function Integration Inlining
bool isInlineViable(Function &Callee)
Minimal filter to detect invalid constructs for inlining.
void initializeAMDGPUInlinerPass(PassRegistry &)
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
static cl::opt< unsigned > ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), cl::desc("Maximum alloca size to use for inline cost"))
static InlineCost getAlways(const char *Reason)
amdgpu Simplify well known AMD library false Value * Callee
const BasicBlock & getEntryBlock() const
initializer< Ty > init(const Ty &Val)
void getAnalysisUsage(AnalysisUsage &Info) const override
For this class, we declare that we require and preserve the call graph.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
The ModulePass which wraps up a CallGraph and the logic to build it.
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if this function has the given attribute.
bool isPointerTy() const
True if this is an instance of PointerType.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Represent the analysis usage information of a pass.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function< AssumptionCache &(Function &)> &GetAssumptionCache, Optional< function_ref< BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options...
Module.h This file contains the declarations for the Module class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
FunTy * getCaller() const
Return the caller function for this call site.
static InlineCost getNever(const char *Reason)
StringRef getName() const
Return a constant reference to the value's name.
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static cl::opt< int > ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200), cl::desc("Cost of alloca argument"))
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it's an indirect...
Module * getParent()
Get the module that this global value is contained inside of...
INITIALIZE_PASS_BEGIN(AMDGPUInliner, "amdgpu-inline", "AMDGPU Function Integration/Inlining", false, false) INITIALIZE_PASS_END(AMDGPUInliner
LLVM Value Representation.
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
int DefaultThreshold
The default threshold to start with for a callee.
an instruction to allocate memory on the stack