76 #define DEBUG_TYPE "amdgpu-rewrite-out-arguments" 81 "amdgpu-any-address-space-out-arguments",
82 cl::desc(
"Replace pointer out arguments with " 83 "struct returns for non-private address space"),
88 "amdgpu-max-return-arg-num-regs",
89 cl::desc(
"Approximately limit number of return registers for replacing out arguments"),
94 "Number out arguments moved to struct return values");
95 STATISTIC(NumOutArgumentFunctionsReplaced,
96 "Number of functions with out arguments moved to struct return values");
105 bool checkArgumentUses(
Value &
Arg)
const;
109 bool isVec3ToVec4Shuffle(
Type *Ty0,
Type* Ty1)
const;
122 bool doInitialization(
Module &M)
override;
129 "AMDGPU Rewrite Out Arguments",
false,
false)
134 char AMDGPURewriteOutArguments::
ID = 0;
136 bool AMDGPURewriteOutArguments::checkArgumentUses(
Value &
Arg)
const {
137 const int MaxUses = 10;
140 for (
Use &U : Arg.uses()) {
142 if (UseCount > MaxUses)
147 if (!BCI || !BCI->hasOneUse())
172 if (DL->getTypeAllocSize(SrcEltTy) != DL->getTypeAllocSize(DestEltTy))
175 return checkArgumentUses(*BCI);
189 bool AMDGPURewriteOutArguments::isOutArgumentCandidate(
Argument &
Arg)
const {
201 return checkArgumentUses(Arg);
204 bool AMDGPURewriteOutArguments::doInitialization(
Module &M) {
210 bool AMDGPURewriteOutArguments::isVec3ToVec4Shuffle(
Type *Ty0,
Type* Ty1)
const {
217 VT1->getNumElements() != 4)
221 DL->getTypeSizeInBits(VT1->getElementType());
234 MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
236 unsigned ReturnNumRegs = 0;
241 ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4;
251 if (isOutArgumentCandidate(Arg)) {
253 <<
" in function " << F.
getName() <<
'\n');
267 if (
ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back()))
287 bool ThisReplaceable =
true;
297 unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4;
307 true, BB->
end(), BB, RI);
310 SI = dyn_cast<StoreInst>(Q.
getInst());
313 LLVM_DEBUG(
dbgs() <<
"Found out argument store: " << *SI <<
'\n');
316 ThisReplaceable =
false;
321 if (!ThisReplaceable)
324 for (std::pair<ReturnInst *, StoreInst *>
Store : ReplaceableStores) {
325 Value *ReplVal =
Store.second->getValueOperand();
327 auto &ValVec = Replacements[
Store.first];
329 [OutArg](
const std::pair<Argument *, Value *> &Entry) {
330 return Entry.first == OutArg;}) != ValVec.end()) {
332 <<
"Saw multiple out arg stores" << *OutArg <<
'\n');
335 ThisReplaceable =
false;
339 ValVec.emplace_back(OutArg, ReplVal);
340 Store.second->eraseFromParent();
343 if (ThisReplaceable) {
345 OutArgIndexes.
insert(OutArg->getArgNo());
346 ++NumOutArgumentsReplaced;
352 if (Replacements.
empty())
359 F.getFunctionType()->params(),
362 LLVM_DEBUG(
dbgs() <<
"Computed new return type: " << *NewRetTy <<
'\n');
365 F.getName() +
".body");
385 for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) {
397 for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) {
399 Value *Val = ReturnPoint.second;
402 Type *EffectiveEltTy = EltTy;
403 if (
StructType *CT = dyn_cast<StructType>(EltTy)) {
404 assert(CT->getNumElements() == 1);
405 EffectiveEltTy = CT->getElementType(0);
408 if (DL->getTypeSizeInBits(EffectiveEltTy) !=
409 DL->getTypeSizeInBits(Val->
getType())) {
418 if (EltTy != EffectiveEltTy)
448 int RetIdx = RetTy->
isVoidTy() ? 0 : 1;
458 Align = DL->getABITypeAlignment(EltTy);
478 ++NumOutArgumentFunctionsReplaced;
483 return new AMDGPURewriteOutArguments();
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Return a value (possibly void), from a function.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it. ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Provides a lazy, caching interface for making common memory aliasing information queries, backed by LLVM's alias analysis passes.
This class represents an incoming formal argument to a Function.
This class represents lattice values for constants.
A Module instance is used to store all the information related to an LLVM module. ...
AMDGPU Rewrite Out Arguments
void push_back(const T &Elt)
This class represents a function call, abstracting a target machine's calling convention.
Like Internal, but omit from symbol table.
STATISTIC(NumFunctions, "Total number of functions")
AttrBuilder & addAttribute(Attribute::AttrKind Val)
Add an attribute to the builder.
This defines the Use class.
bool hasByValAttr() const
Return true if this argument has the byval attribute.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
Type * getPointerElementType() const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Class to represent struct types.
A Use represents the edge between a Value definition and its users.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
ReturnInst * CreateRet(Value *V)
Create a 'ret <val>' instruction.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This file contains the simple types necessary to represent the attributes associated with functions a...
uint64_t getNumElements() const
INITIALIZE_PASS_BEGIN(AMDGPURewriteOutArguments, DEBUG_TYPE, "AMDGPU Rewrite Out Arguments", false, false) INITIALIZE_PASS_END(AMDGPURewriteOutArguments
Class to represent function types.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Type * getType() const
All values are typed, get the type of this value.
void setComdat(Comdat *C)
This class represents a no-op cast from one type to another.
An instruction for storing to memory.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Class to represent pointers.
FunctionPass * createAMDGPURewriteOutArgumentsPass()
bool isVoidTy() const
Return true if this is 'void'.
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Type * getReturnType() const
Returns the type of the ret val.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
void stealArgumentListFrom(Function &Src)
Steal arguments from another function.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
LLVM Basic Block Representation.
static cl::opt< bool > AnyAddressSpace("amdgpu-any-address-space-out-arguments", cl::desc("Replace pointer out arguments with " "struct returns for non-private address space"), cl::Hidden, cl::init(false))
The instances of the Type class are immutable: once they are created, they are never changed...
static unsigned getPointerOperandIndex()
This is an important class for using LLVM in a threaded context.
bool hasStructRetAttr() const
Return true if this argument has the sret attribute.
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isEntryFunctionCC(CallingConv::ID CC)
Represent the analysis usage information of a pass.
void splice(iterator where, iplist_impl &L2)
FunctionPass class - This class is used to implement most global optimizations.
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
unsigned getStructNumElements() const
unsigned getAddressSpace() const
Return the address space of the Pointer type.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
A memory dependence query can return one of three different answers.
Representation for a specific memory location.
static cl::opt< unsigned > MaxNumRetRegs("amdgpu-max-return-arg-num-regs", cl::desc("Approximately limit number of return registers for replacing out arguments"), cl::Hidden, cl::init(16))
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned getParamAlignment() const
If this is a byval or inalloca argument, return its alignment.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
bool isAggregateType() const
Return true if the type is an aggregate type.
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Class to represent vector types.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
amdgpu Simplify well known AMD library false Value Value * Arg
void removeAttributes(unsigned i, const AttrBuilder &Attrs)
removes the attributes from the list of attributes.
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
iterator insert(iterator where, pointer New)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
void emplace_back(ArgTypes &&... Args)
LLVM_NODISCARD bool empty() const
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
LLVM_NODISCARD bool empty() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType() const
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Type * getElementType() const
iterator_range< arg_iterator > args()
bool isStructTy() const
True if this is an instance of StructType.
bool isArrayTy() const
True if this is an instance of ArrayType.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.