62 #define DEBUG_TYPE "amdgpu-promote-alloca" 69 "disable-promote-alloca-to-vector",
70 cl::desc(
"Disable promote alloca to vector"),
74 "disable-promote-alloca-to-lds",
75 cl::desc(
"Disable promote alloca to LDS"),
89 bool IsAMDGCN =
false;
90 bool IsAMDHSA =
false;
92 std::pair<Value *, Value *> getLocalSizeYZ(
IRBuilder<> &Builder);
97 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
99 std::vector<Value*> &WorkList)
const;
105 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
107 int OpIdx0,
int OpIdx1)
const;
110 bool hasSufficientLocalMem(
const Function &
F);
117 bool doInitialization(
Module &M)
override;
120 StringRef getPassName()
const override {
return "AMDGPU Promote Alloca"; }
122 bool handleAlloca(
AllocaInst &
I,
bool SufficientLDS);
135 "AMDGPU promote alloca to vector or LDS",
false,
false)
139 bool AMDGPUPromoteAlloca::doInitialization(
Module &M) {
141 DL = &
Mod->getDataLayout();
150 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
155 const Triple &TT =
TM->getTargetTriple();
163 bool SufficientLDS = hasSufficientLocalMem(F);
164 bool Changed =
false;
166 for (
auto I = EntryBB.
begin(),
E = EntryBB.
end();
I !=
E; ) {
171 Changed |= handleAlloca(*AI, SufficientLDS);
177 std::pair<Value *, Value *>
178 AMDGPUPromoteAlloca::getLocalSizeYZ(
IRBuilder<> &Builder) {
194 return std::make_pair(LocalSizeY, LocalSizeZ);
262 return std::make_pair(Y, LoadZU);
302 const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
305 auto I = GEPIdx.find(GEP);
306 return I == GEPIdx.end() ? nullptr : I->second;
330 LoadInst *LI = cast<LoadInst>(Inst);
331 if (isa<AllocaInst>(User) &&
333 isa<VectorType>(LI->
getType()))
337 case Instruction::BitCast:
344 if (isa<AllocaInst>(User) &&
357 if (DisablePromoteAllocaToVector) {
358 LLVM_DEBUG(
dbgs() <<
" Promotion alloca to vector is disabled\n");
379 std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
380 std::vector<Value*> WorkList;
381 for (
User *AllocaUser : Alloca->
users()) {
387 WorkList.push_back(AllocaUser);
396 LLVM_DEBUG(
dbgs() <<
" Cannot compute vector index for GEP " << *GEP
402 for (
User *GEPUser : AllocaUser->
users()) {
406 WorkList.push_back(GEPUser);
414 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> " 415 << *VectorTy <<
'\n');
417 for (
Value *V : WorkList) {
429 Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
430 Value *VecValue = Builder.CreateLoad(BitCast);
431 Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
444 Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
445 Value *VecValue = Builder.CreateLoad(BitCast);
446 Value *NewVecValue = Builder.CreateInsertElement(VecValue,
449 Builder.CreateStore(NewVecValue, BitCast);
453 case Instruction::BitCast:
454 case Instruction::AddrSpaceCast:
486 bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(
Value *BaseAlloca,
496 if (isa<ConstantPointerNull>(OtherOp))
500 if (!isa<AllocaInst>(OtherObj))
509 if (OtherObj != BaseAlloca) {
511 dbgs() <<
"Found a binary instruction with another alloca object\n");
518 bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
521 std::vector<Value*> &WorkList)
const {
531 WorkList.push_back(
User);
536 if (UseInst->
getOpcode() == Instruction::PtrToInt)
539 if (
LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
540 if (LI->isVolatile())
546 if (
StoreInst *
SI = dyn_cast<StoreInst>(UseInst)) {
547 if (
SI->isVolatile())
551 if (
SI->getPointerOperand() != Val)
553 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
554 if (RMW->isVolatile())
557 if (CAS->isVolatile())
563 if (
ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
564 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
568 WorkList.push_back(ICmp);
571 if (UseInst->
getOpcode() == Instruction::AddrSpaceCast) {
576 WorkList.push_back(
User);
586 if (!
GEP->isInBounds())
593 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val,
SI, 1, 2))
598 if (
PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
601 switch (Phi->getNumIncomingValues()) {
605 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
613 WorkList.push_back(
User);
614 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
621 bool AMDGPUPromoteAlloca::hasSufficientLocalMem(
const Function &F) {
633 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to " 634 "local memory disabled.\n");
640 if (LocalMemLimit == 0)
646 CurrentLocalMemUsage = 0;
657 unsigned Align = GV.getAlignment();
666 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage, Align);
667 CurrentLocalMemUsage += AllocSize;
682 if (OccupancyHint == 0)
690 MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
694 unsigned MaxSizeWithWaveCount
698 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
701 LocalMemLimit = MaxSizeWithWaveCount;
705 <<
" Rounding size to " << MaxSizeWithWaveCount
706 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n' 707 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
708 <<
" available for promotion\n");
714 bool AMDGPUPromoteAlloca::handleAlloca(
AllocaInst &
I,
bool SufficientLDS) {
730 if (DisablePromoteAllocaToLDS)
746 <<
" promote alloca to LDS not supported with calling convention.\n");
771 NewSize += AllocSize;
773 if (NewSize > LocalMemLimit) {
775 <<
" bytes of local memory not available to promote\n");
779 CurrentLocalMemUsage = NewSize;
781 std::vector<Value*> WorkList;
783 if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
803 Value *TCntY, *TCntZ;
805 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
806 Value *TIdX = getWorkitemID(Builder, 0);
807 Value *TIdY = getWorkitemID(Builder, 1);
808 Value *TIdZ = getWorkitemID(Builder, 2);
826 for (
Value *V : WorkList) {
829 if (
ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
830 Value *Src0 = CI->getOperand(0);
834 if (isa<ConstantPointerNull>(CI->getOperand(0)))
837 if (isa<ConstantPointerNull>(CI->getOperand(1)))
845 if (isa<AddrSpaceCastInst>(V))
853 V->mutateType(NewTy);
857 if (isa<ConstantPointerNull>(
SI->getOperand(1)))
860 if (isa<ConstantPointerNull>(
SI->getOperand(2)))
862 }
else if (
PHINode *Phi = dyn_cast<PHINode>(V)) {
863 for (
unsigned I = 0,
E = Phi->getNumIncomingValues(); I !=
E; ++
I) {
864 if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
936 return new AMDGPUPromoteAlloca();
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Value * getValueOperand()
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it. ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
A Module instance is used to store all the information related to an LLVM module. ...
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")' correctly, instead of converting the strin...
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
bool isPromoteAllocaEnabled() const
This class represents a function call, abstracting a target machine's calling convention.
unsigned getSourceAlignment() const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Address space for private memory.
This class wraps the llvm.memset intrinsic.
FunctionPass * createAMDGPUPromoteAlloca()
Type * getPointerOperandType() const
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
An instruction for reading from memory.
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
Value * getLength() const
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
iterator begin()
Instruction iterator methods.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Address space for constant memory (VTX2)
This class represents the LLVM 'select' instruction.
Type * getPointerElementType() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
This class wraps the llvm.memmove intrinsic.
int getLocalMemorySize() const
SPIR_KERNEL - Calling convention for SPIR kernel functions.
A Use represents the edge between a Value definition and its users.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memmove between the specified pointers.
unsigned getDestAlignment() const
uint64_t getNumElements() const
This file implements a class to represent arbitrary precision integral constant values and operations...
Class to represent function types.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Type * getType() const
All values are typed, get the type of this value.
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
BasicBlock * GetInsertBlock() const
Class to represent array types.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
An instruction for storing to memory.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Value * getOperand(unsigned i) const
Class to represent pointers.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
ArrayRef< Type * > params() const
Represent the analysis usage information of a pass.
Address space for local memory.
This instruction compares its operands according to the predicate given to the constructor.
void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
FunctionPass class - This class is used to implement most global optimizations.
Value * getPointerOperand()
unsigned getAddressSpace() const
Return the address space of the Pointer type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Triple - Helper class for working with autoconf configuration names.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is the superclass of the array and vector type classes.
unsigned getNumOperands() const
This is the shared class of boolean and integer constants.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Module.h This file contains the declarations for the Module class.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
CallInst * CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
This class wraps the llvm.memcpy intrinsic.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Value * getRawSource() const
Return the arguments to the instruction.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
The access may modify the value stored in memory.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Class to represent vector types.
iterator_range< user_iterator > users()
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
void addDereferenceableAttr(unsigned i, uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void setUnnamedAddr(UnnamedAddr Val)
Type * getPointerOperandType() const
static IntegerType * getInt32Ty(LLVMContext &C)
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Rename collisions when linking (static functions).
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Primary interface to the complete machine description for the target machine.
Type * getElementType() const
char & AMDGPUPromoteAllocaID
StringRef - Represent a constant reference to a string, i.e.
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const =0
Value * getPointerOperand()
Value * getRawDest() const
Calling convention for AMDGPU code object kernels.
A wrapper class for inspecting calls to intrinsic functions.
const BasicBlock * getParent() const
an instruction to allocate memory on the stack
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.