40 #define DEBUG_TYPE "amdgpu-lower-kernel-arguments" 67 auto &TPC = getAnalysis<TargetPassConfig>();
76 const unsigned KernArgBaseAlign = 16;
77 const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F);
81 const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);
82 if (TotalKernArgSize == 0)
87 nullptr, F.
getName() +
".kernarg.segment");
94 uint64_t ExplicitArgOffset = 0;
102 uint64_t EltOffset =
alignTo(ExplicitArgOffset, Align) + BaseOffset;
103 ExplicitArgOffset =
alignTo(ExplicitArgOffset, Align) + AllocSize;
108 if (
PointerType *PT = dyn_cast<PointerType>(ArgTy)) {
119 if (
Arg.hasNoAliasAttr())
129 int64_t AlignDownOffset =
alignDown(EltOffset, 4);
130 int64_t OffsetDiff = EltOffset - AlignDownOffset;
131 unsigned AdjustedAlign =
MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset,
143 ArgPtr = Builder.CreateConstInBoundsGEP1_64(
146 Arg.
getName() +
".kernarg.offset.align.down");
147 ArgPtr = Builder.CreateBitCast(ArgPtr,
148 Builder.getInt32Ty()->getPointerTo(AS),
151 ArgPtr = Builder.CreateConstInBoundsGEP1_64(
155 ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->
getPointerTo(AS),
159 if (IsV3 && Size >= 32) {
162 ArgPtr = Builder.CreateBitCast(ArgPtr, V4Ty->
getPointerTo(AS));
165 LoadInst *
Load = Builder.CreateAlignedLoad(ArgPtr, AdjustedAlign);
170 if (isa<PointerType>(ArgTy)) {
171 if (
Arg.hasNonNullAttr())
174 uint64_t DerefBytes =
Arg.getDereferenceableBytes();
175 if (DerefBytes != 0) {
183 uint64_t DerefOrNullBytes =
Arg.getDereferenceableOrNullBytes();
184 if (DerefOrNullBytes != 0) {
189 DerefOrNullBytes))));
192 unsigned ParamAlign =
Arg.getParamAlignment();
193 if (ParamAlign != 0) {
205 Value *ExtractBits = OffsetDiff == 0 ?
206 Load : Builder.CreateLShr(Load, OffsetDiff * 8);
209 Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy);
210 Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy,
232 "AMDGPU Lower Kernel Arguments",
false,
false)
236 char AMDGPULowerKernelArguments::
ID = 0;
239 return new AMDGPULowerKernelArguments();
Type * getVectorElementType() const
A parsed version of the target data layout string in and methods for querying it. ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
ConstantAsMetadata * createConstant(Constant *C)
Return the given constant as metadata.
static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align)
Return a uniquified Attribute object that has the specific alignment set.
static Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
This class represents a function call, abstracting a target machine's calling convention.
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
An instruction for reading from memory.
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
iterator begin()
Instruction iterator methods.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
AnalysisUsage & addRequired()
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
LLVMContext & getContext() const
Get the global data context.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This file contains the simple types necessary to represent the attributes associated with functions a...
void setName(const Twine &Name)
Change the name of the value.
uint64_t getNumElements() const
Target-Independent Code Generator Pass Configuration Options.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Class to represent pointers.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
static bool runOnFunction(Function &F, bool PostInlining)
FunctionPass * createAMDGPULowerKernelArgumentsPass()
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
AMDGPU Lower Kernel Arguments
Represent the analysis usage information of a pass.
Address space for local memory.
FunctionPass class - This class is used to implement most global optimizations.
Class to represent integer types.
The AMDGPU TargetMachine interface definition for hw codgen targets.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
INITIALIZE_PASS_BEGIN(AMDGPULowerKernelArguments, DEBUG_TYPE, "AMDGPU Lower Kernel Arguments", false, false) INITIALIZE_PASS_END(AMDGPULowerKernelArguments
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
bool isAggregateType() const
Return true if the type is an aggregate type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Class to represent vector types.
void setPreservesAll()
Set by analyses that do not transform their input at all.
amdgpu Simplify well known AMD library false Value Value * Arg
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
StringRef getName() const
Return a constant reference to the value's name.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Primary interface to the complete machine description for the target machine.
Calling convention for AMDGPU code object kernels.
iterator_range< arg_iterator > args()