LLVM8Doxygen/AMDGPULowerKernelArguments_8cpp_source.html

 //===-- AMDGPULowerKernelArguments.cpp ------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file This pass replaces accesses to kernel arguments with loads from
 /// offsets from the kernarg base pointer.
 //
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"

 #define DEBUG_TYPE "amdgpu-lower-kernel-arguments"

 using namespace llvm;

 namespace {

 class AMDGPULowerKernelArguments : public FunctionPass{
 public:
   static char ID;

   AMDGPULowerKernelArguments() : FunctionPass(ID) {}

   bool runOnFunction(Function &F) override;

   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<TargetPassConfig>();
     AU.setPreservesAll();
  }
 };

 } // end anonymous namespace

 bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
   CallingConv::ID CC = F.getCallingConv();
   if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
     return false;

   auto &TPC = getAnalysis<TargetPassConfig>();

   const TargetMachine &TM = TPC.getTM<TargetMachine>();
   const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
   LLVMContext &Ctx = F.getParent()->getContext();
   const DataLayout &DL = F.getParent()->getDataLayout();
   BasicBlock &EntryBlock = *F.begin();
   IRBuilder<> Builder(&*EntryBlock.begin());

   const unsigned KernArgBaseAlign = 16; // FIXME: Increase if necessary
   const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F);

   unsigned MaxAlign;
   // FIXME: Alignment is broken broken with explicit arg offset.;
   const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);
   if (TotalKernArgSize == 0)
     return false;

   CallInst *KernArgSegment =
       Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},
                               nullptr, F.getName() + ".kernarg.segment");

   KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
   KernArgSegment->addAttribute(AttributeList::ReturnIndex,
     Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));

   unsigned AS = KernArgSegment->getType()->getPointerAddressSpace();
   uint64_t ExplicitArgOffset = 0;

   for (Argument &Arg : F.args()) {
     Type *ArgTy = Arg.getType();
     unsigned Align = DL.getABITypeAlignment(ArgTy);
     unsigned Size = DL.getTypeSizeInBits(ArgTy);
     unsigned AllocSize = DL.getTypeAllocSize(ArgTy);

     uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset;
     ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize;

     if (Arg.use_empty())
       continue;

     if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) {
       // FIXME: Hack. We rely on AssertZext to be able to fold DS addressing
       // modes on SI to know the high bits are 0 so pointer adds don't wrap. We
       // can't represent this with range metadata because it's only allowed for
       // integer types.
       if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
           ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
         continue;

       // FIXME: We can replace this with equivalent alias.scope/noalias
       // metadata, but this appears to be a lot of work.
       if (Arg.hasNoAliasAttr())
         continue;
     }

     VectorType *VT = dyn_cast<VectorType>(ArgTy);
     bool IsV3 = VT && VT->getNumElements() == 3;
     bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType();

     VectorType *V4Ty = nullptr;

     int64_t AlignDownOffset = alignDown(EltOffset, 4);
     int64_t OffsetDiff = EltOffset - AlignDownOffset;
     unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset,
                                       KernArgBaseAlign);

     Value *ArgPtr;
     if (DoShiftOpt) { // FIXME: Handle aggregate types
       // Since we don't have sub-dword scalar loads, avoid doing an extload by
       // loading earlier than the argument address, and extracting the relevant
       // bits.
       //
       // Additionally widen any sub-dword load to i32 even if suitably aligned,
       // so that CSE between different argument loads works easily.

       ArgPtr = Builder.CreateConstInBoundsGEP1_64(
         KernArgSegment,
         AlignDownOffset,
         Arg.getName() + ".kernarg.offset.align.down");
       ArgPtr = Builder.CreateBitCast(ArgPtr,
                                      Builder.getInt32Ty()->getPointerTo(AS),
                                      ArgPtr->getName() + ".cast");
     } else {
       ArgPtr = Builder.CreateConstInBoundsGEP1_64(
         KernArgSegment,
         EltOffset,
         Arg.getName() + ".kernarg.offset");
       ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS),
                                      ArgPtr->getName() + ".cast");
     }

     if (IsV3 && Size >= 32) {
       V4Ty = VectorType::get(VT->getVectorElementType(), 4);
       // Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
       ArgPtr = Builder.CreateBitCast(ArgPtr, V4Ty->getPointerTo(AS));
     }

     LoadInst *Load = Builder.CreateAlignedLoad(ArgPtr, AdjustedAlign);
     Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));

     MDBuilder MDB(Ctx);

     if (isa<PointerType>(ArgTy)) {
       if (Arg.hasNonNullAttr())
         Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {}));

       uint64_t DerefBytes = Arg.getDereferenceableBytes();
       if (DerefBytes != 0) {
         Load->setMetadata(
           LLVMContext::MD_dereferenceable,
           MDNode::get(Ctx,
                       MDB.createConstant(
                         ConstantInt::get(Builder.getInt64Ty(), DerefBytes))));
       }

       uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes();
       if (DerefOrNullBytes != 0) {
         Load->setMetadata(
           LLVMContext::MD_dereferenceable_or_null,
           MDNode::get(Ctx,
                       MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(),
                                                           DerefOrNullBytes))));
       }

       unsigned ParamAlign = Arg.getParamAlignment();
       if (ParamAlign != 0) {
         Load->setMetadata(
           LLVMContext::MD_align,
           MDNode::get(Ctx,
                       MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(),
                                                           ParamAlign))));
       }
     }

     // TODO: Convert noalias arg to !noalias

     if (DoShiftOpt) {
       Value *ExtractBits = OffsetDiff == 0 ?
         Load : Builder.CreateLShr(Load, OffsetDiff * 8);

       IntegerType *ArgIntTy = Builder.getIntNTy(Size);
       Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy);
       Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy,
                                             Arg.getName() + ".load");
       Arg.replaceAllUsesWith(NewVal);
     } else if (IsV3) {
       Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty),
                                                 {0, 1, 2},
                                                 Arg.getName() + ".load");
       Arg.replaceAllUsesWith(Shuf);
     } else {
       Load->setName(Arg.getName() + ".load");
       Arg.replaceAllUsesWith(Load);
     }
   }

   KernArgSegment->addAttribute(
     AttributeList::ReturnIndex,
     Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));

   return true;
 }

 INITIALIZE_PASS_BEGIN(AMDGPULowerKernelArguments, DEBUG_TYPE,
                       "AMDGPU Lower Kernel Arguments", false, false)
 INITIALIZE_PASS_END(AMDGPULowerKernelArguments, DEBUG_TYPE, "AMDGPU Lower Kernel Arguments",
                     false, false)

 char AMDGPULowerKernelArguments::ID = 0;

 FunctionPass *llvm::createAMDGPULowerKernelArgumentsPass() {
   return new AMDGPULowerKernelArguments();
 }
llvm::Type::getVectorElementType
Type * getVectorElementType() const
Definition: Type.h:371

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111

Instruction.h

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition: AMDGPUMetadata.h:161

Loads.h

TargetPassConfig.h

llvm::LLVMContext::MD_dereferenceable_or_null
Definition: LLVMContext.h:93

llvm::max
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
Definition: GCNRegPressure.h:89

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30

AMDGPUSubtarget.h
AMDGPU specific subclass of TargetSubtarget.

Instructions.h

llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:24

llvm::MDBuilder::createConstant
ConstantAsMetadata * createConstant(Constant *C)
Return the given constant as metadata.
Definition: MDBuilder.cpp:25

DerivedTypes.h

llvm::Attribute::getWithAlignment
static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:125

Type.h

llvm::Attribute::getWithDereferenceableBytes
static Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
Definition: Attributes.cpp:138

llvm::ARM_MB::ST
Definition: ARMBaseInfo.h:74

llvm::CallInst
This class represents a function call, abstracting a target machine&#39;s calling convention.
Definition: Instructions.h:1438

Metadata.h
This file contains the declarations for metadata subclasses.

AMDGPU.h

BasicBlock.h

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPULowerKernelArguments.cpp:40

F
F(f)

llvm::alignTo
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685

llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503

llvm::Function
Definition: Function.h:60

llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:168

llvm::CallBase::addAttribute
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
Definition: InstrTypes.h:1261

llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269

llvm::alignDown
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:718

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:66

llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371

llvm::LLVMContext::MD_dereferenceable
Definition: LLVMContext.h:92

llvm::Module::getContext
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244

llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:652

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743

llvm::Intrinsic::amdgcn_kernarg_segment_ptr
Definition: Intrinsics.h:1014

Attributes.h
This file contains the simple types necessary to represent the attributes associated with functions a...

unsigned

false
Definition: StackSlotColoring.cpp:142

llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285

llvm::SequentialType::getNumElements
uint64_t getNumElements() const
Definition: DerivedTypes.h:359

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:86

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245

llvm::AttributeList::ReturnIndex
Definition: Attributes.h:330

llvm::Intrinsic::ID
ID
Definition: Intrinsics.h:37

Operator.h

llvm::Function::arg_empty
bool arg_empty() const
Definition: Function.h:699

llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429

llvm::SystemZISD::TM
Definition: SystemZISelLowering.h:68

llvm::Function::begin
iterator begin()
Definition: Function.h:656

llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:467

llvm::LLVMContext::MD_invariant_load
Definition: LLVMContext.h:86

llvm::SPII::Load
Definition: SparcInstrInfo.h:33

llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610

llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:66

llvm::createAMDGPULowerKernelArgumentsPass
FunctionPass * createAMDGPULowerKernelArgumentsPass()
Definition: AMDGPULowerKernelArguments.cpp:238

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

Arguments
AMDGPU Lower Kernel Arguments
Definition: AMDGPULowerKernelArguments.cpp:233

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:43

AMDGPUAS::LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:260

llvm::CodeModel::Kernel
Definition: CodeGen.h:28

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285

llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codgen targets.

llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415

INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69

llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226

llvm::GCNSubtarget
Definition: AMDGPUSubtarget.h:246

llvm::Attribute::NonNull
Definition: Attributes.h:108

StringRef.h

INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPULowerKernelArguments, DEBUG_TYPE, "AMDGPU Lower Kernel Arguments", false, false) INITIALIZE_PASS_END(AMDGPULowerKernelArguments

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213

MDBuilder.h

llvm::DataLayout::getABITypeAlignment
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730

llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:258

llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180

llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622

llvm::VectorType
Class to represent vector types.
Definition: DerivedTypes.h:393

llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
Definition: AMDGPUSubtarget.h:56

llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:121

Pass.h

Function.h

Arg
amdgpu Simplify well known AMD library false Value Value * Arg
Definition: AMDGPULibCalls.cpp:220

llvm::HexPrintStyle::Lower

llvm::DataLayout::getTypeSizeInBits
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:568

llvm::DataLayout::getTypeAllocSize
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436

Value.h

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214

llvm::LLVMContext::MD_nonnull
Definition: LLVMContext.h:91

llvm::dyn_cast
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323

Size
uint32_t Size
Definition: Profile.cpp:47

IRBuilder.h

LLVMContext.h

llvm::LLVMContext::MD_align
Definition: LLVMContext.h:97

llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566

llvm::Value
LLVM Value Representation.
Definition: Value.h:73

llvm::VectorType::get
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:606

InstrTypes.h

Casting.h

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59

Passes.h

llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:323

llvm::CallingConv::AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201

AMDGPU
Definition: AMDGPUPTNote.h:20

llvm::MDBuilder
Definition: MDBuilder.h:35

llvm::Function::args
iterator_range< arg_iterator > args()
Definition: Function.h:689