LLVM  8.0.1
AMDGPUCallLowering.cpp
Go to the documentation of this file.
1 //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements the lowering of LLVM calls to machine code calls for
12 /// GlobalISel.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUCallLowering.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUISelLowering.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIISelLowering.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
27 
28 using namespace llvm;
29 
31  : CallLowering(&TLI) {
32 }
33 
35  const Value *Val,
36  ArrayRef<unsigned> VRegs) const {
37  // FIXME: Add support for non-void returns.
38  if (Val)
39  return false;
40 
41  MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
42  return true;
43 }
44 
45 unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
46  Type *ParamTy,
47  uint64_t Offset) const {
48 
49  MachineFunction &MF = MIRBuilder.getMF();
52  const Function &F = MF.getFunction();
53  const DataLayout &DL = F.getParent()->getDataLayout();
55  LLT PtrType = getLLTForType(*PtrTy, DL);
56  unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
57  unsigned KernArgSegmentPtr =
59  unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
60 
61  unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
62  MIRBuilder.buildConstant(OffsetReg, Offset);
63 
64  MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
65 
66  return DstReg;
67 }
68 
69 void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
70  Type *ParamTy, uint64_t Offset,
71  unsigned Align,
72  unsigned DstReg) const {
73  MachineFunction &MF = MIRBuilder.getMF();
74  const Function &F = MF.getFunction();
75  const DataLayout &DL = F.getParent()->getDataLayout();
77  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
78  unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
79  unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
80 
81  MachineMemOperand *MMO =
85  TypeSize, Align);
86 
87  MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
88 }
89 
91  const Function &F,
92  ArrayRef<unsigned> VRegs) const {
93  // AMDGPU_GS and AMDGP_HS are not supported yet.
96  return false;
97 
98  MachineFunction &MF = MIRBuilder.getMF();
99  const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
102  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
103  const DataLayout &DL = F.getParent()->getDataLayout();
104 
106  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
107 
108  // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
109  if (Info->hasPrivateSegmentBuffer()) {
110  unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
111  MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
112  CCInfo.AllocateReg(PrivateSegmentBufferReg);
113  }
114 
115  if (Info->hasDispatchPtr()) {
116  unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
117  // FIXME: Need to add reg as live-in
118  CCInfo.AllocateReg(DispatchPtrReg);
119  }
120 
121  if (Info->hasQueuePtr()) {
122  unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
123  // FIXME: Need to add reg as live-in
124  CCInfo.AllocateReg(QueuePtrReg);
125  }
126 
127  if (Info->hasKernargSegmentPtr()) {
128  unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
130  unsigned VReg = MRI.createGenericVirtualRegister(P2);
131  MRI.addLiveIn(InputPtrReg, VReg);
132  MIRBuilder.getMBB().addLiveIn(InputPtrReg);
133  MIRBuilder.buildCopy(VReg, InputPtrReg);
134  CCInfo.AllocateReg(InputPtrReg);
135  }
136 
137  if (Info->hasDispatchID()) {
138  unsigned DispatchIDReg = Info->addDispatchID(*TRI);
139  // FIXME: Need to add reg as live-in
140  CCInfo.AllocateReg(DispatchIDReg);
141  }
142 
143  if (Info->hasFlatScratchInit()) {
144  unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
145  // FIXME: Need to add reg as live-in
146  CCInfo.AllocateReg(FlatScratchInitReg);
147  }
148 
149  // The infrastructure for normal calling convention lowering is essentially
150  // useless for kernels. We want to avoid any kind of legalization or argument
151  // splitting.
153  unsigned i = 0;
154  const unsigned KernArgBaseAlign = 16;
155  const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
156  uint64_t ExplicitArgOffset = 0;
157 
158  // TODO: Align down to dword alignment and extract bits for extending loads.
159  for (auto &Arg : F.args()) {
160  Type *ArgTy = Arg.getType();
161  unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
162  if (AllocSize == 0)
163  continue;
164 
165  unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
166 
167  uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
168  ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
169 
170  unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
171  ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
172  lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
173  ++i;
174  }
175 
176  return true;
177  }
178 
179  unsigned NumArgs = F.arg_size();
180  Function::const_arg_iterator CurOrigArg = F.arg_begin();
181  const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
182  unsigned PSInputNum = 0;
183  BitVector Skipped(NumArgs);
184  for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
185  EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
186 
187  // We can only hanlde simple value types at the moment.
188  ISD::ArgFlagsTy Flags;
189  ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
190  setArgFlags(OrigArg, i + 1, DL, F);
191  Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
192 
194  !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
195  PSInputNum <= 15) {
196  if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
197  Skipped.set(i);
198  ++PSInputNum;
199  continue;
200  }
201 
202  Info->markPSInputAllocated(PSInputNum);
203  if (!CurOrigArg->use_empty())
204  Info->markPSInputEnabled(PSInputNum);
205 
206  ++PSInputNum;
207  }
208 
210  /*IsVarArg=*/false);
211 
212  if (ValEVT.isVector()) {
213  EVT ElemVT = ValEVT.getVectorElementType();
214  if (!ValEVT.isSimple())
215  return false;
216  MVT ValVT = ElemVT.getSimpleVT();
217  bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
218  OrigArg.Flags, CCInfo);
219  if (!Res)
220  return false;
221  } else {
222  MVT ValVT = ValEVT.getSimpleVT();
223  if (!ValEVT.isSimple())
224  return false;
225  bool Res =
226  AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
227 
228  // Fail if we don't know how to handle this type.
229  if (Res)
230  return false;
231  }
232  }
233 
235 
238  for (unsigned i = 0, OrigArgIdx = 0;
239  OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
240  if (Skipped.test(OrigArgIdx))
241  continue;
242  CCValAssign &VA = ArgLocs[i++];
243  MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
244  MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
245  MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
246  }
247  return true;
248  }
249 
250  return false;
251 }
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:177
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
Interface definition for SIRegisterInfo.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0, unsigned Op1)
Build and insert Res = G_GEP Op0, Op1.
AMDGPU specific subclass of TargetSubtarget.
bool isPSInputAllocated(unsigned Index) const
This class represents lattice values for constants.
Definition: AllocatorList.h:24
unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
This file describes how to lower LLVM calls to machine code calls.
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:630
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
bool test(unsigned Idx) const
Definition: BitVector.h:502
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
unsigned const TargetRegisterInfo * TRI
F(f)
void markPSInputEnabled(unsigned Index)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
A description of a memory reference used in the backend.
Address space for constant memory (VTX2)
Definition: AMDGPU.h:259
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
unsigned addDispatchID(const SIRegisterInfo &TRI)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
MachineFunction & getMF()
Getter for the function we currently build.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
void setOrigAlign(unsigned A)
void markPSInputAllocated(unsigned Index)
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
Class to represent pointers.
Definition: DerivedTypes.h:467
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned const MachineRegisterInfo * MRI
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
Helper class to build MachineInstr.
SI DAG Lowering interface definition.
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
size_t arg_size() const
Definition: Function.h:698
arg_iterator arg_begin()
Definition: Function.h:671
The memory access is non-temporal.
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
Extended Value Type.
Definition: ValueTypes.h:34
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
This class contains a discriminated union of information about pointers in memory operands...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
LLT getLLTForType(Type &Ty, const DataLayout &DL)
Construct a low-level type based on an LLVM type.
CCState - This class holds information needed while lowering arguments and return values...
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
Interface definition of the TargetLowering class that is common to all AMD GPUs.
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
CCValAssign - Represent assignment of one arg/retval to a location.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef< unsigned > VRegs) const override
This hook must be implemented to lower the incoming (formal) arguments, described by Args...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
This file declares the MachineIRBuilder class.
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
amdgpu Simplify well known AMD library false Value Value * Arg
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
The memory access always returns the same value (or traps).
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef< unsigned > VRegs) const override
This hook must be implemented to lower outgoing return values, described by Val, into the specified v...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
static LLT pointer(uint16_t AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space (defaulting to 0).
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:419
MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
unsigned getLocReg() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
bool use_empty() const
Definition: Value.h:323
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
iterator_range< arg_iterator > args()
Definition: Function.h:689