LLVM  8.0.1
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIRegisterInfo.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/ADT/Optional.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Function.h"
23 #include <cassert>
24 #include <vector>
25 
26 #define MAX_LANES 64
27 
28 using namespace llvm;
29 
32  PrivateSegmentBuffer(false),
33  DispatchPtr(false),
34  QueuePtr(false),
35  KernargSegmentPtr(false),
36  DispatchID(false),
37  FlatScratchInit(false),
38  WorkGroupIDX(false),
39  WorkGroupIDY(false),
40  WorkGroupIDZ(false),
41  WorkGroupInfo(false),
42  PrivateSegmentWaveByteOffset(false),
43  WorkItemIDX(false),
44  WorkItemIDY(false),
45  WorkItemIDZ(false),
46  ImplicitBufferPtr(false),
47  ImplicitArgPtr(false),
48  GITPtrHigh(0xffffffff),
49  HighBitsOf32BitAddress(0) {
51  const Function &F = MF.getFunction();
52  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
53  WavesPerEU = ST.getWavesPerEU(F);
54 
55  Occupancy = getMaxWavesPerEU();
56  limitOccupancy(MF);
57  CallingConv::ID CC = F.getCallingConv();
58 
60  if (!F.arg_empty())
61  KernargSegmentPtr = true;
62  WorkGroupIDX = true;
63  WorkItemIDX = true;
64  } else if (CC == CallingConv::AMDGPU_PS) {
65  PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
66  }
67 
68  if (!isEntryFunction()) {
69  // Non-entry functions have no special inputs for now, other registers
70  // required for scratch access.
71  ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
72  ScratchWaveOffsetReg = AMDGPU::SGPR4;
73  FrameOffsetReg = AMDGPU::SGPR5;
74  StackPtrOffsetReg = AMDGPU::SGPR32;
75 
76  ArgInfo.PrivateSegmentBuffer =
77  ArgDescriptor::createRegister(ScratchRSrcReg);
79  ArgDescriptor::createRegister(ScratchWaveOffsetReg);
80 
81  if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
82  ImplicitArgPtr = true;
83  } else {
84  if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
85  KernargSegmentPtr = true;
88  }
89  }
90 
91  if (ST.debuggerEmitPrologue()) {
92  // Enable everything.
93  WorkGroupIDX = true;
94  WorkGroupIDY = true;
95  WorkGroupIDZ = true;
96  WorkItemIDX = true;
97  WorkItemIDY = true;
98  WorkItemIDZ = true;
99  } else {
100  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
101  WorkGroupIDX = true;
102 
103  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
104  WorkGroupIDY = true;
105 
106  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
107  WorkGroupIDZ = true;
108 
109  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
110  WorkItemIDX = true;
111 
112  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
113  WorkItemIDY = true;
114 
115  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
116  WorkItemIDZ = true;
117  }
118 
119  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
120  bool HasStackObjects = FrameInfo.hasStackObjects();
121 
122  if (isEntryFunction()) {
123  // X, XY, and XYZ are the only supported combinations, so make sure Y is
124  // enabled if Z is.
125  if (WorkItemIDZ)
126  WorkItemIDY = true;
127 
128  PrivateSegmentWaveByteOffset = true;
129 
130  // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
131  if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
134  ArgDescriptor::createRegister(AMDGPU::SGPR5);
135  }
136 
137  bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
138  if (isAmdHsaOrMesa) {
139  PrivateSegmentBuffer = true;
140 
141  if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
142  DispatchPtr = true;
143 
144  if (F.hasFnAttribute("amdgpu-queue-ptr"))
145  QueuePtr = true;
146 
147  if (F.hasFnAttribute("amdgpu-dispatch-id"))
148  DispatchID = true;
149  } else if (ST.isMesaGfxShader(F)) {
150  ImplicitBufferPtr = true;
151  }
152 
153  if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
154  KernargSegmentPtr = true;
155 
156  if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
157  // TODO: This could be refined a lot. The attribute is a poor way of
158  // detecting calls that may require it before argument lowering.
159  if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
160  FlatScratchInit = true;
161  }
162 
163  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
164  StringRef S = A.getValueAsString();
165  if (!S.empty())
166  S.consumeInteger(0, GITPtrHigh);
167 
168  A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
169  S = A.getValueAsString();
170  if (!S.empty())
171  S.consumeInteger(0, HighBitsOf32BitAddress);
172 }
173 
176  const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
178  MF.getFunction()));
179 }
180 
182  const SIRegisterInfo &TRI) {
183  ArgInfo.PrivateSegmentBuffer =
184  ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
185  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
186  NumUserSGPRs += 4;
187  return ArgInfo.PrivateSegmentBuffer.getRegister();
188 }
189 
191  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
192  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
193  NumUserSGPRs += 2;
194  return ArgInfo.DispatchPtr.getRegister();
195 }
196 
198  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
199  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
200  NumUserSGPRs += 2;
201  return ArgInfo.QueuePtr.getRegister();
202 }
203 
205  ArgInfo.KernargSegmentPtr
206  = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
207  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
208  NumUserSGPRs += 2;
209  return ArgInfo.KernargSegmentPtr.getRegister();
210 }
211 
213  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
214  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
215  NumUserSGPRs += 2;
216  return ArgInfo.DispatchID.getRegister();
217 }
218 
220  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
221  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
222  NumUserSGPRs += 2;
223  return ArgInfo.FlatScratchInit.getRegister();
224 }
225 
227  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
228  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
229  NumUserSGPRs += 2;
230  return ArgInfo.ImplicitBufferPtr.getRegister();
231 }
232 
233 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
234  for (unsigned I = 0; CSRegs[I]; ++I) {
235  if (CSRegs[I] == Reg)
236  return true;
237  }
238 
239  return false;
240 }
241 
242 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
244  int FI) {
245  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
246 
247  // This has already been allocated.
248  if (!SpillLanes.empty())
249  return true;
250 
251  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
252  const SIRegisterInfo *TRI = ST.getRegisterInfo();
253  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
255  unsigned WaveSize = ST.getWavefrontSize();
256 
257  unsigned Size = FrameInfo.getObjectSize(FI);
258  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
259  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
260 
261  int NumLanes = Size / 4;
262 
263  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
264 
265  // Make sure to handle the case where a wide SGPR spill may span between two
266  // VGPRs.
267  for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
268  unsigned LaneVGPR;
269  unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
270 
271  if (VGPRIndex == 0) {
272  LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
273  if (LaneVGPR == AMDGPU::NoRegister) {
274  // We have no VGPRs left for spilling SGPRs. Reset because we will not
275  // partially spill the SGPR to VGPRs.
276  SGPRToVGPRSpills.erase(FI);
277  NumVGPRSpillLanes -= I;
278  return false;
279  }
280 
281  Optional<int> CSRSpillFI;
282  if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
283  isCalleeSavedReg(CSRegs, LaneVGPR)) {
284  CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
285  }
286 
287  SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
288 
289  // Add this register as live-in to all blocks to avoid machine verifer
290  // complaining about use of an undefined physical register.
291  for (MachineBasicBlock &BB : MF)
292  BB.addLiveIn(LaneVGPR);
293  } else {
294  LaneVGPR = SpillVGPRs.back().VGPR;
295  }
296 
297  SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
298  }
299 
300  return true;
301 }
302 
304  for (auto &R : SGPRToVGPRSpills)
305  MFI.RemoveStackObject(R.first);
306 }
307 
308 
309 /// \returns VGPR used for \p Dim' work item ID.
310 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
311  switch (Dim) {
312  case 0:
314  return AMDGPU::VGPR0;
315  case 1:
317  return AMDGPU::VGPR1;
318  case 2:
320  return AMDGPU::VGPR2;
321  }
322  llvm_unreachable("unexpected dimension");
323 }
324 
325 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
326  assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
327  return AMDGPU::SGPR0 + NumUserSGPRs;
328 }
329 
330 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
331  return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
332 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
Interface definition for SIRegisterInfo.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
unsigned Reg
unsigned const TargetRegisterInfo * TRI
F(f)
SIMachineFunctionInfo(const MachineFunction &MF)
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
bool isMesaGfxShader(const Function &F) const
unsigned addDispatchID(const SIRegisterInfo &TRI)
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
void limitOccupancy(const MachineFunction &MF)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:531
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool debuggerEmitPrologue() const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static ArgDescriptor createRegister(unsigned Reg)
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Generation getGeneration() const
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
int CreateSpillStackObject(uint64_t Size, unsigned Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInitialPSInputAddr(const Function &F)
Provides AMDGPU specific target descriptions.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
#define I(x, y, z)
Definition: MD5.cpp:58
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:47
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
unsigned getRegister() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getWorkItemIDVGPR(unsigned Dim) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
bool hasCalls() const
Return true if the current function has any function calls.
const SIRegisterInfo * getRegisterInfo() const override