LLVM  8.0.1
SIRegisterInfo.cpp
Go to the documentation of this file.
1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIRegisterInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "SIInstrInfo.h"
19 #include "SIMachineFunctionInfo.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/LLVMContext.h"
29 
30 using namespace llvm;
31 
32 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
33  for (unsigned i = 0; PSets[i] != -1; ++i) {
34  if (PSets[i] == (int)PSetID)
35  return true;
36  }
37  return false;
38 }
39 
40 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
41  BitVector &PressureSets) const {
42  for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
43  const int *PSets = getRegUnitPressureSets(*U);
44  if (hasPressureSet(PSets, PSetID)) {
45  PressureSets.set(PSetID);
46  break;
47  }
48  }
49 }
50 
52  "amdgpu-spill-sgpr-to-smem",
53  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
54  cl::init(false));
55 
57  "amdgpu-spill-sgpr-to-vgpr",
58  cl::desc("Enable spilling VGPRs to SGPRs"),
60  cl::init(true));
61 
64  SGPRPressureSets(getNumRegPressureSets()),
65  VGPRPressureSets(getNumRegPressureSets()),
66  SpillSGPRToVGPR(false),
67  SpillSGPRToSMEM(false) {
68  if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
69  SpillSGPRToSMEM = true;
70  else if (EnableSpillSGPRToVGPR)
71  SpillSGPRToVGPR = true;
72 
73  unsigned NumRegPressureSets = getNumRegPressureSets();
74 
75  SGPRSetID = NumRegPressureSets;
76  VGPRSetID = NumRegPressureSets;
77 
78  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
79  classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
80  classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
81  }
82 
83  // Determine the number of reg units for each pressure set.
84  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
85  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
86  const int *PSets = getRegUnitPressureSets(i);
87  for (unsigned j = 0; PSets[j] != -1; ++j) {
88  ++PressureSetRegUnits[PSets[j]];
89  }
90  }
91 
92  unsigned VGPRMax = 0, SGPRMax = 0;
93  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
94  if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
95  VGPRSetID = i;
96  VGPRMax = PressureSetRegUnits[i];
97  continue;
98  }
99  if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
100  SGPRSetID = i;
101  SGPRMax = PressureSetRegUnits[i];
102  }
103  }
104 
105  assert(SGPRSetID < NumRegPressureSets &&
106  VGPRSetID < NumRegPressureSets);
107 }
108 
110  const MachineFunction &MF) const {
111 
112  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
113  unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
114  unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
115  return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
116 }
117 
118 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
119  unsigned Reg;
120 
121  // Try to place it in a hole after PrivateSegmentBufferReg.
122  if (RegCount & 3) {
123  // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
124  // alignment constraints, so we have a hole where can put the wave offset.
125  Reg = RegCount - 1;
126  } else {
127  // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
128  // wave offset before it.
129  Reg = RegCount - 5;
130  }
131 
132  return Reg;
133 }
134 
136  const MachineFunction &MF) const {
137  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
139  return AMDGPU::SGPR_32RegClass.getRegister(Reg);
140 }
141 
143  const MachineFunction &MF) const {
144  return AMDGPU::SGPR32;
145 }
146 
148  BitVector Reserved(getNumRegs());
149 
150  // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
151  // this seems likely to result in bugs, so I'm marking them as reserved.
152  reserveRegisterTuples(Reserved, AMDGPU::EXEC);
153  reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
154 
155  // M0 has to be reserved so that llvm accepts it as a live-in into a block.
156  reserveRegisterTuples(Reserved, AMDGPU::M0);
157 
158  // Reserve the memory aperture registers.
159  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
160  reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
161  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
162  reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
163 
164  // Reserve xnack_mask registers - support is not implemented in Codegen.
165  reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
166 
167  // Reserve Trap Handler registers - support is not implemented in Codegen.
168  reserveRegisterTuples(Reserved, AMDGPU::TBA);
169  reserveRegisterTuples(Reserved, AMDGPU::TMA);
170  reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
171  reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
172  reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
173  reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
174  reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
175  reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
176  reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
177  reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
178 
179  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
180 
181  unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
182  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
183  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
184  unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
185  reserveRegisterTuples(Reserved, Reg);
186  }
187 
188  unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
189  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
190  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
191  unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
192  reserveRegisterTuples(Reserved, Reg);
193  }
194 
196 
197  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
198  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
199  // Reserve 1 SGPR for scratch wave offset in case we need to spill.
200  reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
201  }
202 
203  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
204  if (ScratchRSrcReg != AMDGPU::NoRegister) {
205  // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
206  // to spill.
207  // TODO: May need to reserve a VGPR if doing LDS spilling.
208  reserveRegisterTuples(Reserved, ScratchRSrcReg);
209  assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
210  }
211 
212  // We have to assume the SP is needed in case there are calls in the function,
213  // which is detected after the function is lowered. If we aren't really going
214  // to need SP, don't bother reserving it.
215  unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
216 
217  if (StackPtrReg != AMDGPU::NoRegister) {
218  reserveRegisterTuples(Reserved, StackPtrReg);
219  assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
220  }
221 
222  unsigned FrameReg = MFI->getFrameOffsetReg();
223  if (FrameReg != AMDGPU::NoRegister) {
224  reserveRegisterTuples(Reserved, FrameReg);
225  assert(!isSubRegister(ScratchRSrcReg, FrameReg));
226  }
227 
228  return Reserved;
229 }
230 
233  if (Info->isEntryFunction()) {
234  const MachineFrameInfo &MFI = Fn.getFrameInfo();
235  return MFI.hasStackObjects() || MFI.hasCalls();
236  }
237 
238  // May need scavenger for dealing with callee saved registers.
239  return true;
240 }
241 
243  const MachineFunction &MF) const {
244  const MachineFrameInfo &MFI = MF.getFrameInfo();
245  if (MFI.hasStackObjects())
246  return true;
247 
248  // May need to deal with callee saved registers.
250  return !Info->isEntryFunction();
251 }
252 
254  const MachineFunction &MF) const {
255  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
256  // create a virtual register for it during frame index elimination, so the
257  // scavenger is directly needed.
258  return MF.getFrameInfo().hasStackObjects() &&
259  MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
260  MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
261 }
262 
264  const MachineFunction &) const {
265  // There are no special dedicated stack or frame pointers.
266  return true;
267 }
268 
270  // This helps catch bugs as verifier errors.
271  return true;
272 }
273 
276 
277  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
278  AMDGPU::OpName::offset);
279  return MI->getOperand(OffIdx).getImm();
280 }
281 
283  int Idx) const {
284  if (!SIInstrInfo::isMUBUF(*MI))
285  return 0;
286 
288  AMDGPU::OpName::vaddr) &&
289  "Should never see frame index on non-address operand");
290 
291  return getMUBUFInstrOffset(MI);
292 }
293 
295  if (!MI->mayLoadOrStore())
296  return false;
297 
298  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
299 
300  return !isUInt<12>(FullOffset);
301 }
302 
304  unsigned BaseReg,
305  int FrameIdx,
306  int64_t Offset) const {
308  DebugLoc DL; // Defaults to "unknown"
309 
310  if (Ins != MBB->end())
311  DL = Ins->getDebugLoc();
312 
313  MachineFunction *MF = MBB->getParent();
314  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
315  const SIInstrInfo *TII = Subtarget.getInstrInfo();
316 
317  if (Offset == 0) {
318  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
319  .addFrameIndex(FrameIdx);
320  return;
321  }
322 
324  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
325 
326  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
327 
328  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
329  .addImm(Offset);
330  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
331  .addFrameIndex(FrameIdx);
332 
333  TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
334  .addReg(OffsetReg, RegState::Kill)
335  .addReg(FIReg);
336 }
337 
339  int64_t Offset) const {
340 
341  MachineBasicBlock *MBB = MI.getParent();
342  MachineFunction *MF = MBB->getParent();
343  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
344  const SIInstrInfo *TII = Subtarget.getInstrInfo();
345 
346 #ifndef NDEBUG
347  // FIXME: Is it possible to be storing a frame index to itself?
348  bool SeenFI = false;
349  for (const MachineOperand &MO: MI.operands()) {
350  if (MO.isFI()) {
351  if (SeenFI)
352  llvm_unreachable("should not see multiple frame indices");
353 
354  SeenFI = true;
355  }
356  }
357 #endif
358 
359  MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
360  assert(FIOp && FIOp->isFI() && "frame index must be address operand");
361  assert(TII->isMUBUF(MI));
362  assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
363  MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
364  "should only be seeing frame offset relative FrameIndex");
365 
366 
367  MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
368  int64_t NewOffset = OffsetOp->getImm() + Offset;
369  assert(isUInt<12>(NewOffset) && "offset should be legal");
370 
371  FIOp->ChangeToRegister(BaseReg, false);
372  OffsetOp->setImm(NewOffset);
373 }
374 
376  unsigned BaseReg,
377  int64_t Offset) const {
378  if (!SIInstrInfo::isMUBUF(*MI))
379  return false;
380 
381  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
382 
383  return isUInt<12>(NewOffset);
384 }
385 
387  const MachineFunction &MF, unsigned Kind) const {
388  // This is inaccurate. It depends on the instruction and address space. The
389  // only place where we should hit this is for dealing with frame indexes /
390  // private accesses, so this is correct in that case.
391  return &AMDGPU::VGPR_32RegClass;
392 }
393 
394 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
395 
396  switch (Op) {
397  case AMDGPU::SI_SPILL_S512_SAVE:
398  case AMDGPU::SI_SPILL_S512_RESTORE:
399  case AMDGPU::SI_SPILL_V512_SAVE:
400  case AMDGPU::SI_SPILL_V512_RESTORE:
401  return 16;
402  case AMDGPU::SI_SPILL_S256_SAVE:
403  case AMDGPU::SI_SPILL_S256_RESTORE:
404  case AMDGPU::SI_SPILL_V256_SAVE:
405  case AMDGPU::SI_SPILL_V256_RESTORE:
406  return 8;
407  case AMDGPU::SI_SPILL_S128_SAVE:
408  case AMDGPU::SI_SPILL_S128_RESTORE:
409  case AMDGPU::SI_SPILL_V128_SAVE:
410  case AMDGPU::SI_SPILL_V128_RESTORE:
411  return 4;
412  case AMDGPU::SI_SPILL_V96_SAVE:
413  case AMDGPU::SI_SPILL_V96_RESTORE:
414  return 3;
415  case AMDGPU::SI_SPILL_S64_SAVE:
416  case AMDGPU::SI_SPILL_S64_RESTORE:
417  case AMDGPU::SI_SPILL_V64_SAVE:
418  case AMDGPU::SI_SPILL_V64_RESTORE:
419  return 2;
420  case AMDGPU::SI_SPILL_S32_SAVE:
421  case AMDGPU::SI_SPILL_S32_RESTORE:
422  case AMDGPU::SI_SPILL_V32_SAVE:
423  case AMDGPU::SI_SPILL_V32_RESTORE:
424  return 1;
425  default: llvm_unreachable("Invalid spill opcode");
426  }
427 }
428 
429 static int getOffsetMUBUFStore(unsigned Opc) {
430  switch (Opc) {
431  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
432  return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
433  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
434  return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
435  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
436  return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
437  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
438  return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
439  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
440  return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
441  case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
442  return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
443  case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
444  return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
445  default:
446  return -1;
447  }
448 }
449 
450 static int getOffsetMUBUFLoad(unsigned Opc) {
451  switch (Opc) {
452  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
453  return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
454  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
455  return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
456  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
457  return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
458  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
459  return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
460  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
461  return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
462  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
463  return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
464  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
465  return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
466  case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
467  return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
468  case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
469  return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
470  case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
471  return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
472  case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
473  return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
474  case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
475  return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
476  case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
477  return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
478  default:
479  return -1;
480  }
481 }
482 
483 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
484 // need to handle the case where an SGPR may need to be spilled while spilling.
486  MachineFrameInfo &MFI,
488  int Index,
489  int64_t Offset) {
490  MachineBasicBlock *MBB = MI->getParent();
491  const DebugLoc &DL = MI->getDebugLoc();
492  bool IsStore = MI->mayStore();
493 
494  unsigned Opc = MI->getOpcode();
495  int LoadStoreOp = IsStore ?
497  if (LoadStoreOp == -1)
498  return false;
499 
500  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
501  MachineInstrBuilder NewMI =
502  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
503  .add(*Reg)
504  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
505  .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
506  .addImm(Offset)
507  .addImm(0) // glc
508  .addImm(0) // slc
509  .addImm(0) // tfe
510  .cloneMemRefs(*MI);
511 
512  const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
513  AMDGPU::OpName::vdata_in);
514  if (VDataIn)
515  NewMI.add(*VDataIn);
516  return true;
517 }
518 
519 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
520  unsigned LoadStoreOp,
521  int Index,
522  unsigned ValueReg,
523  bool IsKill,
524  unsigned ScratchRsrcReg,
525  unsigned ScratchOffsetReg,
526  int64_t InstOffset,
527  MachineMemOperand *MMO,
528  RegScavenger *RS) const {
529  MachineBasicBlock *MBB = MI->getParent();
530  MachineFunction *MF = MI->getParent()->getParent();
531  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
532  const SIInstrInfo *TII = ST.getInstrInfo();
533  const MachineFrameInfo &MFI = MF->getFrameInfo();
534 
535  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
536  const DebugLoc &DL = MI->getDebugLoc();
537  bool IsStore = Desc.mayStore();
538 
539  bool Scavenged = false;
540  unsigned SOffset = ScratchOffsetReg;
541 
542  const unsigned EltSize = 4;
543  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
544  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
545  unsigned Size = NumSubRegs * EltSize;
546  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
547  int64_t ScratchOffsetRegDelta = 0;
548 
549  unsigned Align = MFI.getObjectAlignment(Index);
550  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
551 
552  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
553 
554  if (!isUInt<12>(Offset + Size - EltSize)) {
555  SOffset = AMDGPU::NoRegister;
556 
557  // We currently only support spilling VGPRs to EltSize boundaries, meaning
558  // we can simplify the adjustment of Offset here to just scale with
559  // WavefrontSize.
560  Offset *= ST.getWavefrontSize();
561 
562  // We don't have access to the register scavenger if this function is called
563  // during PEI::scavengeFrameVirtualRegs().
564  if (RS)
565  SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
566 
567  if (SOffset == AMDGPU::NoRegister) {
568  // There are no free SGPRs, and since we are in the process of spilling
569  // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
570  // on SI/CI and on VI it is true until we implement spilling using scalar
571  // stores), we have no way to free up an SGPR. Our solution here is to
572  // add the offset directly to the ScratchOffset register, and then
573  // subtract the offset after the spill to return ScratchOffset to it's
574  // original value.
575  SOffset = ScratchOffsetReg;
576  ScratchOffsetRegDelta = Offset;
577  } else {
578  Scavenged = true;
579  }
580 
581  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
582  .addReg(ScratchOffsetReg)
583  .addImm(Offset);
584 
585  Offset = 0;
586  }
587 
588  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
589  unsigned SubReg = NumSubRegs == 1 ?
590  ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
591 
592  unsigned SOffsetRegState = 0;
593  unsigned SrcDstRegState = getDefRegState(!IsStore);
594  if (i + 1 == e) {
595  SOffsetRegState |= getKillRegState(Scavenged);
596  // The last implicit use carries the "Kill" flag.
597  SrcDstRegState |= getKillRegState(IsKill);
598  }
599 
600  MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
601  MachineMemOperand *NewMMO
602  = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
603  EltSize, MinAlign(Align, EltSize * i));
604 
605  auto MIB = BuildMI(*MBB, MI, DL, Desc)
606  .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
607  .addReg(ScratchRsrcReg)
608  .addReg(SOffset, SOffsetRegState)
609  .addImm(Offset)
610  .addImm(0) // glc
611  .addImm(0) // slc
612  .addImm(0) // tfe
613  .addMemOperand(NewMMO);
614 
615  if (NumSubRegs > 1)
616  MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
617  }
618 
619  if (ScratchOffsetRegDelta != 0) {
620  // Subtract the offset we added to the ScratchOffset register.
621  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
622  .addReg(ScratchOffsetReg)
623  .addImm(ScratchOffsetRegDelta);
624  }
625 }
626 
627 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
628  bool Store) {
629  if (SuperRegSize % 16 == 0) {
630  return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
631  AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
632  }
633 
634  if (SuperRegSize % 8 == 0) {
635  return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
636  AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
637  }
638 
639  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
640  AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
641 }
642 
644  int Index,
645  RegScavenger *RS,
646  bool OnlyToVGPR) const {
647  MachineBasicBlock *MBB = MI->getParent();
648  MachineFunction *MF = MBB->getParent();
650  DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
651 
653  = MFI->getSGPRToVGPRSpills(Index);
654  bool SpillToVGPR = !VGPRSpills.empty();
655  if (OnlyToVGPR && !SpillToVGPR)
656  return false;
657 
659  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
660  const SIInstrInfo *TII = ST.getInstrInfo();
661 
662  unsigned SuperReg = MI->getOperand(0).getReg();
663  bool IsKill = MI->getOperand(0).isKill();
664  const DebugLoc &DL = MI->getDebugLoc();
665 
666  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
667 
668  bool SpillToSMEM = spillSGPRToSMEM();
669  if (SpillToSMEM && OnlyToVGPR)
670  return false;
671 
672  assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
673  SuperReg != MFI->getFrameOffsetReg() &&
674  SuperReg != MFI->getScratchWaveOffsetReg()));
675 
676  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
677 
678  unsigned OffsetReg = AMDGPU::M0;
679  unsigned M0CopyReg = AMDGPU::NoRegister;
680 
681  if (SpillToSMEM) {
682  if (RS->isRegUsed(AMDGPU::M0)) {
683  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
684  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
685  .addReg(AMDGPU::M0);
686  }
687  }
688 
689  unsigned ScalarStoreOp;
690  unsigned EltSize = 4;
691  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
692  if (SpillToSMEM && isSGPRClass(RC)) {
693  // XXX - if private_element_size is larger than 4 it might be useful to be
694  // able to spill wider vmem spills.
695  std::tie(EltSize, ScalarStoreOp) =
696  getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
697  }
698 
699  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
700  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
701 
702  // SubReg carries the "Kill" flag when SubReg == SuperReg.
703  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
704  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
705  unsigned SubReg = NumSubRegs == 1 ?
706  SuperReg : getSubReg(SuperReg, SplitParts[i]);
707 
708  if (SpillToSMEM) {
709  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
710 
711  // The allocated memory size is really the wavefront size * the frame
712  // index size. The widest register class is 64 bytes, so a 4-byte scratch
713  // allocation is enough to spill this in a single stack object.
714  //
715  // FIXME: Frame size/offsets are computed earlier than this, so the extra
716  // space is still unnecessarily allocated.
717 
718  unsigned Align = FrameInfo.getObjectAlignment(Index);
719  MachinePointerInfo PtrInfo
720  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
721  MachineMemOperand *MMO
723  EltSize, MinAlign(Align, EltSize * i));
724 
725  // SMEM instructions only support a single offset, so increment the wave
726  // offset.
727 
728  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
729  if (Offset != 0) {
730  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
731  .addReg(MFI->getFrameOffsetReg())
732  .addImm(Offset);
733  } else {
734  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
735  .addReg(MFI->getFrameOffsetReg());
736  }
737 
738  BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
739  .addReg(SubReg, getKillRegState(IsKill)) // sdata
740  .addReg(MFI->getScratchRSrcReg()) // sbase
741  .addReg(OffsetReg, RegState::Kill) // soff
742  .addImm(0) // glc
743  .addMemOperand(MMO);
744 
745  continue;
746  }
747 
748  if (SpillToVGPR) {
749  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
750 
751  // During SGPR spilling to VGPR, determine if the VGPR is defined. The
752  // only circumstance in which we say it is undefined is when it is the
753  // first spill to this VGPR in the first basic block.
754  bool VGPRDefined = true;
755  if (MBB == &MF->front())
756  VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
757 
758  // Mark the "old value of vgpr" input undef only if this is the first sgpr
759  // spill to this specific vgpr in the first basic block.
760  BuildMI(*MBB, MI, DL,
761  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
762  Spill.VGPR)
763  .addReg(SubReg, getKillRegState(IsKill))
764  .addImm(Spill.Lane)
765  .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
766 
767  // FIXME: Since this spills to another register instead of an actual
768  // frame index, we should delete the frame index when all references to
769  // it are fixed.
770  } else {
771  // XXX - Can to VGPR spill fail for some subregisters but not others?
772  if (OnlyToVGPR)
773  return false;
774 
775  // Spill SGPR to a frame index.
776  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
777  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
778  // TODO: Should VI try to spill to VGPR and then spill to SMEM?
779 
781  = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
782  .addReg(SubReg, SubKillState);
783 
784 
785  // There could be undef components of a spilled super register.
786  // TODO: Can we detect this and skip the spill?
787  if (NumSubRegs > 1) {
788  // The last implicit use of the SuperReg carries the "Kill" flag.
789  unsigned SuperKillState = 0;
790  if (i + 1 == e)
791  SuperKillState |= getKillRegState(IsKill);
792  Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
793  }
794 
795  unsigned Align = FrameInfo.getObjectAlignment(Index);
796  MachinePointerInfo PtrInfo
797  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
798  MachineMemOperand *MMO
800  EltSize, MinAlign(Align, EltSize * i));
801  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
802  .addReg(TmpReg, RegState::Kill) // src
803  .addFrameIndex(Index) // vaddr
804  .addReg(MFI->getScratchRSrcReg()) // srrsrc
805  .addReg(MFI->getFrameOffsetReg()) // soffset
806  .addImm(i * 4) // offset
807  .addMemOperand(MMO);
808  }
809  }
810 
811  if (M0CopyReg != AMDGPU::NoRegister) {
812  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
813  .addReg(M0CopyReg, RegState::Kill);
814  }
815 
816  MI->eraseFromParent();
817  MFI->addToSpilledSGPRs(NumSubRegs);
818  return true;
819 }
820 
822  int Index,
823  RegScavenger *RS,
824  bool OnlyToVGPR) const {
825  MachineFunction *MF = MI->getParent()->getParent();
827  MachineBasicBlock *MBB = MI->getParent();
829 
831  = MFI->getSGPRToVGPRSpills(Index);
832  bool SpillToVGPR = !VGPRSpills.empty();
833  if (OnlyToVGPR && !SpillToVGPR)
834  return false;
835 
836  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
837  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
838  const SIInstrInfo *TII = ST.getInstrInfo();
839  const DebugLoc &DL = MI->getDebugLoc();
840 
841  unsigned SuperReg = MI->getOperand(0).getReg();
842  bool SpillToSMEM = spillSGPRToSMEM();
843  if (SpillToSMEM && OnlyToVGPR)
844  return false;
845 
846  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
847 
848  unsigned OffsetReg = AMDGPU::M0;
849  unsigned M0CopyReg = AMDGPU::NoRegister;
850 
851  if (SpillToSMEM) {
852  if (RS->isRegUsed(AMDGPU::M0)) {
853  M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
854  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
855  .addReg(AMDGPU::M0);
856  }
857  }
858 
859  unsigned EltSize = 4;
860  unsigned ScalarLoadOp;
861 
862  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
863  if (SpillToSMEM && isSGPRClass(RC)) {
864  // XXX - if private_element_size is larger than 4 it might be useful to be
865  // able to spill wider vmem spills.
866  std::tie(EltSize, ScalarLoadOp) =
867  getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
868  }
869 
870  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
871  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
872 
873  // SubReg carries the "Kill" flag when SubReg == SuperReg.
874  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
875 
876  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
877  unsigned SubReg = NumSubRegs == 1 ?
878  SuperReg : getSubReg(SuperReg, SplitParts[i]);
879 
880  if (SpillToSMEM) {
881  // FIXME: Size may be > 4 but extra bytes wasted.
882  unsigned Align = FrameInfo.getObjectAlignment(Index);
883  MachinePointerInfo PtrInfo
884  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
885  MachineMemOperand *MMO
887  EltSize, MinAlign(Align, EltSize * i));
888 
889  // Add i * 4 offset
890  int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
891  if (Offset != 0) {
892  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
893  .addReg(MFI->getFrameOffsetReg())
894  .addImm(Offset);
895  } else {
896  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
897  .addReg(MFI->getFrameOffsetReg());
898  }
899 
900  auto MIB =
901  BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
902  .addReg(MFI->getScratchRSrcReg()) // sbase
903  .addReg(OffsetReg, RegState::Kill) // soff
904  .addImm(0) // glc
905  .addMemOperand(MMO);
906 
907  if (NumSubRegs > 1 && i == 0)
908  MIB.addReg(SuperReg, RegState::ImplicitDefine);
909 
910  continue;
911  }
912 
913  if (SpillToVGPR) {
914  SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
915  auto MIB =
916  BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
917  SubReg)
918  .addReg(Spill.VGPR)
919  .addImm(Spill.Lane);
920 
921  if (NumSubRegs > 1 && i == 0)
922  MIB.addReg(SuperReg, RegState::ImplicitDefine);
923  } else {
924  if (OnlyToVGPR)
925  return false;
926 
927  // Restore SGPR from a stack slot.
928  // FIXME: We should use S_LOAD_DWORD here for VI.
929  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
930  unsigned Align = FrameInfo.getObjectAlignment(Index);
931 
932  MachinePointerInfo PtrInfo
933  = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
934 
935  MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
936  MachineMemOperand::MOLoad, EltSize,
937  MinAlign(Align, EltSize * i));
938 
939  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
940  .addFrameIndex(Index) // vaddr
941  .addReg(MFI->getScratchRSrcReg()) // srsrc
942  .addReg(MFI->getFrameOffsetReg()) // soffset
943  .addImm(i * 4) // offset
944  .addMemOperand(MMO);
945 
946  auto MIB =
947  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
948  .addReg(TmpReg, RegState::Kill);
949 
950  if (NumSubRegs > 1)
951  MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
952  }
953  }
954 
955  if (M0CopyReg != AMDGPU::NoRegister) {
956  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
957  .addReg(M0CopyReg, RegState::Kill);
958  }
959 
960  MI->eraseFromParent();
961  return true;
962 }
963 
964 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
965 /// a VGPR and the stack slot can be safely eliminated when all other users are
966 /// handled.
969  int FI,
970  RegScavenger *RS) const {
971  switch (MI->getOpcode()) {
972  case AMDGPU::SI_SPILL_S512_SAVE:
973  case AMDGPU::SI_SPILL_S256_SAVE:
974  case AMDGPU::SI_SPILL_S128_SAVE:
975  case AMDGPU::SI_SPILL_S64_SAVE:
976  case AMDGPU::SI_SPILL_S32_SAVE:
977  return spillSGPR(MI, FI, RS, true);
978  case AMDGPU::SI_SPILL_S512_RESTORE:
979  case AMDGPU::SI_SPILL_S256_RESTORE:
980  case AMDGPU::SI_SPILL_S128_RESTORE:
981  case AMDGPU::SI_SPILL_S64_RESTORE:
982  case AMDGPU::SI_SPILL_S32_RESTORE:
983  return restoreSGPR(MI, FI, RS, true);
984  default:
985  llvm_unreachable("not an SGPR spill instruction");
986  }
987 }
988 
990  int SPAdj, unsigned FIOperandNum,
991  RegScavenger *RS) const {
992  MachineFunction *MF = MI->getParent()->getParent();
994  MachineBasicBlock *MBB = MI->getParent();
996  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
997  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
998  const SIInstrInfo *TII = ST.getInstrInfo();
999  DebugLoc DL = MI->getDebugLoc();
1000 
1001  MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1002  int Index = MI->getOperand(FIOperandNum).getIndex();
1003 
1004  switch (MI->getOpcode()) {
1005  // SGPR register spill
1006  case AMDGPU::SI_SPILL_S512_SAVE:
1007  case AMDGPU::SI_SPILL_S256_SAVE:
1008  case AMDGPU::SI_SPILL_S128_SAVE:
1009  case AMDGPU::SI_SPILL_S64_SAVE:
1010  case AMDGPU::SI_SPILL_S32_SAVE: {
1011  spillSGPR(MI, Index, RS);
1012  break;
1013  }
1014 
1015  // SGPR register restore
1016  case AMDGPU::SI_SPILL_S512_RESTORE:
1017  case AMDGPU::SI_SPILL_S256_RESTORE:
1018  case AMDGPU::SI_SPILL_S128_RESTORE:
1019  case AMDGPU::SI_SPILL_S64_RESTORE:
1020  case AMDGPU::SI_SPILL_S32_RESTORE: {
1021  restoreSGPR(MI, Index, RS);
1022  break;
1023  }
1024 
1025  // VGPR register spill
1026  case AMDGPU::SI_SPILL_V512_SAVE:
1027  case AMDGPU::SI_SPILL_V256_SAVE:
1028  case AMDGPU::SI_SPILL_V128_SAVE:
1029  case AMDGPU::SI_SPILL_V96_SAVE:
1030  case AMDGPU::SI_SPILL_V64_SAVE:
1031  case AMDGPU::SI_SPILL_V32_SAVE: {
1032  const MachineOperand *VData = TII->getNamedOperand(*MI,
1033  AMDGPU::OpName::vdata);
1034  buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1035  Index,
1036  VData->getReg(), VData->isKill(),
1037  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1038  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1039  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1040  *MI->memoperands_begin(),
1041  RS);
1042  MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1043  MI->eraseFromParent();
1044  break;
1045  }
1046  case AMDGPU::SI_SPILL_V32_RESTORE:
1047  case AMDGPU::SI_SPILL_V64_RESTORE:
1048  case AMDGPU::SI_SPILL_V96_RESTORE:
1049  case AMDGPU::SI_SPILL_V128_RESTORE:
1050  case AMDGPU::SI_SPILL_V256_RESTORE:
1051  case AMDGPU::SI_SPILL_V512_RESTORE: {
1052  const MachineOperand *VData = TII->getNamedOperand(*MI,
1053  AMDGPU::OpName::vdata);
1054 
1055  buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1056  Index,
1057  VData->getReg(), VData->isKill(),
1058  TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1059  TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1060  TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1061  *MI->memoperands_begin(),
1062  RS);
1063  MI->eraseFromParent();
1064  break;
1065  }
1066 
1067  default: {
1068  const DebugLoc &DL = MI->getDebugLoc();
1069  bool IsMUBUF = TII->isMUBUF(*MI);
1070 
1071  if (!IsMUBUF &&
1072  MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1073  // Convert to an absolute stack address by finding the offset from the
1074  // scratch wave base and scaling by the wave size.
1075  //
1076  // In an entry function/kernel the stack address is already the
1077  // absolute address relative to the scratch wave offset.
1078 
1079  unsigned DiffReg
1080  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1081 
1082  bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1083  unsigned ResultReg = IsCopy ?
1084  MI->getOperand(0).getReg() :
1085  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1086 
1087  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1088  .addReg(MFI->getFrameOffsetReg())
1089  .addReg(MFI->getScratchWaveOffsetReg());
1090 
1091  int64_t Offset = FrameInfo.getObjectOffset(Index);
1092  if (Offset == 0) {
1093  // XXX - This never happens because of emergency scavenging slot at 0?
1094  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1095  .addImm(Log2_32(ST.getWavefrontSize()))
1096  .addReg(DiffReg);
1097  } else {
1098  unsigned ScaledReg
1099  = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1100 
1101  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1102  .addImm(Log2_32(ST.getWavefrontSize()))
1103  .addReg(DiffReg, RegState::Kill);
1104 
1105  // TODO: Fold if use instruction is another add of a constant.
1107  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1108  .addImm(Offset)
1109  .addReg(ScaledReg, RegState::Kill);
1110  } else {
1111  unsigned ConstOffsetReg
1112  = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1113 
1114  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1115  .addImm(Offset);
1116  TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1117  .addReg(ConstOffsetReg, RegState::Kill)
1118  .addReg(ScaledReg, RegState::Kill);
1119  }
1120  }
1121 
1122  // Don't introduce an extra copy if we're just materializing in a mov.
1123  if (IsCopy)
1124  MI->eraseFromParent();
1125  else
1126  FIOp.ChangeToRegister(ResultReg, false, false, true);
1127  return;
1128  }
1129 
1130  if (IsMUBUF) {
1131  // Disable offen so we don't need a 0 vgpr base.
1132  assert(static_cast<int>(FIOperandNum) ==
1133  AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1134  AMDGPU::OpName::vaddr));
1135 
1136  assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1137  == MFI->getFrameOffsetReg());
1138 
1139  int64_t Offset = FrameInfo.getObjectOffset(Index);
1140  int64_t OldImm
1141  = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1142  int64_t NewOffset = OldImm + Offset;
1143 
1144  if (isUInt<12>(NewOffset) &&
1145  buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1146  MI->eraseFromParent();
1147  return;
1148  }
1149  }
1150 
1151  // If the offset is simply too big, don't convert to a scratch wave offset
1152  // relative index.
1153 
1154  int64_t Offset = FrameInfo.getObjectOffset(Index);
1155  FIOp.ChangeToImmediate(Offset);
1156  if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1157  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1158  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1159  .addImm(Offset);
1160  FIOp.ChangeToRegister(TmpReg, false, false, true);
1161  }
1162  }
1163  }
1164 }
1165 
1167  #define AMDGPU_REG_ASM_NAMES
1168  #include "AMDGPURegAsmNames.inc.cpp"
1169 
1170  #define REG_RANGE(BeginReg, EndReg, RegTable) \
1171  if (Reg >= BeginReg && Reg <= EndReg) { \
1172  unsigned Index = Reg - BeginReg; \
1173  assert(Index < array_lengthof(RegTable)); \
1174  return RegTable[Index]; \
1175  }
1176 
1177  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1178  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1179  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1180  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1181  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1182  VGPR96RegNames);
1183 
1184  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1185  AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1186  VGPR128RegNames);
1187  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1188  AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1189  SGPR128RegNames);
1190 
1191  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1192  AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1193  VGPR256RegNames);
1194 
1195  REG_RANGE(
1196  AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1197  AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1198  VGPR512RegNames);
1199 
1200  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1201  AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1202  SGPR256RegNames);
1203 
1204  REG_RANGE(
1205  AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1206  AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1207  SGPR512RegNames
1208  );
1209 
1210 #undef REG_RANGE
1211 
1212  // FIXME: Rename flat_scr so we don't need to special case this.
1213  switch (Reg) {
1214  case AMDGPU::FLAT_SCR:
1215  return "flat_scratch";
1216  case AMDGPU::FLAT_SCR_LO:
1217  return "flat_scratch_lo";
1218  case AMDGPU::FLAT_SCR_HI:
1219  return "flat_scratch_hi";
1220  default:
1221  // For the special named registers the default is fine.
1223  }
1224 }
1225 
1226 // FIXME: This is very slow. It might be worth creating a map from physreg to
1227 // register class.
1230 
1231  static const TargetRegisterClass *const BaseClasses[] = {
1232  &AMDGPU::VGPR_32RegClass,
1233  &AMDGPU::SReg_32RegClass,
1234  &AMDGPU::VReg_64RegClass,
1235  &AMDGPU::SReg_64RegClass,
1236  &AMDGPU::VReg_96RegClass,
1237  &AMDGPU::VReg_128RegClass,
1238  &AMDGPU::SReg_128RegClass,
1239  &AMDGPU::VReg_256RegClass,
1240  &AMDGPU::SReg_256RegClass,
1241  &AMDGPU::VReg_512RegClass,
1242  &AMDGPU::SReg_512RegClass,
1243  &AMDGPU::SCC_CLASSRegClass,
1244  &AMDGPU::Pseudo_SReg_32RegClass,
1245  &AMDGPU::Pseudo_SReg_128RegClass,
1246  };
1247 
1248  for (const TargetRegisterClass *BaseClass : BaseClasses) {
1249  if (BaseClass->contains(Reg)) {
1250  return BaseClass;
1251  }
1252  }
1253  return nullptr;
1254 }
1255 
1256 // TODO: It might be helpful to have some target specific flags in
1257 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1259  unsigned Size = getRegSizeInBits(*RC);
1260  if (Size < 32)
1261  return false;
1262  switch (Size) {
1263  case 32:
1264  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1265  case 64:
1266  return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1267  case 96:
1268  return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1269  case 128:
1270  return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1271  case 256:
1272  return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1273  case 512:
1274  return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1275  default:
1276  llvm_unreachable("Invalid register class size");
1277  }
1278 }
1279 
1281  const TargetRegisterClass *SRC) const {
1282  switch (getRegSizeInBits(*SRC)) {
1283  case 32:
1284  return &AMDGPU::VGPR_32RegClass;
1285  case 64:
1286  return &AMDGPU::VReg_64RegClass;
1287  case 96:
1288  return &AMDGPU::VReg_96RegClass;
1289  case 128:
1290  return &AMDGPU::VReg_128RegClass;
1291  case 256:
1292  return &AMDGPU::VReg_256RegClass;
1293  case 512:
1294  return &AMDGPU::VReg_512RegClass;
1295  default:
1296  llvm_unreachable("Invalid register class size");
1297  }
1298 }
1299 
1301  const TargetRegisterClass *VRC) const {
1302  switch (getRegSizeInBits(*VRC)) {
1303  case 32:
1304  return &AMDGPU::SGPR_32RegClass;
1305  case 64:
1306  return &AMDGPU::SReg_64RegClass;
1307  case 128:
1308  return &AMDGPU::SReg_128RegClass;
1309  case 256:
1310  return &AMDGPU::SReg_256RegClass;
1311  case 512:
1312  return &AMDGPU::SReg_512RegClass;
1313  default:
1314  llvm_unreachable("Invalid register class size");
1315  }
1316 }
1317 
1319  const TargetRegisterClass *RC, unsigned SubIdx) const {
1320  if (SubIdx == AMDGPU::NoSubRegister)
1321  return RC;
1322 
1323  // We can assume that each lane corresponds to one 32-bit register.
1324  unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1325  if (isSGPRClass(RC)) {
1326  switch (Count) {
1327  case 1:
1328  return &AMDGPU::SGPR_32RegClass;
1329  case 2:
1330  return &AMDGPU::SReg_64RegClass;
1331  case 4:
1332  return &AMDGPU::SReg_128RegClass;
1333  case 8:
1334  return &AMDGPU::SReg_256RegClass;
1335  case 16: /* fall-through */
1336  default:
1337  llvm_unreachable("Invalid sub-register class size");
1338  }
1339  } else {
1340  switch (Count) {
1341  case 1:
1342  return &AMDGPU::VGPR_32RegClass;
1343  case 2:
1344  return &AMDGPU::VReg_64RegClass;
1345  case 3:
1346  return &AMDGPU::VReg_96RegClass;
1347  case 4:
1348  return &AMDGPU::VReg_128RegClass;
1349  case 8:
1350  return &AMDGPU::VReg_256RegClass;
1351  case 16: /* fall-through */
1352  default:
1353  llvm_unreachable("Invalid sub-register class size");
1354  }
1355  }
1356 }
1357 
1359  const TargetRegisterClass *DefRC,
1360  unsigned DefSubReg,
1361  const TargetRegisterClass *SrcRC,
1362  unsigned SrcSubReg) const {
1363  // We want to prefer the smallest register class possible, so we don't want to
1364  // stop and rewrite on anything that looks like a subregister
1365  // extract. Operations mostly don't care about the super register class, so we
1366  // only want to stop on the most basic of copies between the same register
1367  // class.
1368  //
1369  // e.g. if we have something like
1370  // %0 = ...
1371  // %1 = ...
1372  // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1373  // %3 = COPY %2, sub0
1374  //
1375  // We want to look through the COPY to find:
1376  // => %3 = COPY %0
1377 
1378  // Plain copy.
1379  return getCommonSubClass(DefRC, SrcRC) != nullptr;
1380 }
1381 
1382 /// Returns a register that is not used at any point in the function.
1383 /// If all registers are used, then this function will return
1384 // AMDGPU::NoRegister.
1385 unsigned
1387  const TargetRegisterClass *RC,
1388  const MachineFunction &MF) const {
1389 
1390  for (unsigned Reg : *RC)
1391  if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1392  return Reg;
1393  return AMDGPU::NoRegister;
1394 }
1395 
1397  unsigned EltSize) const {
1398  if (EltSize == 4) {
1399  static const int16_t Sub0_15[] = {
1400  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1401  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1402  AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1403  AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1404  };
1405 
1406  static const int16_t Sub0_7[] = {
1407  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1408  AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1409  };
1410 
1411  static const int16_t Sub0_3[] = {
1412  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1413  };
1414 
1415  static const int16_t Sub0_2[] = {
1416  AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1417  };
1418 
1419  static const int16_t Sub0_1[] = {
1420  AMDGPU::sub0, AMDGPU::sub1,
1421  };
1422 
1423  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1424  case 32:
1425  return {};
1426  case 64:
1427  return makeArrayRef(Sub0_1);
1428  case 96:
1429  return makeArrayRef(Sub0_2);
1430  case 128:
1431  return makeArrayRef(Sub0_3);
1432  case 256:
1433  return makeArrayRef(Sub0_7);
1434  case 512:
1435  return makeArrayRef(Sub0_15);
1436  default:
1437  llvm_unreachable("unhandled register size");
1438  }
1439  }
1440 
1441  if (EltSize == 8) {
1442  static const int16_t Sub0_15_64[] = {
1443  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1444  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1445  AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1446  AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1447  };
1448 
1449  static const int16_t Sub0_7_64[] = {
1450  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1451  AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1452  };
1453 
1454 
1455  static const int16_t Sub0_3_64[] = {
1456  AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1457  };
1458 
1459  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1460  case 64:
1461  return {};
1462  case 128:
1463  return makeArrayRef(Sub0_3_64);
1464  case 256:
1465  return makeArrayRef(Sub0_7_64);
1466  case 512:
1467  return makeArrayRef(Sub0_15_64);
1468  default:
1469  llvm_unreachable("unhandled register size");
1470  }
1471  }
1472 
1473  assert(EltSize == 16 && "unhandled register spill split size");
1474 
1475  static const int16_t Sub0_15_128[] = {
1476  AMDGPU::sub0_sub1_sub2_sub3,
1477  AMDGPU::sub4_sub5_sub6_sub7,
1478  AMDGPU::sub8_sub9_sub10_sub11,
1479  AMDGPU::sub12_sub13_sub14_sub15
1480  };
1481 
1482  static const int16_t Sub0_7_128[] = {
1483  AMDGPU::sub0_sub1_sub2_sub3,
1484  AMDGPU::sub4_sub5_sub6_sub7
1485  };
1486 
1487  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1488  case 128:
1489  return {};
1490  case 256:
1491  return makeArrayRef(Sub0_7_128);
1492  case 512:
1493  return makeArrayRef(Sub0_15_128);
1494  default:
1495  llvm_unreachable("unhandled register size");
1496  }
1497 }
1498 
1499 const TargetRegisterClass*
1501  unsigned Reg) const {
1503  return MRI.getRegClass(Reg);
1504 
1505  return getPhysRegClass(Reg);
1506 }
1507 
1509  unsigned Reg) const {
1510  const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1511  assert(RC && "Register class for the reg not found");
1512  return hasVGPRs(RC);
1513 }
1514 
1516  const TargetRegisterClass *SrcRC,
1517  unsigned SubReg,
1518  const TargetRegisterClass *DstRC,
1519  unsigned DstSubReg,
1520  const TargetRegisterClass *NewRC,
1521  LiveIntervals &LIS) const {
1522  unsigned SrcSize = getRegSizeInBits(*SrcRC);
1523  unsigned DstSize = getRegSizeInBits(*DstRC);
1524  unsigned NewSize = getRegSizeInBits(*NewRC);
1525 
1526  // Do not increase size of registers beyond dword, we would need to allocate
1527  // adjacent registers and constraint regalloc more than needed.
1528 
1529  // Always allow dword coalescing.
1530  if (SrcSize <= 32 || DstSize <= 32)
1531  return true;
1532 
1533  return NewSize <= DstSize || NewSize <= SrcSize;
1534 }
1535 
1537  MachineFunction &MF) const {
1538 
1539  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1541 
1542  unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1543  MF.getFunction());
1544  switch (RC->getID()) {
1545  default:
1546  return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1547  case AMDGPU::VGPR_32RegClassID:
1548  return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1549  case AMDGPU::SGPR_32RegClassID:
1550  return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1551  }
1552 }
1553 
1555  unsigned Idx) const {
1556  if (Idx == getVGPRPressureSet())
1557  return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1558  const_cast<MachineFunction &>(MF));
1559 
1560  if (Idx == getSGPRPressureSet())
1561  return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1562  const_cast<MachineFunction &>(MF));
1563 
1564  return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1565 }
1566 
1567 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1568  static const int Empty[] = { -1 };
1569 
1570  if (hasRegUnit(AMDGPU::M0, RegUnit))
1571  return Empty;
1572  return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1573 }
1574 
1576  // Not a callee saved register.
1577  return AMDGPU::SGPR30_SGPR31;
1578 }
1579 
1580 const TargetRegisterClass *
1582  const MachineRegisterInfo &MRI) const {
1583  unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1584  const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1585  if (!RB)
1586  return nullptr;
1587 
1588  switch (Size) {
1589  case 32:
1590  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1591  &AMDGPU::SReg_32_XM0RegClass;
1592  case 64:
1593  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1594  &AMDGPU::SReg_64_XEXECRegClass;
1595  case 96:
1596  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1597  nullptr;
1598  case 128:
1599  return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1600  &AMDGPU::SReg_128RegClass;
1601  default:
1602  llvm_unreachable("not implemented");
1603  }
1604 }
1605 
1606 // Find reaching register definition
1608  MachineInstr &Use,
1610  LiveIntervals *LIS) const {
1611  auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
1612  SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1613  SlotIndex DefIdx;
1614 
1616  if (!LIS->hasInterval(Reg))
1617  return nullptr;
1618  LiveInterval &LI = LIS->getInterval(Reg);
1619  LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1620  : MRI.getMaxLaneMaskForVReg(Reg);
1621  VNInfo *V = nullptr;
1622  if (LI.hasSubRanges()) {
1623  for (auto &S : LI.subranges()) {
1624  if ((S.LaneMask & SubLanes) == SubLanes) {
1625  V = S.getVNInfoAt(UseIdx);
1626  break;
1627  }
1628  }
1629  } else {
1630  V = LI.getVNInfoAt(UseIdx);
1631  }
1632  if (!V)
1633  return nullptr;
1634  DefIdx = V->def;
1635  } else {
1636  // Find last def.
1637  for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
1638  LiveRange &LR = LIS->getRegUnit(*Units);
1639  if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
1640  if (!DefIdx.isValid() ||
1641  MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
1642  LIS->getInstructionFromIndex(V->def)))
1643  DefIdx = V->def;
1644  } else {
1645  return nullptr;
1646  }
1647  }
1648  }
1649 
1650  MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
1651 
1652  if (!Def || !MDT.dominates(Def, &Use))
1653  return nullptr;
1654 
1655  assert(Def->modifiesRegister(Reg, this));
1656 
1657  return Def;
1658 }
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BitVector & set()
Definition: BitVector.h:398
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
Definition: AllocatorList.h:24
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:638
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:830
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:152
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
A debug info location.
Definition: DebugLoc.h:34
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:718
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:723
unsigned SubReg
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:751
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:419
unsigned FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:409
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
LiveInterval & getInterval(unsigned Reg)
This class implements the register bank concept.
Definition: RegisterBank.h:29
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:405
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
uint32_t Size
Definition: Profile.cpp:47
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
const unsigned Kind
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the &#39;base&#39; register class for this register.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:48
bool hasCalls() const
Return true if the current function has any function calls.