33 for (
unsigned i = 0; PSets[i] != -1; ++i) {
34 if (PSets[i] == (
int)PSetID)
40 void SIRegisterInfo::classifyPressureSet(
unsigned PSetID,
unsigned Reg,
45 PressureSets.
set(PSetID);
52 "amdgpu-spill-sgpr-to-smem",
53 cl::desc(
"Use scalar stores to spill SGPRs if supported by subtarget"),
57 "amdgpu-spill-sgpr-to-vgpr",
58 cl::desc(
"Enable spilling VGPRs to SGPRs"),
64 SGPRPressureSets(getNumRegPressureSets()),
65 VGPRPressureSets(getNumRegPressureSets()),
66 SpillSGPRToVGPR(
false),
67 SpillSGPRToSMEM(
false) {
69 SpillSGPRToSMEM =
true;
70 else if (EnableSpillSGPRToVGPR)
71 SpillSGPRToVGPR =
true;
73 unsigned NumRegPressureSets = getNumRegPressureSets();
75 SGPRSetID = NumRegPressureSets;
76 VGPRSetID = NumRegPressureSets;
78 for (
unsigned i = 0; i < NumRegPressureSets; ++i) {
79 classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
80 classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
84 std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
85 for (
unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
87 for (
unsigned j = 0; PSets[j] != -1; ++j) {
88 ++PressureSetRegUnits[PSets[j]];
92 unsigned VGPRMax = 0, SGPRMax = 0;
93 for (
unsigned i = 0; i < NumRegPressureSets; ++i) {
96 VGPRMax = PressureSetRegUnits[i];
101 SGPRMax = PressureSetRegUnits[i];
105 assert(SGPRSetID < NumRegPressureSets &&
106 VGPRSetID < NumRegPressureSets);
114 unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
115 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
139 return AMDGPU::SGPR_32RegClass.getRegister(Reg);
144 return AMDGPU::SGPR32;
182 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
183 for (
unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
184 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
189 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
190 for (
unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
191 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
198 if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
204 if (ScratchRSrcReg != AMDGPU::NoRegister) {
209 assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
217 if (StackPtrReg != AMDGPU::NoRegister) {
219 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
223 if (FrameReg != AMDGPU::NoRegister) {
225 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
278 AMDGPU::OpName::offset);
288 AMDGPU::OpName::vaddr) &&
289 "Should never see frame index on non-address operand");
300 return !isUInt<12>(FullOffset);
310 if (Ins != MBB->
end())
311 DL = Ins->getDebugLoc();
318 BuildMI(*MBB, Ins, DL, TII->
get(AMDGPU::V_MOV_B32_e32), BaseReg)
319 .addFrameIndex(FrameIdx);
328 BuildMI(*MBB, Ins, DL, TII->
get(AMDGPU::S_MOV_B32), OffsetReg)
330 BuildMI(*MBB, Ins, DL, TII->
get(AMDGPU::V_MOV_B32_e32), FIReg)
331 .addFrameIndex(FrameIdx);
333 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
359 MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
360 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
362 assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
364 "should only be seeing frame offset relative FrameIndex");
367 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
369 assert(isUInt<12>(NewOffset) &&
"offset should be legal");
372 OffsetOp->setImm(NewOffset);
383 return isUInt<12>(NewOffset);
391 return &AMDGPU::VGPR_32RegClass;
397 case AMDGPU::SI_SPILL_S512_SAVE:
398 case AMDGPU::SI_SPILL_S512_RESTORE:
399 case AMDGPU::SI_SPILL_V512_SAVE:
400 case AMDGPU::SI_SPILL_V512_RESTORE:
402 case AMDGPU::SI_SPILL_S256_SAVE:
403 case AMDGPU::SI_SPILL_S256_RESTORE:
404 case AMDGPU::SI_SPILL_V256_SAVE:
405 case AMDGPU::SI_SPILL_V256_RESTORE:
407 case AMDGPU::SI_SPILL_S128_SAVE:
408 case AMDGPU::SI_SPILL_S128_RESTORE:
409 case AMDGPU::SI_SPILL_V128_SAVE:
410 case AMDGPU::SI_SPILL_V128_RESTORE:
412 case AMDGPU::SI_SPILL_V96_SAVE:
413 case AMDGPU::SI_SPILL_V96_RESTORE:
415 case AMDGPU::SI_SPILL_S64_SAVE:
416 case AMDGPU::SI_SPILL_S64_RESTORE:
417 case AMDGPU::SI_SPILL_V64_SAVE:
418 case AMDGPU::SI_SPILL_V64_RESTORE:
420 case AMDGPU::SI_SPILL_S32_SAVE:
421 case AMDGPU::SI_SPILL_S32_RESTORE:
422 case AMDGPU::SI_SPILL_V32_SAVE:
423 case AMDGPU::SI_SPILL_V32_RESTORE:
431 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
432 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
433 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
434 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
435 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
436 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
437 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
438 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
439 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
440 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
441 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
442 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
443 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
444 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
452 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
453 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
454 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
455 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
456 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
457 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
458 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
459 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
460 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
461 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
462 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
463 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
464 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
465 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
466 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
467 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
468 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
469 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
470 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
471 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
472 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
473 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
474 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
475 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
476 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
477 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
491 const DebugLoc &DL = MI->getDebugLoc();
492 bool IsStore = MI->mayStore();
494 unsigned Opc = MI->getOpcode();
495 int LoadStoreOp = IsStore ?
497 if (LoadStoreOp == -1)
502 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
513 AMDGPU::OpName::vdata_in);
520 unsigned LoadStoreOp,
524 unsigned ScratchRsrcReg,
525 unsigned ScratchOffsetReg,
536 const DebugLoc &DL = MI->getDebugLoc();
539 bool Scavenged =
false;
540 unsigned SOffset = ScratchOffsetReg;
542 const unsigned EltSize = 4;
545 unsigned Size = NumSubRegs * EltSize;
547 int64_t ScratchOffsetRegDelta = 0;
552 assert((Offset % EltSize) == 0 &&
"unexpected VGPR spill offset");
554 if (!isUInt<12>(Offset + Size - EltSize)) {
555 SOffset = AMDGPU::NoRegister;
567 if (SOffset == AMDGPU::NoRegister) {
575 SOffset = ScratchOffsetReg;
576 ScratchOffsetRegDelta =
Offset;
581 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_ADD_U32), SOffset)
582 .addReg(ScratchOffsetReg)
588 for (
unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
589 unsigned SubReg = NumSubRegs == 1 ?
592 unsigned SOffsetRegState = 0;
603 EltSize,
MinAlign(Align, EltSize * i));
605 auto MIB =
BuildMI(*MBB, MI, DL, Desc)
608 .
addReg(SOffset, SOffsetRegState)
619 if (ScratchOffsetRegDelta != 0) {
621 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
622 .addReg(ScratchOffsetReg)
623 .
addImm(ScratchOffsetRegDelta);
629 if (SuperRegSize % 16 == 0) {
630 return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
631 AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
634 if (SuperRegSize % 8 == 0) {
635 return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
636 AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
639 return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
640 AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
646 bool OnlyToVGPR)
const {
654 bool SpillToVGPR = !VGPRSpills.
empty();
655 if (OnlyToVGPR && !SpillToVGPR)
662 unsigned SuperReg = MI->getOperand(0).getReg();
663 bool IsKill = MI->getOperand(0).isKill();
664 const DebugLoc &DL = MI->getDebugLoc();
669 if (SpillToSMEM && OnlyToVGPR)
676 assert(SuperReg != AMDGPU::M0 &&
"m0 should never spill");
678 unsigned OffsetReg = AMDGPU::M0;
679 unsigned M0CopyReg = AMDGPU::NoRegister;
684 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::COPY), M0CopyReg)
689 unsigned ScalarStoreOp;
690 unsigned EltSize = 4;
695 std::tie(EltSize, ScalarStoreOp) =
700 unsigned NumSubRegs = SplitParts.
empty() ? 1 : SplitParts.
size();
704 for (
unsigned i = 0, e = NumSubRegs; i < e; ++i) {
705 unsigned SubReg = NumSubRegs == 1 ?
706 SuperReg : getSubReg(SuperReg, SplitParts[i]);
723 EltSize,
MinAlign(Align, EltSize * i));
730 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_ADD_U32), OffsetReg)
734 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_MOV_B32), OffsetReg)
738 BuildMI(*MBB, MI, DL, TII->
get(ScalarStoreOp))
754 bool VGPRDefined =
true;
755 if (MBB == &MF->
front())
756 VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.
VGPR).second;
761 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
781 =
BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::V_MOV_B32_e32), TmpReg)
782 .addReg(SubReg, SubKillState);
787 if (NumSubRegs > 1) {
789 unsigned SuperKillState = 0;
800 EltSize,
MinAlign(Align, EltSize * i));
801 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::SI_SPILL_V32_SAVE))
811 if (M0CopyReg != AMDGPU::NoRegister) {
812 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::COPY), AMDGPU::M0)
816 MI->eraseFromParent();
824 bool OnlyToVGPR)
const {
832 bool SpillToVGPR = !VGPRSpills.empty();
833 if (OnlyToVGPR && !SpillToVGPR)
839 const DebugLoc &DL = MI->getDebugLoc();
841 unsigned SuperReg = MI->getOperand(0).getReg();
843 if (SpillToSMEM && OnlyToVGPR)
846 assert(SuperReg != AMDGPU::M0 &&
"m0 should never spill");
848 unsigned OffsetReg = AMDGPU::M0;
849 unsigned M0CopyReg = AMDGPU::NoRegister;
854 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::COPY), M0CopyReg)
859 unsigned EltSize = 4;
860 unsigned ScalarLoadOp;
866 std::tie(EltSize, ScalarLoadOp) =
871 unsigned NumSubRegs = SplitParts.
empty() ? 1 : SplitParts.
size();
876 for (
unsigned i = 0, e = NumSubRegs; i < e; ++i) {
877 unsigned SubReg = NumSubRegs == 1 ?
878 SuperReg : getSubReg(SuperReg, SplitParts[i]);
887 EltSize,
MinAlign(Align, EltSize * i));
892 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_ADD_U32), OffsetReg)
896 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_MOV_B32), OffsetReg)
907 if (NumSubRegs > 1 && i == 0)
916 BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
921 if (NumSubRegs > 1 && i == 0)
939 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
940 .addFrameIndex(Index)
955 if (M0CopyReg != AMDGPU::NoRegister) {
956 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::COPY), AMDGPU::M0)
960 MI->eraseFromParent();
971 switch (MI->getOpcode()) {
972 case AMDGPU::SI_SPILL_S512_SAVE:
973 case AMDGPU::SI_SPILL_S256_SAVE:
974 case AMDGPU::SI_SPILL_S128_SAVE:
975 case AMDGPU::SI_SPILL_S64_SAVE:
976 case AMDGPU::SI_SPILL_S32_SAVE:
978 case AMDGPU::SI_SPILL_S512_RESTORE:
979 case AMDGPU::SI_SPILL_S256_RESTORE:
980 case AMDGPU::SI_SPILL_S128_RESTORE:
981 case AMDGPU::SI_SPILL_S64_RESTORE:
982 case AMDGPU::SI_SPILL_S32_RESTORE:
990 int SPAdj,
unsigned FIOperandNum,
1002 int Index = MI->getOperand(FIOperandNum).getIndex();
1004 switch (MI->getOpcode()) {
1006 case AMDGPU::SI_SPILL_S512_SAVE:
1007 case AMDGPU::SI_SPILL_S256_SAVE:
1008 case AMDGPU::SI_SPILL_S128_SAVE:
1009 case AMDGPU::SI_SPILL_S64_SAVE:
1010 case AMDGPU::SI_SPILL_S32_SAVE: {
1016 case AMDGPU::SI_SPILL_S512_RESTORE:
1017 case AMDGPU::SI_SPILL_S256_RESTORE:
1018 case AMDGPU::SI_SPILL_S128_RESTORE:
1019 case AMDGPU::SI_SPILL_S64_RESTORE:
1020 case AMDGPU::SI_SPILL_S32_RESTORE: {
1026 case AMDGPU::SI_SPILL_V512_SAVE:
1027 case AMDGPU::SI_SPILL_V256_SAVE:
1028 case AMDGPU::SI_SPILL_V128_SAVE:
1029 case AMDGPU::SI_SPILL_V96_SAVE:
1030 case AMDGPU::SI_SPILL_V64_SAVE:
1031 case AMDGPU::SI_SPILL_V32_SAVE: {
1033 AMDGPU::OpName::vdata);
1034 buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1037 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1038 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1039 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1040 *MI->memoperands_begin(),
1043 MI->eraseFromParent();
1046 case AMDGPU::SI_SPILL_V32_RESTORE:
1047 case AMDGPU::SI_SPILL_V64_RESTORE:
1048 case AMDGPU::SI_SPILL_V96_RESTORE:
1049 case AMDGPU::SI_SPILL_V128_RESTORE:
1050 case AMDGPU::SI_SPILL_V256_RESTORE:
1051 case AMDGPU::SI_SPILL_V512_RESTORE: {
1053 AMDGPU::OpName::vdata);
1055 buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1058 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1059 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1060 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1061 *MI->memoperands_begin(),
1063 MI->eraseFromParent();
1068 const DebugLoc &DL = MI->getDebugLoc();
1069 bool IsMUBUF = TII->isMUBUF(*MI);
1082 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1083 unsigned ResultReg = IsCopy ?
1084 MI->getOperand(0).getReg() :
1087 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_SUB_U32), DiffReg)
1091 int64_t
Offset = FrameInfo.getObjectOffset(Index);
1094 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1101 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1107 TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1111 unsigned ConstOffsetReg
1114 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1116 TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1124 MI->eraseFromParent();
1132 assert(static_cast<int>(FIOperandNum) ==
1134 AMDGPU::OpName::vaddr));
1136 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1139 int64_t
Offset = FrameInfo.getObjectOffset(Index);
1141 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1142 int64_t NewOffset = OldImm +
Offset;
1144 if (isUInt<12>(NewOffset) &&
1146 MI->eraseFromParent();
1154 int64_t
Offset = FrameInfo.getObjectOffset(Index);
1156 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1158 BuildMI(*MBB, MI, DL, TII->
get(AMDGPU::V_MOV_B32_e32), TmpReg)
1167 #define AMDGPU_REG_ASM_NAMES 1170 #define REG_RANGE(BeginReg, EndReg, RegTable) \ 1171 if (Reg >= BeginReg && Reg <= EndReg) { \ 1172 unsigned Index = Reg - BeginReg; \ 1173 assert(Index < array_lengthof(RegTable)); \ 1174 return RegTable[Index]; \ 1177 REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1178 REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1179 REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1180 REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1181 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1184 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1185 AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1187 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1188 AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1191 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1192 AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1196 AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1197 AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1200 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1201 AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1205 AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1206 AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1214 case AMDGPU::FLAT_SCR:
1215 return "flat_scratch";
1216 case AMDGPU::FLAT_SCR_LO:
1217 return "flat_scratch_lo";
1218 case AMDGPU::FLAT_SCR_HI:
1219 return "flat_scratch_hi";
1232 &AMDGPU::VGPR_32RegClass,
1233 &AMDGPU::SReg_32RegClass,
1234 &AMDGPU::VReg_64RegClass,
1235 &AMDGPU::SReg_64RegClass,
1236 &AMDGPU::VReg_96RegClass,
1237 &AMDGPU::VReg_128RegClass,
1238 &AMDGPU::SReg_128RegClass,
1239 &AMDGPU::VReg_256RegClass,
1240 &AMDGPU::SReg_256RegClass,
1241 &AMDGPU::VReg_512RegClass,
1242 &AMDGPU::SReg_512RegClass,
1243 &AMDGPU::SCC_CLASSRegClass,
1244 &AMDGPU::Pseudo_SReg_32RegClass,
1245 &AMDGPU::Pseudo_SReg_128RegClass,
1249 if (BaseClass->contains(Reg)) {
1259 unsigned Size = getRegSizeInBits(*RC);
1264 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) !=
nullptr;
1266 return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) !=
nullptr;
1268 return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) !=
nullptr;
1270 return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) !=
nullptr;
1272 return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) !=
nullptr;
1274 return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) !=
nullptr;
1282 switch (getRegSizeInBits(*SRC)) {
1284 return &AMDGPU::VGPR_32RegClass;
1286 return &AMDGPU::VReg_64RegClass;
1288 return &AMDGPU::VReg_96RegClass;
1290 return &AMDGPU::VReg_128RegClass;
1292 return &AMDGPU::VReg_256RegClass;
1294 return &AMDGPU::VReg_512RegClass;
1302 switch (getRegSizeInBits(*VRC)) {
1304 return &AMDGPU::SGPR_32RegClass;
1306 return &AMDGPU::SReg_64RegClass;
1308 return &AMDGPU::SReg_128RegClass;
1310 return &AMDGPU::SReg_256RegClass;
1312 return &AMDGPU::SReg_512RegClass;
1320 if (SubIdx == AMDGPU::NoSubRegister)
1324 unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1328 return &AMDGPU::SGPR_32RegClass;
1330 return &AMDGPU::SReg_64RegClass;
1332 return &AMDGPU::SReg_128RegClass;
1334 return &AMDGPU::SReg_256RegClass;
1342 return &AMDGPU::VGPR_32RegClass;
1344 return &AMDGPU::VReg_64RegClass;
1346 return &AMDGPU::VReg_96RegClass;
1348 return &AMDGPU::VReg_128RegClass;
1350 return &AMDGPU::VReg_256RegClass;
1362 unsigned SrcSubReg)
const {
1379 return getCommonSubClass(DefRC, SrcRC) !=
nullptr;
1390 for (
unsigned Reg : *RC)
1393 return AMDGPU::NoRegister;
1397 unsigned EltSize)
const {
1399 static const int16_t Sub0_15[] = {
1400 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1401 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1402 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1403 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1406 static const int16_t Sub0_7[] = {
1407 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1408 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1411 static const int16_t Sub0_3[] = {
1412 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1415 static const int16_t Sub0_2[] = {
1416 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1419 static const int16_t Sub0_1[] = {
1420 AMDGPU::sub0, AMDGPU::sub1,
1442 static const int16_t Sub0_15_64[] = {
1443 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1444 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1445 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1446 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1449 static const int16_t Sub0_7_64[] = {
1450 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1451 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1455 static const int16_t Sub0_3_64[] = {
1456 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1473 assert(EltSize == 16 &&
"unhandled register spill split size");
1475 static const int16_t Sub0_15_128[] = {
1476 AMDGPU::sub0_sub1_sub2_sub3,
1477 AMDGPU::sub4_sub5_sub6_sub7,
1478 AMDGPU::sub8_sub9_sub10_sub11,
1479 AMDGPU::sub12_sub13_sub14_sub15
1482 static const int16_t Sub0_7_128[] = {
1483 AMDGPU::sub0_sub1_sub2_sub3,
1484 AMDGPU::sub4_sub5_sub6_sub7
1501 unsigned Reg)
const {
1509 unsigned Reg)
const {
1511 assert(RC &&
"Register class for the reg not found");
1522 unsigned SrcSize = getRegSizeInBits(*SrcRC);
1523 unsigned DstSize = getRegSizeInBits(*DstRC);
1524 unsigned NewSize = getRegSizeInBits(*NewRC);
1530 if (SrcSize <= 32 || DstSize <= 32)
1533 return NewSize <= DstSize || NewSize <= SrcSize;
1544 switch (RC->
getID()) {
1546 return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1547 case AMDGPU::VGPR_32RegClassID:
1549 case AMDGPU::SGPR_32RegClassID:
1555 unsigned Idx)
const {
1558 const_cast<MachineFunction &>(MF));
1562 const_cast<MachineFunction &>(MF));
1564 return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1568 static const int Empty[] = { -1 };
1570 if (hasRegUnit(AMDGPU::M0, RegUnit))
1572 return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1577 return AMDGPU::SGPR30_SGPR31;
1590 return RB->
getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1591 &AMDGPU::SReg_32_XM0RegClass;
1593 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1594 &AMDGPU::SReg_64_XEXECRegClass;
1596 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1599 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1600 &AMDGPU::SReg_128RegClass;
1619 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1624 if ((S.LaneMask & SubLanes) == SubLanes) {
1625 V = S.getVNInfoAt(UseIdx);
1652 if (!Def || !MDT.dominates(Def, &Use))
unsigned getFrameOffsetReg() const
unsigned getScratchWaveOffsetReg() const
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const override
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Interface definition for SIRegisterInfo.
static unsigned getSubRegFromChannel(unsigned Channel)
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed...
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
AMDGPU specific subclass of TargetSubtarget.
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
void addToSpilledSGPRs(unsigned num)
SlotIndex def
The index of the defining instruction.
bool isRegUsed(unsigned Reg, bool includeReserved=true) const
Return if a specific register is currently used.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
This class represents lattice values for constants.
void addToSpilledVGPRs(unsigned num)
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
SIRegisterInfo(const GCNSubtarget &ST)
unsigned getStackPtrOffsetReg() const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const override
static int getOffsetMUBUFLoad(unsigned Opc)
StringRef getRegAsmName(unsigned Reg) const override
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
LiveInterval - This class represents the liveness of a register, or stack slot.
Describe properties that are true of each instruction in the target description file.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static std::pair< unsigned, unsigned > getSpillEltSize(unsigned SuperRegSize, bool Store)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const SIInstrInfo * getInstrInfo() const override
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isValid() const
Returns true if this is a valid index.
static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount)
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
VNInfo - Value Number Information.
iterator_range< mop_iterator > operands()
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const override
static unsigned getNumSubRegsForSpillOp(unsigned Op)
This class represents the liveness of a register, stack slot, etc.
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
A description of a memory reference used in the backend.
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
bool isEntryFunction() const
A Use represents the edge between a Value definition and its users.
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
iterator_range< subrange_iterator > subranges()
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
static bool hasPressureSet(const int *PSets, unsigned PSetID)
bool isSGPRClass(const TargetRegisterClass *RC) const
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isMUBUF(const MachineInstr &MI)
unsigned FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isSGPRPressureSet(unsigned SetID) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
Analysis containing CSE Info
virtual StringRef getRegAsmName(unsigned Reg) const
Return the assembly name for Reg.
unsigned getKillRegState(bool B)
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const int * getRegUnitPressureSets(unsigned RegUnit) const override
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
#define REG_RANGE(BeginReg, EndReg, RegTable)
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
initializer< Ty > init(const Ty &Val)
This file declares the machine register scavenger class.
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
unsigned reservedPrivateSegmentWaveByteOffsetReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch wave offset in case spilling is needed...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
size_t size() const
size - Get the array size.
unsigned getReturnAddressReg(const MachineFunction &MF) const
static cl::opt< bool > EnableSpillSGPRToSMEM("amdgpu-spill-sgpr-to-smem", cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), cl::init(false))
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
bool hasVGPRs(const TargetRegisterClass *RC) const
int64_t getMUBUFInstrOffset(const MachineInstr *MI) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getSGPRPressureSet() const
const RegisterBank * getRegBankOrNull(unsigned Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
const MachineBasicBlock & front() const
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, bool OnlyToVGPR=false) const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRPressureSet() const
The memory access writes data.
unsigned getWavefrontSize() const
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
LiveInterval & getInterval(unsigned Reg)
This class implements the register bank concept.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
const MCRegisterClass * MC
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const
Special case of eliminateFrameIndex.
bool mayStore() const
Return true if this instruction could possibly modify memory.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachinePointerInfo & getPointerInfo() const
void reserveRegisterTuples(BitVector &, unsigned Reg) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Flags getFlags() const
Return the raw flags of the source value,.
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
static int getOffsetMUBUFStore(unsigned Opc)
bool spillSGPRToSMEM() const
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool isVGPRPressureSet(unsigned SetID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register.
StringRef - Represent a constant reference to a string, i.e.
const MachineOperand & getOperand(unsigned i) const
SlotIndex - An opaque wrapper around machine indexes.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool empty() const
empty - Check if the array is empty.
unsigned getID() const
Get the identifier of this register bank.
bool hasCalls() const
Return true if the current function has any function calls.