68 #define GET_INSTRINFO_CTOR_DTOR 69 #include "AMDGPUGenInstrInfo.inc" 73 #define GET_D16ImageDimIntrinsics_IMPL 74 #define GET_ImageDimIntrinsicTable_IMPL 75 #define GET_RsrcIntrinsics_IMPL 76 #include "AMDGPUGenSearchableTables.inc" 86 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
118 if (Op0Idx == -1 && Op1Idx == -1)
122 if ((Op0Idx == -1 && Op1Idx != -1) ||
123 (Op1Idx == -1 && Op0Idx != -1))
142 case AMDGPU::V_MOV_B32_e32:
143 case AMDGPU::V_MOV_B32_e64:
144 case AMDGPU::V_MOV_B64_PSEUDO:
153 int64_t &Offset1)
const {
161 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
185 Offset0 = cast<ConstantSDNode>(Load0->
getOperand(2))->getZExtValue();
186 Offset1 = cast<ConstantSDNode>(Load1->
getOperand(2))->getZExtValue();
207 if (!Load0Offset || !Load1Offset)
215 Offset1 = Load1Offset->getZExtValue();
232 if (OffIdx0 == -1 || OffIdx1 == -1)
245 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
248 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
249 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
258 case AMDGPU::DS_READ2ST64_B32:
259 case AMDGPU::DS_READ2ST64_B64:
260 case AMDGPU::DS_WRITE2ST64_B32:
261 case AMDGPU::DS_WRITE2ST64_B64:
280 Offset = OffsetImm->
getImm();
281 assert(BaseOp->
isReg() &&
"getMemOperandWithOffset only supports base " 282 "operands of type register.");
294 uint8_t Offset0 = Offset0Imm->
getImm();
295 uint8_t Offset1 = Offset1Imm->
getImm();
297 if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
314 Offset = EltSize * Offset0;
315 assert(BaseOp->
isReg() &&
"getMemOperandWithOffset only supports base " 316 "operands of type register.");
325 if (SOffset && SOffset->
isReg())
335 Offset = OffsetImm->
getImm();
338 Offset += SOffset->
getImm();
340 assert(BaseOp->
isReg() &&
"getMemOperandWithOffset only supports base " 341 "operands of type register.");
353 Offset = OffsetImm->
getImm();
354 assert(BaseOp->
isReg() &&
"getMemOperandWithOffset only supports base " 355 "operands of type register.");
373 assert(BaseOp->
isReg() &&
"getMemOperandWithOffset only supports base " 374 "operands of type register.");
398 if (MO1->getAddrSpace() != MO2->getAddrSpace())
401 auto Base1 = MO1->getValue();
402 auto Base2 = MO2->getValue();
403 if (!Base1 || !Base2)
410 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
413 return Base1 == Base2;
418 unsigned NumLoads)
const {
431 const unsigned MaxGlobalLoadCluster = 6;
432 if (NumLoads > MaxGlobalLoadCluster)
444 }
else if (
isDS(FirstLdSt) &&
isDS(SecondLdSt)) {
449 if (!FirstDst || !SecondDst)
460 unsigned LoadClusterThreshold = 16;
466 return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
480 int64_t Offset0, int64_t Offset1,
481 unsigned NumLoads)
const {
482 assert(Offset1 > Offset0 &&
483 "Second offset should be larger than first offset!");
488 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
493 const DebugLoc &DL,
unsigned DestReg,
494 unsigned SrcReg,
bool KillSrc) {
497 "illegal SGPR to VGPR copy",
502 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
508 const DebugLoc &DL,
unsigned DestReg,
509 unsigned SrcReg,
bool KillSrc)
const {
512 if (RC == &AMDGPU::VGPR_32RegClass) {
514 AMDGPU::SReg_32RegClass.
contains(SrcReg));
515 BuildMI(MBB, MI, DL,
get(AMDGPU::V_MOV_B32_e32), DestReg)
520 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
521 RC == &AMDGPU::SReg_32RegClass) {
522 if (SrcReg == AMDGPU::SCC) {
523 BuildMI(MBB, MI, DL,
get(AMDGPU::S_CSELECT_B32), DestReg)
529 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
534 BuildMI(MBB, MI, DL,
get(AMDGPU::S_MOV_B32), DestReg)
539 if (RC == &AMDGPU::SReg_64RegClass) {
540 if (DestReg == AMDGPU::VCC) {
541 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
542 BuildMI(MBB, MI, DL,
get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
547 BuildMI(MBB, MI, DL,
get(AMDGPU::V_CMP_NE_U32_e32))
555 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
560 BuildMI(MBB, MI, DL,
get(AMDGPU::S_MOV_B64), DestReg)
565 if (DestReg == AMDGPU::SCC) {
567 BuildMI(MBB, MI, DL,
get(AMDGPU::S_CMP_LG_U32))
573 unsigned EltSize = 4;
574 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
576 if (RI.getRegSizeInBits(*RC) > 32) {
577 Opcode = AMDGPU::S_MOV_B64;
580 Opcode = AMDGPU::S_MOV_B32;
593 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
596 SubIdx = SubIndices[Idx];
598 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
601 get(Opcode), RI.getSubReg(DestReg, SubIdx));
603 Builder.
addReg(RI.getSubReg(SrcReg, SubIdx));
608 bool UseKill = KillSrc && Idx == SubIndices.
size() - 1;
633 const DebugLoc &DL,
unsigned DestReg,
634 int64_t
Value)
const {
637 if (RegClass == &AMDGPU::SReg_32RegClass ||
638 RegClass == &AMDGPU::SGPR_32RegClass ||
639 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
640 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
641 BuildMI(MBB, MI, DL,
get(AMDGPU::S_MOV_B32), DestReg)
646 if (RegClass == &AMDGPU::SReg_64RegClass ||
647 RegClass == &AMDGPU::SGPR_64RegClass ||
648 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
649 BuildMI(MBB, MI, DL,
get(AMDGPU::S_MOV_B64), DestReg)
654 if (RegClass == &AMDGPU::VGPR_32RegClass) {
655 BuildMI(MBB, MI, DL,
get(AMDGPU::V_MOV_B32_e32), DestReg)
659 if (RegClass == &AMDGPU::VReg_64RegClass) {
660 BuildMI(MBB, MI, DL,
get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
665 unsigned EltSize = 4;
666 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
668 if (RI.getRegSizeInBits(*RegClass) > 32) {
669 Opcode = AMDGPU::S_MOV_B64;
672 Opcode = AMDGPU::S_MOV_B32;
678 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
679 int64_t IdxValue = Idx == 0 ? Value : 0;
682 get(Opcode), RI.getSubReg(DestReg, Idx));
689 return &AMDGPU::VGPR_32RegClass;
694 const DebugLoc &DL,
unsigned DstReg,
697 unsigned FalseReg)
const {
702 if (Cond.
size() == 1) {
704 BuildMI(MBB, I, DL,
get(AMDGPU::COPY), SReg)
706 BuildMI(MBB, I, DL,
get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
710 }
else if (Cond.
size() == 2) {
711 assert(Cond[0].isImm() &&
"Cond[0] is not an immediate");
712 switch (Cond[0].getImm()) {
713 case SIInstrInfo::SCC_TRUE: {
715 BuildMI(MBB, I, DL,
get(AMDGPU::S_CSELECT_B64), SReg)
718 BuildMI(MBB, I, DL,
get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
724 case SIInstrInfo::SCC_FALSE: {
726 BuildMI(MBB, I, DL,
get(AMDGPU::S_CSELECT_B64), SReg)
729 BuildMI(MBB, I, DL,
get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
735 case SIInstrInfo::VCCNZ: {
739 BuildMI(MBB, I, DL,
get(AMDGPU::COPY), SReg)
741 BuildMI(MBB, I, DL,
get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
747 case SIInstrInfo::VCCZ: {
751 BuildMI(MBB, I, DL,
get(AMDGPU::COPY), SReg)
753 BuildMI(MBB, I, DL,
get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
759 case SIInstrInfo::EXECNZ: {
762 BuildMI(MBB, I, DL,
get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
764 BuildMI(MBB, I, DL,
get(AMDGPU::S_CSELECT_B64), SReg)
767 BuildMI(MBB, I, DL,
get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
773 case SIInstrInfo::EXECZ: {
776 BuildMI(MBB, I, DL,
get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
778 BuildMI(MBB, I, DL,
get(AMDGPU::S_CSELECT_B64), SReg)
781 BuildMI(MBB, I, DL,
get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
799 unsigned SrcReg,
int Value)
const {
802 BuildMI(*MBB, I, DL,
get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
812 unsigned SrcReg,
int Value)
const {
815 BuildMI(*MBB, I, DL,
get(AMDGPU::V_CMP_NE_I32_e64), Reg)
824 if (RI.getRegSizeInBits(*DstRC) == 32) {
825 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
826 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
827 return AMDGPU::S_MOV_B64;
828 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
829 return AMDGPU::V_MOV_B64_PSEUDO;
837 return AMDGPU::SI_SPILL_S32_SAVE;
839 return AMDGPU::SI_SPILL_S64_SAVE;
841 return AMDGPU::SI_SPILL_S128_SAVE;
843 return AMDGPU::SI_SPILL_S256_SAVE;
845 return AMDGPU::SI_SPILL_S512_SAVE;
854 return AMDGPU::SI_SPILL_V32_SAVE;
856 return AMDGPU::SI_SPILL_V64_SAVE;
858 return AMDGPU::SI_SPILL_V96_SAVE;
860 return AMDGPU::SI_SPILL_V128_SAVE;
862 return AMDGPU::SI_SPILL_V256_SAVE;
864 return AMDGPU::SI_SPILL_V512_SAVE;
872 unsigned SrcReg,
bool isKill,
881 unsigned Size = FrameInfo.getObjectSize(FrameIndex);
882 unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
927 BuildMI(MBB, MI, DL,
get(Opcode))
939 return AMDGPU::SI_SPILL_S32_RESTORE;
941 return AMDGPU::SI_SPILL_S64_RESTORE;
943 return AMDGPU::SI_SPILL_S128_RESTORE;
945 return AMDGPU::SI_SPILL_S256_RESTORE;
947 return AMDGPU::SI_SPILL_S512_RESTORE;
956 return AMDGPU::SI_SPILL_V32_RESTORE;
958 return AMDGPU::SI_SPILL_V64_RESTORE;
960 return AMDGPU::SI_SPILL_V96_RESTORE;
962 return AMDGPU::SI_SPILL_V128_RESTORE;
964 return AMDGPU::SI_SPILL_V256_RESTORE;
966 return AMDGPU::SI_SPILL_V512_RESTORE;
981 unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
982 unsigned Size = FrameInfo.getObjectSize(FrameIndex);
1020 BuildMI(MBB, MI, DL,
get(Opcode), DestReg)
1031 unsigned FrameOffset,
unsigned Size)
const {
1043 const DebugLoc &DL = Insert->getDebugLoc();
1047 if (TIDReg == AMDGPU::NoRegister)
1051 WorkGroupSize > WavefrontSize) {
1058 unsigned InputPtrReg =
1060 for (
unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
1069 BuildMI(Entry, Insert, DL,
get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
1072 BuildMI(Entry, Insert, DL,
get(AMDGPU::S_LOAD_DWORD_IMM), STmp1)
1077 BuildMI(Entry, Insert, DL,
get(AMDGPU::S_MUL_I32), STmp1)
1081 BuildMI(Entry, Insert, DL,
get(AMDGPU::V_MUL_U32_U24_e32), TIDReg)
1085 BuildMI(Entry, Insert, DL,
get(AMDGPU::V_MAD_U32_U24), TIDReg)
1095 BuildMI(Entry, Insert, DL,
get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
1100 BuildMI(Entry, Insert, DL,
get(AMDGPU::V_MBCNT_HI_U32_B32_e64),
1106 BuildMI(Entry, Insert, DL,
get(AMDGPU::V_LSHLREV_B32_e32),
1114 unsigned LDSOffset = MFI->
getLDSSize() + (FrameOffset * WorkGroupSize);
1133 BuildMI(MBB, MI, DL,
get(AMDGPU::S_NOP))
1151 if (HasNoTerminator)
1153 get(Info->
returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
1171 case AMDGPU::S_MOV_B64_term:
1174 MI.
setDesc(
get(AMDGPU::S_MOV_B64));
1177 case AMDGPU::S_XOR_B64_term:
1180 MI.
setDesc(
get(AMDGPU::S_XOR_B64));
1183 case AMDGPU::S_ANDN2_B64_term:
1186 MI.
setDesc(
get(AMDGPU::S_ANDN2_B64));
1189 case AMDGPU::V_MOV_B64_PSEUDO: {
1191 unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1192 unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1197 if (SrcOp.
isImm()) {
1199 BuildMI(MBB, MI, DL,
get(AMDGPU::V_MOV_B32_e32), DstLo)
1200 .
addImm(Imm.getLoBits(32).getZExtValue())
1202 BuildMI(MBB, MI, DL,
get(AMDGPU::V_MOV_B32_e32), DstHi)
1203 .
addImm(Imm.getHiBits(32).getZExtValue())
1207 BuildMI(MBB, MI, DL,
get(AMDGPU::V_MOV_B32_e32), DstLo)
1210 BuildMI(MBB, MI, DL,
get(AMDGPU::V_MOV_B32_e32), DstHi)
1217 case AMDGPU::V_SET_INACTIVE_B32: {
1218 BuildMI(MBB, MI, DL,
get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1222 BuildMI(MBB, MI, DL,
get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1227 case AMDGPU::V_SET_INACTIVE_B64: {
1228 BuildMI(MBB, MI, DL,
get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1234 BuildMI(MBB, MI, DL,
get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
1239 case AMDGPU::V_MOVRELD_B32_V1:
1240 case AMDGPU::V_MOVRELD_B32_V2:
1241 case AMDGPU::V_MOVRELD_B32_V4:
1242 case AMDGPU::V_MOVRELD_B32_V8:
1243 case AMDGPU::V_MOVRELD_B32_V16: {
1244 const MCInstrDesc &MovRelDesc =
get(AMDGPU::V_MOVRELD_B32_e32);
1251 BuildMI(MBB, MI, DL, MovRelDesc)
1258 const int ImpDefIdx =
1260 const int ImpUseIdx = ImpDefIdx + 1;
1261 MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
1266 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
1269 unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
1270 unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
1275 Bundler.
append(
BuildMI(MF, DL,
get(AMDGPU::S_GETPC_B64), Reg));
1279 Bundler.
append(
BuildMI(MF, DL,
get(AMDGPU::S_ADD_U32), RegLo)
1296 case AMDGPU::EXIT_WWM: {
1299 MI.
setDesc(
get(AMDGPU::S_MOV_B64));
1302 case TargetOpcode::BUNDLE: {
1308 I->isBundledWithSucc(); ++
I) {
1309 I->unbundleFromSucc();
1312 MO.setIsInternalRead(
false);
1324 unsigned Src0OpName,
1326 unsigned Src1OpName)
const {
1333 "All commutable instructions have both src0 and src1 modifiers");
1335 int Src0ModsVal = Src0Mods->
getImm();
1336 int Src1ModsVal = Src1Mods->
getImm();
1338 Src1Mods->
setImm(Src0ModsVal);
1339 Src0Mods->
setImm(Src1ModsVal);
1348 bool IsKill = RegOp.
isKill();
1350 bool IsUndef = RegOp.
isUndef();
1351 bool IsDebug = RegOp.
isDebug();
1353 if (NonRegOp.
isImm())
1355 else if (NonRegOp.
isFI())
1360 NonRegOp.
ChangeToRegister(Reg,
false,
false, IsKill, IsDead, IsUndef, IsDebug);
1368 unsigned Src1Idx)
const {
1369 assert(!NewMI &&
"this should never be used");
1373 if (CommutedOpcode == -1)
1377 static_cast<int>(Src0Idx) &&
1379 static_cast<int>(Src1Idx) &&
1380 "inconsistency with findCommutedOpIndices");
1407 Src1, AMDGPU::OpName::src1_modifiers);
1409 CommutedMI->
setDesc(
get(CommutedOpcode));
1419 unsigned &SrcOpIdx1)
const {
1424 unsigned &SrcOpIdx1)
const {
1437 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
1441 int64_t BrOffset)
const {
1444 assert(BranchOp != AMDGPU::S_SETPC_B64);
1458 if (MI.
getOpcode() == AMDGPU::S_SETPC_B64) {
1472 assert(RS &&
"RegScavenger required for long branching");
1474 "new block should be inserted for expanding unconditional branch");
1491 if (BrOffset >= 0) {
1492 BuildMI(MBB,
I, DL,
get(AMDGPU::S_ADD_U32))
1494 .
addReg(PCReg, 0, AMDGPU::sub0)
1496 BuildMI(MBB,
I, DL,
get(AMDGPU::S_ADDC_U32))
1498 .
addReg(PCReg, 0, AMDGPU::sub1)
1502 BuildMI(MBB,
I, DL,
get(AMDGPU::S_SUB_U32))
1504 .
addReg(PCReg, 0, AMDGPU::sub0)
1506 BuildMI(MBB,
I, DL,
get(AMDGPU::S_SUBB_U32))
1508 .
addReg(PCReg, 0, AMDGPU::sub1)
1513 BuildMI(&MBB, DL,
get(AMDGPU::S_SETPC_B64))
1554 AMDGPU::SReg_64RegClass,
1560 return 4 + 8 + 4 + 4;
1563 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
1565 case SIInstrInfo::SCC_TRUE:
1566 return AMDGPU::S_CBRANCH_SCC1;
1567 case SIInstrInfo::SCC_FALSE:
1568 return AMDGPU::S_CBRANCH_SCC0;
1569 case SIInstrInfo::VCCNZ:
1570 return AMDGPU::S_CBRANCH_VCCNZ;
1571 case SIInstrInfo::VCCZ:
1572 return AMDGPU::S_CBRANCH_VCCZ;
1573 case SIInstrInfo::EXECNZ:
1574 return AMDGPU::S_CBRANCH_EXECNZ;
1575 case SIInstrInfo::EXECZ:
1576 return AMDGPU::S_CBRANCH_EXECZ;
1582 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
1584 case AMDGPU::S_CBRANCH_SCC0:
1586 case AMDGPU::S_CBRANCH_SCC1:
1588 case AMDGPU::S_CBRANCH_VCCNZ:
1590 case AMDGPU::S_CBRANCH_VCCZ:
1592 case AMDGPU::S_CBRANCH_EXECNZ:
1594 case AMDGPU::S_CBRANCH_EXECZ:
1606 bool AllowModify)
const {
1607 if (I->getOpcode() == AMDGPU::S_BRANCH) {
1609 TBB = I->getOperand(0).getMBB();
1615 if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
1616 CondBB = I->getOperand(1).getMBB();
1619 BranchPredicate Pred = getBranchPredicate(I->getOpcode());
1620 if (Pred == INVALID_BR)
1623 CondBB = I->getOperand(0).getMBB();
1629 if (I == MBB.
end()) {
1635 if (I->getOpcode() == AMDGPU::S_BRANCH) {
1637 FBB = I->getOperand(0).getMBB();
1647 bool AllowModify)
const {
1655 while (I !=
E && !I->isBranch() && !I->isReturn() &&
1656 I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
1657 switch (I->getOpcode()) {
1658 case AMDGPU::SI_MASK_BRANCH:
1659 case AMDGPU::S_MOV_B64_term:
1660 case AMDGPU::S_XOR_B64_term:
1661 case AMDGPU::S_ANDN2_B64_term:
1664 case AMDGPU::SI_ELSE:
1665 case AMDGPU::SI_KILL_I1_TERMINATOR:
1666 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
1679 if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
1702 if (TBB != MaskBrDest || Cond.
empty())
1705 auto Pred = Cond[0].getImm();
1706 return (Pred != EXECZ && Pred != EXECNZ);
1710 int *BytesRemoved)
const {
1714 unsigned RemovedSize = 0;
1715 while (I != MBB.
end()) {
1717 if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
1723 I->eraseFromParent();
1729 *BytesRemoved = RemovedSize;
1746 int *BytesAdded)
const {
1747 if (!FBB && Cond.
empty()) {
1748 BuildMI(&MBB, DL,
get(AMDGPU::S_BRANCH))
1755 if(Cond.
size() == 1 && Cond[0].isReg()) {
1756 BuildMI(&MBB, DL,
get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
1762 assert(TBB && Cond[0].isImm());
1765 = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
1770 BuildMI(&MBB, DL,
get(Opcode))
1784 BuildMI(&MBB, DL,
get(Opcode))
1786 BuildMI(&MBB, DL,
get(AMDGPU::S_BRANCH))
1801 if (Cond.
size() != 2) {
1805 if (Cond[0].isImm()) {
1806 Cond[0].setImm(-Cond[0].getImm());
1815 unsigned TrueReg,
unsigned FalseReg,
1817 int &TrueCycles,
int &FalseCycles)
const {
1818 switch (Cond[0].getImm()) {
1826 CondCycles = TrueCycles = FalseCycles = NumInsts;
1842 if (NumInsts % 2 == 0)
1845 CondCycles = TrueCycles = FalseCycles = NumInsts;
1856 unsigned TrueReg,
unsigned FalseReg)
const {
1857 BranchPredicate Pred =
static_cast<BranchPredicate
>(Cond[0].getImm());
1858 if (Pred == VCCZ || Pred == SCC_FALSE) {
1859 Pred =
static_cast<BranchPredicate
>(-Pred);
1865 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
1867 if (DstSize == 32) {
1868 unsigned SelOp = Pred == SCC_TRUE ?
1869 AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
1873 BuildMI(MBB, I, DL,
get(SelOp), DstReg)
1881 if (DstSize == 64 && Pred == SCC_TRUE) {
1883 BuildMI(MBB, I, DL,
get(AMDGPU::S_CSELECT_B64), DstReg)
1891 static const int16_t Sub0_15[] = {
1892 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1893 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1894 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1895 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1898 static const int16_t Sub0_15_64[] = {
1899 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1900 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1901 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1902 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1905 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
1907 const int16_t *SubIndices = Sub0_15;
1908 int NElts = DstSize / 32;
1911 if (Pred == SCC_TRUE) {
1912 SelOp = AMDGPU::S_CSELECT_B64;
1913 EltRC = &AMDGPU::SGPR_64RegClass;
1914 SubIndices = Sub0_15_64;
1921 MBB, I, DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
1926 for (
int Idx = 0; Idx != NElts; ++Idx) {
1930 unsigned SubIdx = SubIndices[Idx];
1933 BuildMI(MBB, I, DL,
get(SelOp), DstElt)
1934 .
addReg(FalseReg, 0, SubIdx)
1935 .
addReg(TrueReg, 0, SubIdx);
1945 case AMDGPU::V_MOV_B32_e32:
1946 case AMDGPU::V_MOV_B32_e64:
1947 case AMDGPU::V_MOV_B64_PSEUDO: {
1955 case AMDGPU::S_MOV_B32:
1956 case AMDGPU::S_MOV_B64:
1965 unsigned Kind)
const {
1984 AMDGPU::OpName::src0_modifiers);
1986 AMDGPU::OpName::src1_modifiers);
1988 AMDGPU::OpName::src2_modifiers);
2003 case AMDGPU::S_MOV_B64:
2008 case AMDGPU::V_MOV_B32_e32:
2009 case AMDGPU::S_MOV_B32:
2016 if (!ImmOp->
isImm())
2020 if (Opc == AMDGPU::COPY) {
2022 unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2029 if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
2030 Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
2045 bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
2060 const int64_t Imm = ImmOp->
getImm();
2071 unsigned Src1Reg = Src1->
getReg();
2072 unsigned Src1SubReg = Src1->
getSubReg();
2077 if (Opc == AMDGPU::V_MAC_F32_e64 ||
2078 Opc == AMDGPU::V_MAC_F16_e64)
2085 UseMI.
setDesc(
get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
2098 bool Src0Inlined =
false;
2099 if (Src0->
isReg()) {
2109 }
else if ((RI.isPhysicalRegister(Src0->
getReg()) &&
2111 (RI.isVirtualRegister(Src0->
getReg()) &&
2117 if (Src1->
isReg() && !Src0Inlined ) {
2123 commuteInstruction(UseMI)) {
2125 }
else if ((RI.isPhysicalRegister(Src1->
getReg()) &&
2127 (RI.isVirtualRegister(Src1->
getReg()) &&
2133 const int64_t Imm = ImmOp->
getImm();
2144 if (Opc == AMDGPU::V_MAC_F32_e64 ||
2145 Opc == AMDGPU::V_MAC_F16_e64)
2154 UseMI.
setDesc(
get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
2168 int WidthB,
int OffsetB) {
2169 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
2170 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
2171 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2172 return LowOffset + LowWidth <= HighOffset;
2175 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
MachineInstr &MIa,
2178 int64_t Offset0, Offset1;
2203 "MIa must load from or modify a memory location");
2205 "MIb must load from or modify a memory location");
2220 if (!AA->
alias(LocA, LocB))
2232 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2239 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2246 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2253 return checkInstOffsetsDoNotOverlap(MIa, MIb);
2267 if (
Def &&
Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
2268 Def->getOperand(1).isImm())
2269 return Def->getOperand(1).getImm();
2270 return AMDGPU::NoRegister;
2278 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64;
2283 case AMDGPU::V_MAC_F16_e64:
2286 case AMDGPU::V_MAC_F32_e64:
2287 case AMDGPU::V_FMAC_F32_e64:
2289 case AMDGPU::V_MAC_F16_e32:
2292 case AMDGPU::V_MAC_F32_e32:
2293 case AMDGPU::V_FMAC_F32_e32: {
2295 AMDGPU::OpName::src0);
2297 if (!Src0->isReg() && !Src0->isImm())
2318 if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
2323 get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
2331 get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
2339 AMDGPU::OpName::src0), Src1))
2341 get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
2349 assert((!IsFMA || !IsF16) &&
"fmac only expected with f32");
2350 unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 :
2351 (IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
2356 .addImm(Src1Mods ? Src1Mods->
getImm() : 0)
2360 .addImm(Clamp ? Clamp->
getImm() : 0)
2361 .addImm(Omod ? Omod->
getImm() : 0);
2369 case AMDGPU::S_SET_GPR_IDX_ON:
2370 case AMDGPU::S_SET_GPR_IDX_MODE:
2371 case AMDGPU::S_SET_GPR_IDX_OFF:
2388 MI.
getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
2389 MI.
getOpcode() == AMDGPU::S_SETREG_B32 ||
2395 Opcode == AMDGPU::DS_GWS_INIT ||
2396 Opcode == AMDGPU::DS_GWS_SEMA_V ||
2397 Opcode == AMDGPU::DS_GWS_SEMA_BR ||
2398 Opcode == AMDGPU::DS_GWS_SEMA_P ||
2399 Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
2400 Opcode == AMDGPU::DS_GWS_BARRIER;
2415 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
2416 Opcode ==
AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
2428 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32)
2463 int64_t Imm = MO.
getImm();
2464 switch (OperandType) {
2469 int32_t Trunc =
static_cast<int32_t
>(Imm);
2487 int16_t Trunc =
static_cast<int16_t
>(Imm);
2497 int16_t Trunc =
static_cast<int16_t
>(Imm);
2501 if (!(Imm & 0xffff)) {
2577 AMDGPU::OpName::src0_modifiers) != -1;
2583 return Mods && Mods->
getImm();
2605 default:
return false;
2607 case AMDGPU::V_ADDC_U32_e64:
2608 case AMDGPU::V_SUBB_U32_e64:
2609 case AMDGPU::V_SUBBREV_U32_e64: {
2617 case AMDGPU::V_MAC_F32_e64:
2618 case AMDGPU::V_MAC_F16_e64:
2619 case AMDGPU::V_FMAC_F32_e64:
2625 case AMDGPU::V_CNDMASK_B32_e64:
2655 if (
Use.isUse() &&
Use.getReg() == AMDGPU::VCC) {
2664 unsigned Op32)
const {
2672 if (Op32DstIdx != -1) {
2690 if (Op32Src2Idx != -1) {
2730 return (MO.
getReg() == AMDGPU::VCC || MO.
getReg() == AMDGPU::M0 ||
2732 (AMDGPU::SGPR_32RegClass.contains(MO.
getReg()) ||
2742 switch (MO.getReg()) {
2745 case AMDGPU::FLAT_SCR:
2753 return AMDGPU::NoRegister;
2759 case AMDGPU::V_READLANE_B32:
2760 case AMDGPU::V_READLANE_B32_si:
2761 case AMDGPU::V_READLANE_B32_vi:
2762 case AMDGPU::V_WRITELANE_B32:
2763 case AMDGPU::V_WRITELANE_B32_si:
2764 case AMDGPU::V_WRITELANE_B32_vi:
2771 if (SIInstrInfo::isGenericOpcode(MI.
getOpcode()) ||
2783 return TRI.isSubRegister(SuperVec.
getReg(), SubReg.
getReg());
2785 return SubReg.
getSubReg() != AMDGPU::NoSubRegister &&
2792 if (SIInstrInfo::isGenericOpcode(MI.
getOpcode()))
2806 ErrInfo =
"Instruction has wrong number of operands.";
2824 ErrInfo =
"inlineasm operand has incorrect register class.";
2835 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast " 2836 "all fp values to integers.";
2845 ErrInfo =
"Illegal immediate value for operand.";
2860 ErrInfo =
"Illegal immediate value for operand.";
2871 ErrInfo =
"Expected immediate, but got non-immediate";
2882 if (RegClass != -1) {
2884 if (Reg == AMDGPU::NoRegister ||
2890 ErrInfo =
"Operand has incorrect register class.";
2899 ErrInfo =
"SDWA is not supported on this target";
2905 const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
2907 for (
int OpIdx: OpIndicies) {
2915 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
2921 ErrInfo =
"Only reg allowed as operands in SDWA instructions on GFX9";
2930 if (OMod !=
nullptr &&
2932 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
2938 if (
isVOPC(BasicOpcode)) {
2942 if (!Dst.
isReg() || Dst.
getReg() != AMDGPU::VCC) {
2943 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
2949 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
2950 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
2956 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
2957 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
2964 if (DstUnused && DstUnused->
isImm() &&
2968 ErrInfo =
"Dst register should have tied register";
2976 "Dst register should be tied to implicit use of preserved register";
2980 ErrInfo =
"Dst register should use same physical register as preserved";
2992 uint64_t DMaskImm = DMask->
getImm();
3012 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
3013 if (RegCount > DstSize) {
3014 ErrInfo =
"MIMG instruction returns too many registers for dst " 3023 if (Desc.
getOpcode() != AMDGPU::V_WRITELANE_B32
3028 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3030 unsigned ConstantBusCount = 0;
3031 unsigned LiteralCount = 0;
3037 if (SGPRUsed != AMDGPU::NoRegister)
3040 for (
int OpIdx : OpIndices) {
3046 if (MO.
getReg() != SGPRUsed)
3055 if (ConstantBusCount > 1) {
3056 ErrInfo =
"VOP* instruction uses the constant bus more than once";
3060 if (
isVOP3(MI) && LiteralCount) {
3061 ErrInfo =
"VOP3 instruction uses literal";
3067 if (Desc.
getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
3068 Desc.
getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
3075 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
3085 ErrInfo =
"invalid immediate for SOPK instruction";
3090 ErrInfo =
"invalid immediate for SOPK instruction";
3096 if (Desc.
getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
3097 Desc.
getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
3098 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
3099 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
3100 const bool IsDst = Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
3101 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
3105 const unsigned NumImplicitOps = IsDst ? 2 : 1;
3111 ErrInfo =
"missing implicit register operands";
3117 if (!Dst->
isUse()) {
3118 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
3124 UseOpIdx != StaticNumOps + 1) {
3125 ErrInfo =
"movrel implicit operands should be tied";
3132 = MI.
getOperand(StaticNumOps + NumImplicitOps - 1);
3134 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
3135 ErrInfo =
"src0 should be subreg of implicit vector use";
3144 ErrInfo =
"VALU instruction does not implicitly read exec mask";
3154 if (Soff && Soff->
getReg() != AMDGPU::M0) {
3155 ErrInfo =
"scalar stores must use m0 as offset register";
3163 if (Offset->
getImm() != 0) {
3164 ErrInfo =
"subtarget does not support offsets in flat instructions";
3180 ErrInfo =
"Invalid dpp_ctrl value";
3190 default:
return AMDGPU::INSTRUCTION_LIST_END;
3191 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
3192 case AMDGPU::COPY:
return AMDGPU::COPY;
3193 case AMDGPU::PHI:
return AMDGPU::PHI;
3194 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
3196 case AMDGPU::WWM:
return AMDGPU::WWM;
3197 case AMDGPU::S_MOV_B32:
3199 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
3200 case AMDGPU::S_ADD_I32:
3201 return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
3202 case AMDGPU::S_ADDC_U32:
3203 return AMDGPU::V_ADDC_U32_e32;
3204 case AMDGPU::S_SUB_I32:
3205 return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
3208 case AMDGPU::S_ADD_U32:
3209 return AMDGPU::V_ADD_I32_e32;
3210 case AMDGPU::S_SUB_U32:
3211 return AMDGPU::V_SUB_I32_e32;
3212 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
3213 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_I32;
3214 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
3215 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
3216 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
3217 case AMDGPU::S_XNOR_B32:
3218 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
3219 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
3220 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
3221 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
3222 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
3223 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
3224 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64;
3225 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
3226 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64;
3227 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
3228 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64;
3229 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32;
3230 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32;
3231 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32;
3232 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32;
3233 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
3234 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
3235 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
3236 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
3237 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e32;
3238 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e32;
3239 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e32;
3240 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e32;
3241 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e32;
3242 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e32;
3243 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e32;
3244 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e32;
3245 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e32;
3246 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e32;
3247 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e32;
3248 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e32;
3249 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e32;
3250 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e32;
3251 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
3252 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
3253 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
3254 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
3255 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
3256 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
3261 unsigned OpNo)
const {
3270 return RI.getPhysRegClass(Reg);
3274 return RI.getRegClass(RCID);
3280 case AMDGPU::REG_SEQUENCE:
3282 case AMDGPU::INSERT_SUBREG:
3294 unsigned RCID =
get(MI.
getOpcode()).OpInfo[OpIdx].RegClass;
3296 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
3298 Opcode = AMDGPU::COPY;
3299 else if (RI.isSGPRClass(RC))
3300 Opcode = AMDGPU::S_MOV_B32;
3303 if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
3304 VRC = &AMDGPU::VReg_64RegClass;
3306 VRC = &AMDGPU::VGPR_32RegClass;
3325 if (SuperReg.
getSubReg() == AMDGPU::NoSubRegister) {
3326 BuildMI(*MBB, MI, DL,
get(TargetOpcode::COPY), SubReg)
3337 BuildMI(*MBB, MI, DL,
get(TargetOpcode::COPY), NewSuperReg)
3340 BuildMI(*MBB, MI, DL,
get(TargetOpcode::COPY), SubReg)
3341 .
addReg(NewSuperReg, 0, SubIdx);
3354 if (SubIdx == AMDGPU::sub0)
3356 if (SubIdx == AMDGPU::sub1)
3368 void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
3385 RI.getPhysRegClass(Reg);
3400 return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.
RegClass)) == RC;
3476 if (HasImplicitSGPR) {
3480 if (Src0.
isReg() && RI.isSGPRReg(MRI, Src0.
getReg()))
3487 if (Opc == AMDGPU::V_WRITELANE_B32) {
3491 if (Src0.
isReg() && RI.isVGPR(MRI, Src0.
getReg())) {
3497 if (Src1.
isReg() && RI.isVGPR(MRI, Src1.
getReg())) {
3515 if (Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
3516 RI.isVGPR(MRI, Src1.
getReg())) {
3549 if (CommutedOpc == -1) {
3556 unsigned Src0Reg = Src0.
getReg();
3558 bool Src0Kill = Src0.
isKill();
3562 else if (Src1.
isReg()) {
3587 unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
3589 for (
unsigned i = 0; i < 3; ++i) {
3590 int Idx = VOP3Idx[i];
3602 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.
getReg()) {
3619 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
3623 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
3629 for (
unsigned i = 0; i < SubRegs; ++i) {
3632 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
3633 .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
3639 get(AMDGPU::REG_SEQUENCE), DstReg);
3640 for (
unsigned i = 0; i < SubRegs; ++i) {
3642 MIB.
addImm(RI.getSubRegFromChannel(i));
3672 unsigned OpReg = Op.
getReg();
3676 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
3684 BuildMI(InsertMBB, I, DL,
get(AMDGPU::COPY), DstReg).
add(Op);
3707 unsigned VRsrc = Rsrc.
getReg();
3721 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub0)
3722 .addReg(VRsrc, VRsrcUndef, AMDGPU::sub0);
3723 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub1)
3724 .addReg(VRsrc, VRsrcUndef, AMDGPU::sub1);
3725 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub2)
3726 .addReg(VRsrc, VRsrcUndef, AMDGPU::sub2);
3727 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), SRsrcSub3)
3728 .addReg(VRsrc, VRsrcUndef, AMDGPU::sub3);
3730 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc)
3745 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg0)
3746 .addReg(SRsrc, 0, AMDGPU::sub0_sub1)
3747 .
addReg(VRsrc, 0, AMDGPU::sub0_sub1);
3748 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
3749 .addReg(SRsrc, 0, AMDGPU::sub2_sub3)
3750 .
addReg(VRsrc, 0, AMDGPU::sub2_sub3);
3751 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_B64), AndCond)
3758 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_SAVEEXEC_B64), SaveExec)
3765 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
3766 .addReg(AMDGPU::EXEC)
3768 BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
3784 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B64), SaveExec)
3785 .addReg(AMDGPU::EXEC);
3789 for (
auto &MO : MI.
uses()) {
3790 if (MO.isReg() && MO.isUse()) {
3803 MF.
insert(MBBI, RemainderBB);
3823 for (
auto &Succ : RemainderBB->
successors()) {
3834 BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
3839 static std::tuple<unsigned, unsigned>
3848 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
3863 .addImm(RsrcDataFormat & 0xFFFFFFFF);
3867 .addImm(RsrcDataFormat >> 32);
3872 .
addImm(AMDGPU::sub0_sub1)
3878 return std::make_tuple(RsrcPtr, NewSRsrc);
3915 if (RI.hasVGPRs(OpRC)) {
3928 VRC = RI.getEquivalentVGPRClass(SRC);
3954 if (MI.
getOpcode() == AMDGPU::REG_SEQUENCE) {
3957 if (RI.hasVGPRs(DstRC)) {
3981 if (MI.
getOpcode() == AMDGPU::INSERT_SUBREG) {
3986 if (DstRC != Src0RC) {
3995 if (MI.
getOpcode() == AMDGPU::SI_INIT_M0) {
4027 if (RsrcIdx != -1) {
4030 unsigned RsrcRC =
get(MI.
getOpcode()).OpInfo[RsrcIdx].RegClass;
4032 RI.getRegClass(RsrcRC))) {
4061 unsigned RsrcPtr, NewSRsrc;
4066 BuildMI(MBB, MI, DL,
get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
4067 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
4071 BuildMI(MBB, MI, DL,
get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
4072 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
4084 }
else if (!VAddr && ST.hasAddr64()) {
4089 "FIXME: Need to emit flat atomics here");
4091 unsigned RsrcPtr, NewSRsrc;
4118 MIB.
addImm(GLC->getImm());
4125 MIB.
addImm(TFE->getImm());
4148 .
addReg(RsrcPtr, 0, AMDGPU::sub0)
4150 .
addReg(RsrcPtr, 0, AMDGPU::sub1)
4163 Worklist.
insert(&TopInst);
4165 while (!Worklist.
empty()) {
4177 case AMDGPU::S_ADD_U64_PSEUDO:
4178 case AMDGPU::S_SUB_U64_PSEUDO:
4179 splitScalar64BitAddSub(Worklist, Inst, MDT);
4182 case AMDGPU::S_ADD_I32:
4183 case AMDGPU::S_SUB_I32:
4185 if (moveScalarAddSub(Worklist, Inst, MDT))
4190 case AMDGPU::S_AND_B64:
4191 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
4195 case AMDGPU::S_OR_B64:
4196 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
4200 case AMDGPU::S_XOR_B64:
4201 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
4205 case AMDGPU::S_NAND_B64:
4206 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
4210 case AMDGPU::S_NOR_B64:
4211 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
4215 case AMDGPU::S_XNOR_B64:
4216 if (ST.hasDLInsts())
4217 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
4219 splitScalar64BitXnor(Worklist, Inst, MDT);
4223 case AMDGPU::S_ANDN2_B64:
4224 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
4228 case AMDGPU::S_ORN2_B64:
4229 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
4233 case AMDGPU::S_NOT_B64:
4234 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
4238 case AMDGPU::S_BCNT1_I32_B64:
4239 splitScalar64BitBCNT(Worklist, Inst);
4243 case AMDGPU::S_BFE_I64:
4244 splitScalar64BitBFE(Worklist, Inst);
4248 case AMDGPU::S_LSHL_B32:
4250 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
4254 case AMDGPU::S_ASHR_I32:
4256 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
4260 case AMDGPU::S_LSHR_B32:
4262 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
4266 case AMDGPU::S_LSHL_B64:
4268 NewOpcode = AMDGPU::V_LSHLREV_B64;
4272 case AMDGPU::S_ASHR_I64:
4274 NewOpcode = AMDGPU::V_ASHRREV_I64;
4278 case AMDGPU::S_LSHR_B64:
4280 NewOpcode = AMDGPU::V_LSHRREV_B64;
4285 case AMDGPU::S_ABS_I32:
4286 lowerScalarAbs(Worklist, Inst);
4290 case AMDGPU::S_CBRANCH_SCC0:
4291 case AMDGPU::S_CBRANCH_SCC1:
4299 case AMDGPU::S_BFE_U64:
4300 case AMDGPU::S_BFM_B64:
4303 case AMDGPU::S_PACK_LL_B32_B16:
4304 case AMDGPU::S_PACK_LH_B32_B16:
4305 case AMDGPU::S_PACK_HH_B32_B16:
4306 movePackToVALU(Worklist, MRI, Inst);
4310 case AMDGPU::S_XNOR_B32:
4311 lowerScalarXnor(Worklist, Inst);
4315 case AMDGPU::S_NAND_B32:
4316 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
4320 case AMDGPU::S_NOR_B32:
4321 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
4325 case AMDGPU::S_ANDN2_B32:
4326 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
4330 case AMDGPU::S_ORN2_B32:
4331 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
4336 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
4354 addSCCDefUsersToVALUWorklist(Inst, Worklist);
4358 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
4361 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
4365 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
4373 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
4378 "Scalar BFE is only implemented for constant width and offset");
4382 uint32_t BitWidth = (Imm & 0x7f0000) >> 16;
4389 unsigned NewDstReg = AMDGPU::NoRegister;
4409 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
4419 Inst.
setDesc(
get(AMDGPU::IMPLICIT_DEF));
4431 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
4438 if (ST.hasAddNoCarry()) {
4450 assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
4452 unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
4453 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
4463 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4482 unsigned SubOp = ST.hasAddNoCarry() ?
4483 AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
4485 BuildMI(MBB, MII, DL,
get(SubOp), TmpReg)
4489 BuildMI(MBB, MII, DL,
get(AMDGPU::V_MAX_I32_e64), ResultReg)
4494 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4508 if (ST.hasDLInsts()) {
4513 BuildMI(MBB, MII, DL,
get(AMDGPU::V_XNOR_B32_e64), NewDest)
4518 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
4524 bool Src0IsSGPR = Src0.
isReg() &&
4526 bool Src1IsSGPR = Src1.
isReg() &&
4537 Not =
BuildMI(MBB, MII, DL,
get(AMDGPU::S_NOT_B32), Temp)
4539 Xor =
BuildMI(MBB, MII, DL,
get(AMDGPU::S_XOR_B32), NewDest)
4542 }
else if (Src1IsSGPR) {
4543 Not =
BuildMI(MBB, MII, DL,
get(AMDGPU::S_NOT_B32), Temp)
4545 Xor =
BuildMI(MBB, MII, DL,
get(AMDGPU::S_XOR_B32), NewDest)
4549 Xor =
BuildMI(MBB, MII, DL,
get(AMDGPU::S_XOR_B32), Temp)
4552 Not =
BuildMI(MBB, MII, DL,
get(AMDGPU::S_NOT_B32), NewDest)
4561 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
4565 void SIInstrInfo::splitScalarNotBinop(
SetVectorType &Worklist,
4567 unsigned Opcode)
const {
4591 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
4594 void SIInstrInfo::splitScalarBinOpN2(
SetVectorType& Worklist,
4596 unsigned Opcode)
const {
4620 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
4623 void SIInstrInfo::splitScalar64BitUnaryOp(
4625 unsigned Opcode)
const {
4638 &AMDGPU::SGPR_32RegClass;
4643 AMDGPU::sub0, Src0SubRC);
4653 AMDGPU::sub1, Src0SubRC);
4659 BuildMI(MBB, MII, DL,
get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4667 Worklist.
insert(&LoHalf);
4668 Worklist.
insert(&HiHalf);
4674 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4677 void SIInstrInfo::splitScalar64BitAddSub(
SetVectorType &Worklist,
4680 bool IsAdd = (Inst.
getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
4704 AMDGPU::sub0, Src0SubRC);
4706 AMDGPU::sub0, Src1SubRC);
4710 AMDGPU::sub1, Src0SubRC);
4712 AMDGPU::sub1, Src1SubRC);
4714 unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
4716 BuildMI(MBB, MII, DL,
get(LoOpc), DestSub0)
4721 unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
4723 BuildMI(MBB, MII, DL,
get(HiOpc), DestSub1)
4729 BuildMI(MBB, MII, DL,
get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4743 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4746 void SIInstrInfo::splitScalar64BitBinaryOp(
SetVectorType &Worklist,
4762 &AMDGPU::SGPR_32RegClass;
4767 &AMDGPU::SGPR_32RegClass;
4772 AMDGPU::sub0, Src0SubRC);
4774 AMDGPU::sub0, Src1SubRC);
4776 AMDGPU::sub1, Src0SubRC);
4778 AMDGPU::sub1, Src1SubRC);
4795 BuildMI(MBB, MII, DL,
get(TargetOpcode::REG_SEQUENCE), FullDestReg)
4803 Worklist.
insert(&LoHalf);
4804 Worklist.
insert(&HiHalf);
4807 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
4810 void SIInstrInfo::splitScalar64BitXnor(
SetVectorType &Worklist,
4830 if (Src0.
isReg() && RI.isSGPRReg(MRI, Src0.
getReg())) {
4838 BuildMI(MBB, MII, DL,
get(AMDGPU::S_NOT_B64), Interm)
4852 void SIInstrInfo::splitScalar64BitBCNT(
4863 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
4866 &AMDGPU::SGPR_32RegClass;
4874 AMDGPU::sub0, SrcSubRC);
4876 AMDGPU::sub1, SrcSubRC);
4886 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4889 void SIInstrInfo::splitScalar64BitBFE(
SetVectorType &Worklist,
4899 uint32_t BitWidth = (Imm & 0x7f0000) >> 16;
4905 Offset == 0 &&
"Not implemented");
4907 if (BitWidth < 32) {
4912 BuildMI(MBB, MII, DL,
get(AMDGPU::V_BFE_I32), MidRegLo)
4917 BuildMI(MBB, MII, DL,
get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
4921 BuildMI(MBB, MII, DL,
get(TargetOpcode::REG_SEQUENCE), ResultReg)
4928 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4936 BuildMI(MBB, MII, DL,
get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
4940 BuildMI(MBB, MII, DL,
get(TargetOpcode::REG_SEQUENCE), ResultReg)
4942 .addImm(AMDGPU::sub0)
4947 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
4950 void SIInstrInfo::addUsersToMoveToVALUWorklist(
4962 }
while (
I !=
E &&
I->getParent() == &
UseMI);
4979 case AMDGPU::S_PACK_LL_B32_B16: {
4985 BuildMI(*MBB, Inst, DL,
get(AMDGPU::V_MOV_B32_e32), ImmReg)
4988 BuildMI(*MBB, Inst, DL,
get(AMDGPU::V_AND_B32_e64), TmpReg)
4992 BuildMI(*MBB, Inst, DL,
get(AMDGPU::V_LSHL_OR_B32), ResultReg)
4998 case AMDGPU::S_PACK_LH_B32_B16: {
5000 BuildMI(*MBB, Inst, DL,
get(AMDGPU::V_MOV_B32_e32), ImmReg)
5002 BuildMI(*MBB, Inst, DL,
get(AMDGPU::V_BFI_B32), ResultReg)
5008 case AMDGPU::S_PACK_HH_B32_B16: {
5011 BuildMI(*MBB, Inst, DL,
get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
5014 BuildMI(*MBB, Inst, DL,
get(AMDGPU::V_MOV_B32_e32), ImmReg)
5016 BuildMI(*MBB, Inst, DL,
get(AMDGPU::V_AND_OR_B32), ResultReg)
5028 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
5031 void SIInstrInfo::addSCCDefUsersToVALUWorklist(
5039 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC,
false,
false, &RI) != -1)
5042 if (
MI.findRegisterUseOperandIdx(AMDGPU::SCC,
false, &RI) != -1)
5057 case AMDGPU::REG_SEQUENCE:
5058 case AMDGPU::INSERT_SUBREG:
5061 if (RI.hasVGPRs(NewDstRC))
5064 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
5075 int OpIndices[3])
const {
5088 if (SGPRReg != AMDGPU::NoRegister)
5091 unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
5094 for (
unsigned i = 0; i < 3; ++i) {
5095 int Idx = OpIndices[i];
5106 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
5113 if (RI.isSGPRClass(RegRC))
5130 if (UsedSGPRs[0] != AMDGPU::NoRegister) {
5131 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
5132 SGPRReg = UsedSGPRs[0];
5135 if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
5136 if (UsedSGPRs[1] == UsedSGPRs[2])
5137 SGPRReg = UsedSGPRs[1];
5144 unsigned OperandName)
const {
5154 if (ST.isAmdHsaOS()) {
5157 RsrcDataFormat |= (1ULL << 56);
5162 RsrcDataFormat |= (2ULL << 59);
5165 return RsrcDataFormat;
5175 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize()) - 1;
5205 if (!Addr || !Addr->
isFI())
5206 return AMDGPU::NoRegister;
5226 return AMDGPU::NoRegister;
5234 return AMDGPU::NoRegister;
5240 return AMDGPU::NoRegister;
5248 return AMDGPU::NoRegister;
5255 while (++I != E && I->isInsideBundle()) {
5256 assert(!I->isBundle() &&
"No nested bundle!");
5266 unsigned DescSize = Desc.
getSize();
5281 return DescSize + 4;
5288 return DescSize + 4;
5295 return DescSize + 4;
5301 case TargetOpcode::IMPLICIT_DEF:
5303 case TargetOpcode::DBG_VALUE:
5306 case TargetOpcode::BUNDLE:
5333 return Branch.
getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
5345 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
5348 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_IF), DstReg)
5349 .
add(Branch->getOperand(0))
5350 .
add(Branch->getOperand(1));
5352 BuildMI(*MF, Branch->getDebugLoc(),
get(AMDGPU::SI_END_CF))
5371 if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
5376 BuildMI(*(MF), Branch->getDebugLoc(),
get(TargetOpcode::PHI), DstReg);
5380 if (*PI == LoopEnd) {
5381 HeaderPHIBuilder.
addReg(BackEdgeReg);
5387 HeaderPHIBuilder.
addReg(ZeroReg);
5389 HeaderPHIBuilder.
addMBB(*PI);
5393 get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
5395 .
add(Branch->getOperand(0));
5397 BuildMI(*(MF), Branch->getDebugLoc(),
get(AMDGPU::SI_LOOP))
5403 LoopEnd->
insert(LoopEnd->
end(), SIIFBREAK);
5404 LoopEnd->
insert(LoopEnd->
end(), SILOOP);
5410 static const std::pair<int, const char *> TargetIndices[] = {
5434 std::pair<unsigned, unsigned>
5436 return std::make_pair(TF &
MO_MASK, TF & ~MO_MASK);
5441 static const std::pair<unsigned, const char *>
TargetFlags[] = {
5461 unsigned DestReg)
const {
5462 if (ST.hasAddNoCarry())
5463 return BuildMI(MBB, I, DL,
get(AMDGPU::V_ADD_U32_e64), DestReg);
5469 return BuildMI(MBB, I, DL,
get(AMDGPU::V_ADD_I32_e64), DestReg)
5475 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
5476 case AMDGPU::SI_KILL_I1_TERMINATOR:
5485 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
5486 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
5487 case AMDGPU::SI_KILL_I1_PSEUDO:
5488 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
5504 return RCID == AMDGPU::SReg_128RegClassID;
5555 if (MCOp == (uint16_t)-1)
5587 case AMDGPU::REG_SEQUENCE:
5591 case AMDGPU::INSERT_SUBREG:
5614 while (
auto *MI = DefInst) {
5618 case AMDGPU::V_MOV_B32_e32: {
unsigned getTargetFlags() const
unsigned getFrameOffsetReg() const
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description.
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
unsigned getVALUOp(const MachineInstr &MI) const
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
EVT getValueType() const
Return the ValueType of the referenced return value.
static bool isSGPRSpill(const MachineInstr &MI)
Interface definition for SIRegisterInfo.
bool hasRegisterImplicitUseOperand(unsigned Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
Diagnostic information for unsupported feature in backend.
AMDGPU specific subclass of TargetSubtarget.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
instr_iterator instr_end()
bool isVGPRCopy(const MachineInstr &MI) const
MachineBasicBlock * getMBB() const
bool hasScalarStores() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
LLVM_NODISCARD T pop_back_val()
This class represents lattice values for constants.
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
static bool sopkIsZext(const MachineInstr &MI)
uint64_t getDefaultRsrcDataFormat() const
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
unsigned getMaxFlatWorkGroupSize() const
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static bool isStride64(unsigned Opc)
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const
void push_back(const T &Elt)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
unsigned getReg() const
getReg - Returns the register number.
void setIsUndef(bool Val=true)
unsigned insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned SrcReg, int Value) const
unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
unsigned getSubReg() const
static bool isSOPK(const MachineInstr &MI)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
bool isRegSequence() const
Address space for private memory.
uint64_t getSize() const
Return the size in bytes of the memory reference.
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const
Get required immediate operand.
MachineBasicBlock reference.
constexpr bool isInt< 16 >(int64_t x)
unsigned const TargetRegisterInfo * TRI
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc)
static SDValue findChainOperand(SDNode *Load)
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
static bool isSMRD(const MachineInstr &MI)
void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
void legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
unsigned getBitWidth() const
Return the number of bits in the APInt.
return AArch64::GPR64RegClass contains(Reg)
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
iterator_range< succ_iterator > successors()
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static unsigned getAddrSpace(StringRef R)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
static bool isFixedSize(const MachineInstr &MI)
MachineFunction & MF
Machine function.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
static bool isDS(const MachineInstr &MI)
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj)
Make a register of the specific register class available from the current position backwards to the p...
A description of a memory reference used in the backend.
static use_iterator use_end()
Address space for constant memory (VTX2)
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
static bool isFLAT(const MachineInstr &MI)
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
static ManagedStatic< DebugCounter > DC
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
uint64_t getScratchRsrcWords23() const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
bool isAlwaysGDS(uint16_t Opcode) const
const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
static bool isGather4(const MachineInstr &MI)
bool isEntryFunction() const
static bool isMIMG(const MachineInstr &MI)
A Use represents the edge between a Value definition and its users.
static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void setImplicit(bool Val=true)
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool opCanUseInlineConstant(unsigned OpType) const
void setHasSpilledVGPRs(bool Spill=true)
void setTIDReg(unsigned Reg)
Name of external global symbol.
bool hasSDWAOutModsVOPC() const
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
void insertWaitStates(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Count) const
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool opCanUseLiteralConstant(unsigned OpType) const
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
const char * getSymbolName() const
static bool isVALU(const MachineInstr &MI)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getID() const
Return the register class ID number.
bool isSGPRClass(const TargetRegisterClass *RC) const
This file implements a class to represent arbitrary precision integral constant values and operations...
INLINEASM - Represents an inline asm block.
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
int getMCOpcode(uint16_t Opcode, unsigned Gen)
uint8_t OperandType
Information about the type of the operand.
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
int64_t getSExtValue() const
Get sign extended value.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool insert(const value_type &X)
Insert a new element into the SetVector.
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isMUBUF(const MachineInstr &MI)
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
static bool shouldReadExec(const MachineInstr &MI)
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
const uint64_t RSRC_DATA_FORMAT
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
Itinerary data supplied by a subtarget to be used by a target.
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
static unsigned findImplicitSGPRRead(const MachineInstr &MI)
bool isBasicBlockPrologue(const MachineInstr &MI) const override
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
unsigned getUndefRegState(bool B)
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Analysis containing CSE Info
BasicBlockListType::iterator iterator
unsigned getKillRegState(bool B)
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
const Value * getValue() const
Return the base address of the memory access.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
TargetInstrInfo::RegSubRegPair RegSubRegPair
static int64_t getFoldableImm(const MachineOperand *MO)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Address of a global value.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
initializer< Ty > init(const Ty &Val)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
This file declares the machine register scavenger class.
const TargetRegisterInfo * getTargetRegisterInfo() const
unsigned const MachineRegisterInfo * MRI
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
bool isFoldableCopy(const MachineInstr &MI) const
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Address space for flat memory.
MachineInstrBuilder & UseMI
size_t size() const
size - Get the array size.
bool isReallyTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA) const override
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
constexpr char WavefrontSize[]
Key for Kernel::CodeProps::Metadata::mWavefrontSize.
const SDValue & getOperand(unsigned Num) const
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool has16BitInsts() const
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool hasVGPRs(const TargetRegisterClass *RC) const
static unsigned getNumOperandsNoGlue(SDNode *Node)
static void removeModOperands(MachineInstr &MI)
void insertReturn(MachineBasicBlock &MBB) const
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY int commuteOpcode(unsigned Opc) const
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
unsigned insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned SrcReg, int Value) const
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
Generation getGeneration() const
self_iterator getIterator()
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, int64_t Value) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
unsigned insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS=nullptr) const override
const TargetRegisterClass * getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const
std::vector< MachineBasicBlock * >::iterator pred_iterator
const MachineInstrBuilder & addFrameIndex(int Idx) const
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
static bool isVOP2(const MachineInstr &MI)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const
void setHasSpilledSGPRs(bool Spill=true)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
pred_iterator pred_begin()
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
This class contains a discriminated union of information about pointers in memory operands...
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, unsigned Src0OpName, MachineOperand &Src1, unsigned Src1OpName) const
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
Operands with register or inline constant.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
void setIsKill(bool Val=true)
const uint64_t RSRC_TID_ENABLE
bool hasUnpackedD16VMem() const
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const final
The memory access writes data.
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getNumWaitStates(const MachineInstr &MI) const
Return the number of wait states that result from executing this instruction.
static bool isUndef(ArrayRef< int > Mask)
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
A SetVector that performs no allocations if smaller than a certain size.
unsigned findTiedOperandIdx(unsigned OpIdx) const
Given the index of a tied register operand, find the operand it is tied to.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
unsigned getLDSSize() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
static bool offsetsDoNotOverlap(int WidthA, int OffsetA, int WidthB, int OffsetB)
static bool isSALU(const MachineInstr &MI)
MachineOperand class - Representation of each machine instruction operand.
unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const
Returns a register that is not used at any point in the function.
void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register...
bool isLiteralConstantLike(const MachineOperand &MO, const MCOperandInfo &OpInfo) const
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
A pair composed of a register and a sub-register index.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
MachineInstrBuilder MachineInstrBuilder & DefMI
bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const override
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg) const
Return a partially built integer add instruction without carry.
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const uint64_t RSRC_INDEX_STRIDE_SHIFT
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Represents one node in the SelectionDAG.
bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2, unsigned NumLoads) const override
bool isVariadic(QueryType Type=IgnoreBundle) const
Return true if this instruction can have a variable number of operands.
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
unsigned pred_size() const
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, unsigned Reg) const
MCSymbol reference (for debug/eh info)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Class for arbitrary precision integers.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
bool isShader(CallingConv::ID cc)
Interface for the AMDGPU Implementation of the Intrinsic Info class.
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, const MachineOperand &BaseOp1, const MachineInstr &MI2, const MachineOperand &BaseOp2)
iterator_range< mop_iterator > implicit_operands()
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
static bool isVOP3(const MachineInstr &MI)
amdgpu Simplify well known AMD library false Value Value * Arg
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Provides AMDGPU specific target descriptions.
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
Representation of each machine instruction.
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool hasInv2PiInlineImm() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
OperandType
Operands are tagged with one of the values of this enum.
unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
static bool isMTBUF(const MachineInstr &MI)
void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Replace this instruction's opcode with the equivalent VALU opcode.
bool hasOneUse(unsigned RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
static bool isVOPC(const MachineInstr &MI)
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
This file provides utility analysis objects describing memory locations.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
static MachineOperand CreateImm(int64_t Val)
bool empty() const
Determine if the SetVector is empty or not.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
void setSubReg(unsigned subReg)
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
Whether we must prevent this instruction from executing with EXEC = 0.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
unsigned scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj)
Make a register of the specific register class available and do the appropriate bookkeeping.
bool isHighLatencyInstruction(const MachineInstr &MI) const
bool hasCalculatedTID() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
bool isLowLatencyInstruction(const MachineInstr &MI) const
static void emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, const DebugLoc &DL, MachineOperand &Rsrc)
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Abstract Stack Frame Index.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
constexpr bool isUInt< 16 >(uint64_t x)
void setSimpleHint(unsigned VReg, unsigned PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
MachineInstr * removeFromParent()
Unlink 'this' from the containing basic block, and return it without deleting it. ...
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Operands with register or 32-bit immediate.
SIInstrInfo(const GCNSubtarget &ST)
bool hasSDWAScalar() const
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isNonUniformBranchInstr(MachineInstr &Instr) const
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
void setRegUsed(unsigned Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
static bool isSDWA(const MachineInstr &MI)
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
MachineInstr * convertToThreeAddress(MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const override
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
static bool isVGPRSpill(const MachineInstr &MI)
const TargetRegisterClass * getPhysRegClass(unsigned Reg) const
Return the 'base' register class for this register.
Operand with 32-bit immediate that uses the constant bus.
const MCOperandInfo * OpInfo
unsigned getHWRegIndex(unsigned Reg) const
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
unsigned getOpcode() const
Return the opcode number for this descriptor.
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
unsigned getTIDReg() const
StringRef - Represent a constant reference to a string, i.e.
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc, MachineDominatorTree *MDT)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool isKillTerminator(unsigned Opcode)
bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const final
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
uint64_t getZExtValue() const
This holds information about one operand of a machine instruction, indicating the register class for ...
const MachineOperand & getOperand(unsigned i) const
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
static bool isVOP1(const MachineInstr &MI)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isBufferSMRD(const MachineInstr &MI) const
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
bool empty() const
empty - Check if the array is empty.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
Helper class for constructing bundles of MachineInstrs.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...