26 #define DEBUG_TYPE "si-fold-operands" 31 struct FoldCandidate {
39 unsigned char UseOpNo;
44 bool Commuted_ =
false,
46 UseMI(MI), OpToFold(
nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
49 if (FoldOp->
isImm()) {
50 ImmToFold = FoldOp->
getImm();
51 }
else if (FoldOp->
isFI()) {
52 FrameIndexToFold = FoldOp->
getIndex();
71 bool isCommuted()
const {
75 bool needsShrink()
const {
76 return ShrinkOpcode != -1;
79 int getShrinkOpcode()
const {
103 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &MI)
const;
113 StringRef getPassName()
const override {
return "SI Fold Operands"; }
124 "SI Fold Operands",
false,
false)
141 case AMDGPU::V_MAC_F32_e64:
142 case AMDGPU::V_MAC_F16_e64:
143 case AMDGPU::V_FMAC_F32_e64: {
147 if (static_cast<int>(OpNo) == Src2Idx) {
148 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
149 bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
151 unsigned Opc = IsFMA ?
152 AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
164 return new SIFoldOperands();
182 ModIdx = AMDGPU::OpName::src0_modifiers;
184 ModIdx = AMDGPU::OpName::src1_modifiers;
186 ModIdx = AMDGPU::OpName::src2_modifiers;
190 unsigned Val = Mod.
getImm();
195 if (!(Fold.ImmToFold & 0xffff)) {
206 if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) {
213 int Op32 = Fold.getShrinkOpcode();
225 if (HaveNonDbgCarryUse) {
239 MI->
setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
241 if (Fold.isCommuted())
242 TII.commuteInstruction(*Inst32,
false);
246 assert(!Fold.needsShrink() &&
"not handled");
274 for (
auto Candidate : FoldList) {
275 if (Candidate.UseMI == MI)
289 if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
290 Opc == AMDGPU::V_FMAC_F32_e64) &&
292 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
293 bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
294 unsigned NewOpc = IsFMA ?
295 AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
309 if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->
isImm()) {
310 MI->
setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
311 FoldList.
push_back(FoldCandidate(MI, OpNo, OpToFold));
321 unsigned CommuteOpNo = OpNo;
330 if (CommuteIdx0 == OpNo)
331 CommuteOpNo = CommuteIdx1;
332 else if (CommuteIdx1 == OpNo)
333 CommuteOpNo = CommuteIdx0;
346 !TII->commuteInstruction(*MI,
false, CommuteIdx0, CommuteIdx1))
350 if ((Opc == AMDGPU::V_ADD_I32_e64 ||
351 Opc == AMDGPU::V_SUB_I32_e64 ||
352 Opc == AMDGPU::V_SUBREV_I32_e64) &&
353 (OpToFold->
isImm() || OpToFold->
isFI())) {
358 unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
360 if (!OtherOp.
isReg() ||
367 unsigned MaybeCommutedOpc = MI->
getOpcode();
370 FoldList.
push_back(FoldCandidate(MI, CommuteOpNo, OpToFold,
true,
375 TII->commuteInstruction(*MI,
false, CommuteIdx0, CommuteIdx1);
379 FoldList.
push_back(FoldCandidate(MI, CommuteOpNo, OpToFold,
true));
383 FoldList.
push_back(FoldCandidate(MI, OpNo, OpToFold));
396 void SIFoldOperands::foldOperand(
433 RSUse =
MRI->use_begin(RegSeqDstReg), RSE =
MRI->use_end();
434 RSUse != RSE; ++RSUse) {
437 if (RSUse->getSubReg() != RegSeqDstSubReg)
440 foldOperand(OpToFold, RSUseMI, RSUse.
getOperandNo(), FoldList,
448 bool FoldingImm = OpToFold.
isImm();
450 if (FoldingImm && UseMI->
isCopy()) {
454 MRI->getRegClass(DestReg) :
455 TRI->getPhysRegClass(DestReg);
461 if (
TRI->isSGPRClass(SrcRC) &&
TRI->hasVGPRs(DestRC)) {
465 Use =
MRI->use_begin(DestReg),
E =
MRI->use_end();
467 NextUse = std::next(
Use);
468 FoldCandidate
FC = FoldCandidate(
Use->getParent(),
472 for (
auto &
F : CopyUses) {
473 foldOperand(*
F.OpToFold,
F.UseMI,
F.UseOpNo,
474 FoldList, CopiesToReplace);
482 unsigned MovOp =
TII->getMovOpcode(DestRC);
483 if (MovOp == AMDGPU::COPY)
533 MRI->getRegClass(UseReg) :
534 TRI->getPhysRegClass(UseReg);
544 Imm = Imm.getHiBits(32);
560 case AMDGPU::V_AND_B32_e64:
561 case AMDGPU::V_AND_B32_e32:
562 case AMDGPU::S_AND_B32:
565 case AMDGPU::V_OR_B32_e64:
566 case AMDGPU::V_OR_B32_e32:
567 case AMDGPU::S_OR_B32:
570 case AMDGPU::V_XOR_B32_e64:
571 case AMDGPU::V_XOR_B32_e32:
572 case AMDGPU::S_XOR_B32:
575 case AMDGPU::V_LSHL_B32_e64:
576 case AMDGPU::V_LSHL_B32_e32:
577 case AMDGPU::S_LSHL_B32:
579 Result = LHS << (RHS & 31);
581 case AMDGPU::V_LSHLREV_B32_e64:
582 case AMDGPU::V_LSHLREV_B32_e32:
583 Result = RHS << (LHS & 31);
585 case AMDGPU::V_LSHR_B32_e64:
586 case AMDGPU::V_LSHR_B32_e32:
587 case AMDGPU::S_LSHR_B32:
588 Result = LHS >> (RHS & 31);
590 case AMDGPU::V_LSHRREV_B32_e64:
591 case AMDGPU::V_LSHRREV_B32_e32:
592 Result = RHS >> (LHS & 31);
594 case AMDGPU::V_ASHR_I32_e64:
595 case AMDGPU::V_ASHR_I32_e32:
596 case AMDGPU::S_ASHR_I32:
597 Result =
static_cast<int32_t
>(LHS) >> (RHS & 31);
599 case AMDGPU::V_ASHRREV_I32_e64:
600 case AMDGPU::V_ASHRREV_I32_e32:
601 Result =
static_cast<int32_t
>(RHS) >> (LHS & 31);
609 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
634 if (Op.
getSubReg() != AMDGPU::NoSubRegister ||
657 if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
658 Opc == AMDGPU::S_NOT_B32) {
672 if (!Src0->
isImm() && !Src1->isImm())
675 if (MI->
getOpcode() == AMDGPU::V_LSHL_OR_B32) {
683 MI->
setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
691 if (Src0->
isImm() && Src1->isImm()) {
710 if (Src0->
isImm() && !Src1->isImm()) {
715 int32_t Src1Val =
static_cast<int32_t
>(Src1->getImm());
716 if (Opc == AMDGPU::V_OR_B32_e64 ||
717 Opc == AMDGPU::V_OR_B32_e32 ||
718 Opc == AMDGPU::S_OR_B32) {
723 }
else if (Src1Val == -1) {
733 if (MI->
getOpcode() == AMDGPU::V_AND_B32_e64 ||
734 MI->
getOpcode() == AMDGPU::V_AND_B32_e32 ||
740 }
else if (Src1Val == -1) {
751 if (MI->
getOpcode() == AMDGPU::V_XOR_B32_e64 ||
752 MI->
getOpcode() == AMDGPU::V_XOR_B32_e32 ||
770 if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
771 Opc == AMDGPU::V_CNDMASK_B32_e64 ||
772 Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
800 bool FoldingImm = OpToFold.
isImm() || OpToFold.
isFI();
802 unsigned NumLiteralUses = 0;
804 int NonInlineUseOpNo = -1;
810 NextUse = std::next(
Use);
851 foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
853 if (++NumLiteralUses == 1) {
854 NonInlineUse = &*
Use;
855 NonInlineUseOpNo = OpNo;
860 if (NumLiteralUses == 1) {
862 foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace);
872 for (
auto U : UsesToProcess) {
876 FoldList, CopiesToReplace);
883 Copy->addImplicitDefUseOperands(*MF);
885 for (FoldCandidate &Fold : FoldList) {
889 assert(Fold.OpToFold && Fold.OpToFold->isReg());
893 MRI->clearKillFlags(Fold.OpToFold->getReg());
896 << static_cast<int>(Fold.UseOpNo) <<
" of " 897 << *Fold.UseMI <<
'\n');
899 }
else if (Fold.isCommuted()) {
901 TII->commuteInstruction(*Fold.UseMI,
false);
911 case AMDGPU::V_MAX_F32_e64:
912 case AMDGPU::V_MAX_F16_e64:
913 case AMDGPU::V_MAX_F64:
914 case AMDGPU::V_PK_MAX_F16: {
915 if (!
TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
924 Src0->
getSubReg() != AMDGPU::NoSubRegister)
928 if (
TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
932 =
TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm();
934 =
TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm();
939 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
970 if (
TII->getClampMask(*Def) !=
TII->getClampMask(MI))
977 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def
989 case AMDGPU::V_MUL_F32_e64: {
990 switch (static_cast<uint32_t>(Val)) {
1001 case AMDGPU::V_MUL_F16_e64: {
1002 switch (static_cast<uint16_t>(Val)) {
1021 std::pair<const MachineOperand *, int>
1025 case AMDGPU::V_MUL_F32_e64:
1026 case AMDGPU::V_MUL_F16_e64: {
1028 if ((Op == AMDGPU::V_MUL_F32_e64 &&
ST->hasFP32Denormals()) ||
1029 (Op == AMDGPU::V_MUL_F16_e64 &&
ST->hasFP16Denormals()))
1036 if (Src0->
isImm()) {
1039 }
else if (Src1->
isImm()) {
1047 TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
1048 TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
1049 TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
1050 TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
1053 return std::make_pair(RegOp, OMod);
1055 case AMDGPU::V_ADD_F32_e64:
1056 case AMDGPU::V_ADD_F16_e64: {
1058 if ((Op == AMDGPU::V_ADD_F32_e64 &&
ST->hasFP32Denormals()) ||
1059 (Op == AMDGPU::V_ADD_F16_e64 &&
ST->hasFP16Denormals()))
1068 !
TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
1069 !
TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
1070 !
TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
1071 !
TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
1085 std::tie(RegOp, OMod) = isOMod(MI);
1087 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
1098 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1101 LLVM_DEBUG(
dbgs() <<
"Folding omod " << MI <<
" into " << *Def <<
'\n');
1115 TII =
ST->getInstrInfo();
1123 bool IsIEEEMode =
ST->enableIEEEBit(MF);
1128 for (I = MBB->begin(); I != MBB->end(); I = Next) {
1129 Next = std::next(I);
1134 if (!TII->isFoldableCopy(MI)) {
1144 bool FoldingImm = OpToFold.
isImm() || OpToFold.
isFI();
1147 if (!FoldingImm && !OpToFold.
isReg())
1150 if (OpToFold.
isReg() &&
1165 foldInstOperand(MI, OpToFold);
static bool isReg(const MCInst &MI, unsigned OpNo)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
This class represents lattice values for constants.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before...
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO)
void push_back(const T &Elt)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Describe properties that are true of each instruction in the target description file.
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
void setIsUndef(bool Val=true)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
bool isRegSequence() const
unsigned const TargetRegisterInfo * TRI
void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool isInlineConstant(const APInt &Imm) const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const SIRegisterInfo & getRegisterInfo() const
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
static unsigned getMovOpc(bool IsScalar)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg)
A Use represents the edge between a Value definition and its users.
static bool tryAddToFoldList(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
static MachineOperand * getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
unsigned getID() const
Return the register class ID number.
static int getOModValue(unsigned Opc, int64_t Val)
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
uint8_t OperandType
Information about the type of the operand.
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
unsigned getOperandNo() const
Return the operand # of this use in its User.
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static const unsigned CommuteAnyOperandIndex
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Register is known to be fully dead.
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setIsKill(bool Val=true)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool hasNoSignedZerosFPMath() const
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI)
static void stripExtraCopyOperands(MachineInstr &MI)
Remove any leftover implicit operands from mutating the instruction.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Class for arbitrary precision integers.
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initializeSIFoldOperandsPass(PassRegistry &)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
FunctionPass * createSIFoldOperandsPass()
static unsigned UseReg(const MachineOperand &MO)
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
void setSubReg(unsigned subReg)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold)
Abstract Stack Frame Index.
constexpr bool isUInt< 16 >(uint64_t x)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
static bool isSDWA(const MachineInstr &MI)
const MCOperandInfo * OpInfo
void ChangeToFrameIndex(int Idx)
Replace this operand with a frame index.
StringRef - Represent a constant reference to a string, i.e.
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp)
const MachineOperand & getOperand(unsigned i) const
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const SIRegisterInfo * getRegisterInfo() const override