56 #define DEBUG_TYPE "gcn-dpp-combine" 58 STATISTIC(NumDPPMovsCombined,
"Number of DPP moves combined.");
78 bool BoundCtrlZero)
const;
83 bool BoundCtrlZero)
const;
88 int64_t
Mask = -1)
const;
101 StringRef getPassName()
const override {
return "GCN DPP Combine"; }
113 char GCNDPPCombine::ID = 0;
118 return new GCNDPPCombine();
139 switch(
Def->getOpcode()) {
141 case AMDGPU::IMPLICIT_DEF:
144 case AMDGPU::V_MOV_B32_e32: {
145 auto &Op1 =
Def->getOperand(1);
157 bool BoundCtrlZero)
const {
180 assert(OldIdx == NumOperands);
182 DPPInst.addReg(OldOpndVGPR.
Reg, 0, OldOpndVGPR.
SubReg);
187 AMDGPU::OpName::src0_modifiers)) {
189 AMDGPU::OpName::src0_modifiers));
191 DPPInst.addImm(Mod0->getImm());
196 if (!TII->
isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
205 AMDGPU::OpName::src1_modifiers)) {
207 AMDGPU::OpName::src1_modifiers));
209 DPPInst.addImm(Mod1->getImm());
213 if (!TII->
isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
223 if (!TII->
isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
234 DPPInst.addImm(BoundCtrlZero ? 1 : 0);
238 DPPInst.getInstr()->eraseFromParent();
242 return DPPInst.getInstr();
252 case AMDGPU::V_MAX_U32_e32:
256 case AMDGPU::V_MAX_I32_e32:
260 case AMDGPU::V_MIN_I32_e32:
261 if (OldOpndValue.
getImm() == std::numeric_limits<int32_t>::min())
265 case AMDGPU::V_MUL_I32_I24_e32:
266 case AMDGPU::V_MUL_U32_U24_e32:
267 if (OldOpndValue.
getImm() == 1) {
269 assert(Src1 && Src1->isReg());
292 bool BoundCtrlZero)
const {
294 if (!BoundCtrlZero && OldOpndValue) {
296 OldOpndVGPR = foldOldOpnd(OrigMI, OldOpndVGPR, *OldOpndValue);
297 if (!OldOpndVGPR.Reg) {
298 LLVM_DEBUG(
dbgs() <<
" failed: old immediate cannot be folded\n");
302 return createDPPInst(OrigMI, MovMI, OldOpndVGPR, BoundCtrlZero);
307 bool GCNDPPCombine::hasNoImmOrEqual(
MachineInstr &
MI,
unsigned OpndName,
314 return (Imm->getImm() &
Mask) == Value;
317 bool GCNDPPCombine::combineDPPMov(
MachineInstr &MovMI)
const {
319 auto *BCZOpnd = TII->
getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
320 assert(BCZOpnd && BCZOpnd->isImm());
321 bool BoundCtrlZero = 0 != BCZOpnd->getImm();
328 auto *OldOpndValue = getOldOpndValue(*OldOpnd);
329 assert(!OldOpndValue || OldOpndValue->
isImm() || OldOpndValue == OldOpnd);
332 OldOpndVGPR.Reg = AMDGPU::NoRegister;
333 OldOpndValue =
nullptr;
335 if (!OldOpndValue->
isImm()) {
336 LLVM_DEBUG(
dbgs() <<
" failed: old operand isn't an imm or undef\n");
339 if (OldOpndValue->
getImm() == 0) {
340 OldOpndVGPR.Reg = AMDGPU::NoRegister;
341 OldOpndValue =
nullptr;
342 BoundCtrlZero =
true;
352 dbgs() <<
", bound_ctrl=" << BoundCtrlZero <<
'\n');
354 std::vector<MachineInstr*> OrigMIs, DPPMIs;
355 if (!OldOpndVGPR.Reg) {
359 TII->get(AMDGPU::IMPLICIT_DEF), OldOpndVGPR.Reg);
360 DPPMIs.push_back(UndefInst.getInstr());
363 OrigMIs.push_back(&MovMI);
364 bool Rollback =
true;
369 auto &OrigMI = *
Use.getParent();
371 if (TII->
isVOP3(OrigOp)) {
378 if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
379 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
380 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
381 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
382 LLVM_DEBUG(
dbgs() <<
" failed: VOP3 has non-default modifiers\n");
385 }
else if (!TII->
isVOP1(OrigOp) && !TII->
isVOP2(OrigOp)) {
392 if (
auto *DPPInst = createDPPInst(OrigMI, MovMI, OldOpndVGPR,
393 OldOpndValue, BoundCtrlZero)) {
394 DPPMIs.push_back(DPPInst);
401 BB->insert(OrigMI, NewMI);
402 if (TII->commuteInstruction(*NewMI)) {
404 if (
auto *DPPInst = createDPPInst(*NewMI, MovMI, OldOpndVGPR,
405 OldOpndValue, BoundCtrlZero)) {
406 DPPMIs.push_back(DPPInst);
411 NewMI->eraseFromParent();
416 OrigMIs.push_back(&OrigMI);
419 for (
auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
431 TII =
ST.getInstrInfo();
435 bool Changed =
false;
436 for (
auto &MBB : MF) {
437 for (
auto I = MBB.rbegin(),
E = MBB.rend();
I !=
E;) {
439 if (MI.
getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
441 ++NumDPPMovsCombined;
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, const TargetRegisterClass &TRC, MachineRegisterInfo &MRI)
Returns true if a reg:subreg pair P has a TRC class.
static int getDPPOp(unsigned Op)
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
iterator_range< use_nodbg_iterator > use_nodbg_operands(unsigned Reg) const
unsigned getReg() const
getReg - Returns the register number.
STATISTIC(NumFunctions, "Total number of functions")
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
A Use represents the edge between a Value definition and its users.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
LLVM_READONLY int getDPPOp32(uint16_t Opcode)
TargetInstrInfo::RegSubRegPair RegSubRegPair
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
FunctionPass * createGCNDPPCombinePass()
Represent the analysis usage information of a pass.
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
FunctionPass class - This class is used to implement most global optimizations.
static bool isVOP2(const MachineInstr &MI)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
MachineOperand class - Representation of each machine instruction operand.
A pair composed of a register and a sub-register index.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
void initializeGCNDPPCombinePass(PassRegistry &)
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool isVOP3(const MachineInstr &MI)
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
StringRef - Represent a constant reference to a string, i.e.
static bool isVOP1(const MachineInstr &MI)
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...