32 #define DEBUG_TYPE "si-optimize-exec-masking-pre-ra" 48 return "SI optimize exec mask operations pre-RA";
61 "SI optimize exec mask operations pre-RA",
false,
false)
66 char SIOptimizeExecMaskingPreRA::
ID = 0;
71 return new SIOptimizeExecMaskingPreRA();
75 return MI.
getOpcode() == AMDGPU::S_OR_B64 &&
86 if (
Op->isReg() &&
Op->getReg() != AMDGPU::EXEC)
89 if (
Op->isReg() &&
Op->getReg() != AMDGPU::EXEC)
91 return AMDGPU::NoRegister;
98 if (SavedExec == AMDGPU::NoRegister)
128 const unsigned AndOpc = AMDGPU::S_AND_B64;
129 const unsigned Andn2Opc = AMDGPU::S_ANDN2_B64;
130 const unsigned CondReg = AMDGPU::VCC;
131 const unsigned ExecReg = AMDGPU::EXEC;
134 unsigned Opc =
MI.getOpcode();
135 return Opc == AMDGPU::S_CBRANCH_VCCZ ||
136 Opc == AMDGPU::S_CBRANCH_VCCNZ; });
138 return AMDGPU::NoRegister;
142 if (!And || And->getOpcode() != AndOpc ||
143 !And->getOperand(1).isReg() || !And->getOperand(2).isReg())
144 return AMDGPU::NoRegister;
147 unsigned CmpReg = AndCC->
getReg();
149 if (CmpReg == ExecReg) {
150 AndCC = &And->getOperand(2);
153 }
else if (And->getOperand(2).getReg() != ExecReg) {
154 return AMDGPU::NoRegister;
158 if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
159 Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
160 Cmp->getParent() != And->getParent())
161 return AMDGPU::NoRegister;
168 return AMDGPU::NoRegister;
170 unsigned SelReg = Op1->
getReg();
172 if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
173 return AMDGPU::NoRegister;
180 return AMDGPU::NoRegister;
183 << *Cmp <<
'\t' << *And);
185 unsigned CCReg = CC->
getReg();
188 TII->get(Andn2Opc), And->getOperand(0).getReg())
199 MRI.use_nodbg_empty(CmpReg)) ||
200 (CmpReg == CondReg &&
203 return MI.readsRegister(CondReg, TRI); }))) {
207 Cmp->eraseFromParent();
211 MRI.use_nodbg_empty(SelReg)) {
215 Sel->eraseFromParent();
222 bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(
MachineFunction &MF) {
232 bool Changed =
false;
238 RecalcRegs.insert(AMDGPU::VCC_LO);
239 RecalcRegs.insert(AMDGPU::VCC_HI);
240 RecalcRegs.insert(AMDGPU::SCC);
245 if (MBB.succ_empty()) {
252 if (Term.
getOpcode() != AMDGPU::S_ENDPGM ||
258 while (!Blocks.empty()) {
260 auto I = CurBB->rbegin(),
E = CurBB->rend();
262 if (
I->isUnconditionalBranch() ||
I->getOpcode() == AMDGPU::S_ENDPGM)
264 else if (
I->isBranch())
269 if (
I->isDebugInstr()) {
274 if (
I->mayStore() ||
I->isBarrier() ||
I->isCall() ||
275 I->hasUnmodeledSideEffects() ||
I->hasOrderedMemoryRef())
279 <<
"Removing no effect instruction: " << *
I <<
'\n');
281 for (
auto &
Op :
I->operands()) {
283 RecalcRegs.insert(
Op.getReg());
286 auto Next = std::next(
I);
288 I->eraseFromParent();
298 for (
auto *Pred : CurBB->predecessors()) {
299 if (Pred->succ_size() == 1)
300 Blocks.push_back(Pred);
307 auto Lead = MBB.begin(),
E = MBB.end();
308 if (MBB.succ_size() != 1 || Lead ==
E || !
isEndCF(*Lead, TRI))
312 if (!MBB.isLayoutSuccessor(Succ))
315 auto I = std::next(Lead);
318 if (!TII->
isSALU(*
I) ||
I->readsRegister(AMDGPU::EXEC, TRI))
324 const auto NextLead = Succ->
begin();
325 if (NextLead == Succ->
end() || !
isEndCF(*NextLead, TRI) ||
329 LLVM_DEBUG(
dbgs() <<
"Redundant EXEC = S_OR_B64 found: " << *Lead <<
'\n');
333 for (
auto &
Op : Lead->operands()) {
335 RecalcRegs.insert(
Op.getReg());
339 Lead->eraseFromParent();
349 if (!SaveExec || !SaveExec->isFullCopy())
352 unsigned SavedExec = SaveExec->getOperand(0).getReg();
353 bool SafeToReplace =
true;
355 if (U.getParent() != SaveExec->getParent()) {
356 SafeToReplace =
false;
360 LLVM_DEBUG(
dbgs() <<
"Redundant EXEC COPY: " << *SaveExec <<
'\n');
365 SaveExec->eraseFromParent();
372 for (
auto Reg : RecalcRegs) {
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
This class represents lattice values for constants.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned getSubReg() const
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static bool isFullExecCopy(const MachineInstr &MI)
SI optimize exec mask operations pre RA
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
MachineInstr * findReachingDef(unsigned Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly...
iterator_range< iterator > terminators()
char & SIOptimizeExecMaskingPreRAID
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, "SI optimize exec mask operations pre-RA", false, false) INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
static MachineInstr * getOrExecSource(const MachineInstr &MI, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
SI optimize exec mask operations
static unsigned getOrNonExecReg(const MachineInstr &MI, const SIInstrInfo &TII)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIOptimizeExecMaskingPreRAPass()
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void removeInterval(unsigned Reg)
Interval removal.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::pair< iterator, bool > insert(const ValueT &V)
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
self_iterator getIterator()
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
succ_iterator succ_begin()
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, const GCNSubtarget &ST, MachineRegisterInfo &MRI, LiveIntervals *LIS)
void removeRegUnit(unsigned Unit)
Remove computed live range for register unit Unit.
static bool isSALU(const MachineInstr &MI)
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
LLVM_NODISCARD T pop_back_val()
static bool isEndCF(const MachineInstr &MI, const SIRegisterInfo *TRI)
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
void setPreservesAll()
Set by analyses that do not transform their input at all.
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool reg_empty(unsigned RegNo) const
reg_empty - Return true if there are no instructions using or defining the specified register (it may...
StringRef - Represent a constant reference to a string, i.e.
const MachineOperand & getOperand(unsigned i) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(unsigned Reg) const
LiveInterval & createAndComputeVirtRegInterval(unsigned Reg)
const SIRegisterInfo * getRegisterInfo() const override