22 #define DEBUG_TYPE "si-optimize-exec-masking" 38 return "SI optimize exec mask operations";
50 "SI optimize exec mask operations",
false,
false)
55 char SIOptimizeExecMasking::
ID = 0;
61 switch (
MI.getOpcode()) {
63 case AMDGPU::S_MOV_B64:
64 case AMDGPU::S_MOV_B64_term: {
67 return MI.getOperand(0).getReg();
71 return AMDGPU::NoRegister;
78 case AMDGPU::S_MOV_B64: {
84 case AMDGPU::S_MOV_B64_term:
88 return AMDGPU::NoRegister;
95 case AMDGPU::S_AND_B64:
96 case AMDGPU::S_OR_B64:
97 case AMDGPU::S_XOR_B64:
98 case AMDGPU::S_ANDN2_B64:
99 case AMDGPU::S_ORN2_B64:
100 case AMDGPU::S_NAND_B64:
101 case AMDGPU::S_NOR_B64:
102 case AMDGPU::S_XNOR_B64: {
112 return AMDGPU::NoRegister;
117 case AMDGPU::S_AND_B64:
118 return AMDGPU::S_AND_SAVEEXEC_B64;
119 case AMDGPU::S_OR_B64:
120 return AMDGPU::S_OR_SAVEEXEC_B64;
121 case AMDGPU::S_XOR_B64:
122 return AMDGPU::S_XOR_SAVEEXEC_B64;
123 case AMDGPU::S_ANDN2_B64:
124 return AMDGPU::S_ANDN2_SAVEEXEC_B64;
125 case AMDGPU::S_ORN2_B64:
126 return AMDGPU::S_ORN2_SAVEEXEC_B64;
127 case AMDGPU::S_NAND_B64:
128 return AMDGPU::S_NAND_SAVEEXEC_B64;
129 case AMDGPU::S_NOR_B64:
130 return AMDGPU::S_NOR_SAVEEXEC_B64;
131 case AMDGPU::S_XNOR_B64:
132 return AMDGPU::S_XNOR_SAVEEXEC_B64;
134 return AMDGPU::INSTRUCTION_LIST_END;
143 case AMDGPU::S_MOV_B64_term: {
144 MI.
setDesc(TII.get(AMDGPU::COPY));
147 case AMDGPU::S_XOR_B64_term: {
150 MI.
setDesc(TII.get(AMDGPU::S_XOR_B64));
153 case AMDGPU::S_ANDN2_B64_term: {
156 MI.
setDesc(TII.get(AMDGPU::S_ANDN2_B64));
168 for (; I !=
E; ++
I) {
169 if (!I->isTerminator())
183 unsigned CopyToExec) {
184 const unsigned InstLimit = 25;
187 for (
unsigned N = 0;
N <= InstLimit && I !=
E; ++
I, ++
N) {
189 if (CopyFromExec != AMDGPU::NoRegister)
201 if (Succ->isLiveIn(Reg))
208 bool SIOptimizeExecMasking::runOnMachineFunction(
MachineFunction &MF) {
234 if (CopyToExec == AMDGPU::NoRegister)
238 auto CopyToExecInst = &*
I;
239 auto CopyFromExecInst =
findExecCopy(*TII, MBB, I, CopyToExec);
240 if (CopyFromExecInst == E) {
241 auto PrepareExecInst = std::next(I);
242 if (PrepareExecInst == E)
245 if (CopyToExecInst->getOperand(1).isKill() &&
249 PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
253 CopyToExecInst->eraseFromParent();
265 unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
270 = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
273 LLVM_DEBUG(
dbgs() <<
"exec read prevents saveexec: " << *J <<
'\n');
276 SaveExecInst =
nullptr;
280 bool ReadsCopyFromExec = J->
readsRegister(CopyFromExec, TRI);
282 if (J->modifiesRegister(CopyToExec, TRI)) {
285 <<
printReg(CopyToExec, TRI) <<
'\n');
286 SaveExecInst =
nullptr;
291 if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
294 if (ReadsCopyFromExec) {
296 LLVM_DEBUG(
dbgs() <<
"Found save exec op: " << *SaveExecInst <<
'\n');
300 <<
"Instruction does not read exec copy: " << *J <<
'\n');
303 }
else if (ReadsCopyFromExec && !SaveExecInst) {
312 LLVM_DEBUG(
dbgs() <<
"Found second use of save inst candidate: " << *J
318 assert(SaveExecInst != &*J);
326 LLVM_DEBUG(
dbgs() <<
"Insert save exec op: " << *SaveExecInst <<
'\n');
333 if (Src0.
isReg() && Src0.
getReg() == CopyFromExec) {
335 }
else if (Src1.
isReg() && Src1.
getReg() == CopyFromExec) {
343 CopyFromExecInst->eraseFromParent();
350 .addReg(OtherOp->
getReg());
353 CopyToExecInst->eraseFromParent();
356 OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC,
357 AMDGPU::NoSubRegister, *TRI);
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
void push_back(const T &Elt)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
unsigned getReg() const
getReg - Returns the register number.
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
static unsigned isLogicalOpOnExec(const MachineInstr &MI)
If MI is a logical operation on an exec value, return the register copied to.
static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI)
static MachineBasicBlock::reverse_iterator fixTerminators(const SIInstrInfo &TII, MachineBasicBlock &MBB)
iterator_range< succ_iterator > successors()
#define INITIALIZE_PASS_DEPENDENCY(depName)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static unsigned isCopyFromExec(const MachineInstr &MI)
If MI is a copy from exec, return the register copied to.
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
SI optimize exec mask operations
reverse_iterator rbegin()
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Represent the analysis usage information of a pass.
self_iterator getIterator()
static MachineBasicBlock::reverse_iterator findExecCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I, unsigned CopyToExec)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
char & SIOptimizeExecMaskingID
static unsigned isCopyToExec(const MachineInstr &MI)
If MI is a copy to exec, return the register copied from.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
MachineOperand class - Representation of each machine instruction operand.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Interface definition for SIInstrInfo.
INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE, "SI optimize exec mask operations", false, false) INITIALIZE_PASS_END(SIOptimizeExecMasking
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getSaveExecOp(unsigned Opc)
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
StringRef - Represent a constant reference to a string, i.e.
const MachineOperand & getOperand(unsigned i) const
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
const SIRegisterInfo * getRegisterInfo() const override