42 #define DEBUG_TYPE "si-insert-skips" 45 "amdgpu-skip-threshold",
46 cl::desc(
"Number of instructions before jumping over divergent control flow"),
55 unsigned SkipThreshold = 0;
79 return "SI insert s_cbranch_execz instructions";
92 "SI insert s_cbranch_execz instructions",
false,
false)
98 case TargetOpcode::IMPLICIT_DEF:
100 case TargetOpcode::BUNDLE:
101 case TargetOpcode::CFI_INSTRUCTION:
103 case TargetOpcode::GC_LABEL:
104 case TargetOpcode::DBG_VALUE:
116 unsigned NumInstr = 0;
120 MBBI != End && MBBI != ToI; ++MBBI) {
124 NumInstr < SkipThreshold &&
I !=
E; ++
I) {
134 if (
I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
135 I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
138 if (
TII->hasUnwantedEffectsWhenEXECEmpty(*
I))
142 if (NumInstr >= SkipThreshold)
163 BuildMI(&MBB, DL,
TII->get(AMDGPU::S_CBRANCH_EXECNZ))
169 BuildMI(*SkipBB, Insert, DL,
TII->get(AMDGPU::EXP_DONE))
180 BuildMI(*SkipBB, Insert, DL,
TII->get(AMDGPU::S_ENDPGM));
190 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: {
198 Opcode = AMDGPU::V_CMPX_EQ_F32_e64;
202 Opcode = AMDGPU::V_CMPX_LT_F32_e64;
206 Opcode = AMDGPU::V_CMPX_LE_F32_e64;
210 Opcode = AMDGPU::V_CMPX_GT_F32_e64;
214 Opcode = AMDGPU::V_CMPX_GE_F32_e64;
218 Opcode = AMDGPU::V_CMPX_LG_F32_e64;
221 Opcode = AMDGPU::V_CMPX_O_F32_e64;
224 Opcode = AMDGPU::V_CMPX_U_F32_e64;
227 Opcode = AMDGPU::V_CMPX_NLG_F32_e64;
230 Opcode = AMDGPU::V_CMPX_NGE_F32_e64;
233 Opcode = AMDGPU::V_CMPX_NGT_F32_e64;
236 Opcode = AMDGPU::V_CMPX_NLE_F32_e64;
239 Opcode = AMDGPU::V_CMPX_NLT_F32_e64;
242 Opcode = AMDGPU::V_CMPX_NEQ_F32_e64;
267 case AMDGPU::SI_KILL_I1_TERMINATOR: {
270 assert(KillVal == 0 || KillVal == -1);
274 int64_t Imm = Op.
getImm();
275 assert(Imm == 0 || Imm == -1);
278 BuildMI(MBB, &MI, DL,
TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
283 unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
284 BuildMI(MBB, &MI, DL,
TII->get(Opcode), AMDGPU::EXEC)
285 .addReg(AMDGPU::EXEC)
313 if (!shouldSkip(**SrcMBB.
succ_begin(), *DestBB))
319 BuildMI(SrcMBB, InsPt, DL,
TII->get(AMDGPU::S_CBRANCH_EXECZ))
325 bool SIInsertSkips::optimizeVccBranch(
MachineInstr &MI)
const {
332 bool Changed =
false;
334 const unsigned CondReg = AMDGPU::VCC;
335 const unsigned ExecReg = AMDGPU::EXEC;
336 const unsigned And = AMDGPU::S_AND_B64;
340 bool ReadsCond =
false;
342 for (++A ; A !=
E ; ++A) {
345 if (A->modifiesRegister(ExecReg,
TRI))
347 if (A->modifiesRegister(CondReg,
TRI)) {
348 if (!A->definesRegister(CondReg,
TRI) || A->getOpcode() != And)
352 ReadsCond |= A->readsRegister(CondReg,
TRI);
360 TII->commuteInstruction(*A);
363 if (Op1.
getReg() != ExecReg)
368 unsigned SReg = AMDGPU::NoRegister;
371 auto M = std::next(A);
372 bool ReadsSreg =
false;
373 for ( ; M !=
E ; ++M) {
374 if (M->definesRegister(SReg,
TRI))
376 if (M->modifiesRegister(SReg,
TRI))
378 ReadsSreg |= M->readsRegister(SReg,
TRI);
381 !M->isMoveImmediate() ||
382 !M->getOperand(1).isImm() ||
383 M->getOperand(1).getImm() != -1)
387 if (!ReadsSreg && Op2.
isKill()) {
388 A->getOperand(2).ChangeToImmediate(-1);
389 M->eraseFromParent();
393 if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
395 A->eraseFromParent();
397 bool IsVCCZ = MI.
getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
398 if (SReg == ExecReg) {
405 MI.
setDesc(
TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ
406 : AMDGPU::S_CBRANCH_EXECNZ));
421 bool HaveKill =
false;
422 bool MadeChange =
false;
432 BI != BE; BI = NextBB) {
433 NextBB = std::next(BI);
435 bool HaveSkipBlock =
false;
437 if (!ExecBranchStack.
empty() && ExecBranchStack.
back() == &MBB) {
442 if (HaveKill && ExecBranchStack.
empty()) {
449 for (I = MBB.
begin(); I != MBB.
end(); I = Next) {
455 case AMDGPU::SI_MASK_BRANCH:
457 MadeChange |= skipMaskBranch(MI, MBB);
460 case AMDGPU::S_BRANCH:
465 }
else if (HaveSkipBlock) {
473 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
474 case AMDGPU::SI_KILL_I1_TERMINATOR:
478 if (ExecBranchStack.
empty()) {
479 if (NextBB != BE && skipIfDead(MI, *NextBB)) {
480 HaveSkipBlock =
true;
481 NextBB = std::next(BI);
491 case AMDGPU::SI_RETURN_TO_EPILOG:
500 if (!EmptyMBBAtEnd) {
507 .addMBB(EmptyMBBAtEnd);
508 I->eraseFromParent();
512 case AMDGPU::S_CBRANCH_VCCZ:
513 case AMDGPU::S_CBRANCH_VCCNZ:
514 MadeChange |= optimizeVccBranch(MI);
const MachineInstrBuilder & add(const MachineOperand &MO) const
AMDGPU specific subclass of TargetSubtarget.
MachineBasicBlock * getMBB() const
This class represents lattice values for constants.
void push_back(const T &Elt)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
char & SIInsertSkipsPassID
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
self_iterator getIterator()
succ_iterator succ_begin()
Calling convention used for Mesa/AMDPAL pixel shaders.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
BlockVerifier::State From
LLVM_READONLY int getVOPe32(uint16_t Opcode)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
const MachineBasicBlock * getParent() const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
reverse_self_iterator getReverseIterator()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
const MachineBasicBlock & back() const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
StringRef - Represent a constant reference to a string, i.e.
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
static cl::opt< unsigned > SkipThresholdFlag("amdgpu-skip-threshold", cl::desc("Number of instructions before jumping over divergent control flow"), cl::init(12), cl::Hidden)
static INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE, "SI insert s_cbranch_execz instructions", false, false) char &llvm bool opcodeEmitsNoInsts(unsigned Opc)
const MachineOperand & getOperand(unsigned i) const
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found...
const SIRegisterInfo * getRegisterInfo() const override