LLVM  8.0.1
SIInsertSkips.cpp
Go to the documentation of this file.
1 //===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass inserts branches on the 0 exec mask over divergent branches
12 /// branches when it's expected that jumping over the untaken control flow will
13 /// be cheaper than having every workitem no-op through it.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIInstrInfo.h"
20 #include "SIMachineFunctionInfo.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringRef.h"
30 #include "llvm/IR/CallingConv.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/MC/MCAsmInfo.h"
33 #include "llvm/Pass.h"
36 #include <cassert>
37 #include <cstdint>
38 #include <iterator>
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "si-insert-skips"
43 
45  "amdgpu-skip-threshold",
46  cl::desc("Number of instructions before jumping over divergent control flow"),
47  cl::init(12), cl::Hidden);
48 
49 namespace {
50 
51 class SIInsertSkips : public MachineFunctionPass {
52 private:
53  const SIRegisterInfo *TRI = nullptr;
54  const SIInstrInfo *TII = nullptr;
55  unsigned SkipThreshold = 0;
56 
57  bool shouldSkip(const MachineBasicBlock &From,
58  const MachineBasicBlock &To) const;
59 
60  bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
61 
62  void kill(MachineInstr &MI);
63 
64  MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB,
66 
67  bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
68 
69  bool optimizeVccBranch(MachineInstr &MI) const;
70 
71 public:
72  static char ID;
73 
74  SIInsertSkips() : MachineFunctionPass(ID) {}
75 
76  bool runOnMachineFunction(MachineFunction &MF) override;
77 
78  StringRef getPassName() const override {
79  return "SI insert s_cbranch_execz instructions";
80  }
81 
82  void getAnalysisUsage(AnalysisUsage &AU) const override {
84  }
85 };
86 
87 } // end anonymous namespace
88 
89 char SIInsertSkips::ID = 0;
90 
91 INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,
92  "SI insert s_cbranch_execz instructions", false, false)
93 
94 char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID;
95 
96 static bool opcodeEmitsNoInsts(unsigned Opc) {
97  switch (Opc) {
98  case TargetOpcode::IMPLICIT_DEF:
99  case TargetOpcode::KILL:
100  case TargetOpcode::BUNDLE:
101  case TargetOpcode::CFI_INSTRUCTION:
103  case TargetOpcode::GC_LABEL:
104  case TargetOpcode::DBG_VALUE:
105  return true;
106  default:
107  return false;
108  }
109 }
110 
111 bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
112  const MachineBasicBlock &To) const {
113  if (From.succ_empty())
114  return false;
115 
116  unsigned NumInstr = 0;
117  const MachineFunction *MF = From.getParent();
118 
119  for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
120  MBBI != End && MBBI != ToI; ++MBBI) {
121  const MachineBasicBlock &MBB = *MBBI;
122 
123  for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
124  NumInstr < SkipThreshold && I != E; ++I) {
125  if (opcodeEmitsNoInsts(I->getOpcode()))
126  continue;
127 
128  // FIXME: Since this is required for correctness, this should be inserted
129  // during SILowerControlFlow.
130 
131  // When a uniform loop is inside non-uniform control flow, the branch
132  // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
133  // when EXEC = 0. We should skip the loop lest it becomes infinite.
134  if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
135  I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
136  return true;
137 
138  if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
139  return true;
140 
141  ++NumInstr;
142  if (NumInstr >= SkipThreshold)
143  return true;
144  }
145  }
146 
147  return false;
148 }
149 
150 bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
151  MachineBasicBlock &MBB = *MI.getParent();
152  MachineFunction *MF = MBB.getParent();
153 
155  !shouldSkip(MBB, MBB.getParent()->back()))
156  return false;
157 
158  MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());
159 
160  const DebugLoc &DL = MI.getDebugLoc();
161 
162  // If the exec mask is non-zero, skip the next two instructions
163  BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
164  .addMBB(&NextBB);
165 
166  MachineBasicBlock::iterator Insert = SkipBB->begin();
167 
168  // Exec mask is zero: Export to NULL target...
169  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE))
170  .addImm(0x09) // V_008DFC_SQ_EXP_NULL
171  .addReg(AMDGPU::VGPR0, RegState::Undef)
172  .addReg(AMDGPU::VGPR0, RegState::Undef)
173  .addReg(AMDGPU::VGPR0, RegState::Undef)
174  .addReg(AMDGPU::VGPR0, RegState::Undef)
175  .addImm(1) // vm
176  .addImm(0) // compr
177  .addImm(0); // en
178 
179  // ... and terminate wavefront.
180  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
181 
182  return true;
183 }
184 
185 void SIInsertSkips::kill(MachineInstr &MI) {
186  MachineBasicBlock &MBB = *MI.getParent();
187  DebugLoc DL = MI.getDebugLoc();
188 
189  switch (MI.getOpcode()) {
190  case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: {
191  unsigned Opcode = 0;
192 
193  // The opcodes are inverted because the inline immediate has to be
194  // the first operand, e.g. from "x < imm" to "imm > x"
195  switch (MI.getOperand(2).getImm()) {
196  case ISD::SETOEQ:
197  case ISD::SETEQ:
198  Opcode = AMDGPU::V_CMPX_EQ_F32_e64;
199  break;
200  case ISD::SETOGT:
201  case ISD::SETGT:
202  Opcode = AMDGPU::V_CMPX_LT_F32_e64;
203  break;
204  case ISD::SETOGE:
205  case ISD::SETGE:
206  Opcode = AMDGPU::V_CMPX_LE_F32_e64;
207  break;
208  case ISD::SETOLT:
209  case ISD::SETLT:
210  Opcode = AMDGPU::V_CMPX_GT_F32_e64;
211  break;
212  case ISD::SETOLE:
213  case ISD::SETLE:
214  Opcode = AMDGPU::V_CMPX_GE_F32_e64;
215  break;
216  case ISD::SETONE:
217  case ISD::SETNE:
218  Opcode = AMDGPU::V_CMPX_LG_F32_e64;
219  break;
220  case ISD::SETO:
221  Opcode = AMDGPU::V_CMPX_O_F32_e64;
222  break;
223  case ISD::SETUO:
224  Opcode = AMDGPU::V_CMPX_U_F32_e64;
225  break;
226  case ISD::SETUEQ:
227  Opcode = AMDGPU::V_CMPX_NLG_F32_e64;
228  break;
229  case ISD::SETUGT:
230  Opcode = AMDGPU::V_CMPX_NGE_F32_e64;
231  break;
232  case ISD::SETUGE:
233  Opcode = AMDGPU::V_CMPX_NGT_F32_e64;
234  break;
235  case ISD::SETULT:
236  Opcode = AMDGPU::V_CMPX_NLE_F32_e64;
237  break;
238  case ISD::SETULE:
239  Opcode = AMDGPU::V_CMPX_NLT_F32_e64;
240  break;
241  case ISD::SETUNE:
242  Opcode = AMDGPU::V_CMPX_NEQ_F32_e64;
243  break;
244  default:
245  llvm_unreachable("invalid ISD:SET cond code");
246  }
247 
248  assert(MI.getOperand(0).isReg());
249 
250  if (TRI->isVGPR(MBB.getParent()->getRegInfo(),
251  MI.getOperand(0).getReg())) {
252  Opcode = AMDGPU::getVOPe32(Opcode);
253  BuildMI(MBB, &MI, DL, TII->get(Opcode))
254  .add(MI.getOperand(1))
255  .add(MI.getOperand(0));
256  } else {
257  BuildMI(MBB, &MI, DL, TII->get(Opcode))
258  .addReg(AMDGPU::VCC, RegState::Define)
259  .addImm(0) // src0 modifiers
260  .add(MI.getOperand(1))
261  .addImm(0) // src1 modifiers
262  .add(MI.getOperand(0))
263  .addImm(0); // omod
264  }
265  break;
266  }
267  case AMDGPU::SI_KILL_I1_TERMINATOR: {
268  const MachineOperand &Op = MI.getOperand(0);
269  int64_t KillVal = MI.getOperand(1).getImm();
270  assert(KillVal == 0 || KillVal == -1);
271 
272  // Kill all threads if Op0 is an immediate and equal to the Kill value.
273  if (Op.isImm()) {
274  int64_t Imm = Op.getImm();
275  assert(Imm == 0 || Imm == -1);
276 
277  if (Imm == KillVal)
278  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
279  .addImm(0);
280  break;
281  }
282 
283  unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
284  BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC)
285  .addReg(AMDGPU::EXEC)
286  .add(Op);
287  break;
288  }
289  default:
290  llvm_unreachable("invalid opcode, expected SI_KILL_*_TERMINATOR");
291  }
292 }
293 
294 MachineBasicBlock *SIInsertSkips::insertSkipBlock(
296  MachineFunction *MF = MBB.getParent();
297 
299  MachineFunction::iterator MBBI(MBB);
300  ++MBBI;
301 
302  MF->insert(MBBI, SkipBB);
303  MBB.addSuccessor(SkipBB);
304 
305  return SkipBB;
306 }
307 
308 // Returns true if a branch over the block was inserted.
309 bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
310  MachineBasicBlock &SrcMBB) {
311  MachineBasicBlock *DestBB = MI.getOperand(0).getMBB();
312 
313  if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB))
314  return false;
315 
316  const DebugLoc &DL = MI.getDebugLoc();
317  MachineBasicBlock::iterator InsPt = std::next(MI.getIterator());
318 
319  BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
320  .addMBB(DestBB);
321 
322  return true;
323 }
324 
325 bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {
326  // Match:
327  // sreg = -1
328  // vcc = S_AND_B64 exec, sreg
329  // S_CBRANCH_VCC[N]Z
330  // =>
331  // S_CBRANCH_EXEC[N]Z
332  bool Changed = false;
333  MachineBasicBlock &MBB = *MI.getParent();
334  const unsigned CondReg = AMDGPU::VCC;
335  const unsigned ExecReg = AMDGPU::EXEC;
336  const unsigned And = AMDGPU::S_AND_B64;
337 
339  E = MBB.rend();
340  bool ReadsCond = false;
341  unsigned Threshold = 5;
342  for (++A ; A != E ; ++A) {
343  if (!--Threshold)
344  return false;
345  if (A->modifiesRegister(ExecReg, TRI))
346  return false;
347  if (A->modifiesRegister(CondReg, TRI)) {
348  if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And)
349  return false;
350  break;
351  }
352  ReadsCond |= A->readsRegister(CondReg, TRI);
353  }
354  if (A == E)
355  return false;
356 
357  MachineOperand &Op1 = A->getOperand(1);
358  MachineOperand &Op2 = A->getOperand(2);
359  if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
360  TII->commuteInstruction(*A);
361  Changed = true;
362  }
363  if (Op1.getReg() != ExecReg)
364  return Changed;
365  if (Op2.isImm() && Op2.getImm() != -1)
366  return Changed;
367 
368  unsigned SReg = AMDGPU::NoRegister;
369  if (Op2.isReg()) {
370  SReg = Op2.getReg();
371  auto M = std::next(A);
372  bool ReadsSreg = false;
373  for ( ; M != E ; ++M) {
374  if (M->definesRegister(SReg, TRI))
375  break;
376  if (M->modifiesRegister(SReg, TRI))
377  return Changed;
378  ReadsSreg |= M->readsRegister(SReg, TRI);
379  }
380  if (M == E ||
381  !M->isMoveImmediate() ||
382  !M->getOperand(1).isImm() ||
383  M->getOperand(1).getImm() != -1)
384  return Changed;
385  // First if sreg is only used in and instruction fold the immediate
386  // into that and.
387  if (!ReadsSreg && Op2.isKill()) {
388  A->getOperand(2).ChangeToImmediate(-1);
389  M->eraseFromParent();
390  }
391  }
392 
393  if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
394  MI.killsRegister(CondReg, TRI))
395  A->eraseFromParent();
396 
397  bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
398  if (SReg == ExecReg) {
399  if (IsVCCZ) {
400  MI.eraseFromParent();
401  return true;
402  }
403  MI.setDesc(TII->get(AMDGPU::S_BRANCH));
404  } else {
405  MI.setDesc(TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ
406  : AMDGPU::S_CBRANCH_EXECNZ));
407  }
408 
409  MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
411 
412  return true;
413 }
414 
415 bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
416  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
417  TII = ST.getInstrInfo();
418  TRI = &TII->getRegisterInfo();
419  SkipThreshold = SkipThresholdFlag;
420 
421  bool HaveKill = false;
422  bool MadeChange = false;
423 
424  // Track depth of exec mask, divergent branches.
425  SmallVector<MachineBasicBlock *, 16> ExecBranchStack;
426 
428 
429  MachineBasicBlock *EmptyMBBAtEnd = nullptr;
430 
431  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
432  BI != BE; BI = NextBB) {
433  NextBB = std::next(BI);
434  MachineBasicBlock &MBB = *BI;
435  bool HaveSkipBlock = false;
436 
437  if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {
438  // Reached convergence point for last divergent branch.
439  ExecBranchStack.pop_back();
440  }
441 
442  if (HaveKill && ExecBranchStack.empty()) {
443  HaveKill = false;
444 
445  // TODO: Insert skip if exec is 0?
446  }
447 
449  for (I = MBB.begin(); I != MBB.end(); I = Next) {
450  Next = std::next(I);
451 
452  MachineInstr &MI = *I;
453 
454  switch (MI.getOpcode()) {
455  case AMDGPU::SI_MASK_BRANCH:
456  ExecBranchStack.push_back(MI.getOperand(0).getMBB());
457  MadeChange |= skipMaskBranch(MI, MBB);
458  break;
459 
460  case AMDGPU::S_BRANCH:
461  // Optimize out branches to the next block.
462  // FIXME: Shouldn't this be handled by BranchFolding?
463  if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
464  MI.eraseFromParent();
465  } else if (HaveSkipBlock) {
466  // Remove the given unconditional branch when a skip block has been
467  // inserted after the current one and let skip the two instructions
468  // performing the kill if the exec mask is non-zero.
469  MI.eraseFromParent();
470  }
471  break;
472 
473  case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
474  case AMDGPU::SI_KILL_I1_TERMINATOR:
475  MadeChange = true;
476  kill(MI);
477 
478  if (ExecBranchStack.empty()) {
479  if (NextBB != BE && skipIfDead(MI, *NextBB)) {
480  HaveSkipBlock = true;
481  NextBB = std::next(BI);
482  BE = MF.end();
483  }
484  } else {
485  HaveKill = true;
486  }
487 
488  MI.eraseFromParent();
489  break;
490 
491  case AMDGPU::SI_RETURN_TO_EPILOG:
492  // FIXME: Should move somewhere else
494 
495  // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
496  // because external bytecode will be appended at the end.
497  if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
498  // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at
499  // the end and jump there.
500  if (!EmptyMBBAtEnd) {
501  EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
502  MF.insert(MF.end(), EmptyMBBAtEnd);
503  }
504 
505  MBB.addSuccessor(EmptyMBBAtEnd);
506  BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
507  .addMBB(EmptyMBBAtEnd);
508  I->eraseFromParent();
509  }
510  break;
511 
512  case AMDGPU::S_CBRANCH_VCCZ:
513  case AMDGPU::S_CBRANCH_VCCNZ:
514  MadeChange |= optimizeVccBranch(MI);
515  break;
516 
517  default:
518  break;
519  }
520  }
521  }
522 
523  return MadeChange;
524 }
const MachineInstrBuilder & add(const MachineOperand &MO) const
AMDGPU specific subclass of TargetSubtarget.
MachineBasicBlock * getMBB() const
This class represents lattice values for constants.
Definition: AllocatorList.h:24
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
#define DEBUG_TYPE
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
reverse_iterator rend()
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
char & SIInsertSkipsPassID
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
self_iterator getIterator()
Definition: ilist_node.h:82
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:672
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
Iterator for intrusive lists based on ilist_node.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
BlockVerifier::State From
LLVM_READONLY int getVOPe32(uint16_t Opcode)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
int64_t getImm() const
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:85
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
#define I(x, y, z)
Definition: MD5.cpp:58
const MachineBasicBlock & back() const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
static cl::opt< unsigned > SkipThresholdFlag("amdgpu-skip-threshold", cl::desc("Number of instructions before jumping over divergent control flow"), cl::init(12), cl::Hidden)
static INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE, "SI insert s_cbranch_execz instructions", false, false) char &llvm bool opcodeEmitsNoInsts(unsigned Opc)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found...
const SIRegisterInfo * getRegisterInfo() const override