LLVM  8.0.1
SIOptimizeExecMasking.cpp
Go to the documentation of this file.
1 //===-- SIOptimizeExecMasking.cpp -----------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPU.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
14 #include "llvm/ADT/SmallSet.h"
18 #include "llvm/Support/Debug.h"
19 
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "si-optimize-exec-masking"
23 
24 namespace {
25 
26 class SIOptimizeExecMasking : public MachineFunctionPass {
27 public:
28  static char ID;
29 
30 public:
31  SIOptimizeExecMasking() : MachineFunctionPass(ID) {
33  }
34 
35  bool runOnMachineFunction(MachineFunction &MF) override;
36 
37  StringRef getPassName() const override {
38  return "SI optimize exec mask operations";
39  }
40 
41  void getAnalysisUsage(AnalysisUsage &AU) const override {
42  AU.setPreservesCFG();
44  }
45 };
46 
47 } // End anonymous namespace.
48 
49 INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE,
50  "SI optimize exec mask operations", false, false)
52 INITIALIZE_PASS_END(SIOptimizeExecMasking, DEBUG_TYPE,
53  "SI optimize exec mask operations", false, false)
54 
55 char SIOptimizeExecMasking::ID = 0;
56 
57 char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID;
58 
59 /// If \p MI is a copy from exec, return the register copied to.
60 static unsigned isCopyFromExec(const MachineInstr &MI) {
61  switch (MI.getOpcode()) {
62  case AMDGPU::COPY:
63  case AMDGPU::S_MOV_B64:
64  case AMDGPU::S_MOV_B64_term: {
65  const MachineOperand &Src = MI.getOperand(1);
66  if (Src.isReg() && Src.getReg() == AMDGPU::EXEC)
67  return MI.getOperand(0).getReg();
68  }
69  }
70 
71  return AMDGPU::NoRegister;
72 }
73 
74 /// If \p MI is a copy to exec, return the register copied from.
75 static unsigned isCopyToExec(const MachineInstr &MI) {
76  switch (MI.getOpcode()) {
77  case AMDGPU::COPY:
78  case AMDGPU::S_MOV_B64: {
79  const MachineOperand &Dst = MI.getOperand(0);
80  if (Dst.isReg() && Dst.getReg() == AMDGPU::EXEC && MI.getOperand(1).isReg())
81  return MI.getOperand(1).getReg();
82  break;
83  }
84  case AMDGPU::S_MOV_B64_term:
85  llvm_unreachable("should have been replaced");
86  }
87 
88  return AMDGPU::NoRegister;
89 }
90 
91 /// If \p MI is a logical operation on an exec value,
92 /// return the register copied to.
93 static unsigned isLogicalOpOnExec(const MachineInstr &MI) {
94  switch (MI.getOpcode()) {
95  case AMDGPU::S_AND_B64:
96  case AMDGPU::S_OR_B64:
97  case AMDGPU::S_XOR_B64:
98  case AMDGPU::S_ANDN2_B64:
99  case AMDGPU::S_ORN2_B64:
100  case AMDGPU::S_NAND_B64:
101  case AMDGPU::S_NOR_B64:
102  case AMDGPU::S_XNOR_B64: {
103  const MachineOperand &Src1 = MI.getOperand(1);
104  if (Src1.isReg() && Src1.getReg() == AMDGPU::EXEC)
105  return MI.getOperand(0).getReg();
106  const MachineOperand &Src2 = MI.getOperand(2);
107  if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC)
108  return MI.getOperand(0).getReg();
109  }
110  }
111 
112  return AMDGPU::NoRegister;
113 }
114 
115 static unsigned getSaveExecOp(unsigned Opc) {
116  switch (Opc) {
117  case AMDGPU::S_AND_B64:
118  return AMDGPU::S_AND_SAVEEXEC_B64;
119  case AMDGPU::S_OR_B64:
120  return AMDGPU::S_OR_SAVEEXEC_B64;
121  case AMDGPU::S_XOR_B64:
122  return AMDGPU::S_XOR_SAVEEXEC_B64;
123  case AMDGPU::S_ANDN2_B64:
124  return AMDGPU::S_ANDN2_SAVEEXEC_B64;
125  case AMDGPU::S_ORN2_B64:
126  return AMDGPU::S_ORN2_SAVEEXEC_B64;
127  case AMDGPU::S_NAND_B64:
128  return AMDGPU::S_NAND_SAVEEXEC_B64;
129  case AMDGPU::S_NOR_B64:
130  return AMDGPU::S_NOR_SAVEEXEC_B64;
131  case AMDGPU::S_XNOR_B64:
132  return AMDGPU::S_XNOR_SAVEEXEC_B64;
133  default:
134  return AMDGPU::INSTRUCTION_LIST_END;
135  }
136 }
137 
138 // These are only terminators to get correct spill code placement during
139 // register allocation, so turn them back into normal instructions. Only one of
140 // these is expected per block.
142  switch (MI.getOpcode()) {
143  case AMDGPU::S_MOV_B64_term: {
144  MI.setDesc(TII.get(AMDGPU::COPY));
145  return true;
146  }
147  case AMDGPU::S_XOR_B64_term: {
148  // This is only a terminator to get the correct spill code placement during
149  // register allocation.
150  MI.setDesc(TII.get(AMDGPU::S_XOR_B64));
151  return true;
152  }
153  case AMDGPU::S_ANDN2_B64_term: {
154  // This is only a terminator to get the correct spill code placement during
155  // register allocation.
156  MI.setDesc(TII.get(AMDGPU::S_ANDN2_B64));
157  return true;
158  }
159  default:
160  return false;
161  }
162 }
163 
165  const SIInstrInfo &TII,
166  MachineBasicBlock &MBB) {
168  for (; I != E; ++I) {
169  if (!I->isTerminator())
170  return I;
171 
172  if (removeTerminatorBit(TII, *I))
173  return I;
174  }
175 
176  return E;
177 }
178 
180  const SIInstrInfo &TII,
181  MachineBasicBlock &MBB,
183  unsigned CopyToExec) {
184  const unsigned InstLimit = 25;
185 
186  auto E = MBB.rend();
187  for (unsigned N = 0; N <= InstLimit && I != E; ++I, ++N) {
188  unsigned CopyFromExec = isCopyFromExec(*I);
189  if (CopyFromExec != AMDGPU::NoRegister)
190  return I;
191  }
192 
193  return E;
194 }
195 
196 // XXX - Seems LivePhysRegs doesn't work correctly since it will incorrectly
197 // repor tthe register as unavailable because a super-register with a lane mask
198 // as unavailable.
199 static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
200  for (MachineBasicBlock *Succ : MBB.successors()) {
201  if (Succ->isLiveIn(Reg))
202  return true;
203  }
204 
205  return false;
206 }
207 
208 bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
209  if (skipFunction(MF.getFunction()))
210  return false;
211 
212  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
213  const SIRegisterInfo *TRI = ST.getRegisterInfo();
214  const SIInstrInfo *TII = ST.getInstrInfo();
215 
216  // Optimize sequences emitted for control flow lowering. They are originally
217  // emitted as the separate operations because spill code may need to be
218  // inserted for the saved copy of exec.
219  //
220  // x = copy exec
221  // z = s_<op>_b64 x, y
222  // exec = copy z
223  // =>
224  // x = s_<op>_saveexec_b64 y
225  //
226 
227  for (MachineBasicBlock &MBB : MF) {
230  if (I == E)
231  continue;
232 
233  unsigned CopyToExec = isCopyToExec(*I);
234  if (CopyToExec == AMDGPU::NoRegister)
235  continue;
236 
237  // Scan backwards to find the def.
238  auto CopyToExecInst = &*I;
239  auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
240  if (CopyFromExecInst == E) {
241  auto PrepareExecInst = std::next(I);
242  if (PrepareExecInst == E)
243  continue;
244  // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
245  if (CopyToExecInst->getOperand(1).isKill() &&
246  isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
247  LLVM_DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
248 
249  PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
250 
251  LLVM_DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
252 
253  CopyToExecInst->eraseFromParent();
254  }
255 
256  continue;
257  }
258 
259  if (isLiveOut(MBB, CopyToExec)) {
260  // The copied register is live out and has a second use in another block.
261  LLVM_DEBUG(dbgs() << "Exec copy source register is live out\n");
262  continue;
263  }
264 
265  unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
266  MachineInstr *SaveExecInst = nullptr;
267  SmallVector<MachineInstr *, 4> OtherUseInsts;
268 
270  = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
271  J != JE; ++J) {
272  if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) {
273  LLVM_DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
274  // Make sure this is inserted after any VALU ops that may have been
275  // scheduled in between.
276  SaveExecInst = nullptr;
277  break;
278  }
279 
280  bool ReadsCopyFromExec = J->readsRegister(CopyFromExec, TRI);
281 
282  if (J->modifiesRegister(CopyToExec, TRI)) {
283  if (SaveExecInst) {
284  LLVM_DEBUG(dbgs() << "Multiple instructions modify "
285  << printReg(CopyToExec, TRI) << '\n');
286  SaveExecInst = nullptr;
287  break;
288  }
289 
290  unsigned SaveExecOp = getSaveExecOp(J->getOpcode());
291  if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
292  break;
293 
294  if (ReadsCopyFromExec) {
295  SaveExecInst = &*J;
296  LLVM_DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
297  continue;
298  } else {
299  LLVM_DEBUG(dbgs()
300  << "Instruction does not read exec copy: " << *J << '\n');
301  break;
302  }
303  } else if (ReadsCopyFromExec && !SaveExecInst) {
304  // Make sure no other instruction is trying to use this copy, before it
305  // will be rewritten by the saveexec, i.e. hasOneUse. There may have
306  // been another use, such as an inserted spill. For example:
307  //
308  // %sgpr0_sgpr1 = COPY %exec
309  // spill %sgpr0_sgpr1
310  // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1
311  //
312  LLVM_DEBUG(dbgs() << "Found second use of save inst candidate: " << *J
313  << '\n');
314  break;
315  }
316 
317  if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) {
318  assert(SaveExecInst != &*J);
319  OtherUseInsts.push_back(&*J);
320  }
321  }
322 
323  if (!SaveExecInst)
324  continue;
325 
326  LLVM_DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n');
327 
328  MachineOperand &Src0 = SaveExecInst->getOperand(1);
329  MachineOperand &Src1 = SaveExecInst->getOperand(2);
330 
331  MachineOperand *OtherOp = nullptr;
332 
333  if (Src0.isReg() && Src0.getReg() == CopyFromExec) {
334  OtherOp = &Src1;
335  } else if (Src1.isReg() && Src1.getReg() == CopyFromExec) {
336  if (!SaveExecInst->isCommutable())
337  break;
338 
339  OtherOp = &Src0;
340  } else
341  llvm_unreachable("unexpected");
342 
343  CopyFromExecInst->eraseFromParent();
344 
345  auto InsPt = SaveExecInst->getIterator();
346  const DebugLoc &DL = SaveExecInst->getDebugLoc();
347 
348  BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())),
349  CopyFromExec)
350  .addReg(OtherOp->getReg());
351  SaveExecInst->eraseFromParent();
352 
353  CopyToExecInst->eraseFromParent();
354 
355  for (MachineInstr *OtherInst : OtherUseInsts) {
356  OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC,
357  AMDGPU::NoSubRegister, *TRI);
358  }
359  }
360 
361  return true;
362 
363 }
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
void push_back(const T &Elt)
Definition: SmallVector.h:218
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
const SIInstrInfo * getInstrInfo() const override
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
static unsigned isLogicalOpOnExec(const MachineInstr &MI)
If MI is a logical operation on an exec value, return the register copied to.
static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI)
static MachineBasicBlock::reverse_iterator fixTerminators(const SIInstrInfo &TII, MachineBasicBlock &MBB)
iterator_range< succ_iterator > successors()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
static unsigned isCopyFromExec(const MachineInstr &MI)
If MI is a copy from exec, return the register copied to.
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
SI optimize exec mask operations
#define DEBUG_TYPE
reverse_iterator rend()
reverse_iterator rbegin()
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Represent the analysis usage information of a pass.
self_iterator getIterator()
Definition: ilist_node.h:82
static MachineBasicBlock::reverse_iterator findExecCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::reverse_iterator I, unsigned CopyToExec)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
char & SIOptimizeExecMaskingID
static unsigned isCopyToExec(const MachineInstr &MI)
If MI is a copy to exec, return the register copied from.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
MachineOperand class - Representation of each machine instruction operand.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
Interface definition for SIInstrInfo.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE, "SI optimize exec mask operations", false, false) INITIALIZE_PASS_END(SIOptimizeExecMasking
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
aarch64 promote const
static unsigned getSaveExecOp(unsigned Opc)
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MachineInstr.h:848
const SIRegisterInfo * getRegisterInfo() const override