LLVM  8.0.1
SIFixupVectorISel.cpp
Go to the documentation of this file.
1 //===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 /// SIFixupVectorISel pass cleans up post ISEL Vector issues.
10 /// Currently this will convert GLOBAL_{LOAD|STORE}_*
11 /// and GLOBAL_Atomic_* instructions into their _SADDR variants,
12 /// feeding the sreg into the saddr field of the new instruction.
13 /// We currently handle a REG_SEQUENCE feeding the vaddr
14 /// and decompose it into a base and index.
15 ///
16 /// Transform:
17 /// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32
18 /// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32,
19 /// %24:vgpr_32, %19:sreg_64_xexec
20 /// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1
21 /// %11:vreg_64 = COPY %16:vreg_64
22 /// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0
23 /// Into:
24 /// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0
25 /// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1
26 /// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16...
27 ///
28 //===----------------------------------------------------------------------===//
29 //
30 
31 #include "AMDGPU.h"
32 #include "AMDGPUSubtarget.h"
34 #include "llvm/ADT/Statistic.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/LLVMContext.h"
40 #include "llvm/Support/Debug.h"
42 #define DEBUG_TYPE "si-fixup-vector-isel"
43 
44 using namespace llvm;
45 
47  "amdgpu-enable-global-sgpr-addr",
48  cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"),
49  cl::init(false));
50 
51 STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities");
52 STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted");
53 
54 namespace {
55 
56 class SIFixupVectorISel : public MachineFunctionPass {
57 public:
58  static char ID;
59 
60 public:
61  SIFixupVectorISel() : MachineFunctionPass(ID) {
63  }
64 
65  bool runOnMachineFunction(MachineFunction &MF) override;
66 
67  void getAnalysisUsage(AnalysisUsage &AU) const override {
68  AU.setPreservesCFG();
70  }
71 };
72 
73 } // End anonymous namespace.
74 
75 INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE,
76  "SI Fixup Vector ISel", false, false)
77 
78 char SIFixupVectorISel::ID = 0;
79 
80 char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID;
81 
83  return new SIFixupVectorISel();
84 }
85 
87  unsigned &BaseReg,
88  unsigned &IndexReg,
90  const SIRegisterInfo *TRI) {
92  Worklist.push_back(Op);
93  while (!Worklist.empty()) {
94  MachineOperand *WOp = Worklist.pop_back_val();
95  if (!WOp->isReg() ||
97  continue;
98  MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg());
99  switch (DefInst->getOpcode()) {
100  default:
101  continue;
102  case AMDGPU::COPY:
103  Worklist.push_back(&DefInst->getOperand(1));
104  break;
105  case AMDGPU::REG_SEQUENCE:
106  if (DefInst->getNumOperands() != 5)
107  continue;
108  Worklist.push_back(&DefInst->getOperand(1));
109  Worklist.push_back(&DefInst->getOperand(3));
110  break;
111  case AMDGPU::V_ADD_I32_e64:
112  // The V_ADD_* and its analogous V_ADDCV_* are generated by
113  // a previous pass which lowered from an ADD_64_PSEUDO,
114  // which generates subregs to break up the 64 bit args.
115  if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister)
116  continue;
117  BaseReg = DefInst->getOperand(2).getReg();
118  if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister)
119  continue;
120  IndexReg = DefInst->getOperand(3).getReg();
121  // Chase the IndexReg.
122  MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg);
123  if (!MI || !MI->isCopy())
124  continue;
125  // Make sure the reg class is 64 bit for Index.
126  // If the Index register is a subreg, we want it to reference
127  // a 64 bit register which we will use as the Index reg.
128  const TargetRegisterClass *IdxRC, *BaseRC;
129  IdxRC = MRI.getRegClass(MI->getOperand(1).getReg());
130  if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64)
131  continue;
132  IndexReg = MI->getOperand(1).getReg();
133  // Chase the BaseReg.
134  MI = MRI.getUniqueVRegDef(BaseReg);
135  if (!MI || !MI->isCopy())
136  continue;
137  // Make sure the register class is 64 bit for Base.
138  BaseReg = MI->getOperand(1).getReg();
139  BaseRC = MRI.getRegClass(BaseReg);
140  if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64)
141  continue;
142  // Make sure Base is SReg and Index is VReg.
143  if (!TRI->isSGPRReg(MRI, BaseReg))
144  return false;
145  if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg)))
146  return false;
147  // clear any killed flags on Index and Base regs, used later.
148  MRI.clearKillFlags(IndexReg);
149  MRI.clearKillFlags(BaseReg);
150  return true;
151  }
152  }
153  return false;
154 }
155 
156 // Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR.
158  MachineFunction &MF,
160  const GCNSubtarget &ST,
161  const SIInstrInfo *TII,
162  const SIRegisterInfo *TRI) {
164  return false;
165  bool FuncModified = false;
167  for (I = MBB.begin(); I != MBB.end(); I = Next) {
168  Next = std::next(I);
169  MachineInstr &MI = *I;
170  int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode());
171  if (NewOpcd < 0)
172  continue;
173  // Update our statistics on opportunities seen.
174  ++NumSGPRGlobalOccurs;
175  LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n');
176  // Need a Base and Index or we cant transform to _SADDR.
177  unsigned BaseReg = 0;
178  unsigned IndexReg = 0;
179  MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
180  if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI))
181  continue;
182  ++NumSGPRGlobalSaddrs;
183  FuncModified = true;
184  // Create the new _SADDR Memory instruction.
185  bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr;
186  MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
187  MachineInstr *NewGlob = nullptr;
188  NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd));
189  if (HasVdst)
190  NewGlob->addOperand(MF, MI.getOperand(0));
191  NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false));
192  if (VData)
193  NewGlob->addOperand(MF, *VData);
194  NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false));
195  NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset));
196 
197  MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc);
198  // Atomics dont have a GLC, so omit the field if not there.
199  if (Glc)
200  NewGlob->addOperand(MF, *Glc);
201  NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
202  // _D16 have an vdst_in operand, copy it in.
203  MachineOperand *VDstInOp = TII->getNamedOperand(MI,
204  AMDGPU::OpName::vdst_in);
205  if (VDstInOp)
206  NewGlob->addOperand(MF, *VDstInOp);
207  NewGlob->copyImplicitOps(MF, MI);
208  NewGlob->cloneMemRefs(MF, MI);
209  // Remove the old Global Memop instruction.
210  MI.eraseFromParent();
211  LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n');
212  }
213  return FuncModified;
214 }
215 
216 bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) {
217  if (skipFunction(MF.getFunction()))
218  return false;
219 
221  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
222  const SIInstrInfo *TII = ST.getInstrInfo();
223  const SIRegisterInfo *TRI = ST.getRegisterInfo();
224 
225  bool FuncModified = false;
226  for (MachineBasicBlock &MBB : MF) {
227  // Cleanup missed Saddr opportunites from ISel.
228  FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI);
229  }
230  return FuncModified;
231 }
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned getSubReg() const
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
const SIInstrInfo * getInstrInfo() const override
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
char & SIFixupVectorISelID
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
unsigned getID() const
Return the register class ID number.
void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
void clearKillFlags(unsigned Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
FunctionPass * createSIFixupVectorISelPass()
void initializeSIFixupVectorISelPass(PassRegistry &)
#define DEBUG_TYPE
static bool fixupGlobalSaddr(MachineBasicBlock &MBB, MachineFunction &MF, MachineRegisterInfo &MRI, const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
bool hasVGPRs(const TargetRegisterClass *RC) const
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
bool isCopy() const
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr&#39;s memory reference descriptor list and replace ours with it...
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static cl::opt< bool > EnableGlobalSGPRAddr("amdgpu-enable-global-sgpr-addr", cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"), cl::init(false))
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
static bool findSRegBaseAndIndex(MachineOperand *Op, unsigned &BaseReg, unsigned &IndexReg, MachineRegisterInfo &MRI, const SIRegisterInfo *TRI)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
#define I(x, y, z)
Definition: MD5.cpp:58
bool isReg() const
isReg - Tests if this is a MO_Register operand.
IRTranslator LLVM IR MI
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
const SIRegisterInfo * getRegisterInfo() const override