LLVM  8.0.1
SIFormMemoryClauses.cpp
Go to the documentation of this file.
1 //===-- SIFormMemoryClauses.cpp -------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass creates bundles of SMEM and VMEM instructions forming memory
12 /// clauses if XNACK is enabled. Def operands of clauses are marked as early
13 /// clobber to make sure we will not override any source within a clause.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "GCNRegPressure.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "SIRegisterInfo.h"
24 #include "llvm/ADT/DenseMap.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "si-form-memory-clauses"
31 
32 // Clauses longer then 15 instructions would overflow one of the counters
33 // and stall. They can stall even earlier if there are outstanding counters.
34 static cl::opt<unsigned>
35 MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15),
36  cl::desc("Maximum length of a memory clause, instructions"));
37 
38 namespace {
39 
40 class SIFormMemoryClauses : public MachineFunctionPass {
42 
43 public:
44  static char ID;
45 
46 public:
47  SIFormMemoryClauses() : MachineFunctionPass(ID) {
49  }
50 
51  bool runOnMachineFunction(MachineFunction &MF) override;
52 
53  StringRef getPassName() const override {
54  return "SI Form memory clauses";
55  }
56 
57  void getAnalysisUsage(AnalysisUsage &AU) const override {
59  AU.setPreservesAll();
61  }
62 
63 private:
64  template <typename Callable>
65  void forAllLanes(unsigned Reg, LaneBitmask LaneMask, Callable Func) const;
66 
67  bool canBundle(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
68  bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT);
69  void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
70  bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,
72 
73  const GCNSubtarget *ST;
74  const SIRegisterInfo *TRI;
75  const MachineRegisterInfo *MRI;
77 
78  unsigned LastRecordedOccupancy;
79  unsigned MaxVGPRs;
80  unsigned MaxSGPRs;
81 };
82 
83 } // End anonymous namespace.
84 
85 INITIALIZE_PASS_BEGIN(SIFormMemoryClauses, DEBUG_TYPE,
86  "SI Form memory clauses", false, false)
88 INITIALIZE_PASS_END(SIFormMemoryClauses, DEBUG_TYPE,
89  "SI Form memory clauses", false, false)
90 
91 
92 char SIFormMemoryClauses::ID = 0;
93 
94 char &llvm::SIFormMemoryClausesID = SIFormMemoryClauses::ID;
95 
97  return new SIFormMemoryClauses();
98 }
99 
100 static bool isVMEMClauseInst(const MachineInstr &MI) {
101  return SIInstrInfo::isFLAT(MI) || SIInstrInfo::isVMEM(MI);
102 }
103 
104 static bool isSMEMClauseInst(const MachineInstr &MI) {
105  return SIInstrInfo::isSMRD(MI);
106 }
107 
108 // There no sense to create store clauses, they do not define anything,
109 // thus there is nothing to set early-clobber.
110 static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
111  if (MI.isDebugValue() || MI.isBundled())
112  return false;
113  if (!MI.mayLoad() || MI.mayStore())
114  return false;
115  if (AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1 ||
116  AMDGPU::getAtomicRetOp(MI.getOpcode()) != -1)
117  return false;
118  if (IsVMEMClause && !isVMEMClauseInst(MI))
119  return false;
120  if (!IsVMEMClause && !isSMEMClauseInst(MI))
121  return false;
122  return true;
123 }
124 
125 static unsigned getMopState(const MachineOperand &MO) {
126  unsigned S = 0;
127  if (MO.isImplicit())
128  S |= RegState::Implicit;
129  if (MO.isDead())
130  S |= RegState::Dead;
131  if (MO.isUndef())
132  S |= RegState::Undef;
133  if (MO.isKill())
134  S |= RegState::Kill;
135  if (MO.isEarlyClobber())
138  S |= RegState::Renamable;
139  return S;
140 }
141 
142 template <typename Callable>
143 void SIFormMemoryClauses::forAllLanes(unsigned Reg, LaneBitmask LaneMask,
144  Callable Func) const {
145  if (LaneMask.all() || TargetRegisterInfo::isPhysicalRegister(Reg) ||
146  LaneMask == MRI->getMaxLaneMaskForVReg(Reg)) {
147  Func(0);
148  return;
149  }
150 
151  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
152  unsigned E = TRI->getNumSubRegIndices();
154  for (unsigned Idx = 1; Idx < E; ++Idx) {
155  // Is this index even compatible with the given class?
156  if (TRI->getSubClassWithSubReg(RC, Idx) != RC)
157  continue;
158  LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(Idx);
159  // Early exit if we found a perfect match.
160  if (SubRegMask == LaneMask) {
161  Func(Idx);
162  return;
163  }
164 
165  if ((SubRegMask & ~LaneMask).any() || (SubRegMask & LaneMask).none())
166  continue;
167 
168  CoveringSubregs.push_back(Idx);
169  }
170 
171  llvm::sort(CoveringSubregs, [this](unsigned A, unsigned B) {
172  LaneBitmask MaskA = TRI->getSubRegIndexLaneMask(A);
173  LaneBitmask MaskB = TRI->getSubRegIndexLaneMask(B);
174  unsigned NA = MaskA.getNumLanes();
175  unsigned NB = MaskB.getNumLanes();
176  if (NA != NB)
177  return NA > NB;
178  return MaskA.getHighestLane() > MaskB.getHighestLane();
179  });
180 
181  for (unsigned Idx : CoveringSubregs) {
182  LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(Idx);
183  if ((SubRegMask & ~LaneMask).any() || (SubRegMask & LaneMask).none())
184  continue;
185 
186  Func(Idx);
187  LaneMask &= ~SubRegMask;
188  if (LaneMask.none())
189  return;
190  }
191 
192  llvm_unreachable("Failed to find all subregs to cover lane mask");
193 }
194 
195 // Returns false if there is a use of a def already in the map.
196 // In this case we must break the clause.
197 bool SIFormMemoryClauses::canBundle(const MachineInstr &MI,
198  RegUse &Defs, RegUse &Uses) const {
199  // Check interference with defs.
200  for (const MachineOperand &MO : MI.operands()) {
201  // TODO: Prologue/Epilogue Insertion pass does not process bundled
202  // instructions.
203  if (MO.isFI())
204  return false;
205 
206  if (!MO.isReg())
207  continue;
208 
209  unsigned Reg = MO.getReg();
210 
211  // If it is tied we will need to write same register as we read.
212  if (MO.isTied())
213  return false;
214 
215  RegUse &Map = MO.isDef() ? Uses : Defs;
216  auto Conflict = Map.find(Reg);
217  if (Conflict == Map.end())
218  continue;
219 
221  return false;
222 
223  LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
224  if ((Conflict->second.second & Mask).any())
225  return false;
226  }
227 
228  return true;
229 }
230 
231 // Since all defs in the clause are early clobber we can run out of registers.
232 // Function returns false if pressure would hit the limit if instruction is
233 // bundled into a memory clause.
234 bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI,
235  GCNDownwardRPTracker &RPT) {
236  // NB: skip advanceBeforeNext() call. Since all defs will be marked
237  // early-clobber they will all stay alive at least to the end of the
238  // clause. Therefor we should not decrease pressure even if load
239  // pointer becomes dead and could otherwise be reused for destination.
240  RPT.advanceToNext();
241  GCNRegPressure MaxPressure = RPT.moveMaxPressure();
242  unsigned Occupancy = MaxPressure.getOccupancy(*ST);
243  if (Occupancy >= MFI->getMinAllowedOccupancy() &&
244  MaxPressure.getVGPRNum() <= MaxVGPRs &&
245  MaxPressure.getSGPRNum() <= MaxSGPRs) {
246  LastRecordedOccupancy = Occupancy;
247  return true;
248  }
249  return false;
250 }
251 
252 // Collect register defs and uses along with their lane masks and states.
253 void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
254  RegUse &Defs, RegUse &Uses) const {
255  for (const MachineOperand &MO : MI.operands()) {
256  if (!MO.isReg())
257  continue;
258  unsigned Reg = MO.getReg();
259  if (!Reg)
260  continue;
261 
263  TRI->getSubRegIndexLaneMask(MO.getSubReg()) :
265  RegUse &Map = MO.isDef() ? Defs : Uses;
266 
267  auto Loc = Map.find(Reg);
268  unsigned State = getMopState(MO);
269  if (Loc == Map.end()) {
270  Map[Reg] = std::make_pair(State, Mask);
271  } else {
272  Loc->second.first |= State;
273  Loc->second.second |= Mask;
274  }
275  }
276 }
277 
278 // Check register def/use conflicts, occupancy limits and collect def/use maps.
279 // Return true if instruction can be bundled with previous. It it cannot
280 // def/use maps are not updated.
281 bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI,
282  RegUse &Defs, RegUse &Uses,
283  GCNDownwardRPTracker &RPT) {
284  if (!canBundle(MI, Defs, Uses))
285  return false;
286 
287  if (!checkPressure(MI, RPT))
288  return false;
289 
290  collectRegUses(MI, Defs, Uses);
291  return true;
292 }
293 
294 bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
295  if (skipFunction(MF.getFunction()))
296  return false;
297 
298  ST = &MF.getSubtarget<GCNSubtarget>();
299  if (!ST->isXNACKEnabled())
300  return false;
301 
302  const SIInstrInfo *TII = ST->getInstrInfo();
303  TRI = ST->getRegisterInfo();
304  MRI = &MF.getRegInfo();
305  MFI = MF.getInfo<SIMachineFunctionInfo>();
306  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
307  SlotIndexes *Ind = LIS->getSlotIndexes();
308  bool Changed = false;
309 
310  MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count();
311  MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count();
312 
313  for (MachineBasicBlock &MBB : MF) {
315  for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) {
316  MachineInstr &MI = *I;
317  Next = std::next(I);
318 
319  bool IsVMEM = isVMEMClauseInst(MI);
320 
321  if (!isValidClauseInst(MI, IsVMEM))
322  continue;
323 
324  RegUse Defs, Uses;
325  GCNDownwardRPTracker RPT(*LIS);
326  RPT.reset(MI);
327 
328  if (!processRegUses(MI, Defs, Uses, RPT))
329  continue;
330 
331  unsigned Length = 1;
332  for ( ; Next != E && Length < MaxClause; ++Next) {
333  if (!isValidClauseInst(*Next, IsVMEM))
334  break;
335 
336  // A load from pointer which was loaded inside the same bundle is an
337  // impossible clause because we will need to write and read the same
338  // register inside. In this case processRegUses will return false.
339  if (!processRegUses(*Next, Defs, Uses, RPT))
340  break;
341 
342  ++Length;
343  }
344  if (Length < 2)
345  continue;
346 
347  Changed = true;
348  MFI->limitOccupancy(LastRecordedOccupancy);
349 
350  auto B = BuildMI(MBB, I, DebugLoc(), TII->get(TargetOpcode::BUNDLE));
351  Ind->insertMachineInstrInMaps(*B);
352 
353  for (auto BI = I; BI != Next; ++BI) {
354  BI->bundleWithPred();
355  Ind->removeSingleMachineInstrFromMaps(*BI);
356 
357  for (MachineOperand &MO : BI->defs())
358  if (MO.readsReg())
359  MO.setIsInternalRead(true);
360  }
361 
362  for (auto &&R : Defs) {
363  forAllLanes(R.first, R.second.second, [&R, &B](unsigned SubReg) {
364  unsigned S = R.second.first | RegState::EarlyClobber;
365  if (!SubReg)
366  S &= ~(RegState::Undef | RegState::Dead);
367  B.addDef(R.first, S, SubReg);
368  });
369  }
370 
371  for (auto &&R : Uses) {
372  forAllLanes(R.first, R.second.second, [&R, &B](unsigned SubReg) {
373  B.addUse(R.first, R.second.first & ~RegState::Kill, SubReg);
374  });
375  }
376 
377  for (auto &&R : Defs) {
378  unsigned Reg = R.first;
379  Uses.erase(Reg);
381  continue;
382  LIS->removeInterval(Reg);
383  LIS->createAndComputeVirtRegInterval(Reg);
384  }
385 
386  for (auto &&R : Uses) {
387  unsigned Reg = R.first;
389  continue;
390  LIS->removeInterval(Reg);
391  LIS->createAndComputeVirtRegInterval(Reg);
392  }
393  }
394  }
395 
396  return Changed;
397 }
char & SIFormMemoryClausesID
unsigned getNumLanes() const
Definition: LaneBitmask.h:76
void bundleWithPred()
Bundle this instruction with its predecessor.
Interface definition for SIRegisterInfo.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
FunctionPass * createSIFormMemoryClausesPass()
void initializeSIFormMemoryClausesPass(PassRegistry &)
SI Form memory clauses
decltype(MaxPressure) moveMaxPressure()
bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs=nullptr)
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getHighestLane() const
Definition: LaneBitmask.h:79
static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause)
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode)
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:435
bool isEarlyClobber() const
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
LLVM_READONLY int getAtomicRetOp(uint16_t Opcode)
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:471
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned SubReg
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:84
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
unsigned getOccupancy(const GCNSubtarget &ST) const
SlotIndexes pass.
Definition: SlotIndexes.h:331
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:820
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static cl::opt< unsigned > MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15), cl::desc("Maximum length of a memory clause, instructions"))
bool isBundled() const
Return true if this instruction part of a bundle.
Definition: MachineInstr.h:356
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
unsigned getSGPRNum() const
Represent the analysis usage information of a pass.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
constexpr bool all() const
Definition: LaneBitmask.h:54
static unsigned getMopState(const MachineOperand &MO)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
Iterator for intrusive lists based on ilist_node.
bool isDebugValue() const
Definition: MachineInstr.h:997
MachineOperand class - Representation of each machine instruction operand.
unsigned getVGPRNum() const
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void setPreservesAll()
Set by analyses that do not transform their input at all.
bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
#define DEBUG_TYPE
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool isVMEMClauseInst(const MachineInstr &MI)
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isSMEMClauseInst(const MachineInstr &MI)
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:331
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:807
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
INITIALIZE_PASS_BEGIN(SIFormMemoryClauses, DEBUG_TYPE, "SI Form memory clauses", false, false) INITIALIZE_PASS_END(SIFormMemoryClauses
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool isImplicit() const