LLVM  8.0.1
R600EmitClauseMarkers.cpp
Go to the documentation of this file.
1 //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
12 /// 128 Alu instructions ; these instructions can access up to 4 prefetched
13 /// 4 lines of 16 registers from constant buffers. Such ALU clauses are
14 /// initiated by CF_ALU instructions.
15 //===----------------------------------------------------------------------===//
16 
17 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "R600Defines.h"
20 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
31 #include "llvm/Pass.h"
33 #include <cassert>
34 #include <cstdint>
35 #include <utility>
36 #include <vector>
37 
38 using namespace llvm;
39 
40 namespace llvm {
41 
43 
44 } // end namespace llvm
45 
46 namespace {
47 
48 class R600EmitClauseMarkers : public MachineFunctionPass {
49 private:
50  const R600InstrInfo *TII = nullptr;
51  int Address = 0;
52 
53  unsigned OccupiedDwords(MachineInstr &MI) const {
54  switch (MI.getOpcode()) {
55  case R600::INTERP_PAIR_XY:
56  case R600::INTERP_PAIR_ZW:
57  case R600::INTERP_VEC_LOAD:
58  case R600::DOT_4:
59  return 4;
60  case R600::KILL:
61  return 0;
62  default:
63  break;
64  }
65 
66  // These will be expanded to two ALU instructions in the
67  // ExpandSpecialInstructions pass.
68  if (TII->isLDSRetInstr(MI.getOpcode()))
69  return 2;
70 
71  if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
72  TII->isReductionOp(MI.getOpcode()))
73  return 4;
74 
75  unsigned NumLiteral = 0;
77  E = MI.operands_end();
78  It != E; ++It) {
79  MachineOperand &MO = *It;
80  if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
81  ++NumLiteral;
82  }
83  return 1 + NumLiteral;
84  }
85 
86  bool isALU(const MachineInstr &MI) const {
87  if (TII->isALUInstr(MI.getOpcode()))
88  return true;
89  if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
90  return true;
91  switch (MI.getOpcode()) {
92  case R600::PRED_X:
93  case R600::INTERP_PAIR_XY:
94  case R600::INTERP_PAIR_ZW:
95  case R600::INTERP_VEC_LOAD:
96  case R600::COPY:
97  case R600::DOT_4:
98  return true;
99  default:
100  return false;
101  }
102  }
103 
104  bool IsTrivialInst(MachineInstr &MI) const {
105  switch (MI.getOpcode()) {
106  case R600::KILL:
107  case R600::RETURN:
108  case R600::IMPLICIT_DEF:
109  return true;
110  default:
111  return false;
112  }
113  }
114 
115  std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
116  // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
117  // (See also R600ISelLowering.cpp)
118  // ConstIndex value is in [0, 4095];
119  return std::pair<unsigned, unsigned>(
120  ((Sel >> 2) - 512) >> 12, // KC_BANK
121  // Line Number of ConstIndex
122  // A line contains 16 constant registers however KCX bank can lock
123  // two line at the same time ; thus we want to get an even line number.
124  // Line number can be retrieved with (>>4), using (>>5) <<1 generates
125  // an even number.
126  ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
127  }
128 
129  bool
130  SubstituteKCacheBank(MachineInstr &MI,
131  std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
132  bool UpdateInstr = true) const {
133  std::vector<std::pair<unsigned, unsigned>> UsedKCache;
134 
135  if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
136  return true;
137 
139  TII->getSrcs(MI);
140  assert(
141  (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
142  "Can't assign Const");
143  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
144  if (Consts[i].first->getReg() != R600::ALU_CONST)
145  continue;
146  unsigned Sel = Consts[i].second;
147  unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
148  unsigned KCacheIndex = Index * 4 + Chan;
149  const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
150  if (CachedConsts.empty()) {
151  CachedConsts.push_back(BankLine);
152  UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
153  continue;
154  }
155  if (CachedConsts[0] == BankLine) {
156  UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
157  continue;
158  }
159  if (CachedConsts.size() == 1) {
160  CachedConsts.push_back(BankLine);
161  UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
162  continue;
163  }
164  if (CachedConsts[1] == BankLine) {
165  UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
166  continue;
167  }
168  return false;
169  }
170 
171  if (!UpdateInstr)
172  return true;
173 
174  for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
175  if (Consts[i].first->getReg() != R600::ALU_CONST)
176  continue;
177  switch(UsedKCache[j].first) {
178  case 0:
179  Consts[i].first->setReg(
180  R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
181  break;
182  case 1:
183  Consts[i].first->setReg(
184  R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
185  break;
186  default:
187  llvm_unreachable("Wrong Cache Line");
188  }
189  j++;
190  }
191  return true;
192  }
193 
194  bool canClauseLocalKillFitInClause(
195  unsigned AluInstCount,
196  std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
199  const R600RegisterInfo &TRI = TII->getRegisterInfo();
200  //TODO: change this to defs?
202  MOI = Def->operands_begin(),
203  MOE = Def->operands_end(); MOI != MOE; ++MOI) {
204  if (!MOI->isReg() || !MOI->isDef() ||
205  TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
206  continue;
207 
208  // Def defines a clause local register, so check that its use will fit
209  // in the clause.
210  unsigned LastUseCount = 0;
211  for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
212  AluInstCount += OccupiedDwords(*UseI);
213  // Make sure we won't need to end the clause due to KCache limitations.
214  if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
215  return false;
216 
217  // We have reached the maximum instruction limit before finding the
218  // use that kills this register, so we cannot use this def in the
219  // current clause.
220  if (AluInstCount >= TII->getMaxAlusPerClause())
221  return false;
222 
223  // TODO: Is this true? kill flag appears to work OK below
224  // Register kill flags have been cleared by the time we get to this
225  // pass, but it is safe to assume that all uses of this register
226  // occur in the same basic block as its definition, because
227  // it is illegal for the scheduler to schedule them in
228  // different blocks.
229  if (UseI->readsRegister(MOI->getReg(), &TRI))
230  LastUseCount = AluInstCount;
231 
232  // Exit early if the current use kills the register
233  if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI))
234  break;
235  }
236  if (LastUseCount)
237  return LastUseCount <= TII->getMaxAlusPerClause();
238  llvm_unreachable("Clause local register live at end of clause.");
239  }
240  return true;
241  }
242 
244  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
245  MachineBasicBlock::iterator ClauseHead = I;
246  std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
247  bool PushBeforeModifier = false;
248  unsigned AluInstCount = 0;
249  for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
250  if (IsTrivialInst(*I))
251  continue;
252  if (!isALU(*I))
253  break;
254  if (AluInstCount > TII->getMaxAlusPerClause())
255  break;
256  if (I->getOpcode() == R600::PRED_X) {
257  // We put PRED_X in its own clause to ensure that ifcvt won't create
258  // clauses with more than 128 insts.
259  // IfCvt is indeed checking that "then" and "else" branches of an if
260  // statement have less than ~60 insts thus converted clauses can't be
261  // bigger than ~121 insts (predicate setter needs to be in the same
262  // clause as predicated alus).
263  if (AluInstCount > 0)
264  break;
265  if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
266  PushBeforeModifier = true;
267  AluInstCount ++;
268  continue;
269  }
270  // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
271  //
272  // * KILL or INTERP instructions
273  // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
274  // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
275  //
276  // XXX: These checks have not been implemented yet.
277  if (TII->mustBeLastInClause(I->getOpcode())) {
278  I++;
279  break;
280  }
281 
282  // If this instruction defines a clause local register, make sure
283  // its use can fit in this clause.
284  if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
285  break;
286 
287  if (!SubstituteKCacheBank(*I, KCacheBanks))
288  break;
289  AluInstCount += OccupiedDwords(*I);
290  }
291  unsigned Opcode = PushBeforeModifier ?
292  R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
293  BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
294  // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
295  // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
296  // pass may assume that identical ALU clause starter at the beginning of a
297  // true and false branch can be factorized which is not the case.
298  .addImm(Address++) // ADDR
299  .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
300  .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
301  .addImm(KCacheBanks.empty()?0:2) // KM0
302  .addImm((KCacheBanks.size() < 2)?0:2) // KM1
303  .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
304  .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
305  .addImm(AluInstCount) // COUNT
306  .addImm(1); // Enabled
307  return I;
308  }
309 
310 public:
311  static char ID;
312 
313  R600EmitClauseMarkers() : MachineFunctionPass(ID) {
315  }
316 
317  bool runOnMachineFunction(MachineFunction &MF) override {
319  TII = ST.getInstrInfo();
320 
321  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
322  BB != BB_E; ++BB) {
323  MachineBasicBlock &MBB = *BB;
325  if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
326  continue; // BB was already parsed
327  for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
328  if (isALU(*I)) {
329  auto next = MakeALUClause(MBB, I);
330  assert(next != I);
331  I = next;
332  } else
333  ++I;
334  }
335  }
336  return false;
337  }
338 
339  StringRef getPassName() const override {
340  return "R600 Emit Clause Markers Pass";
341  }
342 };
343 
345 
346 } // end anonymous namespace
347 
348 INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
349  "R600 Emit Clause Markters", false, false)
350 INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
351  "R600 Emit Clause Markters", false, false)
352 
354  return new R600EmitClauseMarkers();
355 }
mop_iterator operands_end()
Definition: MachineInstr.h:454
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Interface definition for R600InstrInfo.
void initializeR600EmitClauseMarkersPass(PassRegistry &)
unsigned getReg() const
getReg - Returns the register number.
unsigned second
unsigned const TargetRegisterInfo * TRI
Interface definition for R600RegisterInfo.
R600 Emit Clause Markters
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers", "R600 Emit Clause Markters", false, false) INITIALIZE_PASS_END(R600EmitClauseMarkers
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
MachineOperand & getFlagOp(MachineInstr &MI, unsigned SrcIdx=0, unsigned Flag=0) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
unsigned getMaxAlusPerClause() const
bool mustBeLastInClause(unsigned Opcode) const
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
const R600InstrInfo * getInstrInfo() const override
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:72
size_t size() const
Definition: SmallVector.h:53
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned first
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
Iterator for intrusive lists based on ilist_node.
bool isPhysRegLiveAcrossClauses(unsigned Reg) const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
FunctionPass * createR600EmitClauseMarkers()
bool isALUInstr(unsigned Opcode) const
bool isLDSRetInstr(unsigned Opcode) const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
bool isReductionOp(unsigned opcode) const
#define I(x, y, z)
Definition: MD5.cpp:58
SmallVector< std::pair< MachineOperand *, int64_t >, 3 > getSrcs(MachineInstr &MI) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
mop_iterator operands_begin()
Definition: MachineInstr.h:453
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
bool isCubeOp(unsigned opcode) const