LLVM  8.0.1
R600ControlFlowFinalizer.cpp
Go to the documentation of this file.
1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass compute turns all control flow pseudo instructions into native one
12 /// computing their address on the fly; it also sets STACK_SIZE info.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Compiler.h"
37 #include "llvm/Support/Debug.h"
40 #include <algorithm>
41 #include <cassert>
42 #include <cstdint>
43 #include <set>
44 #include <utility>
45 #include <vector>
46 
47 using namespace llvm;
48 
49 #define DEBUG_TYPE "r600cf"
50 
51 namespace {
52 
53 struct CFStack {
54  enum StackItem {
55  ENTRY = 0,
56  SUB_ENTRY = 1,
57  FIRST_NON_WQM_PUSH = 2,
58  FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
59  };
60 
61  const R600Subtarget *ST;
62  std::vector<StackItem> BranchStack;
63  std::vector<StackItem> LoopStack;
64  unsigned MaxStackSize;
65  unsigned CurrentEntries = 0;
66  unsigned CurrentSubEntries = 0;
67 
68  CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
69  // We need to reserve a stack entry for CALL_FS in vertex shaders.
70  MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
71 
72  unsigned getLoopDepth();
73  bool branchStackContains(CFStack::StackItem);
74  bool requiresWorkAroundForInst(unsigned Opcode);
75  unsigned getSubEntrySize(CFStack::StackItem Item);
76  void updateMaxStackSize();
77  void pushBranch(unsigned Opcode, bool isWQM = false);
78  void pushLoop();
79  void popBranch();
80  void popLoop();
81 };
82 
83 unsigned CFStack::getLoopDepth() {
84  return LoopStack.size();
85 }
86 
87 bool CFStack::branchStackContains(CFStack::StackItem Item) {
88  for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
89  E = BranchStack.end(); I != E; ++I) {
90  if (*I == Item)
91  return true;
92  }
93  return false;
94 }
95 
96 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
97  if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
98  getLoopDepth() > 1)
99  return true;
100 
101  if (!ST->hasCFAluBug())
102  return false;
103 
104  switch(Opcode) {
105  default: return false;
106  case R600::CF_ALU_PUSH_BEFORE:
107  case R600::CF_ALU_ELSE_AFTER:
108  case R600::CF_ALU_BREAK:
109  case R600::CF_ALU_CONTINUE:
110  if (CurrentSubEntries == 0)
111  return false;
112  if (ST->getWavefrontSize() == 64) {
113  // We are being conservative here. We only require this work-around if
114  // CurrentSubEntries > 3 &&
115  // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
116  //
117  // We have to be conservative, because we don't know for certain that
118  // our stack allocation algorithm for Evergreen/NI is correct. Applying this
119  // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
120  // resources without any problems.
121  return CurrentSubEntries > 3;
122  } else {
123  assert(ST->getWavefrontSize() == 32);
124  // We are being conservative here. We only require the work-around if
125  // CurrentSubEntries > 7 &&
126  // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
127  // See the comment on the wavefront size == 64 case for why we are
128  // being conservative.
129  return CurrentSubEntries > 7;
130  }
131  }
132 }
133 
134 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
135  switch(Item) {
136  default:
137  return 0;
138  case CFStack::FIRST_NON_WQM_PUSH:
139  assert(!ST->hasCaymanISA());
140  if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
141  // +1 For the push operation.
142  // +2 Extra space required.
143  return 3;
144  } else {
145  // Some documentation says that this is not necessary on Evergreen,
146  // but experimentation has show that we need to allocate 1 extra
147  // sub-entry for the first non-WQM push.
148  // +1 For the push operation.
149  // +1 Extra space required.
150  return 2;
151  }
152  case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
153  assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
154  // +1 For the push operation.
155  // +1 Extra space required.
156  return 2;
157  case CFStack::SUB_ENTRY:
158  return 1;
159  }
160 }
161 
162 void CFStack::updateMaxStackSize() {
163  unsigned CurrentStackSize =
164  CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
165  MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
166 }
167 
168 void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
169  CFStack::StackItem Item = CFStack::ENTRY;
170  switch(Opcode) {
171  case R600::CF_PUSH_EG:
172  case R600::CF_ALU_PUSH_BEFORE:
173  if (!isWQM) {
174  if (!ST->hasCaymanISA() &&
175  !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
176  Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
177  // See comment in
178  // CFStack::getSubEntrySize()
179  else if (CurrentEntries > 0 &&
180  ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
181  !ST->hasCaymanISA() &&
182  !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
183  Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
184  else
185  Item = CFStack::SUB_ENTRY;
186  } else
187  Item = CFStack::ENTRY;
188  break;
189  }
190  BranchStack.push_back(Item);
191  if (Item == CFStack::ENTRY)
192  CurrentEntries++;
193  else
194  CurrentSubEntries += getSubEntrySize(Item);
195  updateMaxStackSize();
196 }
197 
198 void CFStack::pushLoop() {
199  LoopStack.push_back(CFStack::ENTRY);
200  CurrentEntries++;
201  updateMaxStackSize();
202 }
203 
204 void CFStack::popBranch() {
205  CFStack::StackItem Top = BranchStack.back();
206  if (Top == CFStack::ENTRY)
207  CurrentEntries--;
208  else
209  CurrentSubEntries-= getSubEntrySize(Top);
210  BranchStack.pop_back();
211 }
212 
213 void CFStack::popLoop() {
214  CurrentEntries--;
215  LoopStack.pop_back();
216 }
217 
218 class R600ControlFlowFinalizer : public MachineFunctionPass {
219 private:
220  using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
221 
222  enum ControlFlowInstruction {
223  CF_TC,
224  CF_VC,
225  CF_CALL_FS,
226  CF_WHILE_LOOP,
227  CF_END_LOOP,
228  CF_LOOP_BREAK,
229  CF_LOOP_CONTINUE,
230  CF_JUMP,
231  CF_ELSE,
232  CF_POP,
233  CF_END
234  };
235 
236  const R600InstrInfo *TII = nullptr;
237  const R600RegisterInfo *TRI = nullptr;
238  unsigned MaxFetchInst;
239  const R600Subtarget *ST = nullptr;
240 
241  bool IsTrivialInst(MachineInstr &MI) const {
242  switch (MI.getOpcode()) {
243  case R600::KILL:
244  case R600::RETURN:
245  return true;
246  default:
247  return false;
248  }
249  }
250 
251  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
252  unsigned Opcode = 0;
253  bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
254  switch (CFI) {
255  case CF_TC:
256  Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
257  break;
258  case CF_VC:
259  Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
260  break;
261  case CF_CALL_FS:
262  Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
263  break;
264  case CF_WHILE_LOOP:
265  Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
266  break;
267  case CF_END_LOOP:
268  Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
269  break;
270  case CF_LOOP_BREAK:
271  Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
272  break;
273  case CF_LOOP_CONTINUE:
274  Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
275  break;
276  case CF_JUMP:
277  Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
278  break;
279  case CF_ELSE:
280  Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
281  break;
282  case CF_POP:
283  Opcode = isEg ? R600::POP_EG : R600::POP_R600;
284  break;
285  case CF_END:
286  if (ST->hasCaymanISA()) {
287  Opcode = R600::CF_END_CM;
288  break;
289  }
290  Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
291  break;
292  }
293  assert (Opcode && "No opcode selected");
294  return TII->get(Opcode);
295  }
296 
297  bool isCompatibleWithClause(const MachineInstr &MI,
298  std::set<unsigned> &DstRegs) const {
299  unsigned DstMI, SrcMI;
301  E = MI.operands_end();
302  I != E; ++I) {
303  const MachineOperand &MO = *I;
304  if (!MO.isReg())
305  continue;
306  if (MO.isDef()) {
307  unsigned Reg = MO.getReg();
308  if (R600::R600_Reg128RegClass.contains(Reg))
309  DstMI = Reg;
310  else
311  DstMI = TRI->getMatchingSuperReg(Reg,
313  &R600::R600_Reg128RegClass);
314  }
315  if (MO.isUse()) {
316  unsigned Reg = MO.getReg();
317  if (R600::R600_Reg128RegClass.contains(Reg))
318  SrcMI = Reg;
319  else
320  SrcMI = TRI->getMatchingSuperReg(Reg,
322  &R600::R600_Reg128RegClass);
323  }
324  }
325  if ((DstRegs.find(SrcMI) == DstRegs.end())) {
326  DstRegs.insert(DstMI);
327  return true;
328  } else
329  return false;
330  }
331 
332  ClauseFile
333  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
334  const {
335  MachineBasicBlock::iterator ClauseHead = I;
336  std::vector<MachineInstr *> ClauseContent;
337  unsigned AluInstCount = 0;
338  bool IsTex = TII->usesTextureCache(*ClauseHead);
339  std::set<unsigned> DstRegs;
340  for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
341  if (IsTrivialInst(*I))
342  continue;
343  if (AluInstCount >= MaxFetchInst)
344  break;
345  if ((IsTex && !TII->usesTextureCache(*I)) ||
346  (!IsTex && !TII->usesVertexCache(*I)))
347  break;
348  if (!isCompatibleWithClause(*I, DstRegs))
349  break;
350  AluInstCount ++;
351  ClauseContent.push_back(&*I);
352  }
353  MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
354  getHWInstrDesc(IsTex?CF_TC:CF_VC))
355  .addImm(0) // ADDR
356  .addImm(AluInstCount - 1); // COUNT
357  return ClauseFile(MIb, std::move(ClauseContent));
358  }
359 
360  void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
361  static const unsigned LiteralRegs[] = {
362  R600::ALU_LITERAL_X,
363  R600::ALU_LITERAL_Y,
364  R600::ALU_LITERAL_Z,
365  R600::ALU_LITERAL_W
366  };
368  TII->getSrcs(MI);
369  for (const auto &Src:Srcs) {
370  if (Src.first->getReg() != R600::ALU_LITERAL_X)
371  continue;
372  int64_t Imm = Src.second;
373  std::vector<MachineOperand *>::iterator It =
374  llvm::find_if(Lits, [&](MachineOperand *val) {
375  return val->isImm() && (val->getImm() == Imm);
376  });
377 
378  // Get corresponding Operand
379  MachineOperand &Operand = MI.getOperand(
380  TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
381 
382  if (It != Lits.end()) {
383  // Reuse existing literal reg
384  unsigned Index = It - Lits.begin();
385  Src.first->setReg(LiteralRegs[Index]);
386  } else {
387  // Allocate new literal reg
388  assert(Lits.size() < 4 && "Too many literals in Instruction Group");
389  Src.first->setReg(LiteralRegs[Lits.size()]);
390  Lits.push_back(&Operand);
391  }
392  }
393  }
394 
395  MachineBasicBlock::iterator insertLiterals(
396  MachineBasicBlock::iterator InsertPos,
397  const std::vector<unsigned> &Literals) const {
398  MachineBasicBlock *MBB = InsertPos->getParent();
399  for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
400  unsigned LiteralPair0 = Literals[i];
401  unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
402  InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
403  TII->get(R600::LITERALS))
404  .addImm(LiteralPair0)
405  .addImm(LiteralPair1);
406  }
407  return InsertPos;
408  }
409 
410  ClauseFile
411  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
412  const {
413  MachineInstr &ClauseHead = *I;
414  std::vector<MachineInstr *> ClauseContent;
415  I++;
416  for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
417  if (IsTrivialInst(*I)) {
418  ++I;
419  continue;
420  }
421  if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
422  break;
423  std::vector<MachineOperand *>Literals;
424  if (I->isBundle()) {
425  MachineInstr &DeleteMI = *I;
427  while (++BI != E && BI->isBundledWithPred()) {
428  BI->unbundleFromPred();
429  for (MachineOperand &MO : BI->operands()) {
430  if (MO.isReg() && MO.isInternalRead())
431  MO.setIsInternalRead(false);
432  }
433  getLiteral(*BI, Literals);
434  ClauseContent.push_back(&*BI);
435  }
436  I = BI;
437  DeleteMI.eraseFromParent();
438  } else {
439  getLiteral(*I, Literals);
440  ClauseContent.push_back(&*I);
441  I++;
442  }
443  for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
444  MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
445  TII->get(R600::LITERALS));
446  if (Literals[i]->isImm()) {
447  MILit.addImm(Literals[i]->getImm());
448  } else {
449  MILit.addGlobalAddress(Literals[i]->getGlobal(),
450  Literals[i]->getOffset());
451  }
452  if (i + 1 < e) {
453  if (Literals[i + 1]->isImm()) {
454  MILit.addImm(Literals[i + 1]->getImm());
455  } else {
456  MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
457  Literals[i + 1]->getOffset());
458  }
459  } else
460  MILit.addImm(0);
461  ClauseContent.push_back(MILit);
462  }
463  }
464  assert(ClauseContent.size() < 128 && "ALU clause is too big");
465  ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
466  return ClauseFile(&ClauseHead, std::move(ClauseContent));
467  }
468 
469  void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
470  const DebugLoc &DL, ClauseFile &Clause,
471  unsigned &CfCount) {
472  CounterPropagateAddr(*Clause.first, CfCount);
473  MachineBasicBlock *BB = Clause.first->getParent();
474  BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
475  for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
476  BB->splice(InsertPos, BB, Clause.second[i]);
477  }
478  CfCount += 2 * Clause.second.size();
479  }
480 
481  void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
482  ClauseFile &Clause, unsigned &CfCount) {
483  Clause.first->getOperand(0).setImm(0);
484  CounterPropagateAddr(*Clause.first, CfCount);
485  MachineBasicBlock *BB = Clause.first->getParent();
486  BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
487  for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
488  BB->splice(InsertPos, BB, Clause.second[i]);
489  }
490  CfCount += Clause.second.size();
491  }
492 
493  void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
494  MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
495  }
496  void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
497  unsigned Addr) const {
498  for (MachineInstr *MI : MIs) {
499  CounterPropagateAddr(*MI, Addr);
500  }
501  }
502 
503 public:
504  static char ID;
505 
506  R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
507 
508  bool runOnMachineFunction(MachineFunction &MF) override {
509  ST = &MF.getSubtarget<R600Subtarget>();
510  MaxFetchInst = ST->getTexVTXClauseSize();
511  TII = ST->getInstrInfo();
512  TRI = ST->getRegisterInfo();
513 
515 
516  CFStack CFStack(ST, MF.getFunction().getCallingConv());
517  for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
518  ++MB) {
519  MachineBasicBlock &MBB = *MB;
520  unsigned CfCount = 0;
521  std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
522  std::vector<MachineInstr * > IfThenElseStack;
524  BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
525  getHWInstrDesc(CF_CALL_FS));
526  CfCount++;
527  }
528  std::vector<ClauseFile> FetchClauses, AluClauses;
529  std::vector<MachineInstr *> LastAlu(1);
530  std::vector<MachineInstr *> ToPopAfter;
531 
532  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
533  I != E;) {
534  if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
535  LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
536  FetchClauses.push_back(MakeFetchClause(MBB, I));
537  CfCount++;
538  LastAlu.back() = nullptr;
539  continue;
540  }
541 
543  if (MI->getOpcode() != R600::ENDIF)
544  LastAlu.back() = nullptr;
545  if (MI->getOpcode() == R600::CF_ALU)
546  LastAlu.back() = &*MI;
547  I++;
548  bool RequiresWorkAround =
549  CFStack.requiresWorkAroundForInst(MI->getOpcode());
550  switch (MI->getOpcode()) {
551  case R600::CF_ALU_PUSH_BEFORE:
552  if (RequiresWorkAround) {
553  LLVM_DEBUG(dbgs()
554  << "Applying bug work-around for ALU_PUSH_BEFORE\n");
555  BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
556  .addImm(CfCount + 1)
557  .addImm(1);
558  MI->setDesc(TII->get(R600::CF_ALU));
559  CfCount++;
560  CFStack.pushBranch(R600::CF_PUSH_EG);
561  } else
562  CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
564  case R600::CF_ALU:
565  I = MI;
566  AluClauses.push_back(MakeALUClause(MBB, I));
567  LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
568  CfCount++;
569  break;
570  case R600::WHILELOOP: {
571  CFStack.pushLoop();
572  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
573  getHWInstrDesc(CF_WHILE_LOOP))
574  .addImm(1);
575  std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
576  std::set<MachineInstr *>());
577  Pair.second.insert(MIb);
578  LoopStack.push_back(std::move(Pair));
579  MI->eraseFromParent();
580  CfCount++;
581  break;
582  }
583  case R600::ENDLOOP: {
584  CFStack.popLoop();
585  std::pair<unsigned, std::set<MachineInstr *>> Pair =
586  std::move(LoopStack.back());
587  LoopStack.pop_back();
588  CounterPropagateAddr(Pair.second, CfCount);
589  BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
590  .addImm(Pair.first + 1);
591  MI->eraseFromParent();
592  CfCount++;
593  break;
594  }
595  case R600::IF_PREDICATE_SET: {
596  LastAlu.push_back(nullptr);
597  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
598  getHWInstrDesc(CF_JUMP))
599  .addImm(0)
600  .addImm(0);
601  IfThenElseStack.push_back(MIb);
602  LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
603  MI->eraseFromParent();
604  CfCount++;
605  break;
606  }
607  case R600::ELSE: {
608  MachineInstr * JumpInst = IfThenElseStack.back();
609  IfThenElseStack.pop_back();
610  CounterPropagateAddr(*JumpInst, CfCount);
611  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
612  getHWInstrDesc(CF_ELSE))
613  .addImm(0)
614  .addImm(0);
615  LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
616  IfThenElseStack.push_back(MIb);
617  MI->eraseFromParent();
618  CfCount++;
619  break;
620  }
621  case R600::ENDIF: {
622  CFStack.popBranch();
623  if (LastAlu.back()) {
624  ToPopAfter.push_back(LastAlu.back());
625  } else {
626  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
627  getHWInstrDesc(CF_POP))
628  .addImm(CfCount + 1)
629  .addImm(1);
630  (void)MIb;
631  LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
632  CfCount++;
633  }
634 
635  MachineInstr *IfOrElseInst = IfThenElseStack.back();
636  IfThenElseStack.pop_back();
637  CounterPropagateAddr(*IfOrElseInst, CfCount);
638  IfOrElseInst->getOperand(1).setImm(1);
639  LastAlu.pop_back();
640  MI->eraseFromParent();
641  break;
642  }
643  case R600::BREAK: {
644  CfCount ++;
645  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
646  getHWInstrDesc(CF_LOOP_BREAK))
647  .addImm(0);
648  LoopStack.back().second.insert(MIb);
649  MI->eraseFromParent();
650  break;
651  }
652  case R600::CONTINUE: {
653  MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
654  getHWInstrDesc(CF_LOOP_CONTINUE))
655  .addImm(0);
656  LoopStack.back().second.insert(MIb);
657  MI->eraseFromParent();
658  CfCount++;
659  break;
660  }
661  case R600::RETURN: {
662  DebugLoc DL = MBB.findDebugLoc(MI);
663  BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
664  CfCount++;
665  if (CfCount % 2) {
666  BuildMI(MBB, I, DL, TII->get(R600::PAD));
667  CfCount++;
668  }
669  MI->eraseFromParent();
670  for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
671  EmitFetchClause(I, DL, FetchClauses[i], CfCount);
672  for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
673  EmitALUClause(I, DL, AluClauses[i], CfCount);
674  break;
675  }
676  default:
677  if (TII->isExport(MI->getOpcode())) {
678  LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
679  CfCount++;
680  }
681  break;
682  }
683  }
684  for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
685  MachineInstr *Alu = ToPopAfter[i];
687  TII->get(R600::CF_ALU_POP_AFTER))
688  .addImm(Alu->getOperand(0).getImm())
689  .addImm(Alu->getOperand(1).getImm())
690  .addImm(Alu->getOperand(2).getImm())
691  .addImm(Alu->getOperand(3).getImm())
692  .addImm(Alu->getOperand(4).getImm())
693  .addImm(Alu->getOperand(5).getImm())
694  .addImm(Alu->getOperand(6).getImm())
695  .addImm(Alu->getOperand(7).getImm())
696  .addImm(Alu->getOperand(8).getImm());
697  Alu->eraseFromParent();
698  }
699  MFI->CFStackSize = CFStack.MaxStackSize;
700  }
701 
702  return false;
703  }
704 
705  StringRef getPassName() const override {
706  return "R600 Control Flow Finalizer Pass";
707  }
708 };
709 
710 } // end anonymous namespace
711 
712 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
713  "R600 Control Flow Finalizer", false, false)
714 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
715  "R600 Control Flow Finalizer", false, false)
716 
717 char R600ControlFlowFinalizer::ID = 0;
718 
719 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
720 
722  return new R600ControlFlowFinalizer();
723 }
mop_iterator operands_end()
Definition: MachineInstr.h:454
static unsigned getSubRegFromChannel(unsigned Channel)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
instr_iterator instr_end()
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Interface definition for R600InstrInfo.
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
Interface definition for R600RegisterInfo.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Generation getGeneration() const
return AArch64::GPR64RegClass contains(Reg)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
#define ENTRY(ASMNAME, ENUM)
char & R600ControlFlowFinalizerID
R600 Control Flow Finalizer
bool usesVertexCache(unsigned Opcode) const
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
bool isExport(unsigned Opcode) const
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
#define DEBUG_TYPE
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
Annotate SI Control Flow
FunctionPass * createR600ControlFlowFinalizer()
const R600InstrInfo * getInstrInfo() const override
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1214
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
const R600RegisterInfo * getRegisterInfo() const override
Iterator for intrusive lists based on ilist_node.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool hasCaymanISA() const
int64_t getImm() const
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register&#39;s channel.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
bool isALUInstr(unsigned Opcode) const
INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, "R600 Control Flow Finalizer", false, false) INITIALIZE_PASS_END(R600ControlFlowFinalizer
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
short getTexVTXClauseSize() const
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
#define I(x, y, z)
Definition: MD5.cpp:58
SmallVector< std::pair< MachineOperand *, int64_t >, 3 > getSrcs(MachineInstr &MI) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
mop_iterator operands_begin()
Definition: MachineInstr.h:453
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
bool usesTextureCache(unsigned Opcode) const