49 #define DEBUG_TYPE "r600cf" 57 FIRST_NON_WQM_PUSH = 2,
58 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
62 std::vector<StackItem> BranchStack;
63 std::vector<StackItem> LoopStack;
64 unsigned MaxStackSize;
65 unsigned CurrentEntries = 0;
66 unsigned CurrentSubEntries = 0;
72 unsigned getLoopDepth();
73 bool branchStackContains(CFStack::StackItem);
74 bool requiresWorkAroundForInst(
unsigned Opcode);
75 unsigned getSubEntrySize(CFStack::StackItem Item);
76 void updateMaxStackSize();
77 void pushBranch(
unsigned Opcode,
bool isWQM =
false);
83 unsigned CFStack::getLoopDepth() {
84 return LoopStack.size();
87 bool CFStack::branchStackContains(CFStack::StackItem Item) {
88 for (std::vector<CFStack::StackItem>::const_iterator
I = BranchStack.begin(),
89 E = BranchStack.end();
I !=
E; ++
I) {
96 bool CFStack::requiresWorkAroundForInst(
unsigned Opcode) {
97 if (Opcode == R600::CF_ALU_PUSH_BEFORE &&
ST->hasCaymanISA() &&
101 if (!
ST->hasCFAluBug())
105 default:
return false;
106 case R600::CF_ALU_PUSH_BEFORE:
107 case R600::CF_ALU_ELSE_AFTER:
108 case R600::CF_ALU_BREAK:
109 case R600::CF_ALU_CONTINUE:
110 if (CurrentSubEntries == 0)
112 if (
ST->getWavefrontSize() == 64) {
121 return CurrentSubEntries > 3;
123 assert(
ST->getWavefrontSize() == 32);
129 return CurrentSubEntries > 7;
134 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
138 case CFStack::FIRST_NON_WQM_PUSH:
152 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
157 case CFStack::SUB_ENTRY:
162 void CFStack::updateMaxStackSize() {
163 unsigned CurrentStackSize =
164 CurrentEntries + (
alignTo(CurrentSubEntries, 4) / 4);
165 MaxStackSize =
std::max(CurrentStackSize, MaxStackSize);
168 void CFStack::pushBranch(
unsigned Opcode,
bool isWQM) {
171 case R600::CF_PUSH_EG:
172 case R600::CF_ALU_PUSH_BEFORE:
174 if (!
ST->hasCaymanISA() &&
175 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
176 Item = CFStack::FIRST_NON_WQM_PUSH;
179 else if (CurrentEntries > 0 &&
181 !
ST->hasCaymanISA() &&
182 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
183 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
185 Item = CFStack::SUB_ENTRY;
190 BranchStack.push_back(Item);
194 CurrentSubEntries += getSubEntrySize(Item);
195 updateMaxStackSize();
198 void CFStack::pushLoop() {
201 updateMaxStackSize();
204 void CFStack::popBranch() {
205 CFStack::StackItem Top = BranchStack.back();
209 CurrentSubEntries-= getSubEntrySize(Top);
210 BranchStack.pop_back();
213 void CFStack::popLoop() {
215 LoopStack.pop_back();
220 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
222 enum ControlFlowInstruction {
238 unsigned MaxFetchInst;
251 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI)
const {
256 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
259 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
262 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
265 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
268 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
271 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
273 case CF_LOOP_CONTINUE:
274 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
277 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
280 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
283 Opcode = isEg ? R600::POP_EG : R600::POP_R600;
287 Opcode = R600::CF_END_CM;
290 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
293 assert (Opcode &&
"No opcode selected");
294 return TII->get(Opcode);
298 std::set<unsigned> &DstRegs)
const {
299 unsigned DstMI, SrcMI;
308 if (R600::R600_Reg128RegClass.
contains(Reg))
311 DstMI = TRI->getMatchingSuperReg(Reg,
313 &R600::R600_Reg128RegClass);
317 if (R600::R600_Reg128RegClass.
contains(Reg))
320 SrcMI = TRI->getMatchingSuperReg(Reg,
322 &R600::R600_Reg128RegClass);
325 if ((DstRegs.find(SrcMI) == DstRegs.end())) {
326 DstRegs.insert(DstMI);
336 std::vector<MachineInstr *> ClauseContent;
337 unsigned AluInstCount = 0;
339 std::set<unsigned> DstRegs;
341 if (IsTrivialInst(*I))
343 if (AluInstCount >= MaxFetchInst)
348 if (!isCompatibleWithClause(*I, DstRegs))
351 ClauseContent.push_back(&*I);
354 getHWInstrDesc(IsTex?CF_TC:CF_VC))
356 .
addImm(AluInstCount - 1);
357 return ClauseFile(MIb, std::move(ClauseContent));
360 void getLiteral(
MachineInstr &MI, std::vector<MachineOperand *> &Lits)
const {
361 static const unsigned LiteralRegs[] = {
369 for (
const auto &Src:Srcs) {
370 if (Src.first->getReg() != R600::ALU_LITERAL_X)
372 int64_t Imm = Src.second;
373 std::vector<MachineOperand *>::iterator It =
382 if (It != Lits.end()) {
384 unsigned Index = It - Lits.begin();
385 Src.first->setReg(LiteralRegs[Index]);
388 assert(Lits.size() < 4 &&
"Too many literals in Instruction Group");
389 Src.first->setReg(LiteralRegs[Lits.size()]);
390 Lits.push_back(&Operand);
397 const std::vector<unsigned> &Literals)
const {
399 for (
unsigned i = 0, e = Literals.size(); i < e; i+=2) {
400 unsigned LiteralPair0 = Literals[i];
401 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
402 InsertPos =
BuildMI(MBB, InsertPos->getDebugLoc(),
403 TII->get(R600::LITERALS))
404 .addImm(LiteralPair0)
414 std::vector<MachineInstr *> ClauseContent;
417 if (IsTrivialInst(*I)) {
421 if (!I->isBundle() && !TII->
isALUInstr(I->getOpcode()))
423 std::vector<MachineOperand *>Literals;
427 while (++BI !=
E && BI->isBundledWithPred()) {
428 BI->unbundleFromPred();
430 if (MO.isReg() && MO.isInternalRead())
431 MO.setIsInternalRead(
false);
433 getLiteral(*BI, Literals);
434 ClauseContent.push_back(&*BI);
439 getLiteral(*I, Literals);
440 ClauseContent.push_back(&*I);
443 for (
unsigned i = 0, e = Literals.size(); i < e; i += 2) {
445 TII->get(R600::LITERALS));
446 if (Literals[i]->isImm()) {
447 MILit.
addImm(Literals[i]->getImm());
453 if (Literals[i + 1]->isImm()) {
454 MILit.
addImm(Literals[i + 1]->getImm());
461 ClauseContent.push_back(MILit);
464 assert(ClauseContent.size() < 128 &&
"ALU clause is too big");
466 return ClauseFile(&ClauseHead, std::move(ClauseContent));
470 const DebugLoc &DL, ClauseFile &Clause,
472 CounterPropagateAddr(*Clause.first, CfCount);
474 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
475 for (
unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
476 BB->
splice(InsertPos, BB, Clause.second[i]);
478 CfCount += 2 * Clause.second.size();
482 ClauseFile &Clause,
unsigned &CfCount) {
483 Clause.first->getOperand(0).setImm(0);
484 CounterPropagateAddr(*Clause.first, CfCount);
486 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
487 for (
unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
488 BB->
splice(InsertPos, BB, Clause.second[i]);
490 CfCount += Clause.second.size();
493 void CounterPropagateAddr(
MachineInstr &MI,
unsigned Addr)
const {
496 void CounterPropagateAddr(
const std::set<MachineInstr *> &MIs,
497 unsigned Addr)
const {
499 CounterPropagateAddr(*MI, Addr);
520 unsigned CfCount = 0;
521 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
522 std::vector<MachineInstr * > IfThenElseStack;
525 getHWInstrDesc(CF_CALL_FS));
528 std::vector<ClauseFile> FetchClauses, AluClauses;
529 std::vector<MachineInstr *> LastAlu(1);
530 std::vector<MachineInstr *> ToPopAfter;
536 FetchClauses.push_back(MakeFetchClause(MBB, I));
538 LastAlu.back() =
nullptr;
543 if (MI->getOpcode() != R600::ENDIF)
544 LastAlu.back() =
nullptr;
545 if (MI->getOpcode() == R600::CF_ALU)
546 LastAlu.back() = &*
MI;
548 bool RequiresWorkAround =
549 CFStack.requiresWorkAroundForInst(MI->getOpcode());
550 switch (MI->getOpcode()) {
551 case R600::CF_ALU_PUSH_BEFORE:
552 if (RequiresWorkAround) {
554 <<
"Applying bug work-around for ALU_PUSH_BEFORE\n");
558 MI->setDesc(TII->get(R600::CF_ALU));
560 CFStack.pushBranch(R600::CF_PUSH_EG);
562 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
566 AluClauses.push_back(MakeALUClause(MBB, I));
570 case R600::WHILELOOP: {
573 getHWInstrDesc(CF_WHILE_LOOP))
575 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
576 std::set<MachineInstr *>());
577 Pair.second.insert(MIb);
578 LoopStack.push_back(std::move(Pair));
579 MI->eraseFromParent();
583 case R600::ENDLOOP: {
585 std::pair<unsigned, std::set<MachineInstr *>> Pair =
586 std::move(LoopStack.back());
587 LoopStack.pop_back();
588 CounterPropagateAddr(Pair.second, CfCount);
590 .addImm(Pair.first + 1);
591 MI->eraseFromParent();
595 case R600::IF_PREDICATE_SET: {
596 LastAlu.push_back(
nullptr);
598 getHWInstrDesc(CF_JUMP))
601 IfThenElseStack.push_back(MIb);
603 MI->eraseFromParent();
609 IfThenElseStack.pop_back();
610 CounterPropagateAddr(*JumpInst, CfCount);
612 getHWInstrDesc(CF_ELSE))
616 IfThenElseStack.push_back(MIb);
617 MI->eraseFromParent();
623 if (LastAlu.back()) {
624 ToPopAfter.push_back(LastAlu.back());
627 getHWInstrDesc(CF_POP))
636 IfThenElseStack.pop_back();
637 CounterPropagateAddr(*IfOrElseInst, CfCount);
640 MI->eraseFromParent();
646 getHWInstrDesc(CF_LOOP_BREAK))
648 LoopStack.back().second.insert(MIb);
649 MI->eraseFromParent();
652 case R600::CONTINUE: {
654 getHWInstrDesc(CF_LOOP_CONTINUE))
656 LoopStack.back().second.insert(MIb);
657 MI->eraseFromParent();
663 BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
666 BuildMI(MBB, I, DL, TII->get(R600::PAD));
669 MI->eraseFromParent();
670 for (
unsigned i = 0, e = FetchClauses.size(); i < e; i++)
671 EmitFetchClause(I, DL, FetchClauses[i], CfCount);
672 for (
unsigned i = 0, e = AluClauses.size(); i < e; i++)
673 EmitALUClause(I, DL, AluClauses[i], CfCount);
677 if (TII->
isExport(MI->getOpcode())) {
684 for (
unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
687 TII->get(R600::CF_ALU_POP_AFTER))
688 .addImm(Alu->getOperand(0).getImm())
689 .addImm(Alu->getOperand(1).getImm())
690 .addImm(Alu->getOperand(2).getImm())
691 .addImm(Alu->getOperand(3).getImm())
692 .addImm(Alu->getOperand(4).getImm())
693 .addImm(Alu->getOperand(5).getImm())
694 .addImm(Alu->getOperand(6).getImm())
695 .addImm(Alu->getOperand(7).getImm())
696 .addImm(Alu->getOperand(8).getImm());
697 Alu->eraseFromParent();
706 return "R600 Control Flow Finalizer Pass";
713 "R600 Control Flow Finalizer",
false,
false)
717 char R600ControlFlowFinalizer::
ID = 0;
722 return new R600ControlFlowFinalizer();
mop_iterator operands_end()
static unsigned getSubRegFromChannel(unsigned Channel)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
instr_iterator instr_end()
This class represents lattice values for constants.
Interface definition for R600InstrInfo.
Describe properties that are true of each instruction in the target description file.
unsigned getReg() const
getReg - Returns the register number.
unsigned const TargetRegisterInfo * TRI
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
Interface definition for R600RegisterInfo.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Generation getGeneration() const
return AArch64::GPR64RegClass contains(Reg)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
#define ENTRY(ASMNAME, ENUM)
char & R600ControlFlowFinalizerID
R600 Control Flow Finalizer
bool usesVertexCache(unsigned Opcode) const
instr_iterator getInstrIterator() const
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
bool isExport(unsigned Opcode) const
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
FunctionPass * createR600ControlFlowFinalizer()
const R600InstrInfo * getInstrInfo() const override
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
const R600RegisterInfo * getRegisterInfo() const override
Iterator for intrusive lists based on ilist_node.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
bool hasCaymanISA() const
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register's channel.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isALUInstr(unsigned Opcode) const
INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, "R600 Control Flow Finalizer", false, false) INITIALIZE_PASS_END(R600ControlFlowFinalizer
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
short getTexVTXClauseSize() const
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
SmallVector< std::pair< MachineOperand *, int64_t >, 3 > getSrcs(MachineInstr &MI) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
mop_iterator operands_begin()
StringRef - Represent a constant reference to a string, i.e.
const MachineOperand & getOperand(unsigned i) const
bool usesTextureCache(unsigned Opcode) const