90 #define DEBUG_TYPE "si-wqm" 104 explicit PrintState(
int State) : State(State) {}
109 if (PS.State & StateWQM)
111 if (PS.State & StateWWM) {
112 if (PS.State & StateWQM)
116 if (PS.State & StateExact) {
117 if (PS.State & (StateWQM | StateWWM))
142 WorkItem() =
default;
163 std::vector<WorkItem> &Worklist);
164 void markInstructionUses(
const MachineInstr &MI,
char Flag,
165 std::vector<WorkItem> &Worklist);
166 char scanInstructions(
MachineFunction &MF, std::vector<WorkItem> &Worklist);
167 void propagateInstruction(
MachineInstr &MI, std::vector<WorkItem> &Worklist);
171 bool requiresCorrectState(
const MachineInstr &MI)
const;
180 unsigned SaveWQM,
unsigned LiveMaskReg);
189 void lowerLiveMaskQueries(
unsigned LiveMaskReg);
190 void lowerCopyInstrs();
200 StringRef getPassName()
const override {
return "SI Whole Quad Mode"; }
222 return new SIWholeQuadMode;
227 for (
const auto &BII : Blocks) {
230 <<
" InNeeds = " << PrintState(BII.second.InNeeds)
231 <<
", Needs = " << PrintState(BII.second.Needs)
232 <<
", OutNeeds = " << PrintState(BII.second.OutNeeds) <<
"\n\n";
235 auto III = Instructions.find(&MI);
236 if (III == Instructions.end())
239 dbgs() <<
" " << MI <<
" Needs = " << PrintState(III->second.Needs)
240 <<
", OutNeeds = " << PrintState(III->second.OutNeeds) <<
'\n';
247 std::vector<WorkItem> &Worklist) {
248 InstrInfo &II = Instructions[&
MI];
250 assert(!(Flag & StateExact) && Flag != 0);
256 Flag &= ~II.Disabled;
260 if ((II.Needs & Flag) == Flag)
264 Worklist.push_back(&MI);
268 void SIWholeQuadMode::markInstructionUses(
const MachineInstr &MI,
char Flag,
269 std::vector<WorkItem> &Worklist) {
271 if (!
Use.isReg() || !
Use.isUse())
274 unsigned Reg =
Use.getReg();
280 if (Reg == AMDGPU::EXEC)
284 LiveRange &LR = LIS->getRegUnit(*RegUnit);
294 markInstruction(*LIS->getInstructionFromIndex(Value->
def),
Flag,
302 markInstruction(
DefMI, Flag, Worklist);
309 std::vector<WorkItem> &Worklist) {
310 char GlobalFlags = 0;
319 for (
auto BI = RPOT.
begin(), BE = RPOT.
end(); BI != BE; ++BI) {
321 BlockInfo &BBI = Blocks[&MBB];
323 for (
auto II = MBB.
begin(),
IE = MBB.
end(); II !=
IE; ++II) {
325 InstrInfo &III = Instructions[&
MI];
329 if (
TII->isWQM(Opcode)) {
333 markInstructionUses(MI, StateWQM, Worklist);
334 GlobalFlags |= StateWQM;
340 LowerToCopyInstrs.push_back(&MI);
341 }
else if (Opcode == AMDGPU::WWM) {
345 markInstructionUses(MI, StateWWM, Worklist);
346 GlobalFlags |= StateWWM;
347 LowerToCopyInstrs.push_back(&MI);
349 }
else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
350 Opcode == AMDGPU::V_SET_INACTIVE_B64) {
351 III.Disabled = StateWWM;
353 if (Inactive.
isReg()) {
355 LowerToCopyInstrs.push_back(&MI);
360 markInstruction(
DefMI, StateWWM, Worklist);
366 }
else if (
TII->isDisableWQM(MI)) {
367 BBI.Needs |= StateExact;
368 if (!(BBI.InNeeds & StateExact)) {
369 BBI.InNeeds |= StateExact;
370 Worklist.push_back(&MBB);
372 GlobalFlags |= StateExact;
373 III.Disabled = StateWQM | StateWWM;
376 if (Opcode == AMDGPU::SI_PS_LIVE) {
377 LiveMaskQueries.push_back(&MI);
378 }
else if (WQMOutputs) {
386 unsigned Reg = MO.getReg();
388 if (!
TRI->isVirtualRegister(Reg) &&
389 TRI->hasVGPRs(
TRI->getPhysRegClass(Reg))) {
400 markInstruction(MI, Flags, Worklist);
401 GlobalFlags |= Flags;
408 if (GlobalFlags & StateWQM) {
410 markInstruction(*MI, StateWQM, Worklist);
416 void SIWholeQuadMode::propagateInstruction(
MachineInstr &MI,
417 std::vector<WorkItem>& Worklist) {
419 InstrInfo II = Instructions[&
MI];
420 BlockInfo &BI = Blocks[MBB];
424 if ((II.OutNeeds & StateWQM) && !(II.Disabled & StateWQM) &&
426 Instructions[&
MI].Needs = StateWQM;
431 if (II.Needs & StateWQM) {
432 BI.Needs |= StateWQM;
433 if (!(BI.InNeeds & StateWQM)) {
434 BI.InNeeds |= StateWQM;
435 Worklist.push_back(MBB);
441 char InNeeds = (II.Needs & ~StateWWM) | II.OutNeeds;
442 if (!PrevMI->isPHI()) {
443 InstrInfo &PrevII = Instructions[PrevMI];
444 if ((PrevII.OutNeeds | InNeeds) != PrevII.OutNeeds) {
445 PrevII.OutNeeds |= InNeeds;
446 Worklist.push_back(PrevMI);
452 assert(!(II.Needs & StateExact));
455 markInstructionUses(MI, II.Needs, Worklist);
459 if (II.Needs & StateWWM)
460 BI.Needs |= StateWWM;
464 std::vector<WorkItem>& Worklist) {
465 BlockInfo BI = Blocks[&MBB];
470 InstrInfo &LastII = Instructions[LastMI];
471 if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) {
472 LastII.OutNeeds |= BI.OutNeeds;
473 Worklist.push_back(LastMI);
479 BlockInfo &PredBI = Blocks[Pred];
480 if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds)
483 PredBI.OutNeeds |= BI.InNeeds;
484 PredBI.InNeeds |= BI.InNeeds;
485 Worklist.push_back(Pred);
490 BlockInfo &SuccBI = Blocks[Succ];
491 if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds)
494 SuccBI.InNeeds |= BI.OutNeeds;
495 Worklist.push_back(Succ);
500 std::vector<WorkItem> Worklist;
501 char GlobalFlags = scanInstructions(MF, Worklist);
503 while (!Worklist.empty()) {
504 WorkItem WI = Worklist.back();
508 propagateInstruction(*WI.MI, Worklist);
510 propagateBlock(*WI.MBB, Worklist);
521 bool SIWholeQuadMode::requiresCorrectState(
const MachineInstr &MI)
const {
526 if (
TII->isScalarUnit(MI))
549 unsigned SaveReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
553 .addReg(AMDGPU::SCC);
558 LIS->InsertMachineInstrInMaps(*Save);
559 LIS->InsertMachineInstrInMaps(*Restore);
560 LIS->createAndComputeVirtRegInterval(SaveReg);
572 return PreferLast ? Last : First;
575 auto MBBE = MBB.
end();
576 SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First)
577 : LIS->getMBBEndIdx(&MBB);
579 Last != MBBE ? LIS->getInstructionIndex(*Last) : LIS->getMBBEndIdx(&MBB);
580 SlotIndex Idx = PreferLast ? LastIdx : FirstIdx;
603 if (
MachineInstr *MI = LIS->getInstructionFromIndex(Idx))
606 assert(Idx == LIS->getMBBEndIdx(&MBB));
611 MBBI = saveSCC(MBB, MBBI);
618 unsigned SaveWQM,
unsigned LiveMaskReg) {
624 .addReg(LiveMaskReg);
628 .addReg(AMDGPU::EXEC)
632 LIS->InsertMachineInstrInMaps(*MI);
646 .addReg(AMDGPU::EXEC);
649 LIS->InsertMachineInstrInMaps(*MI);
661 LIS->InsertMachineInstrInMaps(*MI);
666 unsigned SavedOrig) {
672 LIS->InsertMachineInstrInMaps(*MI);
677 auto BII = Blocks.find(&MBB);
678 if (BII == Blocks.end())
681 const BlockInfo &BI = BII->second;
685 if (!isEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact)
691 unsigned SavedWQMReg = 0;
692 unsigned SavedNonWWMReg = 0;
693 bool WQMFromExec = isEntry;
694 char State = (isEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM;
695 char NonWWMState = 0;
712 char Needs = StateExact | StateWQM;
726 if (requiresCorrectState(MI)) {
727 auto III = Instructions.find(&MI);
728 if (III != Instructions.end()) {
729 if (III->second.Needs & StateWWM)
731 else if (III->second.Needs & StateWQM)
734 Needs &= ~III->second.Disabled;
735 OutNeeds = III->second.OutNeeds;
740 Needs = StateExact | StateWQM | StateWWM;
746 if (MI.
getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
752 if (BI.OutNeeds & StateWQM)
754 else if (BI.OutNeeds == StateExact)
757 Needs = StateWQM | StateExact;
761 if (!(Needs & State)) {
763 if (State == StateWWM || Needs == StateWWM) {
772 prepareInsertion(MBB, First, II, Needs == StateWQM,
773 Needs == StateExact || WQMFromExec);
775 if (State == StateWWM) {
777 fromWWM(MBB, Before, SavedNonWWMReg);
781 if (Needs == StateWWM) {
783 SavedNonWWMReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
784 toWWM(MBB, Before, SavedNonWWMReg);
787 if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) {
788 if (!WQMFromExec && (OutNeeds & StateWQM))
789 SavedWQMReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
791 toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
793 }
else if (State == StateExact && (Needs & StateWQM) &&
794 !(Needs & StateExact)) {
795 assert(WQMFromExec == (SavedWQMReg == 0));
797 toWQM(MBB, Before, SavedWQMReg);
800 LIS->createAndComputeVirtRegInterval(SavedWQMReg);
812 if (Needs != (StateExact | StateWQM | StateWWM)) {
813 if (Needs != (StateExact | StateWQM))
824 void SIWholeQuadMode::lowerLiveMaskQueries(
unsigned LiveMaskReg) {
830 .addReg(LiveMaskReg);
832 LIS->ReplaceMachineInstrInMaps(*MI, *Copy);
837 void SIWholeQuadMode::lowerCopyInstrs() {
846 Instructions.clear();
848 LiveMaskQueries.clear();
849 LowerToCopyInstrs.clear();
857 LIS = &getAnalysis<LiveIntervals>();
859 char GlobalFlags = analyzeFunction(MF);
860 unsigned LiveMaskReg = 0;
861 if (!(GlobalFlags & StateWQM)) {
862 lowerLiveMaskQueries(AMDGPU::EXEC);
863 if (!(GlobalFlags & StateWWM))
864 return !LiveMaskQueries.empty();
870 if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
871 LiveMaskReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
873 TII->
get(AMDGPU::COPY), LiveMaskReg)
874 .addReg(AMDGPU::EXEC);
875 LIS->InsertMachineInstrInMaps(*MI);
878 lowerLiveMaskQueries(LiveMaskReg);
880 if (GlobalFlags == StateWQM) {
884 .addReg(AMDGPU::EXEC);
897 for (
auto BII : Blocks)
898 processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.
begin());
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
AMDGPU specific subclass of TargetSubtarget.
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
SlotIndex def
The index of the defining instruction.
This class represents lattice values for constants.
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false, false) INITIALIZE_PASS_END(SIWholeQuadMode
void push_back(const T &Elt)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const SIInstrInfo * getInstrInfo() const override
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
This represents a simple continuous liveness interval for a value.
unsigned const TargetRegisterInfo * TRI
VNInfo - Value Number Information.
iterator_range< succ_iterator > successors()
This class represents the liveness of a register, stack slot, etc.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
SlotIndex getNextIndex() const
Returns the next index.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
A Use represents the edge between a Value definition and its users.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
reverse_iterator rbegin()
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
FunctionPass * createSIWholeQuadModePass()
Represent the analysis usage information of a pass.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
iterator_range< pred_iterator > predecessors()
const MachineBasicBlock & front() const
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
MachineInstrBuilder MachineInstrBuilder & DefMI
void setPreservesCFG()
This function should be called by the pass, iff they do not:
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream...
StringRef - Represent a constant reference to a string, i.e.
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
const MachineOperand & getOperand(unsigned i) const
SlotIndex - An opaque wrapper around machine indexes.
const SIRegisterInfo * getRegisterInfo() const override