40 #define DEBUG_TYPE "si-i1-copies" 70 StringRef getPassName()
const override {
return "SI Lower i1 Copies"; }
80 void lowerCopiesFromI1();
82 void lowerCopiesToI1();
83 bool isConstantLaneMask(
unsigned Reg,
bool &Val)
const;
86 unsigned DstReg,
unsigned PrevReg,
unsigned CurReg);
90 bool isLaneMaskReg(
unsigned Reg)
const {
119 class PhiIncomingAnalysis {
135 return ReachableMap.
find(&MBB)->second;
143 ReachableMap.
clear();
144 ReachableOrdered.
clear();
145 Predecessors.
clear();
153 if (MBB == &DefBlock) {
154 ReachableMap[&DefBlock] =
true;
163 bool Divergent =
false;
165 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
166 MI.getOpcode() == AMDGPU::SI_IF ||
167 MI.getOpcode() == AMDGPU::SI_ELSE ||
168 MI.getOpcode() == AMDGPU::SI_LOOP) {
174 if (Divergent && PDT.
dominates(&DefBlock, MBB)) {
180 while (!Stack.
empty()) {
191 bool HaveReachablePred =
false;
193 if (ReachableMap.
count(Pred)) {
194 HaveReachablePred =
true;
199 if (!HaveReachablePred)
200 ReachableMap[MBB] =
true;
201 if (HaveReachablePred) {
203 if (
llvm::find(Predecessors, UnreachablePred) == Predecessors.
end())
263 unsigned FoundLoopLevel = ~0u;
271 : DT(DT), PDT(PDT) {}
275 CommonDominators.
clear();
278 VisitedPostDom =
nullptr;
279 FoundLoopLevel = ~0u;
295 while (PDNode->
getBlock() != PostDom) {
296 if (PDNode->
getBlock() == VisitedPostDom)
300 if (FoundLoopLevel == Level)
318 if (!inLoopLevel(*Dom, LoopLevel, Blocks)) {
324 if (!inLoopLevel(*Pred, LoopLevel, Blocks))
333 auto DomIt = Visited.
find(&MBB);
334 if (DomIt != Visited.
end() && DomIt->second <= LoopLevel)
343 void advanceLevel() {
346 if (!VisitedPostDom) {
347 VisitedPostDom = DefBlock;
348 VisitedDom = DefBlock;
352 VisitedDom = CommonDominators.
back();
354 for (
unsigned i = 0; i < NextLevel.
size();) {
355 if (PDT.
dominates(VisitedPostDom, NextLevel[i])) {
358 NextLevel[i] = NextLevel.
back();
366 unsigned Level = CommonDominators.
size();
367 while (!Stack.
empty()) {
372 Visited[MBB] =
Level;
376 if (Succ == DefBlock) {
377 if (MBB == VisitedPostDom)
378 FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
380 FoundLoopLevel = std::min(FoundLoopLevel, Level);
385 if (MBB == VisitedPostDom)
406 char SILowerI1Copies::ID = 0;
411 return new SILowerI1Copies();
441 DT = &getAnalysis<MachineDominatorTree>();
442 PDT = &getAnalysis<MachinePostDominatorTree>();
445 TII = ST->getInstrInfo();
451 for (
unsigned Reg : ConstrainRegs)
453 ConstrainRegs.clear();
458 void SILowerI1Copies::lowerCopiesFromI1() {
463 if (
MI.getOpcode() != AMDGPU::COPY)
466 unsigned DstReg =
MI.getOperand(0).getReg();
467 unsigned SrcReg =
MI.getOperand(1).getReg();
469 MRI->
getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass)
472 if (isLaneMaskReg(DstReg) ||
474 MRI->
getRegClass(DstReg) == &AMDGPU::VReg_1RegClass))
482 assert(!
MI.getOperand(0).getSubReg());
484 ConstrainRegs.
insert(SrcReg);
485 BuildMI(MBB,
MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
493 MI->eraseFromParent();
498 void SILowerI1Copies::lowerPhis() {
500 LoopFinder LF(*DT, *PDT);
501 PhiIncomingAnalysis PIA(*PDT);
511 unsigned DstReg =
MI.getOperand(0).getReg();
512 if (MRI->
getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
517 MRI->
setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
520 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
521 assert(i + 1 <
MI.getNumOperands());
522 unsigned IncomingReg =
MI.getOperand(i).getReg();
526 if (IncomingDef->
getOpcode() == AMDGPU::COPY) {
528 assert(isLaneMaskReg(IncomingReg));
530 }
else if (IncomingDef->
getOpcode() == AMDGPU::IMPLICIT_DEF) {
548 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
552 if (FoundLoopLevel) {
553 LF.addLoopEntries(FoundLoopLevel, SSAUpdater, IncomingBlocks);
555 for (
unsigned i = 0; i < IncomingRegs.
size(); ++i) {
558 IncomingUpdated.
back());
561 for (
unsigned i = 0; i < IncomingRegs.
size(); ++i) {
564 IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
570 PIA.analyze(MBB, IncomingBlocks);
575 for (
unsigned i = 0; i < IncomingRegs.
size(); ++i) {
577 if (PIA.isSource(IMBB)) {
586 for (
unsigned i = 0; i < IncomingRegs.
size(); ++i) {
587 if (!IncomingUpdated[i])
592 IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
598 if (NewReg != DstReg) {
603 MI.getOperand(0).setReg(NewReg);
607 IncomingBlocks.
clear();
608 IncomingRegs.
clear();
609 IncomingUpdated.
clear();
613 MI->eraseFromParent();
618 void SILowerI1Copies::lowerCopiesToI1() {
620 LoopFinder LF(*DT, *PDT);
627 if (
MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
628 MI.getOpcode() != AMDGPU::COPY)
631 unsigned DstReg =
MI.getOperand(0).getReg();
633 MRI->
getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
643 MRI->
setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
644 if (
MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
648 unsigned SrcReg =
MI.getOperand(1).getReg();
649 assert(!
MI.getOperand(1).getSubReg());
652 !isLaneMaskReg(SrcReg)) {
655 BuildMI(MBB,
MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
658 MI.getOperand(1).setReg(TmpReg);
670 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
671 if (FoundLoopLevel) {
674 LF.addLoopEntries(FoundLoopLevel, SSAUpdater);
676 buildMergeLaneMasks(MBB,
MI, DL, DstReg,
683 MI->eraseFromParent();
688 bool SILowerI1Copies::isConstantLaneMask(
unsigned Reg,
bool &Val)
const {
698 if (!isLaneMaskReg(Reg))
702 if (MI->
getOpcode() != AMDGPU::S_MOV_B64)
726 if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
740 bool TerminatorsUseSCC =
false;
741 for (
auto I = InsertionPt,
E = MBB.
end();
I !=
E; ++
I) {
744 if (TerminatorsUseSCC || DefsSCC)
748 if (!TerminatorsUseSCC)
751 while (InsertionPt != MBB.
begin()) {
766 const DebugLoc &DL,
unsigned DstReg,
767 unsigned PrevReg,
unsigned CurReg) {
769 bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
771 bool CurConstant = isConstantLaneMask(CurReg, CurVal);
773 if (PrevConstant && CurConstant) {
774 if (PrevVal == CurVal) {
775 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
777 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(AMDGPU::EXEC);
779 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), DstReg)
780 .addReg(AMDGPU::EXEC)
786 unsigned PrevMaskedReg = 0;
787 unsigned CurMaskedReg = 0;
789 if (CurConstant && CurVal) {
790 PrevMaskedReg = PrevReg;
793 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ANDN2_B64), PrevMaskedReg)
800 if (PrevConstant && PrevVal) {
801 CurMaskedReg = CurReg;
804 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), CurMaskedReg)
810 if (PrevConstant && !PrevVal) {
811 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
812 .addReg(CurMaskedReg);
813 }
else if (CurConstant && !CurVal) {
814 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
815 .addReg(PrevMaskedReg);
816 }
else if (PrevConstant && PrevVal) {
817 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ORN2_B64), DstReg)
818 .addReg(CurMaskedReg)
821 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_OR_B64), DstReg)
822 .addReg(PrevMaskedReg)
823 .
addReg(CurMaskedReg ? CurMaskedReg : (
unsigned)AMDGPU::EXEC);
Helper class for SSA formation on a set of values defined in multiple blocks.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
void push_back(const T &Elt)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE, "SI Lower i1 Copies", false, false) INITIALIZE_PASS_END(SILowerI1Copies
unsigned getSubReg() const
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
const SIInstrInfo * getInstrInfo() const override
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)
iterator_range< mop_iterator > operands()
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
const SIRegisterInfo & getRegisterInfo() const
iterator_range< succ_iterator > successors()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const
A Use represents the edge between a Value definition and its users.
iterator_range< iterator > terminators()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static bool isSource(Value *V)
Return true if the given value is a source in the use-def chain, producing a narrow 'TypeSize' value...
FunctionPass * createSILowerI1CopiesPass()
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B...
void Initialize(unsigned V)
Initialize - Reset this object to get ready for a new set of SSA updates.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator find(const_arg_type_t< KeyT > Val)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::pair< iterator, bool > insert(const ValueT &V)
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
iterator_range< pred_iterator > predecessors()
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
unsigned getWavefrontSize() const
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
LLVM_NODISCARD T pop_back_val()
pred_range predecessors(BasicBlock *BB)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringRef > StandardNames)
Initialize the set of available library functions based on the specified target triple.
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
void replaceRegWith(unsigned FromReg, unsigned ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Interface definition for SIInstrInfo.
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static unsigned createLaneMaskReg(MachineFunction &MF)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
static unsigned insertUndefLaneMask(MachineBasicBlock &MBB)
unsigned GetValueInMiddleOfBlock(MachineBasicBlock *BB)
GetValueInMiddleOfBlock - Construct SSA form, materializing a value that is live in the middle of the...
void AddAvailableValue(MachineBasicBlock *BB, unsigned V)
AddAvailableValue - Indicate that a rewritten value is available at the end of the specified block wi...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
iterator_range< use_instr_iterator > use_instructions(unsigned Reg) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
StringRef - Represent a constant reference to a string, i.e.
void initializeSILowerI1CopiesPass(PassRegistry &)
const MachineOperand & getOperand(unsigned i) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...