43 #define DEBUG_TYPE "aarch64-ccmp" 49 cl::desc(
"Maximum number of instructions per speculated block."));
55 STATISTIC(NumConsidered,
"Number of ccmps considered");
56 STATISTIC(NumPhiRejs,
"Number of ccmps rejected (PHI)");
57 STATISTIC(NumPhysRejs,
"Number of ccmps rejected (Physregs)");
58 STATISTIC(NumPhi2Rejs,
"Number of ccmps rejected (PHI2)");
59 STATISTIC(NumHeadBranchRejs,
"Number of ccmps rejected (Head branch)");
60 STATISTIC(NumCmpBranchRejs,
"Number of ccmps rejected (CmpBB branch)");
61 STATISTIC(NumCmpTermRejs,
"Number of ccmps rejected (CmpBB is cbz...)");
62 STATISTIC(NumImmRangeRejs,
"Number of ccmps rejected (Imm out of range)");
63 STATISTIC(NumLiveDstRejs,
"Number of ccmps rejected (Cmp dest live)");
64 STATISTIC(NumMultNZCVUses,
"Number of ccmps rejected (NZCV used)");
65 STATISTIC(NumUnknNZCVDefs,
"Number of ccmps rejected (NZCV def unknown)");
67 STATISTIC(NumSpeculateRejs,
"Number of ccmps rejected (Can't speculate)");
69 STATISTIC(NumConverted,
"Number of ccmp instructions created");
70 STATISTIC(NumCompBranches,
"Number of cbz/cbnz branches converted");
173 bool trivialTailPHIs();
176 void updateTailPHIs();
179 bool isDeadDef(
unsigned DstReg);
210 int expectedCodeSizeDelta()
const;
216 bool SSACCmpConv::trivialTailPHIs() {
217 for (
auto &
I : *Tail) {
220 unsigned HeadReg = 0, CmpBBReg = 0;
222 for (
unsigned oi = 1, oe =
I.getNumOperands(); oi != oe; oi += 2) {
224 unsigned Reg =
I.getOperand(oi).getReg();
226 assert((!HeadReg || HeadReg == Reg) &&
"Inconsistent PHI operands");
230 assert((!CmpBBReg || CmpBBReg == Reg) &&
"Inconsistent PHI operands");
234 if (HeadReg != CmpBBReg)
242 void SSACCmpConv::updateTailPHIs() {
243 for (
auto &
I : *Tail) {
247 for (
unsigned oi =
I.getNumOperands(); oi > 2; oi -= 2) {
249 if (
I.getOperand(oi - 1).getMBB() == CmpBB) {
250 I.RemoveOperand(oi - 1);
251 I.RemoveOperand(oi - 2);
259 bool SSACCmpConv::isDeadDef(
unsigned DstReg) {
261 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
275 if (Cond[0].getImm() != -1) {
276 assert(Cond.
size() == 1 &&
"Unknown Cond array format");
281 switch (Cond[1].getImm()) {
288 assert(Cond.
size() == 3 &&
"Unknown Cond array format");
293 assert(Cond.
size() == 3 &&
"Unknown Cond array format");
304 if (!I->readsRegister(AArch64::NZCV)) {
305 switch (I->getOpcode()) {
321 assert(!I->isTerminator() &&
"Spurious terminator");
322 switch (I->getOpcode()) {
324 case AArch64::SUBSWri:
325 case AArch64::SUBSXri:
327 case AArch64::ADDSWri:
328 case AArch64::ADDSXri:
331 if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
337 case AArch64::SUBSWrr:
338 case AArch64::SUBSXrr:
339 case AArch64::ADDSWrr:
340 case AArch64::ADDSXrr:
341 if (isDeadDef(I->getOperand(0).getReg()))
343 LLVM_DEBUG(
dbgs() <<
"Can't convert compare with live destination: " 347 case AArch64::FCMPSrr:
348 case AArch64::FCMPDrr:
349 case AArch64::FCMPESrr:
350 case AArch64::FCMPEDrr:
355 MIOperands::PhysRegInfo PRI =
362 LLVM_DEBUG(
dbgs() <<
"Can't create ccmp with multiple uses: " << *I);
367 if (PRI.Defined || PRI.Clobbered) {
397 if (
I.isDebugInstr())
421 bool DontMoveAcrossStore =
true;
422 if (!
I.isSafeToMove(
nullptr, DontMoveAcrossStore)) {
428 if (&
I != CmpMI &&
I.modifiesRegister(AArch64::NZCV, TRI)) {
441 Tail = CmpBB =
nullptr;
474 if (!trivialTailPHIs()) {
480 if (!Tail->livein_empty()) {
495 LLVM_DEBUG(
dbgs() <<
"Can't handle live-in physregs in CmpBB.\n");
511 if (!TBB || HeadCond.
empty()) {
513 dbgs() <<
"AnalyzeBranch didn't find conditional branch in Head.\n");
526 assert(TBB == Tail &&
"Unexpected TBB");
538 if (!TBB || CmpBBCond.
empty()) {
540 dbgs() <<
"AnalyzeBranch didn't find conditional branch in CmpBB.\n");
545 if (!
parseCond(CmpBBCond, CmpBBTailCC)) {
556 <<
", CmpBB->Tail on " 559 CmpMI = findConvertibleCompare(CmpBB);
563 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
599 Head2Tail + Head2CmpBB * CmpBB2Tail);
618 if (HeadCond[0].getImm() == -1) {
621 switch (HeadCond[1].getImm()) {
624 Opc = AArch64::SUBSWri;
628 Opc = AArch64::SUBSXri;
653 unsigned FirstOp = 1;
654 bool isZBranch =
false;
658 case AArch64::SUBSWri: Opc = AArch64::CCMPWi;
break;
659 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr;
break;
660 case AArch64::SUBSXri: Opc = AArch64::CCMPXi;
break;
661 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr;
break;
662 case AArch64::ADDSWri: Opc = AArch64::CCMNWi;
break;
663 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr;
break;
664 case AArch64::ADDSXri: Opc = AArch64::CCMNXi;
break;
665 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr;
break;
666 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0;
break;
667 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0;
break;
668 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0;
break;
669 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0;
break;
672 Opc = AArch64::CCMPWi;
678 Opc = AArch64::CCMPXi;
707 bool isNZ = CmpMI->
getOpcode() == AArch64::CBNZW ||
722 int SSACCmpConv::expectedCodeSizeDelta()
const {
727 if (HeadCond[0].getImm() == -1) {
728 switch (HeadCond[1].getImm()) {
784 return "AArch64 Conditional Compares";
791 void invalidateTraces();
799 "AArch64 CCMP Pass",
false,
false)
807 return new AArch64ConditionalCompares();
810 void AArch64ConditionalCompares::getAnalysisUsage(
AnalysisUsage &AU)
const {
822 void AArch64ConditionalCompares::updateDomTree(
829 assert(Node != HeadNode &&
"Cannot erase the head node");
830 assert(Node->
getIDom() == HeadNode &&
"CmpBB should be dominated by Head");
832 DomTree->changeImmediateDominator(Node->
getChildren().back(), HeadNode);
833 DomTree->eraseNode(RemovedMBB);
843 Loops->removeBlock(RemovedMBB);
847 void AArch64ConditionalCompares::invalidateTraces() {
848 Traces->invalidate(CmpConv.Head);
849 Traces->invalidate(CmpConv.CmpBB);
867 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
868 LLVM_DEBUG(
dbgs() <<
"Code size delta: " << CodeSizeDelta <<
'\n');
871 if (CodeSizeDelta < 0)
873 if (CodeSizeDelta > 0) {
874 LLVM_DEBUG(
dbgs() <<
"Code size is increasing, give up on this one.\n");
886 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
891 unsigned CmpBBDepth =
894 <<
"\nCmpBB depth: " << CmpBBDepth <<
'\n');
895 if (CmpBBDepth > HeadDepth + DelayLimit) {
896 LLVM_DEBUG(
dbgs() <<
"Branch delay would be larger than " << DelayLimit
909 if (ResDepth > HeadDepth) {
917 bool Changed =
false;
921 CmpConv.convert(RemovedBlocks);
923 updateDomTree(RemovedBlocks);
924 updateLoops(RemovedBlocks);
929 bool AArch64ConditionalCompares::runOnMachineFunction(
MachineFunction &MF) {
930 LLVM_DEBUG(
dbgs() <<
"********** AArch64 Conditional Compares **********\n" 931 <<
"********** Function: " << MF.
getName() <<
'\n');
939 DomTree = &getAnalysis<MachineDominatorTree>();
940 Loops = getAnalysisIfAvailable<MachineLoopInfo>();
941 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
942 Traces = &getAnalysis<MachineTraceMetrics>();
946 bool Changed =
false;
947 CmpConv.runOnMachineFunction(MF, MBPI);
955 if (tryConvert(
I->getBlock()))
Pass interface - Implemented by all 'passes'.
const MachineInstrBuilder & add(const MachineOperand &MO) const
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
This class represents lattice values for constants.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void push_back(const T &Elt)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
static CondCode getInvertedCondCode(CondCode Code)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
FunctionPass * createAArch64ConditionalCompares()
static unsigned InstrCount
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
AnalysisUsage & addRequired()
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
#define INITIALIZE_PASS_DEPENDENCY(depName)
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static const char * getCondCodeName(CondCode Code)
PhysRegInfo analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI)
analyzePhysReg - Analyze how the current instruction or bundle uses a physical register.
Select the trace through a block that has the fewest instructions.
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
const std::vector< DomTreeNodeBase * > & getChildren() const
virtual const TargetInstrInfo * getInstrInfo() const
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
TargetInstrInfo - Interface to description of machine instruction set.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
size_t size() const
size - Get the array size.
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
bool livein_empty() const
succ_iterator succ_begin()
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
size_t getNumChildren() const
void updateTerminator()
Update the terminator instructions in block to account for changes to the layout. ...
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
MIOperands - Iterate over operands of a single instruction.
unsigned pred_size() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
unsigned succ_size() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Representation of each machine instruction.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) INITIALIZE_PASS_END(AArch64ConditionalCompares
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void initializeAArch64ConditionalComparesPass(PassRegistry &)
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
iterator_range< df_iterator< T > > depth_first(const T &G)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StringRef - Represent a constant reference to a string, i.e.
const MachineOperand & getOperand(unsigned i) const
Machine model for scheduling, bundling, and heuristics.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...