77 #define DEBUG_TYPE "x86-cmov-conversion" 79 STATISTIC(NumOfSkippedCmovGroups,
"Number of unsupported CMOV-groups");
80 STATISTIC(NumOfCmovGroupCandidate,
"Number of CMOV-group candidates");
81 STATISTIC(NumOfLoopCandidate,
"Number of CMOV-conversion profitable loops");
82 STATISTIC(NumOfOptimizedCmovGroups,
"Number of optimized CMOV-groups");
87 cl::desc(
"Enable the X86 cmov-to-branch optimization."),
92 cl::desc(
"Minimum gain per loop (in cycles) threshold."),
96 "x86-cmov-converter-force-mem-operand",
97 cl::desc(
"Convert cmovs to branches whenever they have memory operands."),
109 StringRef getPassName()
const override {
return "X86 cmov Conversion"; }
133 CmovGroups &CmovInstGroups,
134 bool IncludeLoads =
false);
143 CmovGroups &CmovInstGroups);
155 void X86CmovConverterPass::getAnalysisUsage(
AnalysisUsage &AU)
const {
169 bool Changed =
false;
173 TII = STI.getInstrInfo();
174 TRI = STI.getRegisterInfo();
175 TSchedModel.init(&STI);
183 CmovGroups AllCmovGroups;
187 if (collectCmovCandidates(Blocks, AllCmovGroups,
true)) {
188 for (
auto &Group : AllCmovGroups) {
197 convertCmovInstsToBranches(Group);
229 for (
int i = 0; i < (int)
Loops.size(); ++i)
231 Loops.push_back(Child);
235 if (!CurrLoop->getSubLoops().empty())
239 CmovGroups CmovInstGroups;
241 if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
244 if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
249 for (
auto &Group : CmovInstGroups)
250 convertCmovInstsToBranches(Group);
256 bool X86CmovConverterPass::collectCmovCandidates(
280 for (
auto *MBB : Blocks) {
286 bool FoundNonCMOVInst =
false;
288 bool SkipGroup =
false;
290 for (
auto &
I : *MBB) {
292 if (
I.isDebugInstr())
303 FoundNonCMOVInst =
false;
309 if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC))
316 else if (CC != MemOpCC)
323 MRI->use_nodbg_instructions(
I.defs().begin()->getReg()),
325 return UseI.
getOpcode() == X86::SUBREG_TO_REG;
337 FoundNonCMOVInst =
true;
340 if (
I.definesRegister(X86::EFLAGS)) {
344 CmovInstGroups.push_back(Group);
346 ++NumOfSkippedCmovGroups;
355 CmovInstGroups.push_back(Group);
357 ++NumOfSkippedCmovGroups;
360 NumOfCmovGroupCandidate += CmovInstGroups.size();
361 return !CmovInstGroups.empty();
373 return (TrueOpDepth + FalseOpDepth + 1) / 2;
376 bool X86CmovConverterPass::checkForProfitableCmovCandidates(
385 static const unsigned LoopIterations = 2;
387 DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
388 enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
396 DepthMap[
nullptr] = {0, 0};
399 for (
auto &Group : CmovInstGroups)
400 CmovInstructions.
insert(Group.begin(), Group.end());
425 for (
unsigned I = 0;
I < LoopIterations; ++
I) {
427 for (
auto *MBB : Blocks) {
429 RegDefMaps[PhyRegType].
clear();
432 if (
MI.isDebugInstr())
434 unsigned MIDepth = 0;
435 unsigned MIDepthOpt = 0;
436 bool IsCMOV = CmovInstructions.
count(&
MI);
437 for (
auto &MO :
MI.uses()) {
439 if (!MO.isReg() || !MO.isUse())
441 unsigned Reg = MO.getReg();
444 OperandToDefMap[&MO] =
DefMI;
446 MIDepth =
std::max(MIDepth, Info.Depth);
448 MIDepthOpt =
std::max(MIDepthOpt, Info.OptDepth);
454 DepthMap[OperandToDefMap.
lookup(&
MI.getOperand(1))].OptDepth,
455 DepthMap[OperandToDefMap.
lookup(&
MI.getOperand(2))].OptDepth);
458 for (
auto &MO :
MI.operands()) {
459 if (!MO.isReg() || !MO.isDef())
461 unsigned Reg = MO.getReg();
465 unsigned Latency = TSchedModel.computeInstrLatency(&
MI);
466 DepthMap[&
MI] = {MIDepth +=
Latency, MIDepthOpt += Latency};
467 MaxDepth.Depth =
std::max(MaxDepth.Depth, MIDepth);
468 MaxDepth.OptDepth =
std::max(MaxDepth.OptDepth, MIDepthOpt);
473 unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
474 LoopDepth[1].Depth - LoopDepth[1].OptDepth};
506 bool WorthOptLoop =
false;
507 if (Diff[1] == Diff[0])
508 WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
509 else if (Diff[1] > Diff[0])
511 (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].
Depth - LoopDepth[0].
Depth) &&
512 (Diff[1] * 8 >= LoopDepth[1].Depth);
517 ++NumOfLoopCandidate;
530 unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
531 CmovGroups TempGroups;
533 for (
auto &Group : TempGroups) {
534 bool WorthOpGroup =
true;
535 for (
auto *
MI : Group) {
539 auto UIs =
MRI->use_instructions(
MI->defs().begin()->getReg());
540 if (UIs.begin() != UIs.end() && ++UIs.begin() == UIs.end()) {
541 unsigned Op = UIs.begin()->getOpcode();
542 if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
543 WorthOpGroup =
false;
549 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(3))].Depth;
551 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(1))].Depth,
552 DepthMap[OperandToDefMap.
lookup(&
MI->getOperand(2))].Depth);
553 if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
554 WorthOpGroup =
false;
560 CmovInstGroups.push_back(Group);
563 return !CmovInstGroups.empty();
576 for (
auto I = std::next(ItrMI),
E = BB->
end();
I !=
E; ++
I) {
577 if (
I->readsRegister(X86::EFLAGS))
579 if (
I->definesRegister(X86::EFLAGS))
585 if ((*I)->isLiveIn(X86::EFLAGS))
598 "Last instruction in a CMOV group must be a CMOV instruction");
602 if (
I->isDebugInstr())
608 for (
auto *
MI : DBGInstructions)
612 void X86CmovConverterPass::convertCmovInstsToBranches(
614 assert(!Group.
empty() &&
"No CMOV instructions to convert");
615 ++NumOfOptimizedCmovGroups;
720 auto FRIt = FalseBBRegRewriteTable.
find(FalseReg);
721 if (FRIt == FalseBBRegRewriteTable.
end())
723 FalseReg = FRIt->second;
725 FalseBBRegRewriteTable[MI.getOperand(0).getReg()] = FalseReg;
733 "Can only handle memory-operand cmov instructions with a condition " 734 "opposite to the selected branch direction.");
759 unsigned TmpReg =
MRI->createVirtualRegister(RC);
762 bool Unfolded =
TII->unfoldMemoryOperand(*MBB->
getParent(),
MI, TmpReg,
766 assert(Unfolded &&
"Should never fail to unfold a loading cmov!");
772 "Last new instruction isn't the expected CMOV!");
775 if (&*MIItBegin == &MI)
780 for (
auto *NewMI : NewMIs) {
782 FalseMBB->
insert(FalseInsertionPoint, NewMI);
784 for (
auto &MOp : NewMI->uses()) {
787 auto It = FalseBBRegRewriteTable.
find(MOp.getReg());
788 if (It == FalseBBRegRewriteTable.
end())
791 MOp.setReg(It->second);
797 MOp.setIsKill(
false);
804 FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
816 unsigned DestReg = MIIt->getOperand(0).getReg();
817 unsigned Op1Reg = MIIt->getOperand(1).getReg();
818 unsigned Op2Reg = MIIt->getOperand(2).getReg();
826 auto Op1Itr = RegRewriteTable.
find(Op1Reg);
827 if (Op1Itr != RegRewriteTable.
end())
828 Op1Reg = Op1Itr->second.first;
830 auto Op2Itr = RegRewriteTable.
find(Op2Reg);
831 if (Op2Itr != RegRewriteTable.
end())
832 Op2Reg = Op2Itr->second.second;
837 MIB =
BuildMI(*SinkMBB, SinkInsertionPoint, DL,
TII->get(X86::PHI), DestReg)
847 RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
851 MBB->
erase(MIItBegin, MIItEnd);
861 return new X86CmovConverterPass();
unsigned GetCondBranchFromCond(CondCode CC)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents lattice values for constants.
static void packCmovGroup(MachineInstr *First, MachineInstr *Last)
Given /p First CMOV instruction and /p Last CMOV instruction representing a group of CMOV instruction...
CondCode getCondFromCMovOpc(unsigned Opc)
Return condition code of a CMov opcode.
void push_back(const T &Elt)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
void initializeX86CmovConverterPassPass(PassRegistry &)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static bool checkEFLAGSLive(MachineInstr *MI)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Analysis containing CSE Info
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
TargetInstrInfo - Interface to description of machine instruction set.
iterator find(const_arg_type_t< KeyT > Val)
static cl::opt< bool > EnableCmovConverter("x86-cmov-converter", cl::desc("Enable the X86 cmov-to-branch optimization."), cl::init(true), cl::Hidden)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
MachineInstrBundleIterator< MachineInstr > iterator
void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
unsigned const MachineRegisterInfo * MRI
LLVM Basic Block Representation.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static cl::opt< bool > ForceMemOperand("x86-cmov-converter-force-mem-operand", cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static cl::opt< unsigned > GainCycleThreshold("x86-cmov-converter-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Represent the analysis usage information of a pass.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
FunctionPass class - This class is used to implement most global optimizations.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
self_iterator getIterator()
succ_iterator succ_begin()
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass * createX86CmovConverterPass()
This pass converts X86 cmov instructions into branch when profitable.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
MachineInstrBuilder MachineInstrBuilder & DefMI
LLVM_NODISCARD T pop_back_val()
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion", false, false) INITIALIZE_PASS_END(X86CmovConverterPass
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
StringRef - Represent a constant reference to a string, i.e.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const