33 #define DEBUG_TYPE "machine-combiner" 35 STATISTIC(NumInstCombined,
"Number of machineinst combined");
39 cl::desc(
"Incremental depth computation will be used for basic " 40 "blocks with more instructions."),
cl::init(500));
43 cl::desc(
"Dump all substituted intrs"),
46 #ifdef EXPENSIVE_CHECKS 48 "machine-combiner-verify-pattern-order",
cl::Hidden,
50 "Verify that the generated patterns are ordered by increasing latency"),
54 "machine-combiner-verify-pattern-order",
cl::Hidden,
56 "Verify that the generated patterns are ordered by increasing latency"),
83 StringRef getPassName()
const override {
return "Machine InstCombiner"; }
86 bool doSubstitute(
unsigned NewSize,
unsigned OldSize);
107 std::pair<unsigned, unsigned>
122 "Machine InstCombiner",
false,
false)
128 void MachineCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
129 AU.setPreservesCFG();
142 DefInstr =
MRI->getUniqueVRegDef(MO.
getReg());
144 if (DefInstr && DefInstr->
isPHI())
162 assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
163 "Missing machine model\n");
168 for (
auto *InstrPtr : InsInstrs) {
176 unsigned DepthOp = 0;
177 unsigned LatencyOp = 0;
180 if (II != InstrIdxForVirtReg.
end()) {
182 assert(II->second < InstrDepth.
size() &&
"Bad Index");
185 "There must be a definition for a new virtual register");
186 DepthOp = InstrDepth[II->second];
187 int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.
getReg());
188 int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.
getReg());
189 LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
195 LatencyOp = TSchedModel.computeOperandLatency(
197 InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.
getReg()));
200 IDepth =
std::max(IDepth, DepthOp + LatencyOp);
204 unsigned NewRootIdx = InsInstrs.size() - 1;
205 return InstrDepth[NewRootIdx];
219 assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
220 "Missing machine model\n");
223 unsigned NewRootLatency = 0;
234 if (RI ==
MRI->reg_end())
237 unsigned LatencyOp = 0;
239 LatencyOp = TSchedModel.computeOperandLatency(
243 LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
245 NewRootLatency =
std::max(NewRootLatency, LatencyOp);
247 return NewRootLatency;
275 std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
279 assert(!InsInstrs.
empty() &&
"Only support sequences that insert instrs.");
280 unsigned NewRootLatency = 0;
283 for (
unsigned i = 0; i < InsInstrs.
size() - 1; i++)
284 NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
285 NewRootLatency +=
getLatency(&MI, NewRoot, BlockTrace);
287 unsigned RootLatency = 0;
288 for (
auto I : DelInstrs)
289 RootLatency += TSchedModel.computeInstrLatency(
I);
291 return {NewRootLatency, RootLatency};
299 bool MachineCombiner::improvesCriticalPathLen(
306 bool SlackIsAccurate) {
307 assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
308 "Missing machine model\n");
310 unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
313 LLVM_DEBUG(
dbgs() <<
" Dependence data for " << *Root <<
"\tNewRootDepth: " 314 << NewRootDepth <<
"\tRootDepth: " << RootDepth);
324 ?
dbgs() <<
"\t and it does it\n" 325 :
dbgs() <<
"\t but it does NOT do it\n");
326 return NewRootDepth < RootDepth;
334 unsigned NewRootLatency, RootLatency;
335 std::tie(NewRootLatency, RootLatency) =
336 getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
339 unsigned NewCycleCount = NewRootDepth + NewRootLatency;
340 unsigned OldCycleCount =
341 RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
343 <<
"\tRootLatency: " << RootLatency <<
"\n\tRootSlack: " 344 << RootSlack <<
" SlackIsAccurate=" << SlackIsAccurate
345 <<
"\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
346 <<
"\n\tRootDepth + RootLatency + RootSlack = " 349 ?
dbgs() <<
"\n\t It IMPROVES PathLen because" 350 :
dbgs() <<
"\n\t It DOES NOT improve PathLen because");
352 <<
", OldCycleCount = " << OldCycleCount <<
"\n");
354 return NewCycleCount <= OldCycleCount;
358 void MachineCombiner::instr2instrSC(
361 for (
auto *InstrPtr : Instrs) {
362 unsigned Opc = InstrPtr->getOpcode();
363 unsigned Idx =
TII->get(Opc).getSchedClass();
370 bool MachineCombiner::preservesResourceLen(
374 if (!TSchedModel.hasInstrSchedModel())
388 instr2instrSC(InsInstrs, InsInstrsSC);
389 instr2instrSC(DelInstrs, DelInstrsSC);
395 unsigned ResLenAfterCombine =
399 << ResLenBeforeCombine
400 <<
" and after: " << ResLenAfterCombine <<
"\n";);
402 ResLenAfterCombine <= ResLenBeforeCombine
403 ?
dbgs() <<
"\t\t As result it IMPROVES/PRESERVES Resource Length\n" 404 :
dbgs() <<
"\t\t As result it DOES NOT improve/preserve Resource " 407 return ResLenAfterCombine <= ResLenBeforeCombine;
412 bool MachineCombiner::doSubstitute(
unsigned NewSize,
unsigned OldSize) {
413 if (OptSize && (NewSize < OldSize))
415 if (!TSchedModel.hasInstrSchedModelOrItineraries())
436 bool IncrementalUpdate) {
437 for (
auto *InstrPtr : InsInstrs)
440 for (
auto *InstrPtr : DelInstrs) {
441 InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
443 for (
auto I = RegUnits.
begin();
I != RegUnits.
end(); ) {
444 if (
I->MI == InstrPtr)
451 if (IncrementalUpdate)
452 for (
auto *InstrPtr : InsInstrs)
462 void MachineCombiner::verifyPatternOrder(
466 (void)PrevLatencyDiff;
467 for (
auto P : Patterns) {
471 TII->genAlternativeCodeSequence(Root,
P, InsInstrs, DelInstrs,
476 if (InsInstrs.
empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
479 unsigned NewRootLatency, RootLatency;
480 std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
481 Root, InsInstrs, DelInstrs, MinInstr->getTrace(MBB));
482 long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
483 assert(CurrentLatencyDiff <= PrevLatencyDiff &&
484 "Current pattern is better than previous pattern.");
485 PrevLatencyDiff = CurrentLatencyDiff;
497 bool Changed =
false;
500 bool IncrementalUpdate =
false;
501 auto BlockIter = MBB->
begin();
502 decltype(BlockIter) LastUpdate;
511 while (BlockIter != MBB->
end()) {
512 auto &MI = *BlockIter++;
541 if (!
TII->getMachineCombinerPatterns(MI, Patterns))
545 verifyPatternOrder(MBB, MI, Patterns);
547 for (
auto P : Patterns) {
551 TII->genAlternativeCodeSequence(MI,
P, InsInstrs, DelInstrs,
553 unsigned NewInstCount = InsInstrs.
size();
554 unsigned OldInstCount = DelInstrs.
size();
562 dbgs() <<
"\tFor the Pattern (" << (int)
P <<
") these instructions could be removed\n";
563 for (
auto const *InstrPtr : DelInstrs) {
564 dbgs() <<
"\t\t" << STI->getSchedInfoStr(*InstrPtr) <<
": ";
565 InstrPtr->print(
dbgs(),
false,
false,
false,
TII);
567 dbgs() <<
"\tThese instructions could replace the removed ones\n";
568 for (
auto const *InstrPtr : InsInstrs) {
569 dbgs() <<
"\t\t" << STI->getSchedInfoStr(*InstrPtr) <<
": ";
570 InstrPtr->print(
dbgs(),
false,
false,
false,
TII);
574 bool SubstituteAlways =
false;
575 if (ML &&
TII->isThroughputPattern(
P))
576 SubstituteAlways =
true;
578 if (IncrementalUpdate) {
580 MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
581 LastUpdate = BlockIter;
588 if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
590 RegUnits, IncrementalUpdate);
601 Traces->verifyAnalysis();
602 if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
603 InstrIdxForVirtReg,
P,
604 !IncrementalUpdate) &&
605 preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
608 IncrementalUpdate =
true;
609 LastUpdate = BlockIter;
613 RegUnits, IncrementalUpdate);
622 for (
auto *InstrPtr : InsInstrs)
625 InstrIdxForVirtReg.
clear();
629 if (Changed && IncrementalUpdate)
630 Traces->invalidate(MBB);
636 TII = STI->getInstrInfo();
637 TRI = STI->getRegisterInfo();
638 SchedModel = STI->getSchedModel();
639 TSchedModel.init(STI);
641 MLI = &getAnalysis<MachineLoopInfo>();
642 Traces = &getAnalysis<MachineTraceMetrics>();
647 if (!
TII->useMachineCombiner()) {
650 <<
" Skipping pass: Target does not support machine combiner\n");
654 bool Changed =
false;
658 Changed |= combineInstructions(&MBB);
char & MachineCombinerID
This pass performs instruction combining using trace metrics to estimate critical-path and resource d...
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents lattice values for constants.
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
void push_back(const T &Elt)
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
iterator_range< mop_iterator > operands()
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
#define INITIALIZE_PASS_DEPENDENCY(depName)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
The core instruction combiner logic.
Select the trace through a block that has the fewest instructions.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
COFF::MachineTypes Machine
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
TargetInstrInfo - Interface to description of machine instruction set.
static CombinerObjective getCombinerObjective(MachineCombinerPattern P)
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, SmallVector< MachineInstr *, 16 > InsInstrs, SmallVector< MachineInstr *, 16 > DelInstrs, MachineTraceMetrics::Ensemble *MinInstr, SparseSet< LiveRegUnit > &RegUnits, bool IncrementalUpdate)
Inserts InsInstrs and deletes DelInstrs.
iterator find(const_arg_type_t< KeyT > Val)
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
initializer< Ty > init(const Ty &Val)
static cl::opt< bool > dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden, cl::desc("Dump all substituted intrs"), cl::init(false))
unsigned const MachineRegisterInfo * MRI
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
Represent the analysis usage information of a pass.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
MachineCombinerPattern
These are instruction patterns matched by the machine combiner pass.
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
const_iterator end() const
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
const_iterator begin() const
CHAIN = SC CHAIN, Imm128 - System call.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static cl::opt< unsigned > inc_threshold("machine-combiner-inc-threshold", cl::Hidden, cl::desc("Incremental depth computation will be used for basic " "blocks with more instructions."), cl::init(500))
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found...
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
Representation of each machine instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
SparseSet - Fast set implmentation for objects that can be identified by small unsigned keys...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) INITIALIZE_PASS_END(MachineCombiner
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getResourceLength(ArrayRef< const MachineBasicBlock *> Extrablocks=None, ArrayRef< const MCSchedClassDesc *> ExtraInstrs=None, ArrayRef< const MCSchedClassDesc *> RemoveInstrs=None) const
Return the resource length of the trace.
void initializeMachineCombinerPass(PassRegistry &)
StringRef - Represent a constant reference to a string, i.e.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
Machine model for scheduling, bundling, and heuristics.
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found...
static cl::opt< bool > VerifyPatternOrder("machine-combiner-verify-pattern-order", cl::Hidden, cl::desc("Verify that the generated patterns are ordered by increasing latency"), cl::init(false))
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...