23 #define DEBUG_TYPE "llvm-mca" 32 : STI(sti), MCII(mcii),
MRI(mri), MCIA(mcia), FirstCallInst(
true),
33 FirstReturnInst(
true) {
46 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
47 std::vector<ResourcePlusCycles> Worklist;
62 APInt Buffers(NumProcResources, 0);
64 bool AllInOrderResources =
true;
65 bool AnyDispatchHazards =
false;
71 AllInOrderResources =
false;
79 Worklist.emplace_back(ResourcePlusCycles(Mask,
ResourceUsage(RCy)));
81 uint64_t Super = ProcResourceMasks[PR.
SuperIdx];
82 SuperResources[Super] += PRE->
Cycles;
90 sort(Worklist, [](
const ResourcePlusCycles &A,
const ResourcePlusCycles &
B) {
93 if (popcntA < popcntB)
95 if (popcntA > popcntB)
97 return A.first < B.first;
100 uint64_t UsedResourceUnits = 0;
103 for (
unsigned I = 0,
E = Worklist.size();
I <
E; ++
I) {
104 ResourcePlusCycles &A = Worklist[
I];
105 if (!A.second.size()) {
106 A.second.NumUnits = 0;
107 A.second.setReserved();
113 uint64_t NormalizedMask = A.first;
115 UsedResourceUnits |= A.first;
121 for (
unsigned J =
I + 1; J <
E; ++J) {
122 ResourcePlusCycles &B = Worklist[J];
123 if ((NormalizedMask & B.first) == NormalizedMask) {
124 B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
148 for (ResourcePlusCycles &RPC : ID.
Resources) {
152 if ((Mask & UsedResourceUnits) == Mask)
153 RPC.second.setReserved();
158 for (
const std::pair<uint64_t, unsigned> &SR : SuperResources) {
159 for (
unsigned I = 1, E = NumProcResources;
I <
E; ++
I) {
164 uint64_t
Mask = ProcResourceMasks[
I];
165 if (Mask != SR.first && ((Mask & SR.first) == SR.first))
173 for (
unsigned I = 0, E = NumProcResources;
I < E && NumBuffers; ++
I) {
176 ID.
Buffers[NumBuffers] = ProcResourceMasks[
I];
182 for (
const std::pair<uint64_t, ResourceUsage> &R : ID.
Resources)
184 <<
"cy=" << R.second.size() <<
'\n';
185 for (
const uint64_t R : ID.
Buffers)
208 unsigned NumExplicitDefs = MCDesc.
getNumDefs();
215 if (NumExplicitDefs) {
216 return make_error<InstructionError<MCInst>>(
217 "Expected more register operand definitions.", MCI);
224 std::string Message =
225 "expected a register operand for an optional definition. Instruction " 226 "has not been correctly analyzed.";
227 return make_error<InstructionError<MCInst>>(Message, MCI);
235 unsigned SchedClassID) {
282 unsigned NumExplicitDefs = MCDesc.
getNumDefs();
285 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
290 ID.
Writes.resize(TotalDefs + NumVariadicOps);
294 unsigned CurrentDef = 0;
296 for (; i < MCI.
getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
303 if (CurrentDef < NumWriteLatencyEntries) {
318 <<
", Latency=" << Write.
Latency 324 assert(CurrentDef == NumExplicitDefs &&
325 "Expected more register operand definitions.");
326 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
327 unsigned Index = NumExplicitDefs + CurrentDef;
331 if (Index < NumWriteLatencyEntries) {
349 <<
", Latency=" << Write.
Latency 363 <<
", Latency=" << Write.
Latency 380 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.
hasOptionalDef();
382 I < NumVariadicOps && !AssumeUsesOnly; ++
I, ++OpIndex) {
396 <<
", Latency=" << Write.
Latency 401 ID.
Writes.resize(CurrentDef);
405 unsigned SchedClassID) {
413 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
414 ID.
Reads.resize(TotalUses);
415 unsigned CurrentUse = 0;
416 for (
unsigned I = 0, OpIndex = MCDesc.
getNumDefs();
I < NumExplicitUses;
428 <<
", UseIndex=" << Read.
UseIndex <<
'\n');
433 for (
unsigned I = 0;
I < NumImplicitUses; ++
I) {
440 <<
", UseIndex=" << Read.
UseIndex <<
", RegisterID=" 444 CurrentUse += NumImplicitUses;
454 I < NumVariadicOps && !AssumeDefsOnly; ++
I, ++OpIndex) {
461 Read.
UseIndex = NumExplicitUses + NumImplicitUses +
I;
465 <<
", UseIndex=" << Read.
UseIndex <<
'\n');
468 ID.
Reads.resize(CurrentUse);
472 const MCInst &MCI)
const {
478 bool UsesResources = !ID.
Resources.empty();
479 if (!UsesMemory && !UsesBuffers && !UsesResources)
484 Message =
"found an inconsistent instruction that decodes " 485 "into zero opcodes and that consumes load/store " 488 Message =
"found an inconsistent instruction that decodes " 489 "to zero opcodes and that consumes scheduler " 493 return make_error<InstructionError<MCInst>>(Message, MCI);
497 InstrBuilder::createInstrDescImpl(
const MCInst &MCI) {
499 "Itineraries are not yet supported!");
517 return make_error<InstructionError<MCInst>>(
518 "unable to resolve scheduling class for write variant.", MCI);
525 return make_error<InstructionError<MCInst>>(
526 "found an unsupported instruction in the input assembly sequence.",
531 LLVM_DEBUG(
dbgs() <<
"\t\tSchedClassID=" << SchedClassID <<
'\n');
534 std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
537 if (MCDesc.
isCall() && FirstCallInst) {
541 <<
"Assume a latency of 100cy.\n";
542 FirstCallInst =
false;
545 if (MCDesc.
isReturn() && FirstReturnInst) {
547 <<
" assembly sequence.\n";
549 FirstReturnInst =
false;
552 ID->MayLoad = MCDesc.
mayLoad();
555 ID->BeginGroup = SCDesc.BeginGroup;
556 ID->EndGroup = SCDesc.EndGroup;
562 return std::move(Err);
564 populateWrites(*ID, MCI, SchedClassID);
565 populateReads(*ID, MCI, SchedClassID);
567 LLVM_DEBUG(
dbgs() <<
"\t\tMaxLatency=" << ID->MaxLatency <<
'\n');
568 LLVM_DEBUG(
dbgs() <<
"\t\tNumMicroOps=" << ID->NumMicroOps <<
'\n');
571 if (
Error Err = verifyInstrDesc(*ID, MCI))
572 return std::move(Err);
577 if (!IsVariadic && !IsVariant) {
578 Descriptors[MCI.
getOpcode()] = std::move(ID);
582 VariantDescriptors[&MCI] = std::move(ID);
583 return *VariantDescriptors[&MCI];
587 InstrBuilder::getOrCreateInstrDesc(
const MCInst &MCI) {
588 if (Descriptors.find_as(MCI.
getOpcode()) != Descriptors.end())
591 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
592 return *VariantDescriptors[&MCI];
594 return createInstrDescImpl(MCI);
603 std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(
D);
608 bool IsZeroIdiom =
false;
609 bool IsDepBreaking =
false;
612 IsZeroIdiom = MCIA->
isZeroIdiom(MCI, Mask, ProcID);
616 NewIS->setOptimizableMove();
639 assert(RegID > 0 &&
"Invalid register ID found!");
640 NewIS->getUses().emplace_back(RD, RegID);
646 if (Mask.isNullValue()) {
655 if (Mask.getBitWidth() > RD.
UseIndex) {
666 return std::move(NewIS);
678 unsigned WriteIndex = 0;
688 assert(RegID &&
"Expected a valid register ID!");
689 NewIS->getDefs().emplace_back(WD, RegID,
690 WriteMask[WriteIndex],
695 return std::move(NewIS);
Expected< std::unique_ptr< Instruction > > createInstruction(const MCInst &MCI)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
This class represents lattice values for constants.
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
SmallVector< uint64_t, 4 > Buffers
Subclass of Error for the sole purpose of identifying the success path in the type system...
bool isImplicitRead() const
A register read descriptor.
const MCPhysReg * getImplicitUses() const
Return a list of registers that are potentially read by any instance of this machine instruction...
Describe properties that are true of each instruction in the target description file.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
unsigned SClassOrWriteResourceID
static raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
block Block Frequency true
unsigned getProcessorID() const
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Error takeError()
Take ownership of the stored error.
bool mayLoad() const
Return true if this instruction could possibly read memory.
bool isReturn() const
Return true if the instruction is a return.
void setBit(unsigned BitPosition)
Set a given bit to 1.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Tracks register operand latency in cycles.
SmallVector< ReadDescriptor, 4 > Reads
static raw_ostream & note()
Convenience method for printing "note: " to stderr.
Tagged union holding either a T or a Error.
This file implements a class to represent arbitrary precision integral constant values and operations...
unsigned getReg() const
Returns the register number.
virtual bool isOptimizableRegisterMove(const MCInst &MI, unsigned CPUID) const
Returns true if MI is a candidate for move elimination.
void setIndependentFromDef()
Helper used by class InstrDesc to describe how hardware resources are used.
uint16_t NumWriteProcResEntries
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
A register write descriptor.
void computeProcResourceMasks(const MCSchedModel &SM, MutableArrayRef< uint64_t > Masks)
Populates vector Masks with processor resource masks.
Instances of this class represent a single low-level machine instruction.
const char * getName(unsigned RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register...
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction...
virtual bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Writes) const
Returns true if at least one of the register writes performed by.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
unsigned countPopulation() const
Count the number of bits set.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Interface to description of machine instruction set.
virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking instruction for the subtarget associated with CPUID ...
unsigned getNumOperands() const
SmallVector< std::pair< uint64_t, ResourceUsage >, 4 > Resources
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
void sort(IteratorTy Start, IteratorTy End)
static const unsigned short InvalidNumMicroOps
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Specify the latency in cpu cycles for a particular scheduling class and def index.
bool isImplicitWrite() const
Define a kind of processor resource that will be modeled by the scheduler.
const MCOperand & getOperand(unsigned i) const
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Class for arbitrary precision integers.
bool mayStore() const
Return true if this instruction could possibly modify memory.
bool MustIssueImmediately
virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking zero-idiom for the given subtarget.
An instruction descriptor.
static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI)
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
LLVM_NODISCARD bool empty() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
bool isCall() const
Return true if the instruction is a call.
Generic base class for all target subtargets.
virtual unsigned resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, unsigned CPUID) const
Resolve a variant scheduling class for the given MCInst and CPU.
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A builder class for instructions that are statically analyzed by llvm-mca.
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Lightweight error class with error context and mandatory checking.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
uint16_t NumWriteLatencyEntries
StringRef - Represent a constant reference to a string, i.e.
unsigned getOpcode() const
Instances of this class represent operands of the MCInst class.
static void initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, ArrayRef< uint64_t > ProcResourceMasks)
Machine model for scheduling, bundling, and heuristics.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
unsigned getNumProcResourceKinds() const
SmallVector< WriteDescriptor, 4 > Writes