LLVM  8.0.1
TargetSchedule.cpp
Go to the documentation of this file.
1 //===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a wrapper around MCSchedModel that allows the interface
11 // to benefit from information currently only available in TargetInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
22 #include "llvm/MC/MCInstrDesc.h"
24 #include "llvm/MC/MCSchedule.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstdint>
31 
32 using namespace llvm;
33 
34 static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
35  cl::desc("Use TargetSchedModel for latency lookup"));
36 
37 static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
38  cl::desc("Use InstrItineraryData for latency lookup"));
39 
41  return EnableSchedModel && SchedModel.hasInstrSchedModel();
42 }
43 
45  return EnableSchedItins && !InstrItins.isEmpty();
46 }
47 
48 static unsigned gcd(unsigned Dividend, unsigned Divisor) {
49  // Dividend and Divisor will be naturally swapped as needed.
50  while (Divisor) {
51  unsigned Rem = Dividend % Divisor;
52  Dividend = Divisor;
53  Divisor = Rem;
54  };
55  return Dividend;
56 }
57 
58 static unsigned lcm(unsigned A, unsigned B) {
59  unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
60  assert((LCM >= A && LCM >= B) && "LCM overflow");
61  return LCM;
62 }
63 
65  STI = TSInfo;
66  SchedModel = TSInfo->getSchedModel();
67  TII = TSInfo->getInstrInfo();
68  STI->initInstrItins(InstrItins);
69 
70  unsigned NumRes = SchedModel.getNumProcResourceKinds();
71  ResourceFactors.resize(NumRes);
72  ResourceLCM = SchedModel.IssueWidth;
73  for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
74  unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
75  if (NumUnits > 0)
76  ResourceLCM = lcm(ResourceLCM, NumUnits);
77  }
78  MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
79  for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
80  unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
81  ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
82  }
83 }
84 
85 /// Returns true only if instruction is specified as single issue.
87  const MCSchedClassDesc *SC) const {
88  if (hasInstrSchedModel()) {
89  if (!SC)
90  SC = resolveSchedClass(MI);
91  if (SC->isValid())
92  return SC->BeginGroup;
93  }
94  return false;
95 }
96 
98  const MCSchedClassDesc *SC) const {
99  if (hasInstrSchedModel()) {
100  if (!SC)
101  SC = resolveSchedClass(MI);
102  if (SC->isValid())
103  return SC->EndGroup;
104  }
105  return false;
106 }
107 
109  const MCSchedClassDesc *SC) const {
110  if (hasInstrItineraries()) {
111  int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
112  return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI);
113  }
114  if (hasInstrSchedModel()) {
115  if (!SC)
116  SC = resolveSchedClass(MI);
117  if (SC->isValid())
118  return SC->NumMicroOps;
119  }
120  return MI->isTransient() ? 0 : 1;
121 }
122 
123 // The machine model may explicitly specify an invalid latency, which
124 // effectively means infinite latency. Since users of the TargetSchedule API
125 // don't know how to handle this, we convert it to a very large latency that is
126 // easy to distinguish when debugging the DAG but won't induce overflow.
127 static unsigned capLatency(int Cycles) {
128  return Cycles >= 0 ? Cycles : 1000;
129 }
130 
131 /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
132 /// evaluation of predicates that depend on instruction operands or flags.
135  // Get the definition's scheduling class descriptor from this machine model.
136  unsigned SchedClass = MI->getDesc().getSchedClass();
137  const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
138  if (!SCDesc->isValid())
139  return SCDesc;
140 
141 #ifndef NDEBUG
142  unsigned NIter = 0;
143 #endif
144  while (SCDesc->isVariant()) {
145  assert(++NIter < 6 && "Variants are nested deeper than the magic number");
146 
147  SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
148  SCDesc = SchedModel.getSchedClassDesc(SchedClass);
149  }
150  return SCDesc;
151 }
152 
153 /// Find the def index of this operand. This index maps to the machine model and
154 /// is independent of use operands. Def operands may be reordered with uses or
155 /// merged with uses without affecting the def index (e.g. before/after
156 /// regalloc). However, an instruction's def operands must never be reordered
157 /// with respect to each other.
158 static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
159  unsigned DefIdx = 0;
160  for (unsigned i = 0; i != DefOperIdx; ++i) {
161  const MachineOperand &MO = MI->getOperand(i);
162  if (MO.isReg() && MO.isDef())
163  ++DefIdx;
164  }
165  return DefIdx;
166 }
167 
168 /// Find the use index of this operand. This is independent of the instruction's
169 /// def operands.
170 ///
171 /// Note that uses are not determined by the operand's isUse property, which
172 /// is simply the inverse of isDef. Here we consider any readsReg operand to be
173 /// a "use". The machine model allows an operand to be both a Def and Use.
174 static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
175  unsigned UseIdx = 0;
176  for (unsigned i = 0; i != UseOperIdx; ++i) {
177  const MachineOperand &MO = MI->getOperand(i);
178  if (MO.isReg() && MO.readsReg() && !MO.isDef())
179  ++UseIdx;
180  }
181  return UseIdx;
182 }
183 
184 // Top-level API for clients that know the operand indices.
186  const MachineInstr *DefMI, unsigned DefOperIdx,
187  const MachineInstr *UseMI, unsigned UseOperIdx) const {
188 
190  return TII->defaultDefLatency(SchedModel, *DefMI);
191 
192  if (hasInstrItineraries()) {
193  int OperLatency = 0;
194  if (UseMI) {
195  OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
196  *UseMI, UseOperIdx);
197  }
198  else {
199  unsigned DefClass = DefMI->getDesc().getSchedClass();
200  OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
201  }
202  if (OperLatency >= 0)
203  return OperLatency;
204 
205  // No operand latency was found.
206  unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
207 
208  // Expected latency is the max of the stage latency and itinerary props.
209  // Rather than directly querying InstrItins stage latency, we call a TII
210  // hook to allow subtargets to specialize latency. This hook is only
211  // applicable to the InstrItins model. InstrSchedModel should model all
212  // special cases without TII hooks.
213  InstrLatency =
214  std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
215  return InstrLatency;
216  }
217  // hasInstrSchedModel()
218  const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
219  unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
220  if (DefIdx < SCDesc->NumWriteLatencyEntries) {
221  // Lookup the definition's write latency in SubtargetInfo.
222  const MCWriteLatencyEntry *WLEntry =
223  STI->getWriteLatencyEntry(SCDesc, DefIdx);
224  unsigned WriteID = WLEntry->WriteResourceID;
225  unsigned Latency = capLatency(WLEntry->Cycles);
226  if (!UseMI)
227  return Latency;
228 
229  // Lookup the use's latency adjustment in SubtargetInfo.
230  const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
231  if (UseDesc->NumReadAdvanceEntries == 0)
232  return Latency;
233  unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
234  int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
235  if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
236  return 0;
237  return Latency - Advance;
238  }
239  // If DefIdx does not exist in the model (e.g. implicit defs), then return
240  // unit latency (defaultDefLatency may be too conservative).
241 #ifndef NDEBUG
242  if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
243  && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
244  && SchedModel.isComplete()) {
245  errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
246  << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
247  llvm_unreachable("incomplete machine model");
248  }
249 #endif
250  // FIXME: Automatically giving all implicit defs defaultDefLatency is
251  // undesirable. We should only do it for defs that are known to the MC
252  // desc like flags. Truly implicit defs should get 1 cycle latency.
253  return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
254 }
255 
256 unsigned
257 TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const {
258  return capLatency(MCSchedModel::computeInstrLatency(*STI, SCDesc));
259 }
260 
261 unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
262  assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
263  unsigned SCIdx = TII->get(Opcode).getSchedClass();
264  return capLatency(SchedModel.computeInstrLatency(*STI, SCIdx));
265 }
266 
267 unsigned TargetSchedModel::computeInstrLatency(const MCInst &Inst) const {
268  if (hasInstrSchedModel())
269  return capLatency(SchedModel.computeInstrLatency(*STI, *TII, Inst));
270  return computeInstrLatency(Inst.getOpcode());
271 }
272 
273 unsigned
274 TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
275  bool UseDefaultDefLatency) const {
276  // For the itinerary model, fall back to the old subtarget hook.
277  // Allow subtargets to compute Bundle latencies outside the machine model.
278  if (hasInstrItineraries() || MI->isBundle() ||
279  (!hasInstrSchedModel() && !UseDefaultDefLatency))
280  return TII->getInstrLatency(&InstrItins, *MI);
281 
282  if (hasInstrSchedModel()) {
283  const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
284  if (SCDesc->isValid())
285  return computeInstrLatency(*SCDesc);
286  }
287  return TII->defaultDefLatency(SchedModel, *MI);
288 }
289 
290 unsigned TargetSchedModel::
291 computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
292  const MachineInstr *DepMI) const {
293  if (!SchedModel.isOutOfOrder())
294  return 1;
295 
296  // Out-of-order processor can dispatch WAW dependencies in the same cycle.
297 
298  // Treat predication as a data dependency for out-of-order cpus. In-order
299  // cpus do not need to treat predicated writes specially.
300  //
301  // TODO: The following hack exists because predication passes do not
302  // correctly append imp-use operands, and readsReg() strangely returns false
303  // for predicated defs.
304  unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
305  const MachineFunction &MF = *DefMI->getMF();
307  if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
308  return computeInstrLatency(DefMI);
309 
310  // If we have a per operand scheduling model, check if this def is writing
311  // an unbuffered resource. If so, it treated like an in-order cpu.
312  if (hasInstrSchedModel()) {
313  const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
314  if (SCDesc->isValid()) {
315  for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
316  *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
317  if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize)
318  return 1;
319  }
320  }
321  }
322  return 0;
323 }
324 
325 double
327  if (hasInstrItineraries()) {
328  unsigned SchedClass = MI->getDesc().getSchedClass();
329  return MCSchedModel::getReciprocalThroughput(SchedClass,
331  }
332 
333  if (hasInstrSchedModel())
335 
336  return 0.0;
337 }
338 
339 double
341  unsigned SchedClass = TII->get(Opcode).getSchedClass();
342  if (hasInstrItineraries())
343  return MCSchedModel::getReciprocalThroughput(SchedClass,
345  if (hasInstrSchedModel()) {
346  const MCSchedClassDesc &SCDesc = *SchedModel.getSchedClassDesc(SchedClass);
347  if (SCDesc.isValid() && !SCDesc.isVariant())
348  return MCSchedModel::getReciprocalThroughput(*STI, SCDesc);
349  }
350 
351  return 0.0;
352 }
353 
354 double
356  if (hasInstrSchedModel())
357  return SchedModel.getReciprocalThroughput(*STI, *TII, MI);
359 }
360 
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
double computeReciprocalThroughput(const MachineInstr *MI) const
Compute the reciprocal throughput of the given instruction.
static unsigned lcm(unsigned A, unsigned B)
unsigned IssueWidth
Definition: MCSchedule.h:256
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
uint16_t NumReadAdvanceEntries
Definition: MCSchedule.h:125
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:339
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
unsigned const TargetRegisterInfo * TRI
static unsigned gcd(unsigned Dividend, unsigned Divisor)
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:346
unsigned defaultDefLatency(const MCSchedModel &SchedModel, const MachineInstr &DefMI) const
Return the default expected latency for a def based on its opcode.
virtual unsigned resolveSchedClass(unsigned SchedClass, const MachineInstr *MI, const TargetSchedModel *SchedModel) const
Resolve a SchedClass at runtime, where SchedClass identifies an MCSchedClassDesc with the isVariant p...
bool isComplete() const
Return true if this machine model data for all instructions with a scheduling class (itinerary class ...
Definition: MCSchedule.h:330
static cl::opt< bool > EnableSchedItins("scheditins", cl::Hidden, cl::init(true), cl::desc("Use InstrItineraryData for latency lookup"))
const InstrItineraryData * getInstrItineraries() const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
bool isBundle() const
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
virtual unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const
Compute the instruction latency of a given instruction.
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
virtual const TargetInstrInfo * getInstrInfo() const
bool isValid() const
Definition: MCSchedule.h:127
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:161
void initInstrItins(InstrItineraryData &InstrItins) const
Initialize an InstrItineraryData instance.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:577
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:96
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
MachineInstrBuilder & UseMI
virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const
Return the number of u-operations the given machine instruction will be decoded to on the target cpu...
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
virtual bool isPredicated(const MachineInstr &MI) const
Returns true if the instruction is already predicated.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:320
Specify the latency in cpu cycles for a particular scheduling class and def index.
Definition: MCSchedule.h:78
MachineOperand class - Representation of each machine instruction operand.
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
MachineInstrBuilder MachineInstrBuilder & DefMI
CHAIN = SC CHAIN, Imm128 - System call.
bool isOutOfOrder() const
Return true if machine supports out of order execution.
Definition: MCSchedule.h:333
bool isVariant() const
Definition: MCSchedule.h:130
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
bool hasInstrItineraries() const
Return true if this machine model includes cycle-to-cycle itinerary data.
static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx)
Find the use index of this operand.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
bool isEmpty() const
Returns true if there are no itineraries.
TargetSubtargetInfo - Generic base class for all target subtargets.
static double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Definition: MCSchedule.cpp:89
Representation of each machine instruction.
Definition: MachineInstr.h:64
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
Definition: MCSchedule.cpp:41
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
static cl::opt< bool > EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), cl::desc("Use TargetSchedModel for latency lookup"))
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
int getReadAdvanceCycles(const MCSchedClassDesc *SC, unsigned UseIdx, unsigned WriteResID) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
IRTranslator LLVM IR MI
unsigned getOpcode() const
Definition: MCInst.h:174
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx)
Find the def index of this operand.
virtual int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
static unsigned capLatency(int Cycles)
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:335
bool isImplicit() const
void resize(size_type N)
Definition: SmallVector.h:351