LLVM  8.0.1
SystemZHazardRecognizer.cpp
Go to the documentation of this file.
1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a hazard recognizer for the SystemZ scheduler.
11 //
12 // This class is used by the SystemZ scheduling strategy to maintain
13 // the state during scheduling, and provide cost functions for
14 // scheduling candidates. This includes:
15 //
16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
17 // instructions that always begin a new group should be scheduled when
18 // the current decoder group is empty.
19 // * Processor resources usage. It is beneficial to balance the use of
20 // resources.
21 //
22 // A goal is to consider all instructions, also those outside of any
23 // scheduling region. Such instructions are "advanced" past and include
24 // single instructions before a scheduling region, branches etc.
25 //
26 // A block that has only one predecessor continues scheduling with the state
27 // of it (which may be updated by emitting branches).
28 //
29 // ===---------------------------------------------------------------------===//
30 
32 #include "llvm/ADT/Statistic.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "machine-scheduler"
37 
38 // This is the limit of processor resource usage at which the
39 // scheduler should try to look for other instructions (not using the
40 // critical resource).
41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
42  cl::desc("The OOO window for processor "
43  "resources during scheduling."),
44  cl::init(8));
45 
46 unsigned SystemZHazardRecognizer::
47 getNumDecoderSlots(SUnit *SU) const {
48  const MCSchedClassDesc *SC = getSchedClass(SU);
49  if (!SC->isValid())
50  return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
51 
52  assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
53  "Only cracked instruction can have 2 uops.");
54  assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
55  "Expanded instructions always group alone.");
56  assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
57  "Expanded instructions fill the group(s).");
58 
59  return SC->NumMicroOps;
60 }
61 
62 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
63  unsigned Idx = CurrGroupSize;
64  if (GrpCount % 2)
65  Idx += 3;
66 
67  if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
68  if (Idx == 1 || Idx == 2)
69  Idx = 3;
70  else if (Idx == 4 || Idx == 5)
71  Idx = 0;
72  }
73 
74  return Idx;
75 }
76 
78 getHazardType(SUnit *m, int Stalls) {
79  return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
80 }
81 
83  CurrGroupSize = 0;
84  CurrGroupHas4RegOps = false;
85  clearProcResCounters();
86  GrpCount = 0;
87  LastFPdOpCycleIdx = UINT_MAX;
88  LastEmittedMI = nullptr;
89  LLVM_DEBUG(CurGroupDbg = "";);
90 }
91 
92 bool
93 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
94  const MCSchedClassDesc *SC = getSchedClass(SU);
95  if (!SC->isValid())
96  return true;
97 
98  // A cracked instruction only fits into schedule if the current
99  // group is empty.
100  if (SC->BeginGroup)
101  return (CurrGroupSize == 0);
102 
103  // An instruction with 4 register operands will not fit in last slot.
104  assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
105  "Current decoder group is already full!");
106  if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
107  return false;
108 
109  // Since a full group is handled immediately in EmitInstruction(),
110  // SU should fit into current group. NumSlots should be 1 or 0,
111  // since it is not a cracked or expanded instruction.
112  assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
113  "Expected normal instruction to fit in non-full group!");
114 
115  return true;
116 }
117 
118 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
119  const MachineFunction &MF = *MI->getParent()->getParent();
120  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
121  const MCInstrDesc &MID = MI->getDesc();
122  unsigned Count = 0;
123  for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
124  const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
125  if (RC == nullptr)
126  continue;
127  if (OpIdx >= MID.getNumDefs() &&
128  MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
129  continue;
130  Count++;
131  }
132  return Count >= 4;
133 }
134 
135 void SystemZHazardRecognizer::nextGroup() {
136  if (CurrGroupSize == 0)
137  return;
138 
139  LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
140  LLVM_DEBUG(CurGroupDbg = "";);
141 
142  int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
143  assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
144  "Current decoder group bad.");
145 
146  // Reset counter for next group.
147  CurrGroupSize = 0;
148  CurrGroupHas4RegOps = false;
149 
150  GrpCount += ((unsigned) NumGroups);
151 
152  // Decrease counters for execution units.
153  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
154  ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
155  ? (ProcResourceCounters[i] - NumGroups)
156  : 0);
157 
158  // Clear CriticalResourceIdx if it is now below the threshold.
159  if (CriticalResourceIdx != UINT_MAX &&
160  (ProcResourceCounters[CriticalResourceIdx] <=
162  CriticalResourceIdx = UINT_MAX;
163 
164  LLVM_DEBUG(dumpState(););
165 }
166 
167 #ifndef NDEBUG // Debug output
169  OS << "SU(" << SU->NodeNum << "):";
170  OS << TII->getName(SU->getInstr()->getOpcode());
171 
172  const MCSchedClassDesc *SC = getSchedClass(SU);
173  if (!SC->isValid())
174  return;
175 
177  PI = SchedModel->getWriteProcResBegin(SC),
178  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
179  const MCProcResourceDesc &PRD =
180  *SchedModel->getProcResource(PI->ProcResourceIdx);
181  std::string FU(PRD.Name);
182  // trim e.g. Z13_FXaUnit -> FXa
183  FU = FU.substr(FU.find("_") + 1);
184  size_t Pos = FU.find("Unit");
185  if (Pos != std::string::npos)
186  FU.resize(Pos);
187  if (FU == "LS") // LSUnit -> LSU
188  FU = "LSU";
189  OS << "/" << FU;
190 
191  if (PI->Cycles > 1)
192  OS << "(" << PI->Cycles << "cyc)";
193  }
194 
195  if (SC->NumMicroOps > 1)
196  OS << "/" << SC->NumMicroOps << "uops";
197  if (SC->BeginGroup && SC->EndGroup)
198  OS << "/GroupsAlone";
199  else if (SC->BeginGroup)
200  OS << "/BeginsGroup";
201  else if (SC->EndGroup)
202  OS << "/EndsGroup";
203  if (SU->isUnbuffered)
204  OS << "/Unbuffered";
205  if (has4RegOps(SU->getInstr()))
206  OS << "/4RegOps";
207 }
208 
209 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
210  dbgs() << "++ " << Msg;
211  dbgs() << ": ";
212 
213  if (CurGroupDbg.empty())
214  dbgs() << " <empty>\n";
215  else {
216  dbgs() << "{ " << CurGroupDbg << " }";
217  dbgs() << " (" << CurrGroupSize << " decoder slot"
218  << (CurrGroupSize > 1 ? "s":"")
219  << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
220  << ")\n";
221  }
222 }
223 
225  bool any = false;
226 
227  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
228  if (ProcResourceCounters[i] > 0) {
229  any = true;
230  break;
231  }
232 
233  if (!any)
234  return;
235 
236  dbgs() << "++ | Resource counters: ";
237  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
238  if (ProcResourceCounters[i] > 0)
239  dbgs() << SchedModel->getProcResource(i)->Name
240  << ":" << ProcResourceCounters[i] << " ";
241  dbgs() << "\n";
242 
243  if (CriticalResourceIdx != UINT_MAX)
244  dbgs() << "++ | Critical resource: "
245  << SchedModel->getProcResource(CriticalResourceIdx)->Name
246  << "\n";
247 }
248 
250  dumpCurrGroup("| Current decoder group");
251  dbgs() << "++ | Current cycle index: "
252  << getCurrCycleIdx() << "\n";
254  if (LastFPdOpCycleIdx != UINT_MAX)
255  dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
256 }
257 
258 #endif //NDEBUG
259 
260 void SystemZHazardRecognizer::clearProcResCounters() {
261  ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
262  CriticalResourceIdx = UINT_MAX;
263 }
264 
265 static inline bool isBranchRetTrap(MachineInstr *MI) {
266  return (MI->isBranch() || MI->isReturn() ||
267  MI->getOpcode() == SystemZ::CondTrap);
268 }
269 
270 // Update state with SU as the next scheduled unit.
273  const MCSchedClassDesc *SC = getSchedClass(SU);
274  LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
275  dbgs() << "\n";);
276  LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
277 
278  // If scheduling an SU that must begin a new decoder group, move on
279  // to next group.
280  if (!fitsIntoCurrentGroup(SU))
281  nextGroup();
282 
284  if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
285 
286  LastEmittedMI = SU->getInstr();
287 
288  // After returning from a call, we don't know much about the state.
289  if (SU->isCall) {
290  LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
291  Reset();
292  LastEmittedMI = SU->getInstr();
293  return;
294  }
295 
296  // Increase counter for execution unit(s).
298  PI = SchedModel->getWriteProcResBegin(SC),
299  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
300  // Don't handle FPd together with the other resources.
301  if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
302  continue;
303  int &CurrCounter =
304  ProcResourceCounters[PI->ProcResourceIdx];
305  CurrCounter += PI->Cycles;
306  // Check if this is now the new critical resource.
307  if ((CurrCounter > ProcResCostLim) &&
308  (CriticalResourceIdx == UINT_MAX ||
309  (PI->ProcResourceIdx != CriticalResourceIdx &&
310  CurrCounter >
311  ProcResourceCounters[CriticalResourceIdx]))) {
312  LLVM_DEBUG(
313  dbgs() << "++ New critical resource: "
314  << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
315  << "\n";);
316  CriticalResourceIdx = PI->ProcResourceIdx;
317  }
318  }
319 
320  // Make note of an instruction that uses a blocking resource (FPd).
321  if (SU->isUnbuffered) {
322  LastFPdOpCycleIdx = getCurrCycleIdx(SU);
323  LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
324  << "\n";);
325  }
326 
327  // Insert SU into current group by increasing number of slots used
328  // in current group.
329  CurrGroupSize += getNumDecoderSlots(SU);
330  CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
331  unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
332  assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
333  && "SU does not fit into decoder group!");
334 
335  // Check if current group is now full/ended. If so, move on to next
336  // group to be ready to evaluate more candidates.
337  if (CurrGroupSize >= GroupLim || SC->EndGroup)
338  nextGroup();
339 }
340 
342  const MCSchedClassDesc *SC = getSchedClass(SU);
343  if (!SC->isValid())
344  return 0;
345 
346  // If SU begins new group, it can either break a current group early
347  // or fit naturally if current group is empty (negative cost).
348  if (SC->BeginGroup) {
349  if (CurrGroupSize)
350  return 3 - CurrGroupSize;
351  return -1;
352  }
353 
354  // Similarly, a group-ending SU may either fit well (last in group), or
355  // end the group prematurely.
356  if (SC->EndGroup) {
357  unsigned resultingGroupSize =
358  (CurrGroupSize + getNumDecoderSlots(SU));
359  if (resultingGroupSize < 3)
360  return (3 - resultingGroupSize);
361  return -1;
362  }
363 
364  // An instruction with 4 register operands will not fit in last slot.
365  if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
366  return 1;
367 
368  // Most instructions can be placed in any decoder slot.
369  return 0;
370 }
371 
372 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
373  assert (SU->isUnbuffered);
374  // If this is the first FPd op, it should be scheduled high.
375  if (LastFPdOpCycleIdx == UINT_MAX)
376  return true;
377  // If this is not the first PFd op, it should go into the other side
378  // of the processor to use the other FPd unit there. This should
379  // generally happen if two FPd ops are placed with 2 other
380  // instructions between them (modulo 6).
381  unsigned SUCycleIdx = getCurrCycleIdx(SU);
382  if (LastFPdOpCycleIdx > SUCycleIdx)
383  return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
384  return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
385 }
386 
389  int Cost = 0;
390 
391  const MCSchedClassDesc *SC = getSchedClass(SU);
392  if (!SC->isValid())
393  return 0;
394 
395  // For a FPd op, either return min or max value as indicated by the
396  // distance to any prior FPd op.
397  if (SU->isUnbuffered)
398  Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
399  // For other instructions, give a cost to the use of the critical resource.
400  else if (CriticalResourceIdx != UINT_MAX) {
402  PI = SchedModel->getWriteProcResBegin(SC),
403  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
404  if (PI->ProcResourceIdx == CriticalResourceIdx)
405  Cost = PI->Cycles;
406  }
407 
408  return Cost;
409 }
410 
412  bool TakenBranch) {
413  // Make a temporary SUnit.
414  SUnit SU(MI, 0);
415 
416  // Set interesting flags.
417  SU.isCall = MI->isCall();
418 
419  const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
420  for (const MCWriteProcResEntry &PRE :
421  make_range(SchedModel->getWriteProcResBegin(SC),
422  SchedModel->getWriteProcResEnd(SC))) {
423  switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
424  case 0:
425  SU.hasReservedResource = true;
426  break;
427  case 1:
428  SU.isUnbuffered = true;
429  break;
430  default:
431  break;
432  }
433  }
434 
435  unsigned GroupSizeBeforeEmit = CurrGroupSize;
436  EmitInstruction(&SU);
437 
438  if (!TakenBranch && isBranchRetTrap(MI)) {
439  // NT Branch on second slot ends group.
440  if (GroupSizeBeforeEmit == 1)
441  nextGroup();
442  }
443 
444  if (TakenBranch && CurrGroupSize > 0)
445  nextGroup();
446 
447  assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
448  "Scheduler: unhandled terminator!");
449 }
450 
453  // Current decoder group
454  CurrGroupSize = Incoming->CurrGroupSize;
455  LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
456 
457  // Processor resources
458  ProcResourceCounters = Incoming->ProcResourceCounters;
459  CriticalResourceIdx = Incoming->CriticalResourceIdx;
460 
461  // FPd
462  LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
463  GrpCount = Incoming->GrpCount;
464 }
const SystemZRegisterInfo & getRegisterInfo() const
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:633
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
unsigned const TargetRegisterInfo * TRI
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void Reset() override
Reset - This callback is invoked when a new block of instructions is about to be schedule.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
int groupingCost(SUnit *SU) const
Return the cost of decoder grouping for SU.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:649
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
static cl::opt< int > ProcResCostLim("procres-cost-lim", cl::Hidden, cl::desc("The OOO window for processor " "resources during scheduling."), cl::init(8))
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:423
SystemZHazardRecognizer maintains the state for one MBB during scheduling.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:293
HazardType getHazardType(SUnit *m, int Stalls=0) override
getHazardType - Return the hazard type of emitting this node.
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
bool isValid() const
Definition: MCSchedule.h:127
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:657
static bool isBranchRetTrap(MachineInstr *MI)
void copyState(SystemZHazardRecognizer *Incoming)
Copy counters from end of single predecessor.
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:623
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:292
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isCall
Is a function call.
Definition: ScheduleDAG.h:279
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:377
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
int resourcesCost(SUnit *SU)
Return the cost of SU in regards to processor resources usage.
void dumpCurrGroup(std::string Msg="") const
void emitInstruction(MachineInstr *MI, bool TakenBranch=false)
Wrap a non-scheduled instruction in an SU and emit it.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:188
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:32
CHAIN = SC CHAIN, Imm128 - System call.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:268
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:483
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
IRTranslator LLVM IR MI
#define LLVM_DEBUG(X)
Definition: Debug.h:123
void dumpSU(SUnit *SU, raw_ostream &OS) const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:246