26 #define DEBUG_TYPE "machine-scheduler" 36 CurInstKind = IDOther;
38 OccupedSlotsMask = 31;
39 InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
40 InstKindLimit[IDOther] = 32;
46 void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
47 std::vector<SUnit *> &QDst)
49 QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
54 assert (GPRCount &&
"GPRCount cannot be 0");
55 return 248 / GPRCount;
60 NextInstKind = IDOther;
65 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
66 (Available[CurInstKind].
empty());
67 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
68 (!Available[IDFetch].
empty() || !Available[IDOther].empty());
70 if (CurInstKind == IDAlu && !Available[IDFetch].
empty()) {
75 float ALUFetchRationEstimate =
76 (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
77 (FetchInstCount + Available[IDFetch].
size());
78 if (ALUFetchRationEstimate == 0) {
79 AllowSwitchFromAlu =
true;
81 unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
82 LLVM_DEBUG(
dbgs() << NeededWF <<
" approx. Wavefronts Required\n");
93 unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
95 AllowSwitchFromAlu =
true;
99 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
100 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
103 if (!SU && !PhysicalRegCopy.empty()) {
104 SU = PhysicalRegCopy.front();
105 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
108 if (CurEmitted >= InstKindLimit[IDAlu])
110 NextInstKind = IDAlu;
116 SU = pickOther(IDFetch);
118 NextInstKind = IDFetch;
123 SU = pickOther(IDOther);
125 NextInstKind = IDOther;
129 dbgs() <<
" ** Pick node **\n";
132 dbgs() <<
"NO NODE \n";
133 for (
unsigned i = 0; i < DAG->
SUnits.size(); i++) {
144 if (NextInstKind != CurInstKind) {
146 if (NextInstKind != IDAlu)
147 OccupedSlotsMask |= 31;
149 CurInstKind = NextInstKind;
152 if (CurInstKind == IDAlu) {
154 switch (getAluKind(SU)) {
165 if (MO.
isReg() && MO.
getReg() == R600::ALU_LITERAL_X)
174 LLVM_DEBUG(
dbgs() << CurEmitted <<
" Instructions Emitted in this clause\n");
176 if (CurInstKind != IDFetch) {
177 MoveUnits(Pending[IDFetch], Available[IDFetch]);
197 PhysicalRegCopy.push_back(SU);
201 int IK = getInstKind(SU);
205 Available[IDOther].push_back(SU);
207 Pending[IK].push_back(SU);
211 bool R600SchedStrategy::regBelongsToClass(
unsigned Reg,
220 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(
SUnit *SU)
const {
229 case R600::INTERP_PAIR_XY:
230 case R600::INTERP_PAIR_ZW:
231 case R600::INTERP_VEC_LOAD:
250 MI->
getOpcode() == R600::GROUP_BARRIER) {
260 switch (DestSubReg) {
275 if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
276 regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
278 if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
280 if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
282 if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
284 if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
294 int R600SchedStrategy::getInstKind(
SUnit* SU) {
307 case R600::CONST_COPY:
308 case R600::INTERP_PAIR_XY:
309 case R600::INTERP_PAIR_ZW:
310 case R600::INTERP_VEC_LOAD:
318 SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q,
bool AnyALU) {
321 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(),
E = Q.rend();
324 InstructionsGroupCandidate.push_back(SU->
getInstr());
327 InstructionsGroupCandidate.pop_back();
328 Q.erase((It + 1).base());
331 InstructionsGroupCandidate.pop_back();
337 void R600SchedStrategy::LoadAlu() {
338 std::vector<SUnit *> &QSrc = Pending[IDAlu];
339 for (
unsigned i = 0, e = QSrc.size(); i < e; ++i) {
340 AluKind AK = getAluKind(QSrc[i]);
341 AvailableAlus[AK].push_back(QSrc[i]);
346 void R600SchedStrategy::PrepareNextSlot() {
348 assert (OccupedSlotsMask &&
"Slot wasn't filled");
349 OccupedSlotsMask = 0;
352 InstructionsGroupCandidate.clear();
356 void R600SchedStrategy::AssignSlot(
MachineInstr*
MI,
unsigned Slot) {
358 if (DstIndex == -1) {
388 SUnit *R600SchedStrategy::AttemptFillSlot(
unsigned Slot,
bool AnyAlu) {
389 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
390 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
393 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
395 AssignSlot(UnslotedSU->
getInstr(), Slot);
399 unsigned R600SchedStrategy::AvailablesAluCount()
const {
400 return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
401 AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
402 AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
403 AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
404 AvailableAlus[AluPredX].size();
407 SUnit* R600SchedStrategy::pickAlu() {
408 while (AvailablesAluCount() || !Pending[IDAlu].
empty()) {
409 if (!OccupedSlotsMask) {
411 if (!AvailableAlus[AluPredX].
empty()) {
412 OccupedSlotsMask |= 31;
413 return PopInst(AvailableAlus[AluPredX],
false);
416 if (!AvailableAlus[AluDiscarded].
empty()) {
417 OccupedSlotsMask |= 31;
418 return PopInst(AvailableAlus[AluDiscarded],
false);
421 if (!AvailableAlus[AluT_XYZW].
empty()) {
422 OccupedSlotsMask |= 15;
423 return PopInst(AvailableAlus[AluT_XYZW],
false);
426 bool TransSlotOccuped = OccupedSlotsMask & 16;
427 if (!TransSlotOccuped && VLIW5) {
428 if (!AvailableAlus[AluTrans].
empty()) {
429 OccupedSlotsMask |= 16;
430 return PopInst(AvailableAlus[AluTrans],
false);
432 SUnit *SU = AttemptFillSlot(3,
true);
434 OccupedSlotsMask |= 16;
438 for (
int Chan = 3; Chan > -1; --Chan) {
439 bool isOccupied = OccupedSlotsMask & (1 << Chan);
441 SUnit *SU = AttemptFillSlot(Chan,
false);
443 OccupedSlotsMask |= (1 << Chan);
444 InstructionsGroupCandidate.push_back(SU->
getInstr());
454 SUnit* R600SchedStrategy::pickOther(
int QID) {
456 std::vector<SUnit *> &AQ = Available[QID];
459 MoveUnits(Pending[QID], AQ);
mop_iterator operands_end()
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
bool contains(unsigned Reg) const
Return true if the specified register is included in this register class.
AMDGPU specific subclass of TargetSubtarget.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Interface definition for R600InstrInfo.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned getSubReg() const
R600 Machine Scheduler interface.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
bool isScheduled
True once scheduled.
void releaseBottomNode(SUnit *SU) override
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool usesVertexCache(unsigned Opcode) const
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool isPhysicalRegCopy(MachineInstr *MI)
void releaseTopNode(SUnit *SU) override
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
bool isLDSInstr(unsigned Opcode) const
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
bool isVectorOnly(unsigned Opcode) const
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
bool isTransOnly(unsigned Opcode) const
bool isVector(const MachineInstr &MI) const
Vector instructions are instructions that must fill all instruction slots within an instruction group...
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
MachineOperand class - Representation of each machine instruction operand.
bool hasCaymanISA() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isALUInstr(unsigned Opcode) const
static unsigned getWFCountLimitedByGPR(unsigned GPRCount)
void dumpNode(const SUnit &SU) const override
bool readsLDSSrcReg(const MachineInstr &MI) const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
short getTexVTXClauseSize() const
bool isReductionOp(unsigned opcode) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
mop_iterator operands_begin()
std::vector< SUnit > SUnits
The scheduling units.
const MachineOperand & getOperand(unsigned i) const
bool usesTextureCache(unsigned Opcode) const
Scheduling unit. This is a node in the scheduling DAG.
bool isCubeOp(unsigned opcode) const