26 #define DEBUG_TYPE "machine-scheduler" 30 class GCNMinRegScheduler {
35 Candidate(
const SUnit *SU_,
int Priority_ = 0)
36 : SU(SU_), Priority(Priority_) {}
43 std::vector<unsigned> NumPreds;
45 bool isScheduled(
const SUnit *SU)
const {
50 void setIsScheduled(
const SUnit *SU) {
55 unsigned getNumPreds(
const SUnit *SU)
const {
61 unsigned decNumPreds(
const SUnit *SU) {
69 int getReadySuccessors(
const SUnit *SU)
const;
70 int getNotReadySuccessors(
const SUnit *SU)
const;
72 template <
typename Calc>
73 unsigned findMax(
unsigned Num, Calc
C);
75 Candidate* pickCandidate();
77 void bumpPredsPriority(
const SUnit *SchedSU,
int Priority);
78 void releaseSuccessors(
const SUnit* SU,
int Priority);
88 NumPreds.resize(SUnits.size());
89 for (
unsigned I = 0;
I < SUnits.size(); ++
I)
90 NumPreds[
I] = SUnits[
I].NumPredsLeft;
93 int GCNMinRegScheduler::getReadySuccessors(
const SUnit *SU)
const {
94 unsigned NumSchedSuccs = 0;
96 bool wouldBeScheduled =
true;
98 auto PSU = PDep.getSUnit();
99 assert(!PSU->isBoundaryNode());
100 if (PSU != SU && !isScheduled(PSU)) {
101 wouldBeScheduled =
false;
105 NumSchedSuccs += wouldBeScheduled ? 1 : 0;
107 return NumSchedSuccs;
110 int GCNMinRegScheduler::getNotReadySuccessors(
const SUnit *SU)
const {
111 return SU->
Succs.size() - getReadySuccessors(SU);
114 template <
typename Calc>
115 unsigned GCNMinRegScheduler::findMax(
unsigned Num, Calc
C) {
116 assert(!RQ.empty() && Num <= RQ.size());
118 using T = decltype(
C(*RQ.begin())) ;
120 T Max = std::numeric_limits<T>::min();
122 for (
auto I = RQ.begin(); Num; --Num) {
140 GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() {
142 unsigned Num = RQ.size();
145 LLVM_DEBUG(
dbgs() <<
"\nSelecting max priority candidates among " << Num
147 Num = findMax(Num, [=](
const Candidate &
C) {
return C.Priority; });
150 LLVM_DEBUG(
dbgs() <<
"\nSelecting min non-ready producing candidate among " 152 Num = findMax(Num, [=](
const Candidate &C) {
154 int Res = getNotReadySuccessors(SU);
156 << Res <<
" successors, metric = " << -Res <<
'\n');
161 LLVM_DEBUG(
dbgs() <<
"\nSelecting most producing candidate among " << Num
163 Num = findMax(Num, [=](
const Candidate &C) {
165 auto Res = getReadySuccessors(SU);
167 <<
" successors, metric = " << Res <<
'\n');
172 Num = Num ? Num : RQ.size();
175 <<
"\nCan't find best candidate, selecting in program order among " 177 Num = findMax(Num, [=](
const Candidate &C) {
return -(int64_t)C.SU->NodeNum; });
184 void GCNMinRegScheduler::bumpPredsPriority(
const SUnit *SchedSU,
int Priority) {
186 for (
const auto &S : SchedSU->
Succs) {
187 if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) ||
190 for (
const auto &
P : S.getSUnit()->Preds) {
191 auto PSU =
P.getSUnit();
192 assert(!PSU->isBoundaryNode());
193 if (PSU != SchedSU && !isScheduled(PSU)) {
199 while (!Worklist.empty()) {
200 auto SU = Worklist.pop_back_val();
202 for (
const auto &
P : SU->
Preds) {
203 if (!
P.getSUnit()->isBoundaryNode() && !isScheduled(
P.getSUnit()) &&
205 Worklist.push_back(
P.getSUnit());
209 <<
")'s non-ready successors of " << Priority
210 <<
" priority in ready queue: ");
211 const auto SetEnd = Set.
end();
213 if (Set.
find(
C.SU) != SetEnd) {
214 C.Priority = Priority;
221 void GCNMinRegScheduler::releaseSuccessors(
const SUnit* SU,
int Priority) {
222 for (
const auto &S : SU->
Succs) {
223 auto SuccSU = S.getSUnit();
226 assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0);
227 if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0)
228 RQ.push_front(*new (Alloc.
Allocate()) Candidate(SuccSU, Priority));
232 std::vector<const SUnit*>
235 const auto &SUnits = DAG.
SUnits;
236 std::vector<const SUnit*> Schedule;
237 Schedule.reserve(SUnits.size());
239 initNumPreds(SUnits);
243 for (
auto SU : TopRoots) {
244 RQ.push_back(*
new (Alloc.
Allocate()) Candidate(SU, StepNo));
246 releaseSuccessors(&DAG.
EntrySU, StepNo);
248 while (!RQ.empty()) {
254 <<
' ' <<
C.SU->NodeNum <<
"(P" <<
C.Priority <<
')';
257 auto C = pickCandidate();
263 releaseSuccessors(SU, StepNo);
264 Schedule.push_back(SU);
267 if (getReadySuccessors(SU) == 0)
268 bumpPredsPriority(SU, StepNo);
272 assert(SUnits.size() == Schedule.size());
281 GCNMinRegScheduler S;
282 return S.schedule(TopRoots, DAG);
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents lattice values for constants.
virtual void dumpNode(const SUnit &SU) const =0
SmallVector< SDep, 4 > Preds
All sunit predecessors.
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
iterator find(ConstPtrType Ptr) const
Regular data dependence (aka true-dependence).
A simple intrusive list implementation.
std::vector< const SUnit * > makeMinRegSchedule(ArrayRef< const SUnit *> TopRoots, const ScheduleDAG &DAG)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void * Allocate(size_t Size, size_t Alignment)
Allocate space at the specified alignment.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
SUnit EntrySU
Special node for the region entry.
unsigned NodeNum
Entry # of node in the node vector.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SmallVector< SDep, 4 > Succs
All sunit successors.
std::vector< SUnit > SUnits
The scheduling units.
Scheduling unit. This is a node in the scheduling DAG.