42 #define DEBUG_TYPE "machine-scheduler" 146 case NoCand:
return "NOCAND";
148 case Latency:
return "LATENCY";
150 case Depth:
return "DEPTH";
164 if (TryVal < CandVal) {
168 if (TryVal > CandVal) {
181 if (TryVal > CandVal) {
185 if (TryVal < CandVal) {
199 NodeNum2Index[SU->
NodeNum] = SUnits.size();
200 SUnits.push_back(SU);
204 void SIScheduleBlock::traceCandidate(
const SISchedCandidate &Cand) {
211 void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
212 SISchedCandidate &TryCand) {
214 if (!Cand.isValid()) {
219 if (Cand.SGPRUsage > 60 &&
240 Cand.HasLowLatencyNonWaitedParent,
248 if (TryCand.IsLowLatency &&
258 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
263 SUnit* SIScheduleBlock::pickNode() {
264 SISchedCandidate TopCand;
266 for (
SUnit* SU : TopReadySUs) {
267 SISchedCandidate TryCand;
268 std::vector<unsigned> pressure;
269 std::vector<unsigned> MaxPressure;
272 TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
273 TryCand.SGPRUsage = pressure[DAG->getSGPRSetID()];
274 TryCand.VGPRUsage = pressure[DAG->getVGPRSetID()];
275 TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
276 TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
277 TryCand.HasLowLatencyNonWaitedParent =
278 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
279 tryCandidateTopDown(TopCand, TryCand);
280 if (TryCand.Reason !=
NoCand)
281 TopCand.setBest(TryCand);
294 for (
SUnit* SU : SUnits) {
295 if (!SU->NumPredsLeft)
296 TopReadySUs.push_back(SU);
299 while (!TopReadySUs.empty()) {
300 SUnit *SU = TopReadySUs[0];
301 ScheduledSUnits.push_back(SU);
320 if (InstSlot >= First && InstSlot <= Last)
332 DAG->initRPTracker(TopRPTracker);
333 DAG->initRPTracker(BotRPTracker);
334 DAG->initRPTracker(RPTracker);
338 for (
SUnit* SU : ScheduledSUnits) {
339 RPTracker.setPos(SU->getInstr());
344 RPTracker.closeRegion();
347 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
348 BotRPTracker.
addLiveRegs(RPTracker.getPressure().LiveOutRegs);
351 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
353 LiveInRegs.insert(RegMaskPair.RegUnit);
378 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
379 unsigned Reg = RegMaskPair.RegUnit;
384 LiveOutRegs.insert(Reg);
392 LiveInPressure = TopPressure.MaxSetPressure;
396 TopRPTracker.closeTop();
405 initRegPressure(BeginBlock, EndBlock);
412 for (
SUnit* SU : SUnits) {
413 if (!SU->NumPredsLeft)
414 TopReadySUs.push_back(SU);
417 while (!TopReadySUs.empty()) {
418 SUnit *SU = pickNode();
419 ScheduledSUnits.push_back(SU);
420 TopRPTracker.setPos(SU->
getInstr());
421 TopRPTracker.advance();
426 InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
430 assert(SUnits.size() == ScheduledSUnits.size() &&
431 TopReadySUs.empty());
432 for (
SUnit* SU : SUnits) {
434 SU->NumPredsLeft == 0);
441 void SIScheduleBlock::undoSchedule() {
442 for (
SUnit* SU : SUnits) {
443 SU->isScheduled =
false;
444 for (
SDep& Succ : SU->Succs) {
446 undoReleaseSucc(SU, &Succ);
449 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
450 ScheduledSUnits.clear();
454 void SIScheduleBlock::undoReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
464 void SIScheduleBlock::releaseSucc(
SUnit *SU,
SDep *SuccEdge) {
473 dbgs() <<
"*** Scheduling failed! ***\n";
474 DAG->dumpNode(*SuccSU);
475 dbgs() <<
" has been released too many times!\n";
484 void SIScheduleBlock::releaseSuccessors(
SUnit *SU,
bool InOrOutBlock) {
488 if (SuccSU->
NodeNum >= DAG->SUnits.size())
491 if (BC->isSUInBlock(SuccSU,
ID) != InOrOutBlock)
494 releaseSucc(SU, &Succ);
496 TopReadySUs.push_back(SuccSU);
500 void SIScheduleBlock::nodeScheduled(
SUnit *SU) {
503 std::vector<SUnit *>::iterator
I =
llvm::find(TopReadySUs, SU);
504 if (I == TopReadySUs.end()) {
505 dbgs() <<
"Data Structure Bug in SI Scheduler\n";
508 TopReadySUs.erase(I);
510 releaseSuccessors(SU,
true);
513 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->
NodeNum]])
514 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
516 if (DAG->IsLowLatencySU[SU->
NodeNum]) {
518 std::map<unsigned, unsigned>::iterator I =
520 if (I != NodeNum2Index.end())
521 HasLowLatencyNonWaitedParent[I->second] = 1;
529 for (
SUnit* SU : SUnits) {
530 releaseSuccessors(SU,
false);
531 if (DAG->IsHighLatencySU[SU->
NodeNum])
532 HighLatencyBlock =
true;
534 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
539 unsigned PredID = Pred->
getID();
543 if (PredID ==
P->getID())
546 Preds.push_back(Pred);
551 return PredID == S.first->getID();
553 "Loop in the Block Graph!");
558 unsigned SuccID = Succ->
getID();
561 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
562 if (SuccID == S.first->getID()) {
570 ++NumHighLatencySuccessors;
571 Succs.push_back(std::make_pair(Succ, Kind));
575 "Loop in the Block Graph!");
580 dbgs() <<
"Block (" <<
ID <<
")\n";
584 dbgs() <<
"\nContains High Latency Instruction: " 585 << HighLatencyBlock <<
'\n';
586 dbgs() <<
"\nDepends On:\n";
588 P->printDebug(
false);
591 dbgs() <<
"\nSuccessors:\n";
592 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
594 dbgs() <<
"(Data Dep) ";
595 S.first->printDebug(
false);
599 dbgs() <<
"LiveInPressure " << LiveInPressure[DAG->getSGPRSetID()] <<
' ' 600 << LiveInPressure[DAG->getVGPRSetID()] <<
'\n';
601 dbgs() <<
"LiveOutPressure " << LiveOutPressure[DAG->getSGPRSetID()] <<
' ' 602 << LiveOutPressure[DAG->getVGPRSetID()] <<
"\n\n";
603 dbgs() <<
"LiveIns:\n";
604 for (
unsigned Reg : LiveInRegs)
607 dbgs() <<
"\nLiveOuts:\n";
608 for (
unsigned Reg : LiveOutRegs)
612 dbgs() <<
"\nInstructions:\n";
614 for (
const SUnit* SU : SUnits)
617 for (
const SUnit* SU : SUnits)
621 dbgs() <<
"///////////////////////\n";
635 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator
B =
636 Blocks.find(BlockVariant);
637 if (B == Blocks.end()) {
639 createBlocksForVariant(BlockVariant);
641 scheduleInsideBlocks();
643 Res.
Blocks = CurrentBlocks;
646 Blocks[BlockVariant] = Res;
656 return CurrentBlocks[Node2CurrentBlock[SU->
NodeNum]]->getID() ==
ID;
659 void SIScheduleBlockCreator::colorHighLatenciesAlone() {
660 unsigned DAGSize = DAG->
SUnits.size();
662 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
665 CurrentColoring[SU->
NodeNum] = NextReservedID++;
672 for (
const auto &PredDep : SU.
Preds) {
673 if (PredDep.getSUnit() == &FromSU &&
680 void SIScheduleBlockCreator::colorHighLatenciesGroups() {
681 unsigned DAGSize = DAG->
SUnits.size();
682 unsigned NumHighLatencies = 0;
684 int Color = NextReservedID;
686 std::set<unsigned> FormingGroup;
688 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
694 if (NumHighLatencies == 0)
697 if (NumHighLatencies <= 6)
699 else if (NumHighLatencies <= 12)
707 unsigned CompatibleGroup =
true;
708 int ProposedColor =
Color;
709 std::vector<int> AdditionalElements;
721 for (
unsigned j : FormingGroup) {
723 std::vector<int> SubGraph;
736 else if (SubGraph.size() > 5) {
738 CompatibleGroup =
false;
743 for (
unsigned k : SubGraph) {
749 (CurrentColoring[k] != ProposedColor &&
750 CurrentColoring[k] != 0)) {
751 CompatibleGroup =
false;
757 CompatibleGroup =
false;
761 if (!CompatibleGroup)
765 CompatibleGroup =
false;
773 AdditionalElements.insert(AdditionalElements.end(),
774 SubGraph.begin(), SubGraph.end());
777 if (CompatibleGroup) {
778 FormingGroup.insert(SU.
NodeNum);
779 for (
unsigned j : AdditionalElements)
780 CurrentColoring[j] = ProposedColor;
781 CurrentColoring[SU.
NodeNum] = ProposedColor;
787 if (!CompatibleGroup) {
788 FormingGroup.clear();
789 Color = ++NextReservedID;
790 ProposedColor =
Color;
791 FormingGroup.insert(SU.
NodeNum);
792 CurrentColoring[SU.
NodeNum] = ProposedColor;
794 }
else if (Count == GroupSize) {
795 FormingGroup.clear();
796 Color = ++NextReservedID;
797 ProposedColor =
Color;
804 void SIScheduleBlockCreator::colorComputeReservedDependencies() {
805 unsigned DAGSize = DAG->
SUnits.size();
806 std::map<std::set<unsigned>,
unsigned> ColorCombinations;
808 CurrentTopDownReservedDependencyColoring.clear();
809 CurrentBottomUpReservedDependencyColoring.clear();
811 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
812 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
819 std::set<unsigned> SUColors;
822 if (CurrentColoring[SU->
NodeNum]) {
823 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
832 if (CurrentTopDownReservedDependencyColoring[Pred->
NodeNum] > 0)
833 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->
NodeNum]);
836 if (SUColors.empty())
839 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
840 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
843 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
844 ColorCombinations.find(SUColors);
845 if (Pos != ColorCombinations.end()) {
846 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] = Pos->second;
848 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
850 ColorCombinations[SUColors] = NextNonReservedID++;
855 ColorCombinations.clear();
861 std::set<unsigned> SUColors;
864 if (CurrentColoring[SU->
NodeNum]) {
865 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
874 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0)
875 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum]);
878 if (SUColors.empty())
881 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
882 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
885 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
886 ColorCombinations.find(SUColors);
887 if (Pos != ColorCombinations.end()) {
888 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] = Pos->second;
890 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
892 ColorCombinations[SUColors] = NextNonReservedID++;
898 void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
899 unsigned DAGSize = DAG->
SUnits.size();
900 std::map<std::pair<unsigned, unsigned>,
unsigned> ColorCombinations;
905 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
907 std::pair<unsigned, unsigned> SUColors;
910 if (CurrentColoring[SU->
NodeNum])
913 SUColors.first = CurrentTopDownReservedDependencyColoring[SU->
NodeNum];
914 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->
NodeNum];
916 std::map<std::pair<unsigned, unsigned>,
unsigned>::iterator Pos =
917 ColorCombinations.find(SUColors);
918 if (Pos != ColorCombinations.end()) {
919 CurrentColoring[SU->
NodeNum] = Pos->second;
921 CurrentColoring[SU->
NodeNum] = NextNonReservedID;
922 ColorCombinations[SUColors] = NextNonReservedID++;
927 void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
928 unsigned DAGSize = DAG->
SUnits.size();
929 std::vector<int> PendingColoring = CurrentColoring;
932 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
933 CurrentTopDownReservedDependencyColoring.size() == DAGSize);
936 if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
937 CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
938 *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
939 CurrentTopDownReservedDependencyColoring.end()) == 0)
944 std::set<unsigned> SUColors;
945 std::set<unsigned> SUColorsPending;
947 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
950 if (CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] > 0 ||
951 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] > 0)
958 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0 ||
959 CurrentTopDownReservedDependencyColoring[Succ->
NodeNum] > 0)
960 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
961 SUColorsPending.insert(PendingColoring[Succ->
NodeNum]);
966 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
967 PendingColoring[SU->
NodeNum] = *SUColors.begin();
970 PendingColoring[SU->
NodeNum] = NextNonReservedID++;
972 CurrentColoring = PendingColoring;
976 void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
977 unsigned DAGSize = DAG->
SUnits.size();
978 unsigned PreviousColor;
979 std::set<unsigned> SeenColors;
984 PreviousColor = CurrentColoring[0];
986 for (
unsigned i = 1, e = DAGSize; i != e; ++i) {
988 unsigned CurrentColor = CurrentColoring[i];
989 unsigned PreviousColorSave = PreviousColor;
992 if (CurrentColor != PreviousColor)
993 SeenColors.insert(PreviousColor);
994 PreviousColor = CurrentColor;
996 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
999 if (SeenColors.find(CurrentColor) == SeenColors.end())
1002 if (PreviousColorSave != CurrentColor)
1003 CurrentColoring[i] = NextNonReservedID++;
1005 CurrentColoring[i] = CurrentColoring[i-1];
1009 void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
1010 unsigned DAGSize = DAG->
SUnits.size();
1014 std::set<unsigned> SUColors;
1016 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1028 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1030 if (SUColors.size() == 1)
1031 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1035 void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1036 unsigned DAGSize = DAG->
SUnits.size();
1040 std::set<unsigned> SUColors;
1042 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1049 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1051 if (SUColors.size() == 1)
1052 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1056 void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1057 unsigned DAGSize = DAG->
SUnits.size();
1061 std::set<unsigned> SUColors;
1063 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1070 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1072 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1073 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1077 void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1078 unsigned DAGSize = DAG->
SUnits.size();
1079 std::map<unsigned, unsigned> ColorCount;
1083 unsigned color = CurrentColoring[SU->
NodeNum];
1084 ++ColorCount[color];
1089 unsigned color = CurrentColoring[SU->
NodeNum];
1090 std::set<unsigned> SUColors;
1092 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1095 if (ColorCount[color] > 1)
1102 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1104 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1105 --ColorCount[color];
1106 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1107 ++ColorCount[*SUColors.begin()];
1112 void SIScheduleBlockCreator::cutHugeBlocks() {
1116 void SIScheduleBlockCreator::regroupNoUserInstructions() {
1117 unsigned DAGSize = DAG->
SUnits.size();
1118 int GroupID = NextNonReservedID++;
1122 bool hasSuccessor =
false;
1124 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1131 hasSuccessor =
true;
1134 CurrentColoring[SU->
NodeNum] = GroupID;
1138 void SIScheduleBlockCreator::colorExports() {
1139 unsigned ExportColor = NextNonReservedID++;
1159 for (
unsigned j : ExpGroup) {
1161 std::vector<int> SubGraph;
1177 for (
unsigned k : SubGraph) {
1185 ExpGroup.push_back(SUNum);
1190 for (
unsigned j : ExpGroup)
1191 CurrentColoring[j] = ExportColor;
1195 unsigned DAGSize = DAG->
SUnits.size();
1196 std::map<unsigned,unsigned> RealID;
1198 CurrentBlocks.clear();
1199 CurrentColoring.clear();
1200 CurrentColoring.resize(DAGSize, 0);
1201 Node2CurrentBlock.clear();
1207 NextNonReservedID = DAGSize + 1;
1212 colorHighLatenciesGroups();
1214 colorHighLatenciesAlone();
1215 colorComputeReservedDependencies();
1216 colorAccordingToReservedDependencies();
1217 colorEndsAccordingToDependencies();
1219 colorForceConsecutiveOrderInGroup();
1220 regroupNoUserInstructions();
1221 colorMergeConstantLoadsNextGroup();
1222 colorMergeIfPossibleNextGroupOnlyForReserved();
1226 Node2CurrentBlock.resize(DAGSize, -1);
1227 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
1230 if (RealID.find(Color) == RealID.end()) {
1231 int ID = CurrentBlocks.size();
1232 BlockPtrs.push_back(llvm::make_unique<SIScheduleBlock>(DAG,
this, ID));
1233 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1236 CurrentBlocks[RealID[
Color]]->addUnit(SU);
1241 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
1243 int SUID = Node2CurrentBlock[i];
1248 if (Node2CurrentBlock[Succ->
NodeNum] != SUID)
1249 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->
NodeNum]],
1256 if (Node2CurrentBlock[Pred->
NodeNum] != SUID)
1257 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->
NodeNum]]);
1262 for (
unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
1267 for (
unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
1279 for (; I != End; ++
I) {
1280 if (!I->isDebugInstr())
1286 void SIScheduleBlockCreator::topologicalSort() {
1287 unsigned DAGSize = CurrentBlocks.size();
1288 std::vector<int> WorkList;
1292 WorkList.reserve(DAGSize);
1293 TopDownIndex2Block.resize(DAGSize);
1294 TopDownBlock2Index.resize(DAGSize);
1295 BottomUpIndex2Block.resize(DAGSize);
1297 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
1299 unsigned Degree = Block->
getSuccs().size();
1300 TopDownBlock2Index[i] = Degree;
1302 WorkList.push_back(i);
1307 while (!WorkList.empty()) {
1308 int i = WorkList.back();
1310 WorkList.pop_back();
1311 TopDownBlock2Index[i] = --
Id;
1312 TopDownIndex2Block[
Id] = i;
1314 if (!--TopDownBlock2Index[Pred->getID()])
1315 WorkList.push_back(Pred->getID());
1321 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
1324 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
1325 "Wrong Top Down topological sorting");
1330 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1331 TopDownIndex2Block.rend());
1334 void SIScheduleBlockCreator::scheduleInsideBlocks() {
1335 unsigned DAGSize = CurrentBlocks.size();
1341 LLVM_DEBUG(
dbgs() <<
"First phase: Fast scheduling for Reg Liveness\n");
1342 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
1352 std::vector<MachineBasicBlock::iterator> PosOld;
1353 std::vector<MachineBasicBlock::iterator> PosNew;
1354 PosOld.reserve(DAG->
SUnits.size());
1355 PosNew.reserve(DAG->
SUnits.size());
1357 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
1358 int BlockIndice = TopDownIndex2Block[i];
1362 for (
SUnit* SU : SUs) {
1365 PosOld.push_back(Pos);
1366 if (&*CurrentTopFastSched == MI) {
1367 PosNew.push_back(Pos);
1368 CurrentTopFastSched =
nextIfDebug(++CurrentTopFastSched,
1380 PosNew.push_back(CurrentTopFastSched);
1389 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
1392 Block->
schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1397 for (
unsigned i = PosOld.size(), e = 0; i != e; --i) {
1409 LLVM_DEBUG(
for (
unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
1415 void SIScheduleBlockCreator::fillStats() {
1416 unsigned DAGSize = CurrentBlocks.size();
1418 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
1419 int BlockIndice = TopDownIndex2Block[i];
1426 if (Depth < Pred->
Depth + Pred->getCost())
1427 Depth = Pred->Depth + Pred->getCost();
1433 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
1434 int BlockIndice = BottomUpIndex2Block[i];
1439 unsigned Height = 0;
1441 Height =
std::max(Height, Succ.first->Height + Succ.first->getCost());
1452 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1453 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1454 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1466 LiveOutRegsNumUsages.resize(Blocks.size());
1467 for (
unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1473 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1474 std::set<unsigned>::iterator RegPos = PredOutRegs.find(
Reg);
1476 if (RegPos != PredOutRegs.end()) {
1488 ++LiveOutRegsNumUsages[PredID][
Reg];
1492 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1493 BlockNumPredsLeft.resize(Blocks.size());
1494 BlockNumSuccsLeft.resize(Blocks.size());
1496 for (
unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1498 BlockNumPredsLeft[i] = Block->
getPreds().size();
1499 BlockNumSuccsLeft[i] = Block->
getSuccs().size();
1503 for (
unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1509 std::set<unsigned> InRegs = DAG->
getInRegs();
1510 addLiveRegs(InRegs);
1516 for (
unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1520 const std::set<unsigned> &OutRegs = Block->
getOutRegs();
1522 if (OutRegs.find(
Reg) == OutRegs.end())
1525 ++LiveOutRegsNumUsages[
ID][
Reg];
1532 for (
unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1537 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1538 std::set<unsigned>::iterator RegPos = PredOutRegs.find(
Reg);
1540 if (RegPos != PredOutRegs.end()) {
1547 ++LiveRegsConsumers[
Reg];
1551 for (
unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1553 if (BlockNumPredsLeft[i] == 0) {
1554 ReadyBlocks.push_back(Block);
1559 BlocksScheduled.push_back(Block);
1560 blockScheduled(Block);
1564 : BlocksScheduled) {
1565 dbgs() <<
' ' << Block->getID();
1569 bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1570 SIBlockSchedCandidate &TryCand) {
1571 if (!Cand.isValid()) {
1578 Cand.LastPosHighLatParentScheduled, TryCand, Cand,
Latency))
1585 TryCand, Cand,
Depth))
1588 Cand.NumHighLatencySuccessors,
1594 bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1595 SIBlockSchedCandidate &TryCand) {
1596 if (!Cand.isValid()) {
1605 Cand.NumSuccessors > 0,
1617 SIBlockSchedCandidate Cand;
1618 std::vector<SIScheduleBlock*>::iterator Best;
1620 if (ReadyBlocks.empty())
1624 VregCurrentUsage, SregCurrentUsage);
1625 if (VregCurrentUsage > maxVregUsage)
1626 maxVregUsage = VregCurrentUsage;
1627 if (SregCurrentUsage > maxSregUsage)
1628 maxSregUsage = SregCurrentUsage;
1631 : ReadyBlocks)
dbgs()
1632 << Block->
getID() <<
' ';
1633 dbgs() <<
"\nCurrent Live:\n";
1638 dbgs() <<
"Current VGPRs: " << VregCurrentUsage <<
'\n';
1639 dbgs() <<
"Current SGPRs: " << SregCurrentUsage <<
'\n';);
1641 Cand.Block =
nullptr;
1642 for (std::vector<SIScheduleBlock*>::iterator
I = ReadyBlocks.begin(),
1643 E = ReadyBlocks.end();
I !=
E; ++
I) {
1644 SIBlockSchedCandidate TryCand;
1646 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1647 TryCand.VGPRUsageDiff =
1648 checkRegUsageImpact(TryCand.Block->getInRegs(),
1650 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1651 TryCand.NumHighLatencySuccessors =
1652 TryCand.Block->getNumHighLatencySuccessors();
1653 TryCand.LastPosHighLatParentScheduled =
1654 (
unsigned int) std::max<int> (0,
1655 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1656 LastPosWaitedHighLatency);
1657 TryCand.Height = TryCand.Block->Height;
1659 if (VregCurrentUsage > 120 ||
1661 if (!tryCandidateRegUsage(Cand, TryCand) &&
1663 tryCandidateLatency(Cand, TryCand);
1665 if (!tryCandidateLatency(Cand, TryCand))
1666 tryCandidateRegUsage(Cand, TryCand);
1668 if (TryCand.Reason !=
NoCand) {
1669 Cand.setBest(TryCand);
1671 LLVM_DEBUG(
dbgs() <<
"Best Current Choice: " << Cand.Block->getID() <<
' ' 1677 dbgs() <<
"Is a block with high latency instruction: " 1678 << (Cand.IsHighLatency ?
"yes\n" :
"no\n");
1679 dbgs() <<
"Position of last high latency dependency: " 1680 << Cand.LastPosHighLatParentScheduled <<
'\n';
1681 dbgs() <<
"VGPRUsageDiff: " << Cand.VGPRUsageDiff <<
'\n';
1685 ReadyBlocks.erase(Best);
1691 void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1692 for (
unsigned Reg : Regs) {
1697 (void) LiveRegs.insert(
Reg);
1701 void SIScheduleBlockScheduler::decreaseLiveRegs(
SIScheduleBlock *Block,
1702 std::set<unsigned> &Regs) {
1703 for (
unsigned Reg : Regs) {
1705 std::set<unsigned>::iterator Pos = LiveRegs.find(
Reg);
1706 assert (Pos != LiveRegs.end() &&
1707 LiveRegsConsumers.find(
Reg) != LiveRegsConsumers.end() &&
1708 LiveRegsConsumers[
Reg] >= 1);
1709 --LiveRegsConsumers[
Reg];
1710 if (LiveRegsConsumers[
Reg] == 0)
1711 LiveRegs.erase(Pos);
1715 void SIScheduleBlockScheduler::releaseBlockSuccs(
SIScheduleBlock *Parent) {
1716 for (
const auto &Block : Parent->
getSuccs()) {
1717 if (--BlockNumPredsLeft[Block.first->
getID()] == 0)
1718 ReadyBlocks.push_back(Block.first);
1722 LastPosHighLatencyParentScheduled[Block.first->
getID()] = NumBlockScheduled;
1726 void SIScheduleBlockScheduler::blockScheduled(
SIScheduleBlock *Block) {
1727 decreaseLiveRegs(Block, Block->
getInRegs());
1729 releaseBlockSuccs(Block);
1730 for (std::map<unsigned, unsigned>::iterator RegI =
1731 LiveOutRegsNumUsages[Block->
getID()].begin(),
1732 E = LiveOutRegsNumUsages[Block->
getID()].end(); RegI !=
E; ++RegI) {
1733 std::pair<unsigned, unsigned> RegP = *RegI;
1735 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
1736 LiveRegsConsumers[RegP.first] == 0);
1737 LiveRegsConsumers[RegP.first] += RegP.second;
1739 if (LastPosHighLatencyParentScheduled[Block->
getID()] >
1740 (
unsigned)LastPosWaitedHighLatency)
1741 LastPosWaitedHighLatency =
1742 LastPosHighLatencyParentScheduled[Block->
getID()];
1743 ++NumBlockScheduled;
1747 SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1748 std::set<unsigned> &OutRegs) {
1749 std::vector<int> DiffSetPressure;
1752 for (
unsigned Reg : InRegs) {
1756 if (LiveRegsConsumers[
Reg] > 1)
1759 for (; PSetI.
isValid(); ++PSetI) {
1760 DiffSetPressure[*PSetI] -= PSetI.
getWeight();
1764 for (
unsigned Reg : OutRegs) {
1769 for (; PSetI.
isValid(); ++PSetI) {
1770 DiffSetPressure[*PSetI] += PSetI.
getWeight();
1774 return DiffSetPressure;
1780 SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1781 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1784 std::vector<SIScheduleBlock*> ScheduledBlocks;
1787 ScheduledBlocks = Scheduler.
getBlocks();
1789 for (
unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
1793 for (
SUnit* SU : SUs)
1818 void SIScheduleDAGMI::topologicalSort() {
1831 void SIScheduleDAGMI::moveLowLatencies() {
1832 unsigned DAGSize =
SUnits.size();
1833 int LastLowLatencyUser = -1;
1834 int LastLowLatencyPos = -1;
1836 for (
unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1838 bool IsLowLatencyUser =
false;
1839 unsigned MinPos = 0;
1844 IsLowLatencyUser =
true;
1848 unsigned PredPos = ScheduledSUnitsInv[Pred->
NodeNum];
1849 if (PredPos >= MinPos)
1850 MinPos = PredPos + 1;
1854 unsigned BestPos = LastLowLatencyUser + 1;
1855 if ((
int)BestPos <= LastLowLatencyPos)
1856 BestPos = LastLowLatencyPos + 1;
1857 if (BestPos < MinPos)
1860 for (
unsigned u = i; u > BestPos; --u) {
1861 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1862 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1864 ScheduledSUnits[BestPos] = SU->
NodeNum;
1865 ScheduledSUnitsInv[SU->
NodeNum] = BestPos;
1867 LastLowLatencyPos = BestPos;
1868 if (IsLowLatencyUser)
1869 LastLowLatencyUser = BestPos;
1870 }
else if (IsLowLatencyUser) {
1871 LastLowLatencyUser = i;
1875 bool CopyForLowLat =
false;
1879 CopyForLowLat =
true;
1885 for (
unsigned u = i; u > MinPos; --u) {
1886 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1887 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1889 ScheduledSUnits[MinPos] = SU->
NodeNum;
1890 ScheduledSUnitsInv[SU->
NodeNum] = MinPos;
1897 for (
unsigned i = 0, e =
SUnits.size(); i != e; ++i) {
1898 SUnits[i].isScheduled =
false;
1899 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1900 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1901 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1902 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1907 template<
typename _Iterator>
void 1909 unsigned &VgprUsage,
unsigned &SgprUsage) {
1912 for (_Iterator RegI = First; RegI != End; ++RegI) {
1913 unsigned Reg = *RegI;
1918 for (; PSetI.
isValid(); ++PSetI) {
1919 if (*PSetI == VGPRSetID)
1921 else if (*PSetI == SGPRSetID)
1947 SUnitsLinksBackup =
SUnits;
1956 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1988 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
2009 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
2016 ScheduledSUnits = Best.
SUs;
2017 ScheduledSUnitsInv.resize(
SUnits.size());
2019 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
2020 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
2030 for (std::vector<unsigned>::iterator
I = ScheduledSUnits.begin(),
2031 E = ScheduledSUnits.end();
I !=
E; ++
I) {
2045 dbgs() <<
"*** Final schedule for "
Interface definition for SIRegisterInfo.
virtual unsigned getNumRegPressureSets() const =0
Get the number of dimensions of register pressure.
std::vector< SIScheduleBlock * > Blocks
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
SIScheduleDAGMI(MachineSchedContext *C)
void dumpSchedule() const
dump the scheduled Sequence.
This class represents lattice values for constants.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries...
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
SIScheduleCandReason Reason
void dump() const override
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
std::vector< unsigned > IsLowLatencySU
void addUnit(SUnit *SU)
Functions for Block construction.
std::vector< SIScheduleBlock * > getBlocks()
static MachineBasicBlock::iterator nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::const_iterator End)
Non-const version.
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
std::unique_ptr< MachineSchedStrategy > SchedImpl
SmallVector< SDep, 4 > Preds
All sunit predecessors.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
bool isScheduled
True once scheduled.
std::vector< unsigned > LowLatencyOffset
static bool hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Regular data dependence (aka true-dependence).
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly...
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
static const char * getReasonStr(SIScheduleCandReason Reason)
std::set< unsigned > getOutRegs()
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
std::set< unsigned > getInRegs()
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
static def_instr_iterator def_instr_end()
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
unsigned NumPredsLeft
of preds not scheduled.
std::vector< int > TopDownIndex2Block
struct SIScheduleBlockResult scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, SISchedulerBlockSchedulerVariant ScheduleVariant)
SI Machine Scheduler interface.
void printDebug(bool Full)
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
reverse_iterator rbegin()
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
unsigned const MachineRegisterInfo * MRI
~SIScheduleDAGMI() override
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
bool isSUInBlock(SUnit *SU, unsigned ID)
std::vector< SUnit * > getScheduledUnits()
std::vector< int > TopDownIndex2SU
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)
MachineRegisterInfo * getMRI()
MachineBasicBlock * getBB()
Track the current register pressure at some position in the instruction stream, and remember the high...
unsigned getSGPRPressureSet() const
const TargetRegisterInfo * getTRI()
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
MachineBasicBlock::iterator getCurrentBottom()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
unsigned getVGPRPressureSet() const
static bool isEXP(const MachineInstr &MI)
unsigned WeakPredsLeft
of weak preds not scheduled.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
std::set< unsigned > & getOutRegs()
Color
A "color", which is either even or odd.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
bool isDebugValue() const
std::vector< unsigned > IsHighLatencySU
MachineOperand class - Representation of each machine instruction operand.
std::vector< unsigned > SUs
SISchedulerBlockCreatorVariant
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineBasicBlock::iterator getCurrentTop()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
bool isHighLatencyBlock()
def_instr_iterator def_instr_begin(unsigned RegNo) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
ScheduleDAGTopologicalSort * GetTopo()
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Interface definition for SIInstrInfo.
void addPred(SIScheduleBlock *Pred)
const TargetRegisterInfo * TRI
Target processor register info.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
std::vector< int > BottomUpIndex2SU
Iterate over the pressure sets affected by the given physical or virtual register.
PSetIterator getPressureSets(unsigned RegUnit) const
Get an iterator over the pressure sets affected by the given physical or virtual register.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
static bool isDefBetween(unsigned Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)
bool isHighLatencyInstruction(const MachineInstr &MI) const
bool isLowLatencyInstruction(const MachineInstr &MI) const
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
const TargetInstrInfo * TII
Target instruction information.
void restoreSULinksLeft()
unsigned NodeNum
Entry # of node in the node vector.
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
std::set< unsigned > & getInRegs()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::vector< int > TopDownBlock2Index
const std::vector< SIScheduleBlock * > & getPreds() const
void setRepeat(SIScheduleCandReason R)
SmallVector< SDep, 4 > Succs
All sunit successors.
~SIScheduleBlockCreator()
static const Function * getParent(const Value *V)
bool isWeak() const
Tests if this a weak dependence.
Machine Instruction Scheduler
void setPos(MachineBasicBlock::const_iterator Pos)
unsigned getWeight() const
MachineRegisterInfo & MRI
Virtual/real register map.
bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const final
std::vector< SUnit > SUnits
The scheduling units.
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
SlotIndex - An opaque wrapper around machine indexes.
unsigned getVGPRSetID() const
RegPressureTracker TopRPTracker
SISchedulerBlockSchedulerVariant
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block...
Scheduling unit. This is a node in the scheduling DAG.