66 #include "llvm/Config/llvm-config.h" 95 #define DEBUG_TYPE "pipeliner" 97 STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
98 STATISTIC(NumPipelined,
"Number of loops software pipelined");
99 STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
104 cl::desc(
"Enable Software Pipelining"));
113 cl::desc(
"Size limit for the MII."),
119 cl::desc(
"Maximum stages allowed in the generated scheduled."),
126 cl::desc(
"Prune dependences between unrelated Phi nodes."),
133 cl::desc(
"Prune loop carried order dependences."),
150 cl::desc(
"Enable CopyToPhi DAG Mutation"));
154 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
162 "Modulo Software Pipelining",
false,
false)
172 if (skipFunction(mf.getFunction()))
178 if (mf.getFunction().getAttributes().hasAttribute(
184 MLI = &getAnalysis<MachineLoopInfo>();
185 MDT = &getAnalysis<MachineDominatorTree>();
186 TII = MF->getSubtarget().getInstrInfo();
187 RegClassInfo.runOnMachineFunction(*MF);
199 bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
200 bool Changed =
false;
201 for (
auto &InnerLoop : L)
202 Changed |= scheduleLoop(*InnerLoop);
214 if (!canPipelineLoop(L))
219 Changed = swingModuloScheduler(L);
227 bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
254 SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
261 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
272 auto Copy =
BuildMI(PredB, At, DL,
TII->
get(TargetOpcode::COPY), NewReg)
286 bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
287 assert(L.
getBlocks().size() == 1 &&
"SMS works on single blocks only.");
309 return SMS.hasNewSchedule();
317 addLoopCarriedDependences(AA);
318 updatePhiDependences();
319 Topo.InitDAGTopologicalSorting();
325 findCircuits(NodeSets);
329 unsigned ResMII = calculateResMII();
330 unsigned RecMII = calculateRecMII(NodeSets);
340 <<
", res=" << ResMII <<
")\n");
350 computeNodeFunctions(NodeSets);
352 registerPressureFilter(NodeSets);
354 colocateNodeSets(NodeSets);
356 checkNodeSets(NodeSets);
359 for (
auto &
I : NodeSets) {
360 dbgs() <<
" Rec NodeSet ";
365 std::stable_sort(NodeSets.
begin(), NodeSets.
end(), std::greater<NodeSet>());
367 groupRemainingNodes(NodeSets);
369 removeDuplicateNodes(NodeSets);
372 for (
auto &
I : NodeSets) {
373 dbgs() <<
" NodeSet ";
378 computeNodeOrder(NodeSets);
381 checkValidNodeOrder(Circuits);
384 Scheduled = schedulePipeline(Schedule);
398 generatePipelinedLoop(Schedule);
415 unsigned &InitVal,
unsigned &LoopVal) {
426 assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
450 while (!Worklist.
empty()) {
455 if (Visited.
count(SuccSU))
487 for (
Value *V : Objs) {
500 void SwingSchedulerDAG::addLoopCarriedDependences(
AliasAnalysis *AA) {
502 Value *UnknownValue =
504 for (
auto &SU : SUnits) {
507 PendingLoads.
clear();
513 for (
auto V : Objs) {
522 for (
auto V : Objs) {
524 PendingLoads.
find(V);
525 if (I == PendingLoads.
end())
527 for (
auto Load : I->second) {
535 int64_t Offset1, Offset2;
539 (int)Offset1 < (
int)Offset2) {
541 "What happened to the chain edge?");
595 void SwingSchedulerDAG::updatePhiDependences() {
603 unsigned HasPhiUse = 0;
604 unsigned HasPhiDef = 0;
612 unsigned Reg = MOI->getReg();
616 UI =
MRI.use_instr_begin(Reg),
617 UE =
MRI.use_instr_end();
620 SUnit *SU = getSUnit(UseMI);
621 if (SU !=
nullptr && UseMI->
isPHI()) {
630 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
635 }
else if (MOI->isUse()) {
638 if (DefMI ==
nullptr)
640 SUnit *SU = getSUnit(DefMI);
641 if (SU !=
nullptr && DefMI->
isPHI()) {
651 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
660 for (
auto &PI :
I.Preds) {
663 if (
I.getInstr()->isPHI()) {
672 for (
int i = 0, e = RemoveDeps.
size(); i != e; ++i)
673 I.removePred(RemoveDeps[i]);
679 void SwingSchedulerDAG::changeDependences() {
684 unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
685 int64_t NewOffset = 0;
686 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
691 unsigned OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
695 SUnit *DefSU = getSUnit(DefMI);
702 SUnit *LastSU = getSUnit(LastMI);
706 if (Topo.IsReachable(&
I, LastSU))
713 if (
P->getSUnit() == DefSU)
715 for (
int i = 0, e = Deps.
size(); i != e; i++) {
716 Topo.RemovePred(&
I, Deps[i].getSUnit());
717 I.removePred(Deps[i]);
721 for (
auto &
P : LastSU->
Preds)
724 for (
int i = 0, e = Deps.
size(); i != e; i++) {
725 Topo.RemovePred(LastSU, Deps[i].getSUnit());
732 Topo.AddPred(LastSU, &
I);
737 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
745 struct FuncUnitSorter {
754 unsigned minFuncUnits(
const MachineInstr *Inst,
unsigned &
F)
const {
756 unsigned min = UINT_MAX;
760 unsigned funcUnits = IS->getUnits();
762 if (numAlternatives < min) {
763 min = numAlternatives;
780 unsigned FuncUnits = IS->getUnits();
782 Resources[FuncUnits]++;
788 unsigned F1 = 0, F2 = 0;
789 unsigned MFUs1 = minFuncUnits(IS1, F1);
790 unsigned MFUs2 = minFuncUnits(IS2, F2);
791 if (MFUs1 == 1 && MFUs2 == 1)
793 return MFUs1 > MFUs2;
805 unsigned SwingSchedulerDAG::calculateResMII() {
817 FUS.calcCriticalResources(*
I);
824 FuncUnitOrder.push(&*
I);
826 while (!FuncUnitOrder.empty()) {
833 unsigned NumCycles = getSUnit(MI)->Latency;
834 unsigned ReservedCycles = 0;
837 for (
unsigned C = 0;
C < NumCycles; ++
C)
839 if ((*RI++)->canReserveResources(*MI)) {
845 for (
unsigned C = 0;
C < ReservedCycles; ++
C) {
847 (*RI)->reserveResources(*MI);
850 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
858 int Resmii = Resources.
size();
874 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
877 for (
NodeSet &Nodes : NodeSets) {
881 unsigned Delay = Nodes.getLatency();
882 unsigned Distance = 1;
885 unsigned CurMII = (Delay + Distance - 1) / Distance;
886 Nodes.setRecMII(CurMII);
898 for (
unsigned i = 0, e = SUnits.size(); i != e; ++i) {
899 SUnit *SU = &SUnits[i];
904 DepsAdded.
push_back(std::make_pair(SU, *IP));
924 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
928 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
931 for (
auto &
SI : SUnits[i].Succs) {
935 int N =
SI.getSUnit()->NodeNum;
937 auto Dep = OutputDeps.find(BackEdge);
938 if (Dep != OutputDeps.end()) {
939 BackEdge = Dep->second;
940 OutputDeps.erase(Dep);
942 OutputDeps[
N] = BackEdge;
946 if (
SI.getSUnit()->isBoundaryNode() ||
SI.isArtificial() ||
947 (
SI.getKind() ==
SDep::Anti && !
SI.getSUnit()->getInstr()->isPHI()))
949 int N =
SI.getSUnit()->NodeNum;
950 if (!Added.test(N)) {
951 AdjK[i].push_back(N);
957 for (
auto &PI : SUnits[i].Preds) {
958 if (!SUnits[i].getInstr()->mayStore() ||
961 if (PI.getKind() ==
SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
962 int N = PI.getSUnit()->NodeNum;
963 if (!Added.test(N)) {
964 AdjK[i].push_back(N);
971 for (
auto &OD : OutputDeps)
972 if (!Added.test(OD.second)) {
973 AdjK[OD.first].push_back(OD.second);
974 Added.set(OD.second);
980 bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
982 SUnit *SV = &SUnits[V];
987 for (
auto W : AdjK[V]) {
988 if (NumPaths > MaxPaths)
994 NodeSets.push_back(
NodeSet(Stack.begin(), Stack.end()));
998 }
else if (!Blocked.test(
W)) {
999 if (circuit(
W, S, NodeSets,
1000 Node2Idx->at(
W) < Node2Idx->at(V) ?
true : HasBackedge))
1008 for (
auto W : AdjK[V]) {
1020 void SwingSchedulerDAG::Circuits::unblock(
int U) {
1023 while (!BU.
empty()) {
1025 assert(SI != BU.
end() &&
"Invalid B set.");
1028 if (Blocked.test(W->NodeNum))
1029 unblock(W->NodeNum);
1035 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1040 Circuits Cir(SUnits, Topo);
1042 Cir.createAdjacencyStructure(
this);
1043 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1045 Cir.circuit(i, i, NodeSets);
1081 for (
auto &Dep : SU.
Preds) {
1082 SUnit *TmpSU = Dep.getSUnit();
1094 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
1100 for (
auto I = PHISUs.
begin();
I != PHISUs.
end(); ++
I) {
1101 for (
auto &Dep : (*I)->Succs) {
1105 SUnit *TmpSU = Dep.getSUnit();
1115 if (UseSUs.
size() == 0)
1120 for (
auto I : UseSUs) {
1121 for (
auto Src : SrcSUs) {
1146 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1147 ScheduleInfo.resize(SUnits.size());
1153 const SUnit &SU = SUnits[*
I];
1164 int zeroLatencyDepth = 0;
1167 EP = SU->
Preds.end();
1170 if (IP->getLatency() == 0)
1172 std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1175 asap =
std::max(asap, (
int)(getASAP(pred) + IP->getLatency() -
1176 getDistance(pred, SU, *IP) * MII));
1179 ScheduleInfo[*
I].ASAP = asap;
1180 ScheduleInfo[*
I].ZeroLatencyDepth = zeroLatencyDepth;
1188 int zeroLatencyHeight = 0;
1191 ES = SU->
Succs.end();
1193 SUnit *succ = IS->getSUnit();
1194 if (IS->getLatency() == 0)
1196 std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1199 alap = std::min(alap, (
int)(getALAP(succ) - IS->getLatency() +
1200 getDistance(SU, succ, *IS) * MII));
1203 ScheduleInfo[*
I].ALAP = alap;
1204 ScheduleInfo[*
I].ZeroLatencyHeight = zeroLatencyHeight;
1209 I.computeNodeSetInfo(
this);
1212 for (
unsigned i = 0; i < SUnits.size(); i++) {
1213 dbgs() <<
"\tNode " << i <<
":\n";
1214 dbgs() <<
"\t ASAP = " << getASAP(&SUnits[i]) <<
"\n";
1215 dbgs() <<
"\t ALAP = " << getALAP(&SUnits[i]) <<
"\n";
1216 dbgs() <<
"\t MOV = " << getMOV(&SUnits[i]) <<
"\n";
1217 dbgs() <<
"\t D = " << getDepth(&SUnits[i]) <<
"\n";
1218 dbgs() <<
"\t H = " << getHeight(&SUnits[i]) <<
"\n";
1219 dbgs() <<
"\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) <<
"\n";
1220 dbgs() <<
"\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) <<
"\n";
1236 if (S && S->count(PI->getSUnit()) == 0)
1240 if (NodeOrder.
count(PI->getSUnit()) == 0)
1241 Preds.
insert(PI->getSUnit());
1245 ES = (*I)->Succs.end();
1249 if (S && S->count(IS->getSUnit()) == 0)
1251 if (NodeOrder.
count(IS->getSUnit()) == 0)
1252 Preds.
insert(IS->getSUnit());
1255 return !Preds.
empty();
1269 if (S && S->count(
SI->getSUnit()) == 0)
1273 if (NodeOrder.
count(
SI->getSUnit()) == 0)
1277 PE = (*I)->Preds.end();
1281 if (S && S->count(PI->getSUnit()) == 0)
1283 if (NodeOrder.
count(PI->getSUnit()) == 0)
1284 Succs.
insert(PI->getSUnit());
1287 return !Succs.
empty();
1298 if (Exclude.
count(Cur) != 0)
1300 if (DestNodes.
count(Cur) != 0)
1302 if (!Visited.
insert(Cur).second)
1303 return Path.
count(Cur) != 0;
1304 bool FoundPath =
false;
1306 FoundPath |=
computePath(
SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1307 for (
auto &PI : Cur->
Preds)
1310 computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1317 template <
class S1Ty,
class S2Ty>
static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1318 for (
typename S1Ty::iterator
I = Set1.begin(),
E = Set1.end();
I !=
E; ++
I)
1319 if (Set2.count(*
I) == 0)
1333 for (
SUnit *SU : NS) {
1338 if (MO.isReg() && MO.isUse()) {
1339 unsigned Reg = MO.getReg();
1347 for (
SUnit *SU : NS)
1349 if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1350 unsigned Reg = MO.getReg();
1352 if (!Uses.
count(Reg))
1357 if (!Uses.
count(*Units))
1367 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1368 for (
auto &NS : NodeSets) {
1378 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1383 for (
auto &SU : SUnits) {
1389 RecRPTracker.
setPos(std::next(CurInstI));
1398 dbgs() <<
"Excess register pressure: SU(" << SU->NodeNum <<
") " 1401 NS.setExceedPressure(SU);
1411 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1412 unsigned Colocate = 0;
1413 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
1418 for (
int j = i + 1; j < e; ++j) {
1439 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1444 for (
auto &NS : NodeSets) {
1445 if (NS.getRecMII() > 2)
1447 if (NS.getMaxDepth() > MII)
1457 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1467 for (
SUnit *NI : N) {
1476 if (
succ_L(NodesAdded, N)) {
1478 for (
SUnit *NI : N) {
1485 NodesAdded.
insert(
I.begin(),
I.end());
1492 if (
succ_L(NodesAdded, N))
1494 addConnectedNodes(
I, NewSet, NodesAdded);
1495 if (!NewSet.
empty())
1496 NodeSets.push_back(NewSet);
1501 if (
pred_L(NodesAdded, N))
1503 addConnectedNodes(
I, NewSet, NodesAdded);
1504 if (!NewSet.
empty())
1505 NodeSets.push_back(NewSet);
1509 for (
unsigned i = 0; i < SUnits.size(); ++i) {
1510 SUnit *SU = &SUnits[i];
1511 if (NodesAdded.
count(SU) == 0) {
1513 addConnectedNodes(SU, NewSet, NodesAdded);
1514 if (!NewSet.
empty())
1515 NodeSets.push_back(NewSet);
1521 void SwingSchedulerDAG::addConnectedNodes(
SUnit *SU,
NodeSet &NewSet,
1527 if (!
SI.isArtificial() && NodesAdded.
count(Successor) == 0)
1528 addConnectedNodes(Successor, NewSet, NodesAdded);
1530 for (
auto &PI : SU->
Preds) {
1531 SUnit *Predecessor = PI.getSUnit();
1532 if (!PI.isArtificial() && NodesAdded.
count(Predecessor) == 0)
1533 addConnectedNodes(Predecessor, NewSet, NodesAdded);
1542 for (
unsigned i = 0, e = Set1.
size(); i != e; ++i) {
1543 SUnit *SU = Set1[i];
1544 if (Set2.
count(SU) != 0)
1547 return !Result.
empty();
1551 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1552 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
1555 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
1573 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1574 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
1576 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
1577 J->remove_if([&](
SUnit *SUJ) {
return I->count(SUJ); });
1592 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1596 for (
auto &Nodes : NodeSets) {
1613 }
else if (NodeSets.size() == 1) {
1614 for (
auto &N : Nodes)
1615 if (N->Succs.
size() == 0)
1621 SUnit *maxASAP =
nullptr;
1622 for (
SUnit *SU : Nodes) {
1623 if (maxASAP ==
nullptr || getASAP(SU) > getASAP(maxASAP) ||
1624 (getASAP(SU) == getASAP(maxASAP) && SU->
NodeNum > maxASAP->
NodeNum))
1632 while (!R.
empty()) {
1633 if (Order == TopDown) {
1637 while (!R.
empty()) {
1638 SUnit *maxHeight =
nullptr;
1640 if (maxHeight ==
nullptr || getHeight(
I) > getHeight(maxHeight))
1642 else if (getHeight(
I) == getHeight(maxHeight) &&
1643 getZeroLatencyHeight(
I) > getZeroLatencyHeight(maxHeight))
1645 else if (getHeight(
I) == getHeight(maxHeight) &&
1646 getZeroLatencyHeight(
I) ==
1647 getZeroLatencyHeight(maxHeight) &&
1648 getMOV(
I) < getMOV(maxHeight))
1653 R.remove(maxHeight);
1654 for (
const auto &
I : maxHeight->
Succs) {
1655 if (Nodes.count(
I.getSUnit()) == 0)
1661 R.insert(
I.getSUnit());
1664 for (
const auto &
I : maxHeight->
Preds) {
1667 if (Nodes.count(
I.getSUnit()) == 0)
1671 R.insert(
I.getSUnit());
1683 while (!R.
empty()) {
1684 SUnit *maxDepth =
nullptr;
1686 if (maxDepth ==
nullptr || getDepth(
I) > getDepth(maxDepth))
1688 else if (getDepth(
I) == getDepth(maxDepth) &&
1689 getZeroLatencyDepth(
I) > getZeroLatencyDepth(maxDepth))
1691 else if (getDepth(
I) == getDepth(maxDepth) &&
1692 getZeroLatencyDepth(
I) == getZeroLatencyDepth(maxDepth) &&
1693 getMOV(
I) < getMOV(maxDepth))
1699 if (Nodes.isExceedSU(maxDepth)) {
1702 R.insert(Nodes.getNode(0));
1705 for (
const auto &
I : maxDepth->
Preds) {
1706 if (Nodes.count(
I.getSUnit()) == 0)
1710 R.insert(
I.getSUnit());
1713 for (
const auto &
I : maxDepth->
Succs) {
1716 if (Nodes.count(
I.getSUnit()) == 0)
1720 R.insert(
I.getSUnit());
1734 dbgs() <<
"Node order: ";
1736 dbgs() <<
" " <<
I->NodeNum <<
" ";
1743 bool SwingSchedulerDAG::schedulePipeline(
SMSchedule &Schedule) {
1747 bool scheduleFound =
false;
1749 for (
unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
1761 int EarlyStart = INT_MIN;
1762 int LateStart = INT_MAX;
1765 int SchedEnd = INT_MAX;
1766 int SchedStart = INT_MIN;
1767 Schedule.
computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
1775 dbgs() <<
"\tes: " << EarlyStart <<
" ls: " << LateStart
1776 <<
" me: " << SchedEnd <<
" ms: " << SchedStart <<
"\n";
1779 if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
1780 SchedStart > LateStart)
1781 scheduleFound =
false;
1782 else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
1783 SchedEnd = std::min(SchedEnd, EarlyStart + (
int)II - 1);
1784 scheduleFound = Schedule.
insert(SU, EarlyStart, SchedEnd, II);
1785 }
else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
1786 SchedStart =
std::max(SchedStart, LateStart - (
int)II + 1);
1787 scheduleFound = Schedule.
insert(SU, LateStart, SchedStart, II);
1788 }
else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
1790 std::min(SchedEnd, std::min(LateStart, EarlyStart + (
int)II - 1));
1795 scheduleFound = Schedule.
insert(SU, SchedEnd, EarlyStart, II);
1797 scheduleFound = Schedule.
insert(SU, EarlyStart, SchedEnd, II);
1800 scheduleFound = Schedule.
insert(SU, FirstCycle + getASAP(SU),
1801 FirstCycle + getASAP(SU) + II - 1, II);
1808 scheduleFound =
false;
1812 dbgs() <<
"\tCan't schedule\n";
1814 }
while (++NI != NE && scheduleFound);
1821 LLVM_DEBUG(
dbgs() <<
"Schedule Found? " << scheduleFound <<
"\n");
1836 void SwingSchedulerDAG::generatePipelinedLoop(
SMSchedule &Schedule) {
1846 ValueMapTy *VRMap =
new ValueMapTy[(MaxStageCount + 1) * 2];
1847 InstrMapTy InstrMap;
1851 generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
1852 MF.
insert(BB->getIterator(), KernelBB);
1858 Cycle <= LastCycle; ++Cycle) {
1861 for (
SUnit *CI : CycleInstrs) {
1862 if (CI->getInstr()->isPHI())
1864 unsigned StageNum = Schedule.
stageScheduled(getSUnit(CI->getInstr()));
1865 MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
1866 updateInstruction(NewMI,
false, MaxStageCount, StageNum, Schedule, VRMap);
1868 InstrMap[NewMI] = CI->getInstr();
1875 E = BB->instr_end();
1878 updateInstruction(NewMI,
false, MaxStageCount, 0, Schedule, VRMap);
1880 InstrMap[NewMI] = &*
I;
1886 generateExistingPhis(KernelBB, PrologBBs.
back(), KernelBB, KernelBB, Schedule,
1887 VRMap, InstrMap, MaxStageCount, MaxStageCount,
false);
1888 generatePhis(KernelBB, PrologBBs.
back(), KernelBB, KernelBB, Schedule, VRMap,
1889 InstrMap, MaxStageCount, MaxStageCount,
false);
1895 generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
1900 splitLifetimes(KernelBB, EpilogBBs, Schedule);
1903 removeDeadInstructions(KernelBB, EpilogBBs);
1906 addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
1910 LIS.RemoveMachineInstrFromMaps(
I);
1912 BB->eraseFromParent();
1918 void SwingSchedulerDAG::generateProlog(
SMSchedule &Schedule,
unsigned LastStage,
1921 MBBVectorTy &PrologBBs) {
1923 assert(PreheaderBB !=
nullptr &&
1924 "Need to add code to handle loops w/o preheader");
1926 InstrMapTy InstrMap;
1931 for (
unsigned i = 0; i < LastStage; ++i) {
1936 MF.
insert(BB->getIterator(), NewBB);
1943 for (
int StageNum = i; StageNum >= 0; --StageNum) {
1945 BBE = BB->getFirstTerminator();
1946 BBI != BBE; ++BBI) {
1951 cloneAndChangeInstr(&*BBI, i, (
unsigned)StageNum, Schedule);
1952 updateInstruction(NewMI,
false, i, (
unsigned)StageNum, Schedule,
1955 InstrMap[NewMI] = &*BBI;
1959 rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
1961 dbgs() <<
"prolog:\n";
1980 void SwingSchedulerDAG::generateEpilog(
SMSchedule &Schedule,
unsigned LastStage,
1983 MBBVectorTy &EpilogBBs,
1984 MBBVectorTy &PrologBBs) {
1990 assert(!checkBranch &&
"generateEpilog must be able to analyze the branch");
1995 if (*LoopExitI == KernelBB)
1997 assert(LoopExitI != KernelBB->
succ_end() &&
"Expecting a successor");
2002 InstrMapTy InstrMap;
2007 int EpilogStage = LastStage + 1;
2008 for (
unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2011 MF.
insert(BB->getIterator(), NewBB);
2016 if (EpilogStart == LoopExitBB)
2017 EpilogStart = NewBB;
2021 for (
unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2022 for (
auto &BBI : *BB) {
2030 updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2032 InstrMap[NewMI] =
In;
2036 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2037 VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2038 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2039 InstrMap, LastStage, EpilogStage, i == 1);
2043 dbgs() <<
"epilog:\n";
2052 for (
unsigned i = 2, e =
MI.getNumOperands() + 1; i != e; i += 2) {
2064 if (EpilogBBs.size() > 0) {
2096 if (
I->getParent()->getParent() != BB)
2104 void SwingSchedulerDAG::generateExistingPhis(
2107 InstrMapTy &InstrMap,
unsigned LastStageNum,
unsigned CurStageNum,
2112 unsigned PrologStage = 0;
2113 unsigned PrevStage = 0;
2114 bool InKernel = (LastStageNum == CurStageNum);
2116 PrologStage = LastStageNum - 1;
2117 PrevStage = CurStageNum;
2119 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2120 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2124 BBE = BB->getFirstNonPHI();
2125 BBI != BBE; ++BBI) {
2126 unsigned Def = BBI->getOperand(0).getReg();
2128 unsigned InitVal = 0;
2129 unsigned LoopVal = 0;
2132 unsigned PhiOp1 = 0;
2135 unsigned PhiOp2 = LoopVal;
2136 if (VRMap[LastStageNum].
count(LoopVal))
2137 PhiOp2 = VRMap[LastStageNum][LoopVal];
2143 if (NumStages == 0) {
2146 unsigned NewReg = VRMap[PrevStage][LoopVal];
2147 rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2148 Def, InitVal, NewReg);
2149 if (VRMap[CurStageNum].
count(LoopVal))
2150 VRMap[CurStageNum][
Def] = VRMap[CurStageNum][LoopVal];
2156 unsigned MaxPhis = PrologStage + 2;
2157 if (!InKernel && (
int)PrologStage <= LoopValStage)
2158 MaxPhis =
std::max((
int)MaxPhis - (
int)LoopValStage, 1);
2159 unsigned NumPhis = std::min(NumStages, MaxPhis);
2161 unsigned NewReg = 0;
2162 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2169 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2174 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2175 StageDiff = StageScheduled - LoopValStage;
2176 for (
unsigned np = 0; np < NumPhis; ++np) {
2180 if (np > PrologStage || StageScheduled >= (
int)LastStageNum)
2183 else if (PrologStage >= AccessStage + StageDiff + np &&
2184 VRMap[PrologStage - StageDiff - np].
count(LoopVal) != 0)
2185 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2188 else if (PrologStage >= AccessStage + StageDiff + np) {
2194 while (InstOp1 && InstOp1->
isPHI() && InstOp1->
getParent() == BB) {
2196 if ((
int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2200 InstOp1 =
MRI.getVRegDef(PhiOp1);
2202 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2203 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2204 VRMap[PrologStage - StageAdj - Indirects - np].
count(PhiOp1)) {
2205 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2215 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2219 bool LoopDefIsPhi = PhiInst && PhiInst->
isPHI();
2224 int StageDiffAdj = 0;
2225 if (LoopValStage != -1 && StageScheduled > LoopValStage)
2226 StageDiffAdj = StageScheduled - LoopValStage;
2229 if (np == 0 && PrevStage == LastStageNum &&
2230 (StageScheduled != 0 || LoopValStage != 0) &&
2231 VRMap[PrevStage - StageDiffAdj].
count(LoopVal))
2232 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2235 else if (np > 0 && PrevStage == LastStageNum &&
2236 VRMap[PrevStage - np + 1].
count(Def))
2237 PhiOp2 = VRMap[PrevStage - np + 1][
Def];
2239 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
2240 VRMap[PrevStage - StageDiffAdj - np].
count(LoopVal))
2241 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2244 else if (VRMap[PrevStage - np].
count(Def) &&
2245 (!LoopDefIsPhi || PrevStage != LastStageNum))
2246 PhiOp2 = VRMap[PrevStage - np][
Def];
2254 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
2256 int StageDiff = (StageScheduled - LoopValStage);
2257 LVNumStages -= StageDiff;
2259 if (LVNumStages > (
int)np && VRMap[CurStageNum].count(LoopVal)) {
2261 unsigned ReuseStage = CurStageNum;
2263 ReuseStage -= LVNumStages;
2266 if (VRMap[ReuseStage - np].
count(LoopVal)) {
2267 NewReg = VRMap[ReuseStage - np][LoopVal];
2269 rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2270 &*BBI, Def, NewReg);
2272 VRMap[CurStageNum - np][
Def] = NewReg;
2274 if (VRMap[LastStageNum - np - 1].
count(LoopVal))
2275 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2277 if (IsLast && np == NumPhis - 1)
2283 if (InKernel && StageDiff > 0 &&
2284 VRMap[CurStageNum - StageDiff - np].
count(LoopVal))
2285 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2289 NewReg =
MRI.createVirtualRegister(RC);
2293 TII->
get(TargetOpcode::PHI), NewReg);
2297 InstrMap[NewPhi] = &*BBI;
2302 unsigned PrevReg = 0;
2303 if (InKernel && VRMap[PrevStage - np].
count(LoopVal))
2304 PrevReg = VRMap[PrevStage - np][LoopVal];
2305 rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2306 Def, NewReg, PrevReg);
2308 if (VRMap[CurStageNum - np].
count(Def)) {
2309 unsigned R = VRMap[CurStageNum - np][
Def];
2310 rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2317 if (IsLast && np == NumPhis - 1)
2325 VRMap[CurStageNum - np][
Def] = NewReg;
2328 while (NumPhis++ < NumStages) {
2329 rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2330 &*BBI, Def, NewReg, 0);
2335 if (NumStages == 0 && IsLast && VRMap[CurStageNum].
count(LoopVal))
2343 void SwingSchedulerDAG::generatePhis(
2346 InstrMapTy &InstrMap,
unsigned LastStageNum,
unsigned CurStageNum,
2350 unsigned PrologStage = 0;
2351 unsigned PrevStage = 0;
2352 unsigned StageDiff = CurStageNum - LastStageNum;
2353 bool InKernel = (StageDiff == 0);
2355 PrologStage = LastStageNum - 1;
2356 PrevStage = CurStageNum;
2358 PrologStage = LastStageNum - StageDiff;
2359 PrevStage = LastStageNum + StageDiff - 1;
2363 BBE = BB->instr_end();
2364 BBI != BBE; ++BBI) {
2365 for (
unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2372 assert(StageScheduled != -1 &&
"Expecting scheduled instruction.");
2378 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2381 if (!InKernel && (
unsigned)StageScheduled > PrologStage)
2384 unsigned PhiOp2 = VRMap[PrevStage][
Def];
2386 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2390 if (NumPhis > PrologStage + 1 - StageScheduled)
2391 NumPhis = PrologStage + 1 - StageScheduled;
2392 for (
unsigned np = 0; np < NumPhis; ++np) {
2393 unsigned PhiOp1 = VRMap[PrologStage][
Def];
2394 if (np <= PrologStage)
2395 PhiOp1 = VRMap[PrologStage - np][
Def];
2397 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2399 if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2403 PhiOp2 = VRMap[PrevStage - np][
Def];
2406 unsigned NewReg =
MRI.createVirtualRegister(RC);
2410 TII->
get(TargetOpcode::PHI), NewReg);
2414 InstrMap[NewPhi] = &*BBI;
2419 rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2420 &*BBI, PhiOp1, NewReg);
2421 rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2422 &*BBI, PhiOp2, NewReg);
2425 VRMap[PrevStage - np - 1][
Def] = NewReg;
2427 VRMap[CurStageNum - np][
Def] = NewReg;
2428 if (np == NumPhis - 1)
2429 rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2430 &*BBI, Def, NewReg);
2432 if (IsLast && np == NumPhis - 1)
2444 MBBVectorTy &EpilogBBs) {
2447 for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
2448 MBE = EpilogBBs.rend();
2451 ME = (*MBB)->instr_rend();
2454 if (
MI->isInlineAsm()) {
2458 bool SawStore =
false;
2461 if (!
MI->isSafeToMove(
nullptr, SawStore) && !
MI->isPHI()) {
2467 MOE =
MI->operands_end();
2468 MOI != MOE; ++MOI) {
2469 if (!MOI->isReg() || !MOI->isDef())
2471 unsigned reg = MOI->getReg();
2474 used = !MOI->isDead();
2479 unsigned realUses = 0;
2485 if (UI->getParent()->getParent() != BB) {
2496 LIS.RemoveMachineInstrFromMaps(*
MI);
2497 MI++->eraseFromParent();
2510 if (
MRI.use_begin(reg) ==
MRI.use_end()) {
2511 LIS.RemoveMachineInstrFromMaps(*MI);
2528 MBBVectorTy &EpilogBBs,
2531 for (
auto &PHI : KernelBB->
phis()) {
2532 unsigned Def = PHI.getOperand(0).getReg();
2536 E =
MRI.use_instr_end();
2538 if (
I->isPHI() &&
I->getParent() == KernelBB) {
2548 unsigned SplitReg = 0;
2551 if (BBJ.readsRegister(Def)) {
2553 if (SplitReg == 0) {
2554 SplitReg =
MRI.createVirtualRegister(
MRI.getRegClass(Def));
2556 TII->
get(TargetOpcode::COPY), SplitReg)
2559 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
2564 for (
auto &Epilog : EpilogBBs)
2565 for (
auto &
I : *Epilog)
2566 if (
I.readsRegister(Def))
2567 I.substituteRegister(Def, SplitReg, 0, *TRI);
2579 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2)
2580 if (
MI.getOperand(i + 1).getMBB() == Incoming) {
2581 MI.RemoveOperand(i + 1);
2582 MI.RemoveOperand(i);
2591 void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
2593 MBBVectorTy &EpilogBBs,
2595 assert(PrologBBs.size() == EpilogBBs.size() &&
"Prolog/Epilog mismatch");
2604 unsigned MaxIter = PrologBBs.
size() - 1;
2605 unsigned LC = UINT_MAX;
2606 unsigned LCMin = UINT_MAX;
2607 for (
unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
2623 if (LCMin == UINT_MAX)
2626 unsigned numAdded = 0;
2630 }
else if (j >= LCMin) {
2637 if (LastPro != LastEpi) {
2651 I !=
E && numAdded > 0; ++
I, --numAdded)
2652 updateInstruction(&*
I,
false, j, 0, Schedule, VRMap);
2658 bool SwingSchedulerDAG::computeDelta(
MachineInstr &
MI,
unsigned &Delta) {
2665 if (!BaseOp->
isReg())
2668 unsigned BaseReg = BaseOp->
getReg();
2673 if (BaseDef && BaseDef->
isPHI()) {
2691 void SwingSchedulerDAG::updateMemOperands(
MachineInstr &NewMI,
2701 if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
2702 (!MMO->getValue())) {
2708 int64_t AdjOffset = Delta * Num;
2722 unsigned CurStageNum,
2723 unsigned InstStageNum) {
2730 if (MO.isReg() && MO.isUse())
2736 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2744 unsigned CurStageNum,
2745 unsigned InstStageNum,
2749 InstrChanges.
find(getSUnit(OldMI));
2750 if (It != InstrChanges.
end()) {
2751 std::pair<unsigned, int64_t> RegAndOffset = It->second;
2752 unsigned BasePos, OffsetPos;
2756 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
2757 if (Schedule.
stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
2758 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
2761 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2767 void SwingSchedulerDAG::updateInstruction(
MachineInstr *NewMI,
bool LastDef,
2768 unsigned CurStageNum,
2769 unsigned InstrStageNum,
2771 ValueMapTy *VRMap) {
2776 unsigned reg = MO.
getReg();
2780 unsigned NewReg =
MRI.createVirtualRegister(RC);
2782 VRMap[CurStageNum][reg] = NewReg;
2785 }
else if (MO.
isUse()) {
2789 unsigned StageNum = CurStageNum;
2790 if (DefStageNum != -1 && (
int)InstrStageNum > DefStageNum) {
2792 unsigned StageDiff = (InstrStageNum - DefStageNum);
2794 StageNum -= StageDiff;
2796 if (VRMap[StageNum].
count(reg))
2797 MO.
setReg(VRMap[StageNum][reg]);
2808 while (Def->
isPHI()) {
2809 if (!Visited.
insert(Def).second)
2821 unsigned SwingSchedulerDAG::getPrevMapVal(
unsigned StageNum,
unsigned PhiStage,
2822 unsigned LoopVal,
unsigned LoopStage,
2825 unsigned PrevVal = 0;
2826 if (StageNum > PhiStage) {
2828 if (PhiStage == LoopStage && VRMap[StageNum - 1].
count(LoopVal))
2830 PrevVal = VRMap[StageNum - 1][LoopVal];
2831 else if (VRMap[StageNum].
count(LoopVal))
2834 PrevVal = VRMap[StageNum][LoopVal];
2838 else if (StageNum == PhiStage + 1)
2841 else if (StageNum > PhiStage + 1 && LoopInst->
getParent() == BB)
2844 getPrevMapVal(StageNum - 1, PhiStage,
getLoopPhiReg(*LoopInst, BB),
2845 LoopStage, VRMap, BB);
2858 InstrMapTy &InstrMap) {
2859 for (
auto &PHI : BB->
phis()) {
2860 unsigned InitVal = 0;
2861 unsigned LoopVal = 0;
2863 unsigned PhiDef = PHI.getOperand(0).getReg();
2867 unsigned LoopStage =
2870 if (NumPhis > StageNum)
2872 for (
unsigned np = 0; np <= NumPhis; ++np) {
2874 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
2877 rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
2886 void SwingSchedulerDAG::rewriteScheduledInstr(
2888 unsigned CurStageNum,
unsigned PhiNum,
MachineInstr *Phi,
unsigned OldReg,
2889 unsigned NewReg,
unsigned PrevReg) {
2902 if (UseMI->
isPHI()) {
2908 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
2909 assert(OrigInstr != InstrMap.end() &&
"Instruction not scheduled.");
2910 SUnit *OrigMISU = getSUnit(OrigInstr->second);
2913 unsigned ReplaceReg = 0;
2915 if (StagePhi == StageSched && Phi->
isPHI()) {
2917 if (PrevReg && InProlog)
2918 ReplaceReg = PrevReg;
2920 (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
2921 ReplaceReg = PrevReg;
2923 ReplaceReg = NewReg;
2927 if (!InProlog && StagePhi + 1 == StageSched &&
2929 ReplaceReg = NewReg;
2930 if (StagePhi > StageSched && Phi->
isPHI())
2931 ReplaceReg = NewReg;
2932 if (!InProlog && !Phi->
isPHI() && StagePhi < StageSched)
2933 ReplaceReg = NewReg;
2935 MRI.constrainRegClass(ReplaceReg,
MRI.getRegClass(OldReg));
2936 UseOp.
setReg(ReplaceReg);
2948 bool SwingSchedulerDAG::canUseLastOffsetValue(
MachineInstr *MI,
2950 unsigned &OffsetPos,
2956 unsigned BasePosLd, OffsetPosLd;
2964 if (!Phi || !Phi->
isPHI())
2973 if (!PrevDef || PrevDef == MI)
2979 unsigned BasePos1 = 0, OffsetPos1 = 0;
2995 BasePos = BasePosLd;
2996 OffsetPos = OffsetPosLd;
2998 Offset = StoreOffset;
3006 SUnit *SU = getSUnit(MI);
3008 InstrChanges.
find(SU);
3009 if (It != InstrChanges.
end()) {
3010 std::pair<unsigned, int64_t> RegAndOffset = It->second;
3011 unsigned BasePos, OffsetPos;
3020 if (BaseStageNum < DefStageNum) {
3022 int OffsetDiff = DefStageNum - BaseStageNum;
3023 if (DefCycleNum < BaseCycleNum) {
3032 MISUnitMap[NewMI] = SU;
3033 NewMIs.insert(NewMI);
3057 assert(SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
3068 unsigned DeltaS, DeltaD;
3073 int64_t OffsetS, OffsetD;
3085 if (!Def || !Def->
isPHI())
3087 unsigned InitVal = 0;
3088 unsigned LoopVal = 0;
3100 if (OffsetS >= OffsetD)
3101 return OffsetS + AccessSizeS > DeltaS;
3103 return OffsetD + AccessSizeD > DeltaD;
3108 void SwingSchedulerDAG::postprocessDAG() {
3109 for (
auto &M : Mutations)
3119 bool forward =
true;
3120 if (StartCycle > EndCycle)
3124 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3125 for (
int curCycle = StartCycle; curCycle != termCycle;
3126 forward ? ++curCycle : --curCycle) {
3129 Resources->clearResources();
3130 for (
int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3131 checkCycle <= LastCycle; checkCycle += II) {
3132 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3134 for (std::deque<SUnit *>::iterator
I = cycleInstrs.begin(),
3135 E = cycleInstrs.end();
3137 if (
ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3139 assert(Resources->canReserveResources(*(*I)->getInstr()) &&
3140 "These instructions have already been scheduled.");
3141 Resources->reserveResources(*(*I)->getInstr());
3145 Resources->canReserveResources(*SU->
getInstr())) {
3147 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
3151 ScheduledInstrs[curCycle].push_back(SU);
3152 InstrToCycle.insert(std::make_pair(SU, curCycle));
3153 if (curCycle > LastCycle)
3154 LastCycle = curCycle;
3155 if (curCycle < FirstCycle)
3156 FirstCycle = curCycle;
3160 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
3172 int EarlyCycle = INT_MAX;
3173 while (!Worklist.
empty()) {
3176 if (Visited.
count(PrevSU))
3178 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3179 if (it == InstrToCycle.end())
3181 EarlyCycle = std::min(EarlyCycle, it->second);
3182 for (
const auto &PI : PrevSU->
Preds)
3195 int LateCycle = INT_MIN;
3196 while (!Worklist.
empty()) {
3199 if (Visited.
count(SuccSU))
3201 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3202 if (it == InstrToCycle.end())
3204 LateCycle =
std::max(LateCycle, it->second);
3205 for (
const auto &
SI : SuccSU->
Succs)
3217 for (
auto &
P : SU->
Preds)
3218 if (DAG->
isBackedge(SU,
P) &&
P.getSUnit()->getInstr()->isPHI())
3219 for (
auto &S :
P.getSUnit()->Succs)
3220 if (S.getKind() ==
SDep::Data && S.getSUnit()->getInstr()->isPHI())
3221 return P.getSUnit();
3228 int *MinEnd,
int *MaxStart,
int II,
3233 for (
int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3236 for (
SUnit *
I : getInstructions(cycle)) {
3239 for (
unsigned i = 0, e = (
unsigned)SU->
Preds.size(); i != e; ++i) {
3245 *MaxEarlyStart =
std::max(*MaxEarlyStart, EarlyStart);
3247 int End = earliestCycleInChain(Dep) + (II - 1);
3248 *MinEnd = std::min(*MinEnd, End);
3253 *MinLateStart = std::min(*MinLateStart, LateStart);
3261 *MinLateStart = std::min(*MinLateStart, cycle);
3263 for (
unsigned i = 0, e = (
unsigned)SU->
Succs.size(); i != e; ++i) {
3264 if (SU->
Succs[i].getSUnit() ==
I) {
3269 *MinLateStart = std::min(*MinLateStart, LateStart);
3271 int Start = latestCycleInChain(Dep) + 1 - II;
3272 *MaxStart =
std::max(*MaxStart, Start);
3277 *MaxEarlyStart =
std::max(*MaxEarlyStart, EarlyStart);
3289 std::deque<SUnit *> &Insts) {
3291 bool OrderBeforeUse =
false;
3292 bool OrderAfterDef =
false;
3293 bool OrderBeforeDef =
false;
3294 unsigned MoveDef = 0;
3295 unsigned MoveUse = 0;
3296 int StageInst1 = stageScheduled(SU);
3299 for (std::deque<SUnit *>::iterator
I = Insts.begin(),
E = Insts.end();
I !=
E;
3306 unsigned Reg = MO.
getReg();
3307 unsigned BasePos, OffsetPos;
3308 if (
ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3313 std::tie(Reads, Writes) =
3314 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3315 if (MO.
isDef() && Reads && stageScheduled(*
I) <= StageInst1) {
3316 OrderBeforeUse =
true;
3319 }
else if (MO.
isDef() && Reads && stageScheduled(*
I) > StageInst1) {
3321 OrderAfterDef =
true;
3323 }
else if (MO.
isUse() && Writes && stageScheduled(*
I) == StageInst1) {
3324 if (cycleScheduled(*
I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3325 OrderBeforeUse =
true;
3329 OrderAfterDef =
true;
3332 }
else if (MO.
isUse() && Writes && stageScheduled(*
I) > StageInst1) {
3333 OrderBeforeUse =
true;
3337 OrderAfterDef =
true;
3340 }
else if (MO.
isUse() && Writes && stageScheduled(*
I) < StageInst1) {
3342 OrderBeforeUse =
true;
3345 }
else if (MO.
isUse() && stageScheduled(*
I) == StageInst1 &&
3346 isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3348 OrderBeforeDef =
true;
3355 for (
auto &S : SU->
Succs) {
3356 if (S.getSUnit() != *
I)
3358 if (S.getKind() ==
SDep::Order && stageScheduled(*
I) == StageInst1) {
3359 OrderBeforeUse =
true;
3364 for (
auto &
P : SU->
Preds) {
3365 if (
P.getSUnit() != *
I)
3367 if (
P.getKind() ==
SDep::Order && stageScheduled(*
I) == StageInst1) {
3368 OrderAfterDef =
true;
3375 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3376 OrderBeforeUse =
false;
3381 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3385 if (OrderBeforeUse && OrderAfterDef) {
3386 SUnit *UseSU = Insts.at(MoveUse);
3387 SUnit *DefSU = Insts.at(MoveDef);
3388 if (MoveUse > MoveDef) {
3389 Insts.erase(Insts.begin() + MoveUse);
3390 Insts.erase(Insts.begin() + MoveDef);
3392 Insts.erase(Insts.begin() + MoveDef);
3393 Insts.erase(Insts.begin() + MoveUse);
3395 orderDependence(SSD, UseSU, Insts);
3396 orderDependence(SSD, SU, Insts);
3397 orderDependence(SSD, DefSU, Insts);
3403 Insts.push_front(SU);
3405 Insts.push_back(SU);
3414 unsigned DefCycle = cycleScheduled(DefSU);
3415 int DefStage = stageScheduled(DefSU);
3417 unsigned InitVal = 0;
3418 unsigned LoopVal = 0;
3423 if (UseSU->getInstr()->isPHI())
3425 unsigned LoopCycle = cycleScheduled(UseSU);
3426 int LoopStage = stageScheduled(UseSU);
3427 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3446 if (!isLoopCarried(SSD, *Phi))
3453 if (DMO.
getReg() == LoopReg)
3464 for (
int i = 0, e = SSD->
SUnits.size(); i < e; ++i) {
3468 int StageDef = stageScheduled(&SU);
3469 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3471 if (
SI.isAssignedRegDep())
3472 if (
ST.getRegisterInfo()->isPhysicalRegister(
SI.getReg()))
3473 if (stageScheduled(
SI.getSUnit()) != StageDef)
3489 void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3492 typedef std::pair<SUnit *, unsigned> UnitIndex;
3493 std::vector<UnitIndex> Indices(
NodeOrder.size(), std::make_pair(
nullptr, 0));
3495 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i)
3496 Indices.push_back(std::make_pair(
NodeOrder[i], i));
3498 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3499 return std::get<0>(i1) < std::get<0>(i2);
3512 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i) {
3516 bool PredBefore =
false;
3517 bool SuccBefore =
false;
3526 unsigned PredIndex =
3528 std::make_pair(PredSU, 0), CompareKey));
3538 unsigned SuccIndex =
3540 std::make_pair(SuccSU, 0), CompareKey));
3552 Circuits.begin(), Circuits.end(),
3553 [SU](
const NodeSet &Circuit) {
return Circuit.count(SU); });
3558 NumNodeOrderIssues++;
3562 <<
" are scheduled before node " << SU->
NodeNum 3569 dbgs() <<
"Invalid node order found!\n";
3580 unsigned OverlapReg = 0;
3581 unsigned NewBaseReg = 0;
3582 for (
SUnit *SU : Instrs) {
3592 InstrChanges.
find(SU);
3593 if (It != InstrChanges.
end()) {
3594 unsigned BasePos, OffsetPos;
3603 MISUnitMap[NewMI] = SU;
3604 NewMIs.insert(NewMI);
3613 unsigned TiedUseIdx = 0;
3630 for (
int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3631 for (
int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3633 std::deque<SUnit *> &cycleInstrs =
3634 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3635 for (std::deque<SUnit *>::reverse_iterator
I = cycleInstrs.rbegin(),
3636 E = cycleInstrs.rend();
3638 ScheduledInstrs[cycle].push_front(*
I);
3643 for (
auto &
I : InstrToCycle) {
3644 int DefStage = stageScheduled(
I.first);
3651 unsigned Reg = Op.
getReg();
3652 unsigned MaxDiff = 0;
3653 bool PhiIsSwapped =
false;
3660 int UseStage = stageScheduled(SUnitUse);
3662 if (UseStage != -1 && UseStage >= DefStage)
3663 Diff = UseStage - DefStage;
3665 if (isLoopCarried(SSD, *MI))
3668 PhiIsSwapped =
true;
3672 RegToStageDiff[
Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
3678 for (
int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3679 ScheduledInstrs.erase(cycle);
3683 for (
int i = 0, e = SSD->
SUnits.size(); i != e; ++i) {
3690 for (
int Cycle = getFirstCycle(),
E = getFinalCycle(); Cycle <=
E; ++Cycle) {
3691 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3692 std::deque<SUnit *> newOrderPhi;
3693 for (
unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3694 SUnit *SU = cycleInstrs[i];
3696 newOrderPhi.push_back(SU);
3698 std::deque<SUnit *> newOrderI;
3699 for (
unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3700 SUnit *SU = cycleInstrs[i];
3702 orderDependence(SSD, SU, newOrderI);
3705 cycleInstrs.swap(newOrderPhi);
3706 cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
3714 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3715 <<
" depth " <<
MaxDepth <<
" col " << Colocate <<
"\n";
3716 for (
const auto &
I : Nodes)
3717 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3721 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3725 for (
int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3728 for (
SUnit *CI : cycleInstrs->second) {
3729 os <<
"cycle " << cycle <<
" (" << stageScheduled(CI) <<
") ";
3730 os <<
"(" << CI->
NodeNum <<
") ";
Pass interface - Implemented by all 'passes'.
std::vector< int >::const_reverse_iterator const_reverse_iterator
A parsed version of the target data layout string in and methods for querying it. ...
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
RegisterClassInfo RegClassInfo
A common definition of LaneBitmask for use in TableGen and CodeGen.
MachineInstr * LoopInductionVar
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
instr_iterator instr_begin()
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
size_type size() const
Determine the number of elements in the SetVector.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static constexpr LocationSize unknown()
cl::opt< bool > SwpEnableCopyToPhi
void push_back(const T &Elt)
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
bool isValidSchedule(SwingSchedulerDAG *SSD)
static void getUnderlyingObjects(MachineInstr *MI, SmallVectorImpl< Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
const MachineLoopInfo * MLI
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
unsigned getSubReg() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
The two locations do not alias at all.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
bool isRegSequence() const
This class implements a map that also provides access to all stored values in a deterministic order...
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
block Block Frequency true
unsigned getReg() const
Returns the register associated with this edge.
Kind
These are the different kinds of scheduling dependencies.
iterator_range< mop_iterator > operands()
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr *> &PrevInsts, unsigned Iter, unsigned MaxIter) const
Generate code to reduce the loop iteration by one and check if the loop is finished.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
iterator end()
Get an iterator to the end of the SetVector.
SmallVectorImpl< SDep >::iterator succ_iterator
SUnit * getNode(unsigned i) const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
virtual bool getMemOperandWithOffset(MachineInstr &MI, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
A register anti-dependence (aka WAR).
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
int compareRecMII(NodeSet &RHS)
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
#define INITIALIZE_PASS_DEPENDENCY(depName)
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
SmallVectorImpl< SDep >::iterator pred_iterator
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Modulo Software Pipelining
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
void apply(Opt *O, const Mod &M, const Mods &... Ms)
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
static constexpr LaneBitmask getNone()
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
BlockT * getHeader() const
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
A register output-dependence (aka WAW).
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
bool insert(const value_type &X)
Insert a new element into the SetVector.
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Itinerary data supplied by a subtarget to be used by a target.
auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
unsigned count(SUnit *SU) const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator find(const KeyT &Key)
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
virtual bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
AliasResult
The possible results of an alias query.
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
void finishBlock() override
Clean up after the software pipeliner runs.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
initializer< Ty > init(const Ty &Val)
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
int getFirstCycle() const
Return the first cycle in the completed schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
The main class in the implementation of the target independent software pipeliner pass...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
MachineInstrBuilder & UseMI
unsigned getStagesForPhi(int Reg)
The number of stages for a Phi is a little different than other instructions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
static Type * getVoidTy(LLVMContext &C)
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
void dump() const
Utility function used for debugging to print the schedule.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
const TargetInstrInfo * TII
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
void print(raw_ostream &os) const
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
void fixupRegisterOverlaps(std::deque< SUnit *> &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
LLVM_DUMP_METHOD void dump() const
succ_iterator succ_begin()
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
const InstrItineraryData * InstrItins
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SetVector< SUnit * >::const_iterator iterator
typename vector_type::const_iterator iterator
A SetVector that performs no allocations if smaller than a certain size.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
unsigned countPopulation(T Value)
Count the number of set bits in a value.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
MachineInstrBuilder MachineInstrBuilder & DefMI
bool canReserveResources(const MCInstrDesc *MID)
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
void setRecMII(unsigned mii)
LLVM_NODISCARD T pop_back_val()
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MachineInstr * LoopCompare
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
std::vector< int >::const_iterator const_iterator
void clear()
Completely clear the SetVector.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
This class represents the scheduled code.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
unsigned getStagesForReg(int Reg, unsigned CurStage)
Return the max.
LiveInterval & createEmptyInterval(unsigned Reg)
Interval creation.
void setLatency(unsigned Lat)
Sets the latency for this edge.
const MachineBasicBlock * getParent() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
void setColocate(unsigned c)
std::set< NodeId > NodeSet
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit *> &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void push_back(MachineInstr *MI)
bool empty() const
Determine if the SetVector is empty or not.
void setSubReg(unsigned subReg)
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form...
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
unsigned NodeNum
Entry # of node in the node vector.
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Store the effects of a change in pressure on things that MI scheduler cares about.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand *> MemRefs)
Assign this MachineInstr's memory reference descriptor list.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
mop_iterator operands_begin()
A vector that has set insertion semantics.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
SmallVector< SDep, 4 > Succs
All sunit successors.
Arbitrary strong DAG edge (no real dependence).
This class implements an extremely fast bulk output stream that can only output to a stream...
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
SmallVector< MachineOperand, 4 > BrCond
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
void setPos(MachineBasicBlock::const_iterator Pos)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
std::vector< SUnit > SUnits
The scheduling units.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
const MachineOperand & getOperand(unsigned i) const
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
std::vector< MachineBasicBlock * >::iterator succ_iterator
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Scheduling unit. This is a node in the scheduling DAG.
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
void reserveResources(const MCInstrDesc *MID)
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.