LLVM  8.0.1
AMDGPUMachineCFGStructurizer.cpp
Go to the documentation of this file.
1 //===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the machine instruction level CFG structurizer pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUSubtarget.h"
16 #include "SIInstrInfo.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/DenseSet.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/Config/llvm-config.h"
35 #include "llvm/IR/DebugLoc.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/Debug.h"
41 #include <cassert>
42 #include <tuple>
43 #include <utility>
44 
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "amdgpucfgstructurizer"
48 
49 namespace {
50 
51 class PHILinearizeDestIterator;
52 
53 class PHILinearize {
54  friend class PHILinearizeDestIterator;
55 
56 public:
57  using PHISourceT = std::pair<unsigned, MachineBasicBlock *>;
58 
59 private:
60  using PHISourcesT = DenseSet<PHISourceT>;
61  using PHIInfoElementT = struct {
62  unsigned DestReg;
63  DebugLoc DL;
64  PHISourcesT Sources;
65  };
66  using PHIInfoT = SmallPtrSet<PHIInfoElementT *, 2>;
67  PHIInfoT PHIInfo;
68 
69  static unsigned phiInfoElementGetDest(PHIInfoElementT *Info);
70  static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef);
71  static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info);
72  static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg,
73  MachineBasicBlock *SourceMBB);
74  static void phiInfoElementRemoveSource(PHIInfoElementT *Info,
75  unsigned SourceReg,
76  MachineBasicBlock *SourceMBB);
77  PHIInfoElementT *findPHIInfoElement(unsigned DestReg);
78  PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg,
79  MachineBasicBlock *SourceMBB);
80 
81 public:
82  bool findSourcesFromMBB(MachineBasicBlock *SourceMBB,
83  SmallVector<unsigned, 4> &Sources);
84  void addDest(unsigned DestReg, const DebugLoc &DL);
85  void replaceDef(unsigned OldDestReg, unsigned NewDestReg);
86  void deleteDef(unsigned DestReg);
87  void addSource(unsigned DestReg, unsigned SourceReg,
88  MachineBasicBlock *SourceMBB);
89  void removeSource(unsigned DestReg, unsigned SourceReg,
90  MachineBasicBlock *SourceMBB = nullptr);
91  bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
92  unsigned &DestReg);
93  bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr);
94  unsigned getNumSources(unsigned DestReg);
96  void clear();
97 
98  using source_iterator = PHISourcesT::iterator;
99  using dest_iterator = PHILinearizeDestIterator;
100 
101  dest_iterator dests_begin();
102  dest_iterator dests_end();
103 
104  source_iterator sources_begin(unsigned Reg);
105  source_iterator sources_end(unsigned Reg);
106 };
107 
108 class PHILinearizeDestIterator {
109 private:
111 
112 public:
113  PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {}
114 
115  unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); }
116  PHILinearizeDestIterator &operator++() {
117  ++Iter;
118  return *this;
119  }
120  bool operator==(const PHILinearizeDestIterator &I) const {
121  return I.Iter == Iter;
122  }
123  bool operator!=(const PHILinearizeDestIterator &I) const {
124  return I.Iter != Iter;
125  }
126 };
127 
128 } // end anonymous namespace
129 
130 unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) {
131  return Info->DestReg;
132 }
133 
134 void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info,
135  unsigned NewDef) {
136  Info->DestReg = NewDef;
137 }
138 
140 PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) {
141  return Info->Sources;
142 }
143 
144 void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info,
145  unsigned SourceReg,
146  MachineBasicBlock *SourceMBB) {
147  // Assertion ensures we don't use the same SourceMBB for the
148  // sources, because we cannot have different registers with
149  // identical predecessors, but we can have the same register for
150  // multiple predecessors.
151 #if !defined(NDEBUG)
152  for (auto SI : phiInfoElementGetSources(Info)) {
153  assert((SI.second != SourceMBB || SourceReg == SI.first));
154  }
155 #endif
156 
157  phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB));
158 }
159 
160 void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info,
161  unsigned SourceReg,
162  MachineBasicBlock *SourceMBB) {
163  auto &Sources = phiInfoElementGetSources(Info);
164  SmallVector<PHISourceT, 4> ElimiatedSources;
165  for (auto SI : Sources) {
166  if (SI.first == SourceReg &&
167  (SI.second == nullptr || SI.second == SourceMBB)) {
168  ElimiatedSources.push_back(PHISourceT(SI.first, SI.second));
169  }
170  }
171 
172  for (auto &Source : ElimiatedSources) {
173  Sources.erase(Source);
174  }
175 }
176 
177 PHILinearize::PHIInfoElementT *
178 PHILinearize::findPHIInfoElement(unsigned DestReg) {
179  for (auto I : PHIInfo) {
180  if (phiInfoElementGetDest(I) == DestReg) {
181  return I;
182  }
183  }
184  return nullptr;
185 }
186 
187 PHILinearize::PHIInfoElementT *
188 PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg,
189  MachineBasicBlock *SourceMBB) {
190  for (auto I : PHIInfo) {
191  for (auto SI : phiInfoElementGetSources(I)) {
192  if (SI.first == SourceReg &&
193  (SI.second == nullptr || SI.second == SourceMBB)) {
194  return I;
195  }
196  }
197  }
198  return nullptr;
199 }
200 
201 bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
202  SmallVector<unsigned, 4> &Sources) {
203  bool FoundSource = false;
204  for (auto I : PHIInfo) {
205  for (auto SI : phiInfoElementGetSources(I)) {
206  if (SI.second == SourceMBB) {
207  FoundSource = true;
208  Sources.push_back(SI.first);
209  }
210  }
211  }
212  return FoundSource;
213 }
214 
215 void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
216  assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists");
217  PHISourcesT EmptySet;
218  PHIInfoElementT *NewElement = new PHIInfoElementT();
219  NewElement->DestReg = DestReg;
220  NewElement->DL = DL;
221  NewElement->Sources = EmptySet;
222  PHIInfo.insert(NewElement);
223 }
224 
225 void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) {
226  phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg);
227 }
228 
229 void PHILinearize::deleteDef(unsigned DestReg) {
230  PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg);
231  PHIInfo.erase(InfoElement);
232  delete InfoElement;
233 }
234 
235 void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg,
236  MachineBasicBlock *SourceMBB) {
237  phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
238 }
239 
240 void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg,
241  MachineBasicBlock *SourceMBB) {
242  phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
243 }
244 
245 bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
246  unsigned &DestReg) {
247  PHIInfoElementT *InfoElement =
248  findPHIInfoElementFromSource(SourceReg, SourceMBB);
249  if (InfoElement != nullptr) {
250  DestReg = phiInfoElementGetDest(InfoElement);
251  return true;
252  }
253  return false;
254 }
255 
256 bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) {
257  unsigned DestReg;
258  return findDest(Reg, SourceMBB, DestReg);
259 }
260 
261 unsigned PHILinearize::getNumSources(unsigned DestReg) {
262  return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size();
263 }
264 
265 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
268  dbgs() << "=PHIInfo Start=\n";
269  for (auto PII : this->PHIInfo) {
270  PHIInfoElementT &Element = *PII;
271  dbgs() << "Dest: " << printReg(Element.DestReg, TRI)
272  << " Sources: {";
273  for (auto &SI : Element.Sources) {
274  dbgs() << printReg(SI.first, TRI) << '(' << printMBBReference(*SI.second)
275  << "),";
276  }
277  dbgs() << "}\n";
278  }
279  dbgs() << "=PHIInfo End=\n";
280 }
281 #endif
282 
283 void PHILinearize::clear() { PHIInfo = PHIInfoT(); }
284 
285 PHILinearize::dest_iterator PHILinearize::dests_begin() {
286  return PHILinearizeDestIterator(PHIInfo.begin());
287 }
288 
289 PHILinearize::dest_iterator PHILinearize::dests_end() {
290  return PHILinearizeDestIterator(PHIInfo.end());
291 }
292 
293 PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) {
294  auto InfoElement = findPHIInfoElement(Reg);
295  return phiInfoElementGetSources(InfoElement).begin();
296 }
297 
298 PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) {
299  auto InfoElement = findPHIInfoElement(Reg);
300  return phiInfoElementGetSources(InfoElement).end();
301 }
302 
303 static unsigned getPHINumInputs(MachineInstr &PHI) {
304  assert(PHI.isPHI());
305  return (PHI.getNumOperands() - 1) / 2;
306 }
307 
309  assert(PHI.isPHI());
310  return PHI.getOperand(Index * 2 + 2).getMBB();
311 }
312 
313 static void setPhiPred(MachineInstr &PHI, unsigned Index,
314  MachineBasicBlock *NewPred) {
315  PHI.getOperand(Index * 2 + 2).setMBB(NewPred);
316 }
317 
318 static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) {
319  assert(PHI.isPHI());
320  return PHI.getOperand(Index * 2 + 1).getReg();
321 }
322 
323 static unsigned getPHIDestReg(MachineInstr &PHI) {
324  assert(PHI.isPHI());
325  return PHI.getOperand(0).getReg();
326 }
327 
328 namespace {
329 
330 class RegionMRT;
331 class MBBMRT;
332 
333 class LinearizedRegion {
334 protected:
335  MachineBasicBlock *Entry;
336  // The exit block is part of the region, and is the last
337  // merge block before exiting the region.
338  MachineBasicBlock *Exit;
339  DenseSet<unsigned> LiveOuts;
341  bool HasLoop;
342  LinearizedRegion *Parent;
343  RegionMRT *RMRT;
344 
345  void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
346  MachineInstr *DefInstr, const MachineRegisterInfo *MRI,
347  const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
348 
349  void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
350  MachineInstr *DefInstr,
351  const MachineRegisterInfo *MRI,
352  const TargetRegisterInfo *TRI,
353  PHILinearize &PHIInfo);
354 
355  void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
356  const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
357  RegionMRT *TopRegion);
358 
359  void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
360  const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
361 
362  void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI,
363  const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
364  RegionMRT *TopRegion = nullptr);
365 
366 public:
367  LinearizedRegion();
368  LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
369  const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
370  ~LinearizedRegion() = default;
371 
372  void setRegionMRT(RegionMRT *Region) { RMRT = Region; }
373 
374  RegionMRT *getRegionMRT() { return RMRT; }
375 
376  void setParent(LinearizedRegion *P) { Parent = P; }
377 
378  LinearizedRegion *getParent() { return Parent; }
379 
380  void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr);
381 
382  void setBBSelectRegIn(unsigned Reg);
383 
384  unsigned getBBSelectRegIn();
385 
386  void setBBSelectRegOut(unsigned Reg, bool IsLiveOut);
387 
388  unsigned getBBSelectRegOut();
389 
390  void setHasLoop(bool Value);
391 
392  bool getHasLoop();
393 
394  void addLiveOut(unsigned VReg);
395 
396  void removeLiveOut(unsigned Reg);
397 
398  void replaceLiveOut(unsigned OldReg, unsigned NewReg);
399 
400  void replaceRegister(unsigned Register, unsigned NewRegister,
401  MachineRegisterInfo *MRI, bool ReplaceInside,
402  bool ReplaceOutside, bool IncludeLoopPHIs);
403 
404  void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister,
405  bool IncludeLoopPHIs,
406  MachineRegisterInfo *MRI);
407 
408  void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister,
409  bool IncludeLoopPHIs,
410  MachineRegisterInfo *MRI);
411 
412  DenseSet<unsigned> *getLiveOuts();
413 
414  void setEntry(MachineBasicBlock *NewEntry);
415 
416  MachineBasicBlock *getEntry();
417 
418  void setExit(MachineBasicBlock *NewExit);
419 
420  MachineBasicBlock *getExit();
421 
422  void addMBB(MachineBasicBlock *MBB);
423 
424  void addMBBs(LinearizedRegion *InnerRegion);
425 
426  bool contains(MachineBasicBlock *MBB);
427 
428  bool isLiveOut(unsigned Reg);
429 
430  bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI);
431 
432  void removeFalseRegisterKills(MachineRegisterInfo *MRI);
433 
434  void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI,
435  const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
436 };
437 
438 class MRT {
439 protected:
440  RegionMRT *Parent;
441  unsigned BBSelectRegIn;
442  unsigned BBSelectRegOut;
443 
444 public:
445  virtual ~MRT() = default;
446 
447  unsigned getBBSelectRegIn() { return BBSelectRegIn; }
448 
449  unsigned getBBSelectRegOut() { return BBSelectRegOut; }
450 
451  void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; }
452 
453  void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; }
454 
455  virtual RegionMRT *getRegionMRT() { return nullptr; }
456 
457  virtual MBBMRT *getMBBMRT() { return nullptr; }
458 
459  bool isRegion() { return getRegionMRT() != nullptr; }
460 
461  bool isMBB() { return getMBBMRT() != nullptr; }
462 
463  bool isRoot() { return Parent == nullptr; }
464 
465  void setParent(RegionMRT *Region) { Parent = Region; }
466 
467  RegionMRT *getParent() { return Parent; }
468 
469  static MachineBasicBlock *
470  initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
472 
473  static RegionMRT *buildMRT(MachineFunction &MF,
474  const MachineRegionInfo *RegionInfo,
475  const SIInstrInfo *TII,
476  MachineRegisterInfo *MRI);
477 
478  virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0;
479 
480  void dumpDepth(int depth) {
481  for (int i = depth; i > 0; --i) {
482  dbgs() << " ";
483  }
484  }
485 };
486 
487 class MBBMRT : public MRT {
488  MachineBasicBlock *MBB;
489 
490 public:
491  MBBMRT(MachineBasicBlock *BB) : MBB(BB) {
492  setParent(nullptr);
493  setBBSelectRegOut(0);
494  setBBSelectRegIn(0);
495  }
496 
497  MBBMRT *getMBBMRT() override { return this; }
498 
499  MachineBasicBlock *getMBB() { return MBB; }
500 
501  void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
502  dumpDepth(depth);
503  dbgs() << "MBB: " << getMBB()->getNumber();
504  dbgs() << " In: " << printReg(getBBSelectRegIn(), TRI);
505  dbgs() << ", Out: " << printReg(getBBSelectRegOut(), TRI) << "\n";
506  }
507 };
508 
509 class RegionMRT : public MRT {
510 protected:
512  LinearizedRegion *LRegion = nullptr;
513  MachineBasicBlock *Succ = nullptr;
514  SetVector<MRT *> Children;
515 
516 public:
517  RegionMRT(MachineRegion *MachineRegion) : Region(MachineRegion) {
518  setParent(nullptr);
519  setBBSelectRegOut(0);
520  setBBSelectRegIn(0);
521  }
522 
523  ~RegionMRT() override {
524  if (LRegion) {
525  delete LRegion;
526  }
527 
528  for (auto CI : Children) {
529  delete &(*CI);
530  }
531  }
532 
533  RegionMRT *getRegionMRT() override { return this; }
534 
535  void setLinearizedRegion(LinearizedRegion *LinearizeRegion) {
536  LRegion = LinearizeRegion;
537  }
538 
539  LinearizedRegion *getLinearizedRegion() { return LRegion; }
540 
541  MachineRegion *getMachineRegion() { return Region; }
542 
543  unsigned getInnerOutputRegister() {
544  return (*(Children.begin()))->getBBSelectRegOut();
545  }
546 
547  void addChild(MRT *Tree) { Children.insert(Tree); }
548 
549  SetVector<MRT *> *getChildren() { return &Children; }
550 
551  void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
552  dumpDepth(depth);
553  dbgs() << "Region: " << (void *)Region;
554  dbgs() << " In: " << printReg(getBBSelectRegIn(), TRI);
555  dbgs() << ", Out: " << printReg(getBBSelectRegOut(), TRI) << "\n";
556 
557  dumpDepth(depth);
558  if (getSucc())
559  dbgs() << "Succ: " << getSucc()->getNumber() << "\n";
560  else
561  dbgs() << "Succ: none \n";
562  for (auto MRTI : Children) {
563  MRTI->dump(TRI, depth + 1);
564  }
565  }
566 
567  MRT *getEntryTree() { return Children.back(); }
568 
569  MRT *getExitTree() { return Children.front(); }
570 
571  MachineBasicBlock *getEntry() {
572  MRT *Tree = Children.back();
573  return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry()
574  : Tree->getMBBMRT()->getMBB();
575  }
576 
577  MachineBasicBlock *getExit() {
578  MRT *Tree = Children.front();
579  return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit()
580  : Tree->getMBBMRT()->getMBB();
581  }
582 
583  void setSucc(MachineBasicBlock *MBB) { Succ = MBB; }
584 
585  MachineBasicBlock *getSucc() { return Succ; }
586 
587  bool contains(MachineBasicBlock *MBB) {
588  for (auto CI : Children) {
589  if (CI->isMBB()) {
590  if (MBB == CI->getMBBMRT()->getMBB()) {
591  return true;
592  }
593  } else {
594  if (CI->getRegionMRT()->contains(MBB)) {
595  return true;
596  } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr &&
597  CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) {
598  return true;
599  }
600  }
601  }
602  return false;
603  }
604 
605  void replaceLiveOutReg(unsigned Register, unsigned NewRegister) {
606  LinearizedRegion *LRegion = getLinearizedRegion();
607  LRegion->replaceLiveOut(Register, NewRegister);
608  for (auto &CI : Children) {
609  if (CI->isRegion()) {
610  CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
611  }
612  }
613  }
614 };
615 
616 } // end anonymous namespace
617 
618 static unsigned createBBSelectReg(const SIInstrInfo *TII,
619  MachineRegisterInfo *MRI) {
621 }
622 
624 MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
626  for (auto &MFI : MF) {
627  MachineBasicBlock *ExitMBB = &MFI;
628  if (ExitMBB->succ_size() == 0) {
629  return ExitMBB;
630  }
631  }
632  llvm_unreachable("CFG has no exit block");
633  return nullptr;
634 }
635 
636 RegionMRT *MRT::buildMRT(MachineFunction &MF,
637  const MachineRegionInfo *RegionInfo,
638  const SIInstrInfo *TII, MachineRegisterInfo *MRI) {
639  SmallPtrSet<MachineRegion *, 4> PlacedRegions;
641  MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion();
642  RegionMRT *Result = new RegionMRT(TopLevelRegion);
643  RegionMap[TopLevelRegion] = Result;
644 
645  // Insert the exit block first, we need it to be the merge node
646  // for the top level region.
647  MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap);
648 
649  unsigned BBSelectRegIn = createBBSelectReg(TII, MRI);
650  MBBMRT *ExitMRT = new MBBMRT(Exit);
651  RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT);
652  ExitMRT->setBBSelectRegIn(BBSelectRegIn);
653 
654  for (auto MBBI : post_order(&(MF.front()))) {
655  MachineBasicBlock *MBB = &(*MBBI);
656 
657  // Skip Exit since we already added it
658  if (MBB == Exit) {
659  continue;
660  }
661 
662  LLVM_DEBUG(dbgs() << "Visiting " << printMBBReference(*MBB) << "\n");
663  MBBMRT *NewMBB = new MBBMRT(MBB);
664  MachineRegion *Region = RegionInfo->getRegionFor(MBB);
665 
666  // Ensure we have the MRT region
667  if (RegionMap.count(Region) == 0) {
668  RegionMRT *NewMRTRegion = new RegionMRT(Region);
669  RegionMap[Region] = NewMRTRegion;
670 
671  // Ensure all parents are in the RegionMap
672  MachineRegion *Parent = Region->getParent();
673  while (RegionMap.count(Parent) == 0) {
674  RegionMRT *NewMRTParent = new RegionMRT(Parent);
675  NewMRTParent->addChild(NewMRTRegion);
676  NewMRTRegion->setParent(NewMRTParent);
677  RegionMap[Parent] = NewMRTParent;
678  NewMRTRegion = NewMRTParent;
679  Parent = Parent->getParent();
680  }
681  RegionMap[Parent]->addChild(NewMRTRegion);
682  NewMRTRegion->setParent(RegionMap[Parent]);
683  }
684 
685  // Add MBB to Region MRT
686  RegionMap[Region]->addChild(NewMBB);
687  NewMBB->setParent(RegionMap[Region]);
688  RegionMap[Region]->setSucc(Region->getExit());
689  }
690  return Result;
691 }
692 
693 void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
694  MachineInstr *DefInstr,
695  const MachineRegisterInfo *MRI,
696  const TargetRegisterInfo *TRI,
697  PHILinearize &PHIInfo) {
698  if (TRI->isVirtualRegister(Reg)) {
699  LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
700  << "\n");
701  // If this is a source register to a PHI we are chaining, it
702  // must be live out.
703  if (PHIInfo.isSource(Reg)) {
704  LLVM_DEBUG(dbgs() << "Add LiveOut (PHI): " << printReg(Reg, TRI) << "\n");
705  addLiveOut(Reg);
706  } else {
707  // If this is live out of the MBB
708  for (auto &UI : MRI->use_operands(Reg)) {
709  if (UI.getParent()->getParent() != MBB) {
710  LLVM_DEBUG(dbgs() << "Add LiveOut (MBB " << printMBBReference(*MBB)
711  << "): " << printReg(Reg, TRI) << "\n");
712  addLiveOut(Reg);
713  } else {
714  // If the use is in the same MBB we have to make sure
715  // it is after the def, otherwise it is live out in a loop
716  MachineInstr *UseInstr = UI.getParent();
718  MII = UseInstr->getIterator(),
719  MIE = UseInstr->getParent()->instr_end();
720  MII != MIE; ++MII) {
721  if ((&(*MII)) == DefInstr) {
722  LLVM_DEBUG(dbgs() << "Add LiveOut (Loop): " << printReg(Reg, TRI)
723  << "\n");
724  addLiveOut(Reg);
725  }
726  }
727  }
728  }
729  }
730  }
731 }
732 
733 void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
734  MachineInstr *DefInstr,
735  const MachineRegisterInfo *MRI,
736  const TargetRegisterInfo *TRI,
737  PHILinearize &PHIInfo) {
738  if (TRI->isVirtualRegister(Reg)) {
739  LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
740  << "\n");
741  for (auto &UI : MRI->use_operands(Reg)) {
742  if (!Region->contains(UI.getParent()->getParent())) {
743  LLVM_DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region
744  << "): " << printReg(Reg, TRI) << "\n");
745  addLiveOut(Reg);
746  }
747  }
748  }
749 }
750 
751 void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
752  const MachineRegisterInfo *MRI,
753  const TargetRegisterInfo *TRI,
754  PHILinearize &PHIInfo) {
755  LLVM_DEBUG(dbgs() << "-Store Live Outs Begin (" << printMBBReference(*MBB)
756  << ")-\n");
757  for (auto &II : *MBB) {
758  for (auto &RI : II.defs()) {
759  storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo);
760  }
761  for (auto &IRI : II.implicit_operands()) {
762  if (IRI.isDef()) {
763  storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo);
764  }
765  }
766  }
767 
768  // If we have a successor with a PHI, source coming from this MBB we have to
769  // add the register as live out
770  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
771  E = MBB->succ_end();
772  SI != E; ++SI) {
773  for (auto &II : *(*SI)) {
774  if (II.isPHI()) {
775  MachineInstr &PHI = II;
776  int numPreds = getPHINumInputs(PHI);
777  for (int i = 0; i < numPreds; ++i) {
778  if (getPHIPred(PHI, i) == MBB) {
779  unsigned PHIReg = getPHISourceReg(PHI, i);
780  LLVM_DEBUG(dbgs()
781  << "Add LiveOut (PhiSource " << printMBBReference(*MBB)
782  << " -> " << printMBBReference(*(*SI))
783  << "): " << printReg(PHIReg, TRI) << "\n");
784  addLiveOut(PHIReg);
785  }
786  }
787  }
788  }
789  }
790 
791  LLVM_DEBUG(dbgs() << "-Store Live Outs Endn-\n");
792 }
793 
794 void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB,
795  const MachineRegisterInfo *MRI,
796  const TargetRegisterInfo *TRI,
797  PHILinearize &PHIInfo,
798  RegionMRT *TopRegion) {
799  for (auto &II : *MBB) {
800  for (auto &RI : II.defs()) {
801  storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI,
802  PHIInfo);
803  }
804  for (auto &IRI : II.implicit_operands()) {
805  if (IRI.isDef()) {
806  storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI,
807  TRI, PHIInfo);
808  }
809  }
810  }
811 }
812 
813 void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
814  const MachineRegisterInfo *MRI,
815  const TargetRegisterInfo *TRI,
816  PHILinearize &PHIInfo,
817  RegionMRT *CurrentTopRegion) {
818  MachineBasicBlock *Exit = Region->getSucc();
819 
820  RegionMRT *TopRegion =
821  CurrentTopRegion == nullptr ? Region : CurrentTopRegion;
822 
823  // Check if exit is end of function, if so, no live outs.
824  if (Exit == nullptr)
825  return;
826 
827  auto Children = Region->getChildren();
828  for (auto CI : *Children) {
829  if (CI->isMBB()) {
830  auto MBB = CI->getMBBMRT()->getMBB();
831  storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion);
832  } else {
833  LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
834  // We should be limited to only store registers that are live out from the
835  // lineaized region
836  for (auto MBBI : SubRegion->MBBs) {
837  storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
838  }
839  }
840  }
841 
842  if (CurrentTopRegion == nullptr) {
843  auto Succ = Region->getSucc();
844  for (auto &II : *Succ) {
845  if (II.isPHI()) {
846  MachineInstr &PHI = II;
847  int numPreds = getPHINumInputs(PHI);
848  for (int i = 0; i < numPreds; ++i) {
849  if (Region->contains(getPHIPred(PHI, i))) {
850  unsigned PHIReg = getPHISourceReg(PHI, i);
851  LLVM_DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region
852  << "): " << printReg(PHIReg, TRI) << "\n");
853  addLiveOut(PHIReg);
854  }
855  }
856  }
857  }
858  }
859 }
860 
861 #ifndef NDEBUG
863  OS << "Linearized Region {";
864  bool IsFirst = true;
865  for (const auto &MBB : MBBs) {
866  if (IsFirst) {
867  IsFirst = false;
868  } else {
869  OS << " ,";
870  }
871  OS << MBB->getNumber();
872  }
873  OS << "} (" << Entry->getNumber() << ", "
874  << (Exit == nullptr ? -1 : Exit->getNumber())
875  << "): In:" << printReg(getBBSelectRegIn(), TRI)
876  << " Out:" << printReg(getBBSelectRegOut(), TRI) << " {";
877  for (auto &LI : LiveOuts) {
878  OS << printReg(LI, TRI) << " ";
879  }
880  OS << "} \n";
881 }
882 #endif
883 
884 unsigned LinearizedRegion::getBBSelectRegIn() {
885  return getRegionMRT()->getBBSelectRegIn();
886 }
887 
888 unsigned LinearizedRegion::getBBSelectRegOut() {
889  return getRegionMRT()->getBBSelectRegOut();
890 }
891 
892 void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; }
893 
894 bool LinearizedRegion::getHasLoop() { return HasLoop; }
895 
896 void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); }
897 
898 void LinearizedRegion::removeLiveOut(unsigned Reg) {
899  if (isLiveOut(Reg))
900  LiveOuts.erase(Reg);
901 }
902 
903 void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) {
904  if (isLiveOut(OldReg)) {
905  removeLiveOut(OldReg);
906  addLiveOut(NewReg);
907  }
908 }
909 
910 void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister,
911  MachineRegisterInfo *MRI,
912  bool ReplaceInside, bool ReplaceOutside,
913  bool IncludeLoopPHI) {
914  assert(Register != NewRegister && "Cannot replace a reg with itself");
915 
916  LLVM_DEBUG(
917  dbgs() << "Pepareing to replace register (region): "
918  << printReg(Register, MRI->getTargetRegisterInfo()) << " with "
919  << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
920 
921  // If we are replacing outside, we also need to update the LiveOuts
922  if (ReplaceOutside &&
923  (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) {
924  LinearizedRegion *Current = this;
925  while (Current != nullptr && Current->getEntry() != nullptr) {
926  LLVM_DEBUG(dbgs() << "Region before register replace\n");
927  LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
928  Current->replaceLiveOut(Register, NewRegister);
929  LLVM_DEBUG(dbgs() << "Region after register replace\n");
930  LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
931  Current = Current->getParent();
932  }
933  }
934 
935  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
936  E = MRI->reg_end();
937  I != E;) {
938  MachineOperand &O = *I;
939  ++I;
940 
941  // We don't rewrite defs.
942  if (O.isDef())
943  continue;
944 
945  bool IsInside = contains(O.getParent()->getParent());
946  bool IsLoopPHI = IsInside && (O.getParent()->isPHI() &&
947  O.getParent()->getParent() == getEntry());
948  bool ShouldReplace = (IsInside && ReplaceInside) ||
949  (!IsInside && ReplaceOutside) ||
950  (IncludeLoopPHI && IsLoopPHI);
951  if (ShouldReplace) {
952 
953  if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
954  LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
955  << printReg(NewRegister, MRI->getTargetRegisterInfo())
956  << "\n");
957  llvm_unreachable("Cannot substitute physical registers");
958  } else {
959  LLVM_DEBUG(dbgs() << "Replacing register (region): "
960  << printReg(Register, MRI->getTargetRegisterInfo())
961  << " with "
962  << printReg(NewRegister, MRI->getTargetRegisterInfo())
963  << "\n");
964  O.setReg(NewRegister);
965  }
966  }
967  }
968 }
969 
970 void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register,
971  unsigned NewRegister,
972  bool IncludeLoopPHIs,
973  MachineRegisterInfo *MRI) {
974  replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs);
975 }
976 
977 void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register,
978  unsigned NewRegister,
979  bool IncludeLoopPHIs,
980  MachineRegisterInfo *MRI) {
981  replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs);
982 }
983 
984 DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; }
985 
986 void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) {
987  Entry = NewEntry;
988 }
989 
990 MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; }
991 
992 void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; }
993 
994 MachineBasicBlock *LinearizedRegion::getExit() { return Exit; }
995 
996 void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); }
997 
998 void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) {
999  for (const auto &MBB : InnerRegion->MBBs) {
1000  addMBB(MBB);
1001  }
1002 }
1003 
1005  return MBBs.count(MBB) == 1;
1006 }
1007 
1008 bool LinearizedRegion::isLiveOut(unsigned Reg) {
1009  return LiveOuts.count(Reg) == 1;
1010 }
1011 
1012 bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) {
1013  return MRI->def_begin(Reg) == MRI->def_end();
1014 }
1015 
1016 // After the code has been structurized, what was flagged as kills
1017 // before are no longer register kills.
1018 void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
1019  const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
1020  for (auto MBBI : MBBs) {
1021  MachineBasicBlock *MBB = MBBI;
1022  for (auto &II : *MBB) {
1023  for (auto &RI : II.uses()) {
1024  if (RI.isReg()) {
1025  unsigned Reg = RI.getReg();
1026  if (TRI->isVirtualRegister(Reg)) {
1027  if (hasNoDef(Reg, MRI))
1028  continue;
1029  if (!MRI->hasOneDef(Reg)) {
1030  LLVM_DEBUG(this->getEntry()->getParent()->dump());
1031  LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << "\n");
1032  }
1033 
1034  if (MRI->def_begin(Reg) == MRI->def_end()) {
1035  LLVM_DEBUG(dbgs() << "Register "
1036  << printReg(Reg, MRI->getTargetRegisterInfo())
1037  << " has NO defs\n");
1038  } else if (!MRI->hasOneDef(Reg)) {
1039  LLVM_DEBUG(dbgs() << "Register "
1040  << printReg(Reg, MRI->getTargetRegisterInfo())
1041  << " has multiple defs\n");
1042  }
1043 
1044  assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1045  MachineOperand *Def = &(*(MRI->def_begin(Reg)));
1046  MachineOperand *UseOperand = &(RI);
1047  bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB;
1048  if (UseIsOutsideDefMBB && UseOperand->isKill()) {
1049  LLVM_DEBUG(dbgs() << "Removing kill flag on register: "
1050  << printReg(Reg, TRI) << "\n");
1051  UseOperand->setIsKill(false);
1052  }
1053  }
1054  }
1055  }
1056  }
1057  }
1058 }
1059 
1060 void LinearizedRegion::initLiveOut(RegionMRT *Region,
1061  const MachineRegisterInfo *MRI,
1062  const TargetRegisterInfo *TRI,
1063  PHILinearize &PHIInfo) {
1064  storeLiveOuts(Region, MRI, TRI, PHIInfo);
1065 }
1066 
1067 LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB,
1068  const MachineRegisterInfo *MRI,
1069  const TargetRegisterInfo *TRI,
1070  PHILinearize &PHIInfo) {
1071  setEntry(MBB);
1072  setExit(MBB);
1073  storeLiveOuts(MBB, MRI, TRI, PHIInfo);
1074  MBBs.insert(MBB);
1075  Parent = nullptr;
1076 }
1077 
1078 LinearizedRegion::LinearizedRegion() {
1079  setEntry(nullptr);
1080  setExit(nullptr);
1081  Parent = nullptr;
1082 }
1083 
1084 namespace {
1085 
1086 class AMDGPUMachineCFGStructurizer : public MachineFunctionPass {
1087 private:
1088  const MachineRegionInfo *Regions;
1089  const SIInstrInfo *TII;
1090  const TargetRegisterInfo *TRI;
1092  unsigned BBSelectRegister;
1093  PHILinearize PHIInfo;
1095  RegionMRT *RMRT;
1096 
1097  void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI,
1098  SmallVector<unsigned, 2> &RegionIndices);
1099  void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1100  SmallVector<unsigned, 2> &RegionIndices);
1101  void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1102  SmallVector<unsigned, 2> &PHINonRegionIndices);
1103 
1104  void storePHILinearizationInfoDest(
1105  unsigned LDestReg, MachineInstr &PHI,
1106  SmallVector<unsigned, 2> *RegionIndices = nullptr);
1107 
1108  unsigned storePHILinearizationInfo(MachineInstr &PHI,
1109  SmallVector<unsigned, 2> *RegionIndices);
1110 
1111  void extractKilledPHIs(MachineBasicBlock *MBB);
1112 
1113  bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices,
1114  unsigned *ReplaceReg);
1115 
1116  bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1117  MachineBasicBlock *SourceMBB,
1118  SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg);
1119 
1120  void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1121  MachineBasicBlock *LastMerge,
1122  SmallVector<unsigned, 2> &PHIRegionIndices);
1123  void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1124  MachineBasicBlock *IfMBB,
1125  SmallVector<unsigned, 2> &PHIRegionIndices);
1126  void replaceLiveOutRegs(MachineInstr &PHI,
1127  SmallVector<unsigned, 2> &PHIRegionIndices,
1128  unsigned CombinedSourceReg,
1129  LinearizedRegion *LRegion);
1130  void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge,
1131  MachineInstr &PHI, LinearizedRegion *LRegion);
1132 
1133  void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge,
1134  LinearizedRegion *LRegion);
1135  void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB,
1136  MachineInstr &PHI);
1137  void rewriteRegionEntryPHIs(LinearizedRegion *Region,
1138  MachineBasicBlock *IfMBB);
1139 
1140  bool regionIsSimpleIf(RegionMRT *Region);
1141 
1142  void transformSimpleIfRegion(RegionMRT *Region);
1143 
1144  void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II);
1145 
1146  void insertUnconditionalBranch(MachineBasicBlock *MBB,
1147  MachineBasicBlock *Dest,
1148  const DebugLoc &DL = DebugLoc());
1149 
1150  MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region);
1151 
1152  void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1153  MachineBasicBlock *MergeBB, unsigned DestRegister,
1154  unsigned IfSourceRegister, unsigned CodeSourceRegister,
1155  bool IsUndefIfSource = false);
1156 
1157  MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB,
1158  MachineBasicBlock *CodeBBStart,
1159  MachineBasicBlock *CodeBBEnd,
1160  MachineBasicBlock *SelectBB, unsigned IfReg,
1161  bool InheritPreds);
1162 
1163  void prunePHIInfo(MachineBasicBlock *MBB);
1164  void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg);
1165 
1166  void createEntryPHIs(LinearizedRegion *CurrentRegion);
1167  void resolvePHIInfos(MachineBasicBlock *FunctionEntry);
1168 
1169  void replaceRegisterWith(unsigned Register, unsigned NewRegister);
1170 
1171  MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB,
1172  MachineBasicBlock *CodeBB,
1173  LinearizedRegion *LRegion,
1174  unsigned BBSelectRegIn,
1175  unsigned BBSelectRegOut);
1176 
1178  createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion,
1179  LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
1180  unsigned BBSelectRegIn, unsigned BBSelectRegOut);
1181  void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond);
1182 
1183  void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1184  MachineBasicBlock *MergeBB,
1185  unsigned BBSelectReg);
1186 
1187  MachineInstr *getDefInstr(unsigned Reg);
1188  void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1189  MachineBasicBlock *MergeBB,
1190  LinearizedRegion *InnerRegion, unsigned DestReg,
1191  unsigned SourceReg);
1192  bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion,
1193  unsigned Register);
1194  void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1195  MachineBasicBlock *MergeBB,
1196  LinearizedRegion *InnerRegion,
1197  LinearizedRegion *LRegion);
1198 
1199  void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry,
1200  MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion);
1201  void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc,
1202  LinearizedRegion *LRegion);
1203 
1204  MachineBasicBlock *splitExit(LinearizedRegion *LRegion);
1205 
1206  MachineBasicBlock *splitEntry(LinearizedRegion *LRegion);
1207 
1208  LinearizedRegion *initLinearizedRegion(RegionMRT *Region);
1209 
1210  bool structurizeComplexRegion(RegionMRT *Region);
1211 
1212  bool structurizeRegion(RegionMRT *Region);
1213 
1214  bool structurizeRegions(RegionMRT *Region, bool isTopRegion);
1215 
1216 public:
1217  static char ID;
1218 
1219  AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) {
1221  }
1222 
1223  void getAnalysisUsage(AnalysisUsage &AU) const override {
1226  }
1227 
1228  void initFallthroughMap(MachineFunction &MF);
1229 
1230  void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut);
1231 
1232  unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg,
1233  MachineRegisterInfo *MRI,
1234  const SIInstrInfo *TII);
1235 
1236  void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; }
1237 
1238  RegionMRT *getRegionMRT() { return RMRT; }
1239 
1240  bool runOnMachineFunction(MachineFunction &MF) override;
1241 };
1242 
1243 } // end anonymous namespace
1244 
1246 
1247 bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) {
1248  MachineBasicBlock *Entry = Region->getEntry();
1249  MachineBasicBlock *Succ = Region->getSucc();
1250  bool FoundBypass = false;
1251  bool FoundIf = false;
1252 
1253  if (Entry->succ_size() != 2) {
1254  return false;
1255  }
1256 
1258  E = Entry->succ_end();
1259  SI != E; ++SI) {
1260  MachineBasicBlock *Current = *SI;
1261 
1262  if (Current == Succ) {
1263  FoundBypass = true;
1264  } else if ((Current->succ_size() == 1) &&
1265  *(Current->succ_begin()) == Succ) {
1266  FoundIf = true;
1267  }
1268  }
1269 
1270  return FoundIf && FoundBypass;
1271 }
1272 
1273 void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) {
1274  MachineBasicBlock *Entry = Region->getEntry();
1275  MachineBasicBlock *Exit = Region->getExit();
1276  TII->convertNonUniformIfRegion(Entry, Exit);
1277 }
1278 
1280  if (MBB->succ_size() == 1) {
1281  auto *Succ = *(MBB->succ_begin());
1282  for (auto &TI : MBB->terminators()) {
1283  for (auto &UI : TI.uses()) {
1284  if (UI.isMBB() && UI.getMBB() != Succ) {
1285  UI.setMBB(Succ);
1286  }
1287  }
1288  }
1289  }
1290 }
1291 
1292 static void fixRegionTerminator(RegionMRT *Region) {
1293  MachineBasicBlock *InternalSucc = nullptr;
1294  MachineBasicBlock *ExternalSucc = nullptr;
1295  LinearizedRegion *LRegion = Region->getLinearizedRegion();
1296  auto Exit = LRegion->getExit();
1297 
1299  for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(),
1300  SE = Exit->succ_end();
1301  SI != SE; ++SI) {
1302  MachineBasicBlock *Succ = *SI;
1303  if (LRegion->contains(Succ)) {
1304  // Do not allow re-assign
1305  assert(InternalSucc == nullptr);
1306  InternalSucc = Succ;
1307  } else {
1308  // Do not allow re-assign
1309  assert(ExternalSucc == nullptr);
1310  ExternalSucc = Succ;
1311  }
1312  }
1313 
1314  for (auto &TI : Exit->terminators()) {
1315  for (auto &UI : TI.uses()) {
1316  if (UI.isMBB()) {
1317  auto Target = UI.getMBB();
1318  if (Target != InternalSucc && Target != ExternalSucc) {
1319  UI.setMBB(ExternalSucc);
1320  }
1321  }
1322  }
1323  }
1324 }
1325 
1326 // If a region region is just a sequence of regions (and the exit
1327 // block in the case of the top level region), we can simply skip
1328 // linearizing it, because it is already linear
1329 bool regionIsSequence(RegionMRT *Region) {
1330  auto Children = Region->getChildren();
1331  for (auto CI : *Children) {
1332  if (!CI->isRegion()) {
1333  if (CI->getMBBMRT()->getMBB()->succ_size() > 1) {
1334  return false;
1335  }
1336  }
1337  }
1338  return true;
1339 }
1340 
1341 void fixupRegionExits(RegionMRT *Region) {
1342  auto Children = Region->getChildren();
1343  for (auto CI : *Children) {
1344  if (!CI->isRegion()) {
1345  fixMBBTerminator(CI->getMBBMRT()->getMBB());
1346  } else {
1347  fixRegionTerminator(CI->getRegionMRT());
1348  }
1349  }
1350 }
1351 
1352 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1353  RegionMRT *Region, MachineInstr &PHI,
1354  SmallVector<unsigned, 2> &PHIRegionIndices) {
1355  unsigned NumInputs = getPHINumInputs(PHI);
1356  for (unsigned i = 0; i < NumInputs; ++i) {
1357  MachineBasicBlock *Pred = getPHIPred(PHI, i);
1358  if (Region->contains(Pred)) {
1359  PHIRegionIndices.push_back(i);
1360  }
1361  }
1362 }
1363 
1364 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1365  LinearizedRegion *Region, MachineInstr &PHI,
1366  SmallVector<unsigned, 2> &PHIRegionIndices) {
1367  unsigned NumInputs = getPHINumInputs(PHI);
1368  for (unsigned i = 0; i < NumInputs; ++i) {
1369  MachineBasicBlock *Pred = getPHIPred(PHI, i);
1370  if (Region->contains(Pred)) {
1371  PHIRegionIndices.push_back(i);
1372  }
1373  }
1374 }
1375 
1376 void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices(
1377  LinearizedRegion *Region, MachineInstr &PHI,
1378  SmallVector<unsigned, 2> &PHINonRegionIndices) {
1379  unsigned NumInputs = getPHINumInputs(PHI);
1380  for (unsigned i = 0; i < NumInputs; ++i) {
1381  MachineBasicBlock *Pred = getPHIPred(PHI, i);
1382  if (!Region->contains(Pred)) {
1383  PHINonRegionIndices.push_back(i);
1384  }
1385  }
1386 }
1387 
1388 void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest(
1389  unsigned LDestReg, MachineInstr &PHI,
1390  SmallVector<unsigned, 2> *RegionIndices) {
1391  if (RegionIndices) {
1392  for (auto i : *RegionIndices) {
1393  PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1394  }
1395  } else {
1396  unsigned NumInputs = getPHINumInputs(PHI);
1397  for (unsigned i = 0; i < NumInputs; ++i) {
1398  PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1399  }
1400  }
1401 }
1402 
1403 unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo(
1404  MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) {
1405  unsigned DestReg = getPHIDestReg(PHI);
1406  unsigned LinearizeDestReg =
1407  MRI->createVirtualRegister(MRI->getRegClass(DestReg));
1408  PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc());
1409  storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices);
1410  return LinearizeDestReg;
1411 }
1412 
1413 void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
1414  // We need to create a new chain for the killed phi, but there is no
1415  // need to do the renaming outside or inside the block.
1418  E = MBB->instr_end();
1419  I != E; ++I) {
1420  MachineInstr &Instr = *I;
1421  if (Instr.isPHI()) {
1422  unsigned PHIDestReg = getPHIDestReg(Instr);
1423  LLVM_DEBUG(dbgs() << "Extractking killed phi:\n");
1424  LLVM_DEBUG(Instr.dump());
1425  PHIs.insert(&Instr);
1426  PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
1427  storePHILinearizationInfoDest(PHIDestReg, Instr);
1428  }
1429  }
1430 
1431  for (auto PI : PHIs) {
1432  PI->eraseFromParent();
1433  }
1434 }
1435 
1436 static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices,
1437  unsigned Index) {
1438  for (auto i : PHIRegionIndices) {
1439  if (i == Index)
1440  return true;
1441  }
1442  return false;
1443 }
1444 
1445 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1446  SmallVector<unsigned, 2> &PHIIndices,
1447  unsigned *ReplaceReg) {
1448  return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg);
1449 }
1450 
1451 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1452  unsigned CombinedSourceReg,
1453  MachineBasicBlock *SourceMBB,
1454  SmallVector<unsigned, 2> &PHIIndices,
1455  unsigned *ReplaceReg) {
1456  LLVM_DEBUG(dbgs() << "Shrink PHI: ");
1457  LLVM_DEBUG(PHI.dump());
1458  LLVM_DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI)
1459  << " = PHI(");
1460 
1461  bool Replaced = false;
1462  unsigned NumInputs = getPHINumInputs(PHI);
1463  int SingleExternalEntryIndex = -1;
1464  for (unsigned i = 0; i < NumInputs; ++i) {
1465  if (!isPHIRegionIndex(PHIIndices, i)) {
1466  if (SingleExternalEntryIndex == -1) {
1467  // Single entry
1468  SingleExternalEntryIndex = i;
1469  } else {
1470  // Multiple entries
1471  SingleExternalEntryIndex = -2;
1472  }
1473  }
1474  }
1475 
1476  if (SingleExternalEntryIndex > -1) {
1477  *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex);
1478  // We should not rewrite the code, we should only pick up the single value
1479  // that represents the shrunk PHI.
1480  Replaced = true;
1481  } else {
1482  MachineBasicBlock *MBB = PHI.getParent();
1483  MachineInstrBuilder MIB =
1484  BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1485  getPHIDestReg(PHI));
1486  if (SourceMBB) {
1487  MIB.addReg(CombinedSourceReg);
1488  MIB.addMBB(SourceMBB);
1489  LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1490  << printMBBReference(*SourceMBB));
1491  }
1492 
1493  for (unsigned i = 0; i < NumInputs; ++i) {
1494  if (isPHIRegionIndex(PHIIndices, i)) {
1495  continue;
1496  }
1497  unsigned SourceReg = getPHISourceReg(PHI, i);
1498  MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1499  MIB.addReg(SourceReg);
1500  MIB.addMBB(SourcePred);
1501  LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1502  << printMBBReference(*SourcePred));
1503  }
1504  LLVM_DEBUG(dbgs() << ")\n");
1505  }
1506  PHI.eraseFromParent();
1507  return Replaced;
1508 }
1509 
1510 void AMDGPUMachineCFGStructurizer::replacePHI(
1511  MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge,
1512  SmallVector<unsigned, 2> &PHIRegionIndices) {
1513  LLVM_DEBUG(dbgs() << "Replace PHI: ");
1514  LLVM_DEBUG(PHI.dump());
1515  LLVM_DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI)
1516  << " = PHI(");
1517 
1518  bool HasExternalEdge = false;
1519  unsigned NumInputs = getPHINumInputs(PHI);
1520  for (unsigned i = 0; i < NumInputs; ++i) {
1521  if (!isPHIRegionIndex(PHIRegionIndices, i)) {
1522  HasExternalEdge = true;
1523  }
1524  }
1525 
1526  if (HasExternalEdge) {
1527  MachineBasicBlock *MBB = PHI.getParent();
1528  MachineInstrBuilder MIB =
1529  BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1530  getPHIDestReg(PHI));
1531  MIB.addReg(CombinedSourceReg);
1532  MIB.addMBB(LastMerge);
1533  LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1534  << printMBBReference(*LastMerge));
1535  for (unsigned i = 0; i < NumInputs; ++i) {
1536  if (isPHIRegionIndex(PHIRegionIndices, i)) {
1537  continue;
1538  }
1539  unsigned SourceReg = getPHISourceReg(PHI, i);
1540  MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1541  MIB.addReg(SourceReg);
1542  MIB.addMBB(SourcePred);
1543  LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1544  << printMBBReference(*SourcePred));
1545  }
1546  LLVM_DEBUG(dbgs() << ")\n");
1547  } else {
1548  replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg);
1549  }
1550  PHI.eraseFromParent();
1551 }
1552 
1553 void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
1554  MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB,
1555  SmallVector<unsigned, 2> &PHIRegionIndices) {
1556  LLVM_DEBUG(dbgs() << "Replace entry PHI: ");
1557  LLVM_DEBUG(PHI.dump());
1558  LLVM_DEBUG(dbgs() << " with ");
1559 
1560  unsigned NumInputs = getPHINumInputs(PHI);
1561  unsigned NumNonRegionInputs = NumInputs;
1562  for (unsigned i = 0; i < NumInputs; ++i) {
1563  if (isPHIRegionIndex(PHIRegionIndices, i)) {
1564  NumNonRegionInputs--;
1565  }
1566  }
1567 
1568  if (NumNonRegionInputs == 0) {
1569  auto DestReg = getPHIDestReg(PHI);
1570  replaceRegisterWith(DestReg, CombinedSourceReg);
1571  LLVM_DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI)
1572  << "\n");
1573  PHI.eraseFromParent();
1574  } else {
1575  LLVM_DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
1576  MachineBasicBlock *MBB = PHI.getParent();
1577  MachineInstrBuilder MIB =
1578  BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1579  getPHIDestReg(PHI));
1580  MIB.addReg(CombinedSourceReg);
1581  MIB.addMBB(IfMBB);
1582  LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1583  << printMBBReference(*IfMBB));
1584  unsigned NumInputs = getPHINumInputs(PHI);
1585  for (unsigned i = 0; i < NumInputs; ++i) {
1586  if (isPHIRegionIndex(PHIRegionIndices, i)) {
1587  continue;
1588  }
1589  unsigned SourceReg = getPHISourceReg(PHI, i);
1590  MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1591  MIB.addReg(SourceReg);
1592  MIB.addMBB(SourcePred);
1593  LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1594  << printMBBReference(*SourcePred));
1595  }
1596  LLVM_DEBUG(dbgs() << ")\n");
1597  PHI.eraseFromParent();
1598  }
1599 }
1600 
1601 void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs(
1602  MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices,
1603  unsigned CombinedSourceReg, LinearizedRegion *LRegion) {
1604  bool WasLiveOut = false;
1605  for (auto PII : PHIRegionIndices) {
1606  unsigned Reg = getPHISourceReg(PHI, PII);
1607  if (LRegion->isLiveOut(Reg)) {
1608  bool IsDead = true;
1609 
1610  // Check if register is live out of the basic block
1611  MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent();
1612  for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) {
1613  if ((*UI).getParent()->getParent() != DefMBB) {
1614  IsDead = false;
1615  }
1616  }
1617 
1618  LLVM_DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is "
1619  << (IsDead ? "dead" : "alive")
1620  << " after PHI replace\n");
1621  if (IsDead) {
1622  LRegion->removeLiveOut(Reg);
1623  }
1624  WasLiveOut = true;
1625  }
1626  }
1627 
1628  if (WasLiveOut)
1629  LRegion->addLiveOut(CombinedSourceReg);
1630 }
1631 
1632 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region,
1633  MachineBasicBlock *LastMerge,
1634  MachineInstr &PHI,
1635  LinearizedRegion *LRegion) {
1636  SmallVector<unsigned, 2> PHIRegionIndices;
1637  getPHIRegionIndices(Region, PHI, PHIRegionIndices);
1638  unsigned LinearizedSourceReg =
1639  storePHILinearizationInfo(PHI, &PHIRegionIndices);
1640 
1641  replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices);
1642  replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion);
1643 }
1644 
1645 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region,
1646  MachineBasicBlock *IfMBB,
1647  MachineInstr &PHI) {
1648  SmallVector<unsigned, 2> PHINonRegionIndices;
1649  getPHINonRegionIndices(Region, PHI, PHINonRegionIndices);
1650  unsigned LinearizedSourceReg =
1651  storePHILinearizationInfo(PHI, &PHINonRegionIndices);
1652  replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices);
1653 }
1654 
1657  for (auto &BBI : *MBB) {
1658  if (BBI.isPHI()) {
1659  PHIs.push_back(&BBI);
1660  }
1661  }
1662 }
1663 
1664 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region,
1665  MachineBasicBlock *LastMerge,
1666  LinearizedRegion *LRegion) {
1668  auto Exit = Region->getSucc();
1669  if (Exit == nullptr)
1670  return;
1671 
1672  collectPHIs(Exit, PHIs);
1673 
1674  for (auto PHII : PHIs) {
1675  rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion);
1676  }
1677 }
1678 
1679 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region,
1680  MachineBasicBlock *IfMBB) {
1682  auto Entry = Region->getEntry();
1683 
1684  collectPHIs(Entry, PHIs);
1685 
1686  for (auto PHII : PHIs) {
1687  rewriteRegionEntryPHI(Region, IfMBB, *PHII);
1688  }
1689 }
1690 
1691 void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB,
1692  MachineBasicBlock *Dest,
1693  const DebugLoc &DL) {
1694  LLVM_DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber()
1695  << " -> " << Dest->getNumber() << "\n");
1697  bool HasTerminator = Terminator != MBB->instr_end();
1698  if (HasTerminator) {
1699  TII->ReplaceTailWithBranchTo(Terminator, Dest);
1700  }
1702  TII->insertUnconditionalBranch(*MBB, Dest, DL);
1703  }
1704 }
1705 
1707  MachineBasicBlock *result = nullptr;
1708  for (auto &MFI : MF) {
1709  if (MFI.succ_size() == 0) {
1710  if (result == nullptr) {
1711  result = &MFI;
1712  } else {
1713  return nullptr;
1714  }
1715  }
1716  }
1717 
1718  return result;
1719 }
1720 
1722  return getSingleExitNode(MF) != nullptr;
1723 }
1724 
1726 AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) {
1727  auto Exit = Region->getSucc();
1728 
1729  // If the exit is the end of the function, we just use the existing
1730  MachineFunction *MF = Region->getEntry()->getParent();
1731  if (Exit == nullptr && hasOneExitNode(*MF)) {
1732  return &(*(--(Region->getEntry()->getParent()->end())));
1733  }
1734 
1735  MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock();
1736  if (Exit == nullptr) {
1737  MachineFunction::iterator ExitIter = MF->end();
1738  MF->insert(ExitIter, LastMerge);
1739  } else {
1740  MachineFunction::iterator ExitIter = Exit->getIterator();
1741  MF->insert(ExitIter, LastMerge);
1742  LastMerge->addSuccessor(Exit);
1743  insertUnconditionalBranch(LastMerge, Exit);
1744  LLVM_DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber()
1745  << "\n");
1746  }
1747  return LastMerge;
1748 }
1749 
1750 void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB,
1751  MachineBasicBlock *CodeBB,
1752  MachineBasicBlock *MergeBB,
1753  unsigned DestRegister,
1754  unsigned IfSourceRegister,
1755  unsigned CodeSourceRegister,
1756  bool IsUndefIfSource) {
1757  // If this is the function exit block, we don't need a phi.
1758  if (MergeBB->succ_begin() == MergeBB->succ_end()) {
1759  return;
1760  }
1761  LLVM_DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB)
1762  << "): " << printReg(DestRegister, TRI) << " = PHI("
1763  << printReg(IfSourceRegister, TRI) << ", "
1764  << printMBBReference(*IfBB)
1765  << printReg(CodeSourceRegister, TRI) << ", "
1766  << printMBBReference(*CodeBB) << ")\n");
1767  const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin());
1768  MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL,
1769  TII->get(TargetOpcode::PHI), DestRegister);
1770  if (IsUndefIfSource && false) {
1771  MIB.addReg(IfSourceRegister, RegState::Undef);
1772  } else {
1773  MIB.addReg(IfSourceRegister);
1774  }
1775  MIB.addMBB(IfBB);
1776  MIB.addReg(CodeSourceRegister);
1777  MIB.addMBB(CodeBB);
1778 }
1779 
1782  E = MBB->succ_end();
1783  PI != E; ++PI) {
1784  if ((*PI) != MBB) {
1785  (MBB)->removeSuccessor(*PI);
1786  }
1787  }
1788 }
1789 
1791  MachineBasicBlock *EndMBB) {
1792 
1793  // We have to check against the StartMBB successor becasuse a
1794  // structurized region with a loop will have the entry block split,
1795  // and the backedge will go to the entry successor.
1797  unsigned SuccSize = StartMBB->succ_size();
1798  if (SuccSize > 0) {
1799  MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin());
1800  for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(),
1801  E = EndMBB->succ_end();
1802  PI != E; ++PI) {
1803  // Either we have a back-edge to the entry block, or a back-edge to the
1804  // successor of the entry block since the block may be split.
1805  if ((*PI) != StartMBB &&
1806  !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
1807  Succs.insert(
1808  std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI));
1809  }
1810  }
1811  }
1812 
1813  for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(),
1814  E = StartMBB->pred_end();
1815  PI != E; ++PI) {
1816  if ((*PI) != EndMBB) {
1817  Succs.insert(
1818  std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB));
1819  }
1820  }
1821 
1822  for (auto SI : Succs) {
1823  std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI;
1824  LLVM_DEBUG(dbgs() << "Removing edge: " << printMBBReference(*Edge.first)
1825  << " -> " << printMBBReference(*Edge.second) << "\n");
1826  Edge.first->removeSuccessor(Edge.second);
1827  }
1828 }
1829 
1830 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
1831  MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart,
1832  MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg,
1833  bool InheritPreds) {
1834  MachineFunction *MF = MergeBB->getParent();
1836 
1837  if (InheritPreds) {
1838  for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(),
1839  E = CodeBBStart->pred_end();
1840  PI != E; ++PI) {
1841  if ((*PI) != CodeBBEnd) {
1842  MachineBasicBlock *Pred = (*PI);
1843  Pred->addSuccessor(IfBB);
1844  }
1845  }
1846  }
1847 
1848  removeExternalCFGEdges(CodeBBStart, CodeBBEnd);
1849 
1850  auto CodeBBStartI = CodeBBStart->getIterator();
1851  auto CodeBBEndI = CodeBBEnd->getIterator();
1852  auto MergeIter = MergeBB->getIterator();
1853  MF->insert(MergeIter, IfBB);
1854  MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI);
1855  IfBB->addSuccessor(MergeBB);
1856  IfBB->addSuccessor(CodeBBStart);
1857 
1858  LLVM_DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
1859  // Ensure that the MergeBB is a successor of the CodeEndBB.
1860  if (!CodeBBEnd->isSuccessor(MergeBB))
1861  CodeBBEnd->addSuccessor(MergeBB);
1862 
1863  LLVM_DEBUG(dbgs() << "Moved " << printMBBReference(*CodeBBStart)
1864  << " through " << printMBBReference(*CodeBBEnd) << "\n");
1865 
1866  // If we have a single predecessor we can find a reasonable debug location
1867  MachineBasicBlock *SinglePred =
1868  CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr;
1869  const DebugLoc &DL = SinglePred
1870  ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator())
1871  : DebugLoc();
1872 
1873  unsigned Reg =
1874  TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg,
1875  SelectBB->getNumber() /* CodeBBStart->getNumber() */);
1876  if (&(*(IfBB->getParent()->begin())) == IfBB) {
1877  TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg,
1878  CodeBBStart->getNumber());
1879  }
1880  MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
1881  ArrayRef<MachineOperand> Cond(RegOp);
1882  TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL);
1883 
1884  return IfBB;
1885 }
1886 
1887 void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled(
1889  if (Cond.size() != 1)
1890  return;
1891  if (!Cond[0].isReg())
1892  return;
1893 
1894  unsigned CondReg = Cond[0].getReg();
1895  for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) {
1896  (*UI).setIsKill(false);
1897  }
1898 }
1899 
1900 void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1901  MachineBasicBlock *MergeBB,
1902  unsigned BBSelectReg) {
1903  MachineBasicBlock *TrueBB = nullptr;
1904  MachineBasicBlock *FalseBB = nullptr;
1906  MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB];
1907  TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond);
1908 
1909  const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator());
1910 
1911  if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) {
1912  // This is an exit block, hence no successors. We will assign the
1913  // bb select register to the entry block.
1914  TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1915  BBSelectReg,
1916  CodeBB->getParent()->begin()->getNumber());
1917  insertUnconditionalBranch(CodeBB, MergeBB, DL);
1918  return;
1919  }
1920 
1921  if (FalseBB == nullptr && TrueBB == nullptr) {
1922  TrueBB = FallthroughBB;
1923  } else if (TrueBB != nullptr) {
1924  FalseBB =
1925  (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB;
1926  }
1927 
1928  if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) {
1929  TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1930  BBSelectReg, TrueBB->getNumber());
1931  } else {
1932  const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg);
1933  unsigned TrueBBReg = MRI->createVirtualRegister(RegClass);
1934  unsigned FalseBBReg = MRI->createVirtualRegister(RegClass);
1935  TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1936  TrueBBReg, TrueBB->getNumber());
1937  TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1938  FalseBBReg, FalseBB->getNumber());
1939  ensureCondIsNotKilled(Cond);
1940  TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL,
1941  BBSelectReg, Cond, TrueBBReg, FalseBBReg);
1942  }
1943 
1944  insertUnconditionalBranch(CodeBB, MergeBB, DL);
1945 }
1946 
1947 MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) {
1948  if (MRI->def_begin(Reg) == MRI->def_end()) {
1949  LLVM_DEBUG(dbgs() << "Register "
1950  << printReg(Reg, MRI->getTargetRegisterInfo())
1951  << " has NO defs\n");
1952  } else if (!MRI->hasOneDef(Reg)) {
1953  LLVM_DEBUG(dbgs() << "Register "
1954  << printReg(Reg, MRI->getTargetRegisterInfo())
1955  << " has multiple defs\n");
1956  LLVM_DEBUG(dbgs() << "DEFS BEGIN:\n");
1957  for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) {
1958  LLVM_DEBUG(DI->getParent()->dump());
1959  }
1960  LLVM_DEBUG(dbgs() << "DEFS END\n");
1961  }
1962 
1963  assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1964  return (*(MRI->def_begin(Reg))).getParent();
1965 }
1966 
1967 void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB,
1968  MachineBasicBlock *CodeBB,
1969  MachineBasicBlock *MergeBB,
1970  LinearizedRegion *InnerRegion,
1971  unsigned DestReg,
1972  unsigned SourceReg) {
1973  // In this function we know we are part of a chain already, so we need
1974  // to add the registers to the existing chain, and rename the register
1975  // inside the region.
1976  bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
1977  MachineInstr *DefInstr = getDefInstr(SourceReg);
1978  if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) {
1979  // Handle the case where the def is a PHI-def inside a basic
1980  // block, then we only need to do renaming. Special care needs to
1981  // be taken if the PHI-def is part of an existing chain, or if a
1982  // new one needs to be created.
1983  InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI);
1984 
1985  // We collect all PHI Information, and if we are at the region entry,
1986  // all PHIs will be removed, and then re-introduced if needed.
1987  storePHILinearizationInfoDest(DestReg, *DefInstr);
1988  // We have picked up all the information we need now and can remove
1989  // the PHI
1990  PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
1991  DefInstr->eraseFromParent();
1992  } else {
1993  // If this is not a phi-def, or it is a phi-def but from a linearized region
1994  if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) {
1995  // If this is a single BB and the definition is in this block we
1996  // need to replace any uses outside the region.
1997  InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI);
1998  }
1999  const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg);
2000  unsigned NextDestReg = MRI->createVirtualRegister(RegClass);
2001  bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1;
2002  LLVM_DEBUG(dbgs() << "Insert Chained PHI\n");
2003  insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg,
2004  SourceReg, IsLastDef);
2005 
2006  PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
2007  if (IsLastDef) {
2008  const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator());
2009  TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL,
2010  NextDestReg, 0);
2011  PHIInfo.deleteDef(DestReg);
2012  } else {
2013  PHIInfo.replaceDef(DestReg, NextDestReg);
2014  }
2015  }
2016 }
2017 
2018 bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB,
2019  LinearizedRegion *InnerRegion,
2020  unsigned Register) {
2021  return getDefInstr(Register)->getParent() == MBB ||
2022  InnerRegion->contains(getDefInstr(Register)->getParent());
2023 }
2024 
2025 void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
2026  MachineBasicBlock *CodeBB,
2027  MachineBasicBlock *MergeBB,
2028  LinearizedRegion *InnerRegion,
2029  LinearizedRegion *LRegion) {
2030  DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts();
2031  SmallVector<unsigned, 4> OldLiveOuts;
2032  bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
2033  for (auto OLI : *LiveOuts) {
2034  OldLiveOuts.push_back(OLI);
2035  }
2036 
2037  for (auto LI : OldLiveOuts) {
2038  LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
2039  if (!containsDef(CodeBB, InnerRegion, LI) ||
2040  (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
2041  // If the register simly lives through the CodeBB, we don't have
2042  // to rewrite anything since the register is not defined in this
2043  // part of the code.
2044  LLVM_DEBUG(dbgs() << "- through");
2045  continue;
2046  }
2047  LLVM_DEBUG(dbgs() << "\n");
2048  unsigned Reg = LI;
2049  if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
2050  // If the register is live out, we do want to create a phi,
2051  // unless it is from the Exit block, becasuse in that case there
2052  // is already a PHI, and no need to create a new one.
2053 
2054  // If the register is just a live out def and not part of a phi
2055  // chain, we need to create a PHI node to handle the if region,
2056  // and replace all uses outside of the region with the new dest
2057  // register, unless it is the outgoing BB select register. We have
2058  // already creaed phi nodes for these.
2059  const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
2060  unsigned PHIDestReg = MRI->createVirtualRegister(RegClass);
2061  unsigned IfSourceReg = MRI->createVirtualRegister(RegClass);
2062  // Create initializer, this value is never used, but is needed
2063  // to satisfy SSA.
2064  LLVM_DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n");
2065  TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(),
2066  IfSourceReg, 0);
2067 
2068  InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI);
2069  LLVM_DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n");
2070  insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg,
2071  IfSourceReg, Reg, true);
2072  }
2073  }
2074 
2075  // Handle the chained definitions in PHIInfo, checking if this basic block
2076  // is a source block for a definition.
2077  SmallVector<unsigned, 4> Sources;
2078  if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) {
2079  LLVM_DEBUG(dbgs() << "Inserting PHI Live Out from "
2080  << printMBBReference(*CodeBB) << "\n");
2081  for (auto SI : Sources) {
2082  unsigned DestReg;
2083  PHIInfo.findDest(SI, CodeBB, DestReg);
2084  insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI);
2085  }
2086  LLVM_DEBUG(dbgs() << "Insertion done.\n");
2087  }
2088 
2089  LLVM_DEBUG(PHIInfo.dump(MRI));
2090 }
2091 
2092 void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) {
2093  LLVM_DEBUG(dbgs() << "Before PHI Prune\n");
2094  LLVM_DEBUG(PHIInfo.dump(MRI));
2096  ElimiatedSources;
2097  for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2098  ++DRI) {
2099 
2100  unsigned DestReg = *DRI;
2101  auto SE = PHIInfo.sources_end(DestReg);
2102 
2103  bool MBBContainsPHISource = false;
2104  // Check if there is a PHI source in this MBB
2105  for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2106  unsigned SourceReg = (*SRI).first;
2107  MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2108  if (Def->getParent()->getParent() == MBB) {
2109  MBBContainsPHISource = true;
2110  }
2111  }
2112 
2113  // If so, all other sources are useless since we know this block
2114  // is always executed when the region is executed.
2115  if (MBBContainsPHISource) {
2116  for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2117  PHILinearize::PHISourceT Source = *SRI;
2118  unsigned SourceReg = Source.first;
2119  MachineBasicBlock *SourceMBB = Source.second;
2120  MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2121  if (Def->getParent()->getParent() != MBB) {
2122  ElimiatedSources.push_back(
2123  std::make_tuple(DestReg, SourceReg, SourceMBB));
2124  }
2125  }
2126  }
2127  }
2128 
2129  // Remove the PHI sources that are in the given MBB
2130  for (auto &SourceInfo : ElimiatedSources) {
2131  PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo),
2132  std::get<2>(SourceInfo));
2133  }
2134  LLVM_DEBUG(dbgs() << "After PHI Prune\n");
2135  LLVM_DEBUG(PHIInfo.dump(MRI));
2136 }
2137 
2138 void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion,
2139  unsigned DestReg) {
2140  MachineBasicBlock *Entry = CurrentRegion->getEntry();
2141  MachineBasicBlock *Exit = CurrentRegion->getExit();
2142 
2143  LLVM_DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() << " Pred: "
2144  << (*(Entry->pred_begin()))->getNumber() << "\n");
2145 
2146  int NumSources = 0;
2147  auto SE = PHIInfo.sources_end(DestReg);
2148 
2149  for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2150  NumSources++;
2151  }
2152 
2153  if (NumSources == 1) {
2154  auto SRI = PHIInfo.sources_begin(DestReg);
2155  unsigned SourceReg = (*SRI).first;
2156  replaceRegisterWith(DestReg, SourceReg);
2157  } else {
2158  const DebugLoc &DL = Entry->findDebugLoc(Entry->begin());
2159  MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL,
2160  TII->get(TargetOpcode::PHI), DestReg);
2161  LLVM_DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI(");
2162 
2163  unsigned CurrentBackedgeReg = 0;
2164 
2165  for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2166  unsigned SourceReg = (*SRI).first;
2167 
2168  if (CurrentRegion->contains((*SRI).second)) {
2169  if (CurrentBackedgeReg == 0) {
2170  CurrentBackedgeReg = SourceReg;
2171  } else {
2172  MachineInstr *PHIDefInstr = getDefInstr(SourceReg);
2173  MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent();
2174  const TargetRegisterClass *RegClass =
2175  MRI->getRegClass(CurrentBackedgeReg);
2176  unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass);
2177  MachineInstrBuilder BackedgePHI =
2178  BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL,
2179  TII->get(TargetOpcode::PHI), NewBackedgeReg);
2180  BackedgePHI.addReg(CurrentBackedgeReg);
2181  BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0));
2182  BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1));
2183  BackedgePHI.addMBB((*SRI).second);
2184  CurrentBackedgeReg = NewBackedgeReg;
2185  LLVM_DEBUG(dbgs()
2186  << "Inserting backedge PHI: "
2187  << printReg(NewBackedgeReg, TRI) << " = PHI("
2188  << printReg(CurrentBackedgeReg, TRI) << ", "
2189  << printMBBReference(*getPHIPred(*PHIDefInstr, 0)) << ", "
2190  << printReg(getPHISourceReg(*PHIDefInstr, 1), TRI) << ", "
2191  << printMBBReference(*(*SRI).second));
2192  }
2193  } else {
2194  MIB.addReg(SourceReg);
2195  MIB.addMBB((*SRI).second);
2196  LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
2197  << printMBBReference(*(*SRI).second) << ", ");
2198  }
2199  }
2200 
2201  // Add the final backedge register source to the entry phi
2202  if (CurrentBackedgeReg != 0) {
2203  MIB.addReg(CurrentBackedgeReg);
2204  MIB.addMBB(Exit);
2205  LLVM_DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", "
2206  << printMBBReference(*Exit) << ")\n");
2207  } else {
2208  LLVM_DEBUG(dbgs() << ")\n");
2209  }
2210  }
2211 }
2212 
2213 void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) {
2214  LLVM_DEBUG(PHIInfo.dump(MRI));
2215 
2216  for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2217  ++DRI) {
2218 
2219  unsigned DestReg = *DRI;
2220  createEntryPHI(CurrentRegion, DestReg);
2221  }
2222  PHIInfo.clear();
2223 }
2224 
2225 void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register,
2226  unsigned NewRegister) {
2227  assert(Register != NewRegister && "Cannot replace a reg with itself");
2228 
2229  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
2230  E = MRI->reg_end();
2231  I != E;) {
2232  MachineOperand &O = *I;
2233  ++I;
2234  if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
2235  LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
2236  << printReg(NewRegister, MRI->getTargetRegisterInfo())
2237  << "\n");
2238  llvm_unreachable("Cannot substitute physical registers");
2239  // We don't handle physical registers, but if we need to
2240  // in the future This is how we do it:
2241  // O.substPhysReg(NewRegister, *TRI);
2242  } else {
2243  LLVM_DEBUG(dbgs() << "Replacing register: "
2244  << printReg(Register, MRI->getTargetRegisterInfo())
2245  << " with "
2246  << printReg(NewRegister, MRI->getTargetRegisterInfo())
2247  << "\n");
2248  O.setReg(NewRegister);
2249  }
2250  }
2251  PHIInfo.deleteDef(Register);
2252 
2253  getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
2254 
2255  LLVM_DEBUG(PHIInfo.dump(MRI));
2256 }
2257 
2258 void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) {
2259  LLVM_DEBUG(dbgs() << "Resolve PHI Infos\n");
2260  LLVM_DEBUG(PHIInfo.dump(MRI));
2261  for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2262  ++DRI) {
2263  unsigned DestReg = *DRI;
2264  LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) << "\n");
2265  auto SRI = PHIInfo.sources_begin(DestReg);
2266  unsigned SourceReg = (*SRI).first;
2267  LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI)
2268  << " SourceReg: " << printReg(SourceReg, TRI) << "\n");
2269 
2270  assert(PHIInfo.sources_end(DestReg) == ++SRI &&
2271  "More than one phi source in entry node");
2272  replaceRegisterWith(DestReg, SourceReg);
2273  }
2274 }
2275 
2277  return ((&(*(MBB->getParent()->begin()))) == MBB);
2278 }
2279 
2280 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2281  MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB,
2282  LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn,
2283  unsigned BBSelectRegOut) {
2284  if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) {
2285  // Handle non-loop function entry block.
2286  // We need to allow loops to the entry block and then
2287  rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2288  resolvePHIInfos(CodeBB);
2290  CodeBB->addSuccessor(MergeBB);
2291  CurrentRegion->addMBB(CodeBB);
2292  return nullptr;
2293  }
2294  if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) {
2295  // Handle non-loop region entry block.
2296  MachineFunction *MF = MergeBB->getParent();
2297  auto MergeIter = MergeBB->getIterator();
2298  auto CodeBBStartIter = CodeBB->getIterator();
2299  auto CodeBBEndIter = ++(CodeBB->getIterator());
2300  if (CodeBBEndIter != MergeIter) {
2301  MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter);
2302  }
2303  rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2304  prunePHIInfo(CodeBB);
2305  createEntryPHIs(CurrentRegion);
2307  CodeBB->addSuccessor(MergeBB);
2308  CurrentRegion->addMBB(CodeBB);
2309  return nullptr;
2310  } else {
2311  // Handle internal block.
2312  const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn);
2313  unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass);
2314  rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg);
2315  bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB;
2316  MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB,
2317  BBSelectRegIn, IsRegionEntryBB);
2318  CurrentRegion->addMBB(IfBB);
2319  // If this is the entry block we need to make the If block the new
2320  // linearized region entry.
2321  if (IsRegionEntryBB) {
2322  CurrentRegion->setEntry(IfBB);
2323 
2324  if (CurrentRegion->getHasLoop()) {
2325  MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2326  MachineBasicBlock *ETrueBB = nullptr;
2327  MachineBasicBlock *EFalseBB = nullptr;
2329 
2330  const DebugLoc &DL = DebugLoc();
2331  TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2332  TII->removeBranch(*RegionExit);
2333 
2334  // We need to create a backedge if there is a loop
2335  unsigned Reg = TII->insertNE(
2336  RegionExit, RegionExit->instr_end(), DL,
2337  CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2338  CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2339  MachineOperand RegOp =
2340  MachineOperand::CreateReg(Reg, false, false, true);
2341  ArrayRef<MachineOperand> Cond(RegOp);
2342  LLVM_DEBUG(dbgs() << "RegionExitReg: ");
2343  LLVM_DEBUG(Cond[0].print(dbgs(), TRI));
2344  LLVM_DEBUG(dbgs() << "\n");
2345  TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2346  Cond, DebugLoc());
2347  RegionExit->addSuccessor(CurrentRegion->getEntry());
2348  }
2349  }
2350  CurrentRegion->addMBB(CodeBB);
2351  LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo);
2352 
2353  InnerRegion.setParent(CurrentRegion);
2354  LLVM_DEBUG(dbgs() << "Insert BB Select PHI (BB)\n");
2355  insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2356  CodeBBSelectReg);
2357  InnerRegion.addMBB(MergeBB);
2358 
2359  LLVM_DEBUG(InnerRegion.print(dbgs(), TRI));
2360  rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion);
2361  extractKilledPHIs(CodeBB);
2362  if (IsRegionEntryBB) {
2363  createEntryPHIs(CurrentRegion);
2364  }
2365  return IfBB;
2366  }
2367 }
2368 
2369 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2370  MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion,
2371  LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
2372  unsigned BBSelectRegIn, unsigned BBSelectRegOut) {
2373  unsigned CodeBBSelectReg =
2374  InnerRegion->getRegionMRT()->getInnerOutputRegister();
2375  MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry();
2376  MachineBasicBlock *CodeExitBB = InnerRegion->getExit();
2377  MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB,
2378  SelectBB, BBSelectRegIn, true);
2379  CurrentRegion->addMBB(IfBB);
2380  bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry();
2381  if (isEntry) {
2382 
2383  if (CurrentRegion->getHasLoop()) {
2384  MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2385  MachineBasicBlock *ETrueBB = nullptr;
2386  MachineBasicBlock *EFalseBB = nullptr;
2388 
2389  const DebugLoc &DL = DebugLoc();
2390  TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2391  TII->removeBranch(*RegionExit);
2392 
2393  // We need to create a backedge if there is a loop
2394  unsigned Reg =
2395  TII->insertNE(RegionExit, RegionExit->instr_end(), DL,
2396  CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2397  CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2398  MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
2399  ArrayRef<MachineOperand> Cond(RegOp);
2400  LLVM_DEBUG(dbgs() << "RegionExitReg: ");
2401  LLVM_DEBUG(Cond[0].print(dbgs(), TRI));
2402  LLVM_DEBUG(dbgs() << "\n");
2403  TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2404  Cond, DebugLoc());
2405  RegionExit->addSuccessor(IfBB);
2406  }
2407  }
2408  CurrentRegion->addMBBs(InnerRegion);
2409  LLVM_DEBUG(dbgs() << "Insert BB Select PHI (region)\n");
2410  insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2411  CodeBBSelectReg);
2412 
2413  rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion,
2414  CurrentRegion);
2415 
2416  rewriteRegionEntryPHIs(InnerRegion, IfBB);
2417 
2418  if (isEntry) {
2419  CurrentRegion->setEntry(IfBB);
2420  }
2421 
2422  if (isEntry) {
2423  createEntryPHIs(CurrentRegion);
2424  }
2425 
2426  return IfBB;
2427 }
2428 
2429 void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI,
2430  MachineBasicBlock *Entry,
2431  MachineBasicBlock *EntrySucc,
2432  LinearizedRegion *LRegion) {
2433  SmallVector<unsigned, 2> PHIRegionIndices;
2434  getPHIRegionIndices(LRegion, PHI, PHIRegionIndices);
2435 
2436  assert(PHIRegionIndices.size() == 1);
2437 
2438  unsigned RegionIndex = PHIRegionIndices[0];
2439  unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex);
2440  MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex);
2441  unsigned PHIDest = getPHIDestReg(PHI);
2442  unsigned PHISource = PHIDest;
2443  unsigned ReplaceReg;
2444 
2445  if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) {
2446  PHISource = ReplaceReg;
2447  }
2448 
2449  const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest);
2450  unsigned NewDestReg = MRI->createVirtualRegister(RegClass);
2451  LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI);
2452  MachineInstrBuilder MIB =
2453  BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(),
2454  TII->get(TargetOpcode::PHI), NewDestReg);
2455  LLVM_DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI)
2456  << " = PHI(");
2457  MIB.addReg(PHISource);
2458  MIB.addMBB(Entry);
2459  LLVM_DEBUG(dbgs() << printReg(PHISource, TRI) << ", "
2460  << printMBBReference(*Entry));
2461  MIB.addReg(RegionSourceReg);
2462  MIB.addMBB(RegionSourceMBB);
2463  LLVM_DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", "
2464  << printMBBReference(*RegionSourceMBB) << ")\n");
2465 }
2466 
2467 void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry,
2468  MachineBasicBlock *EntrySucc,
2469  LinearizedRegion *LRegion) {
2471  collectPHIs(Entry, PHIs);
2472 
2473  for (auto PHII : PHIs) {
2474  splitLoopPHI(*PHII, Entry, EntrySucc, LRegion);
2475  }
2476 }
2477 
2478 // Split the exit block so that we can insert a end control flow
2480 AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) {
2481  auto MRTRegion = LRegion->getRegionMRT();
2482  auto Exit = LRegion->getExit();
2483  auto MF = Exit->getParent();
2484  auto Succ = MRTRegion->getSucc();
2485 
2486  auto NewExit = MF->CreateMachineBasicBlock();
2487  auto AfterExitIter = Exit->getIterator();
2488  AfterExitIter++;
2489  MF->insert(AfterExitIter, NewExit);
2490  Exit->removeSuccessor(Succ);
2491  Exit->addSuccessor(NewExit);
2492  NewExit->addSuccessor(Succ);
2493  insertUnconditionalBranch(NewExit, Succ);
2494  LRegion->addMBB(NewExit);
2495  LRegion->setExit(NewExit);
2496 
2497  LLVM_DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber()
2498  << "\n");
2499 
2500  // Replace any PHI Predecessors in the successor with NewExit
2501  for (auto &II : *Succ) {
2502  MachineInstr &Instr = II;
2503 
2504  // If we are past the PHI instructions we are done
2505  if (!Instr.isPHI())
2506  break;
2507 
2508  int numPreds = getPHINumInputs(Instr);
2509  for (int i = 0; i < numPreds; ++i) {
2510  auto Pred = getPHIPred(Instr, i);
2511  if (Pred == Exit) {
2512  setPhiPred(Instr, i, NewExit);
2513  }
2514  }
2515  }
2516 
2517  return NewExit;
2518 }
2519 
2521  // Create the fall-through block.
2522  MachineBasicBlock *MBB = (*I).getParent();
2523  MachineFunction *MF = MBB->getParent();
2524  MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock();
2525  auto MBBIter = ++(MBB->getIterator());
2526  MF->insert(MBBIter, SuccMBB);
2527  SuccMBB->transferSuccessorsAndUpdatePHIs(MBB);
2528  MBB->addSuccessor(SuccMBB);
2529 
2530  // Splice the code over.
2531  SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end());
2532 
2533  return SuccMBB;
2534 }
2535 
2536 // Split the entry block separating PHI-nodes and the rest of the code
2537 // This is needed to insert an initializer for the bb select register
2538 // inloop regions.
2539 
2541 AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) {
2542  MachineBasicBlock *Entry = LRegion->getEntry();
2543  MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI());
2544  MachineBasicBlock *Exit = LRegion->getExit();
2545 
2546  LLVM_DEBUG(dbgs() << "Split " << printMBBReference(*Entry) << " to "
2547  << printMBBReference(*Entry) << " -> "
2548  << printMBBReference(*EntrySucc) << "\n");
2549  LRegion->addMBB(EntrySucc);
2550 
2551  // Make the backedge go to Entry Succ
2552  if (Exit->isSuccessor(Entry)) {
2553  Exit->removeSuccessor(Entry);
2554  }
2555  Exit->addSuccessor(EntrySucc);
2556  MachineInstr &Branch = *(Exit->instr_rbegin());
2557  for (auto &UI : Branch.uses()) {
2558  if (UI.isMBB() && UI.getMBB() == Entry) {
2559  UI.setMBB(EntrySucc);
2560  }
2561  }
2562 
2563  splitLoopPHIs(Entry, EntrySucc, LRegion);
2564 
2565  return EntrySucc;
2566 }
2567 
2568 LinearizedRegion *
2569 AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) {
2570  LinearizedRegion *LRegion = Region->getLinearizedRegion();
2571  LRegion->initLiveOut(Region, MRI, TRI, PHIInfo);
2572  LRegion->setEntry(Region->getEntry());
2573  return LRegion;
2574 }
2575 
2576 static void removeOldExitPreds(RegionMRT *Region) {
2577  MachineBasicBlock *Exit = Region->getSucc();
2578  if (Exit == nullptr) {
2579  return;
2580  }
2581  for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(),
2582  E = Exit->pred_end();
2583  PI != E; ++PI) {
2584  if (Region->contains(*PI)) {
2585  (*PI)->removeSuccessor(Exit);
2586  }
2587  }
2588 }
2589 
2592  for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
2593  if (MBBs.count(*SI) != 0) {
2594  return true;
2595  }
2596  }
2597  return false;
2598 }
2599 
2600 static bool containsNewBackedge(MRT *Tree,
2602  // Need to traverse this in reverse since it is in post order.
2603  if (Tree == nullptr)
2604  return false;
2605 
2606  if (Tree->isMBB()) {
2607  MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB();
2608  MBBs.insert(MBB);
2609  if (mbbHasBackEdge(MBB, MBBs)) {
2610  return true;
2611  }
2612  } else {
2613  RegionMRT *Region = Tree->getRegionMRT();
2614  SetVector<MRT *> *Children = Region->getChildren();
2615  for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) {
2616  if (containsNewBackedge(*CI, MBBs))
2617  return true;
2618  }
2619  }
2620  return false;
2621 }
2622 
2623 static bool containsNewBackedge(RegionMRT *Region) {
2625  return containsNewBackedge(Region, MBBs);
2626 }
2627 
2628 bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
2629  auto *LRegion = initLinearizedRegion(Region);
2630  LRegion->setHasLoop(containsNewBackedge(Region));
2631  MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region);
2632  MachineBasicBlock *CurrentMerge = LastMerge;
2633  LRegion->addMBB(LastMerge);
2634  LRegion->setExit(LastMerge);
2635 
2636  rewriteRegionExitPHIs(Region, LastMerge, LRegion);
2637  removeOldExitPreds(Region);
2638 
2639  LLVM_DEBUG(PHIInfo.dump(MRI));
2640 
2641  SetVector<MRT *> *Children = Region->getChildren();
2642  LLVM_DEBUG(dbgs() << "===========If Region Start===============\n");
2643  if (LRegion->getHasLoop()) {
2644  LLVM_DEBUG(dbgs() << "Has Backedge: Yes\n");
2645  } else {
2646  LLVM_DEBUG(dbgs() << "Has Backedge: No\n");
2647  }
2648 
2649  unsigned BBSelectRegIn;
2650  unsigned BBSelectRegOut;
2651  for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) {
2652  LLVM_DEBUG(dbgs() << "CurrentRegion: \n");
2653  LLVM_DEBUG(LRegion->print(dbgs(), TRI));
2654 
2655  auto CNI = CI;
2656  ++CNI;
2657 
2658  MRT *Child = (*CI);
2659 
2660  if (Child->isRegion()) {
2661 
2662  LinearizedRegion *InnerLRegion =
2663  Child->getRegionMRT()->getLinearizedRegion();
2664  // We found the block is the exit of an inner region, we need
2665  // to put it in the current linearized region.
2666 
2667  LLVM_DEBUG(dbgs() << "Linearizing region: ");
2668  LLVM_DEBUG(InnerLRegion->print(dbgs(), TRI));
2669  LLVM_DEBUG(dbgs() << "\n");
2670 
2671  MachineBasicBlock *InnerEntry = InnerLRegion->getEntry();
2672  if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) {
2673  // Entry has already been linearized, no need to do this region.
2674  unsigned OuterSelect = InnerLRegion->getBBSelectRegOut();
2675  unsigned InnerSelectReg =
2676  InnerLRegion->getRegionMRT()->getInnerOutputRegister();
2677  replaceRegisterWith(InnerSelectReg, OuterSelect),
2678  resolvePHIInfos(InnerEntry);
2679  if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge))
2680  InnerLRegion->getExit()->addSuccessor(CurrentMerge);
2681  continue;
2682  }
2683 
2684  BBSelectRegOut = Child->getBBSelectRegOut();
2685  BBSelectRegIn = Child->getBBSelectRegIn();
2686 
2687  LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
2688  << "\n");
2689  LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
2690  << "\n");
2691 
2692  MachineBasicBlock *IfEnd = CurrentMerge;
2693  CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion,
2694  Child->getRegionMRT()->getEntry(),
2695  BBSelectRegIn, BBSelectRegOut);
2696  TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2697  } else {
2698  MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB();
2699  LLVM_DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n");
2700 
2701  if (MBB == getSingleExitNode(*(MBB->getParent()))) {
2702  // If this is the exit block then we need to skip to the next.
2703  // The "in" register will be transferred to "out" in the next
2704  // iteration.
2705  continue;
2706  }
2707 
2708  BBSelectRegOut = Child->getBBSelectRegOut();
2709  BBSelectRegIn = Child->getBBSelectRegIn();
2710 
2711  LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
2712  << "\n");
2713  LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
2714  << "\n");
2715 
2716  MachineBasicBlock *IfEnd = CurrentMerge;
2717  // This is a basic block that is not part of an inner region, we
2718  // need to put it in the current linearized region.
2719  CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn,
2720  BBSelectRegOut);
2721  if (CurrentMerge) {
2722  TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2723  }
2724 
2725  LLVM_DEBUG(PHIInfo.dump(MRI));
2726  }
2727  }
2728 
2729  LRegion->removeFalseRegisterKills(MRI);
2730 
2731  if (LRegion->getHasLoop()) {
2732  MachineBasicBlock *NewSucc = splitEntry(LRegion);
2733  if (isFunctionEntryBlock(LRegion->getEntry())) {
2734  resolvePHIInfos(LRegion->getEntry());
2735  }
2736  const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI());
2737  unsigned InReg = LRegion->getBBSelectRegIn();
2738  unsigned InnerSelectReg =
2739  MRI->createVirtualRegister(MRI->getRegClass(InReg));
2740  unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg));
2741  TII->materializeImmediate(*(LRegion->getEntry()),
2742  LRegion->getEntry()->getFirstTerminator(), DL,
2743  NewInReg, Region->getEntry()->getNumber());
2744  // Need to be careful about updating the registers inside the region.
2745  LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI);
2746  LLVM_DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n");
2747  insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc,
2748  InnerSelectReg, NewInReg,
2749  LRegion->getRegionMRT()->getInnerOutputRegister());
2750  splitExit(LRegion);
2751  TII->convertNonUniformLoopRegion(NewSucc, LastMerge);
2752  }
2753 
2754  if (Region->isRoot()) {
2755  TII->insertReturn(*LastMerge);
2756  }
2757 
2758  LLVM_DEBUG(Region->getEntry()->getParent()->dump());
2759  LLVM_DEBUG(LRegion->print(dbgs(), TRI));
2760  LLVM_DEBUG(PHIInfo.dump(MRI));
2761 
2762  LLVM_DEBUG(dbgs() << "===========If Region End===============\n");
2763 
2764  Region->setLinearizedRegion(LRegion);
2765  return true;
2766 }
2767 
2768 bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) {
2769  if (false && regionIsSimpleIf(Region)) {
2770  transformSimpleIfRegion(Region);
2771  return true;
2772  } else if (regionIsSequence(Region)) {
2773  fixupRegionExits(Region);
2774  return false;
2775  } else {
2776  structurizeComplexRegion(Region);
2777  }
2778  return false;
2779 }
2780 
2781 static int structurize_once = 0;
2782 
2783 bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region,
2784  bool isTopRegion) {
2785  bool Changed = false;
2786 
2787  auto Children = Region->getChildren();
2788  for (auto CI : *Children) {
2789  if (CI->isRegion()) {
2790  Changed |= structurizeRegions(CI->getRegionMRT(), false);
2791  }
2792  }
2793 
2794  if (structurize_once < 2 || true) {
2795  Changed |= structurizeRegion(Region);
2796  structurize_once++;
2797  }
2798  return Changed;
2799 }
2800 
2801 void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) {
2802  LLVM_DEBUG(dbgs() << "Fallthrough Map:\n");
2803  for (auto &MBBI : MF) {
2804  MachineBasicBlock *MBB = MBBI.getFallThrough();
2805  if (MBB != nullptr) {
2806  LLVM_DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> "
2807  << MBB->getNumber() << "\n");
2808  }
2809  FallthroughMap[&MBBI] = MBB;
2810  }
2811 }
2812 
2813 void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region,
2814  unsigned SelectOut) {
2815  LinearizedRegion *LRegion = new LinearizedRegion();
2816  if (SelectOut) {
2817  LRegion->addLiveOut(SelectOut);
2818  LLVM_DEBUG(dbgs() << "Add LiveOut (BBSelect): " << printReg(SelectOut, TRI)
2819  << "\n");
2820  }
2821  LRegion->setRegionMRT(Region);
2822  Region->setLinearizedRegion(LRegion);
2823  LRegion->setParent(Region->getParent()
2824  ? Region->getParent()->getLinearizedRegion()
2825  : nullptr);
2826 }
2827 
2828 unsigned
2829 AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut,
2830  MachineRegisterInfo *MRI,
2831  const SIInstrInfo *TII) {
2832  if (MRT->isRegion()) {
2833  RegionMRT *Region = MRT->getRegionMRT();
2834  Region->setBBSelectRegOut(SelectOut);
2835  unsigned InnerSelectOut = createBBSelectReg(TII, MRI);
2836 
2837  // Fixme: Move linearization creation to the original spot
2838  createLinearizedRegion(Region, SelectOut);
2839 
2840  for (auto CI = Region->getChildren()->begin(),
2841  CE = Region->getChildren()->end();
2842  CI != CE; ++CI) {
2843  InnerSelectOut =
2844  initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII);
2845  }
2846  MRT->setBBSelectRegIn(InnerSelectOut);
2847  return InnerSelectOut;
2848  } else {
2849  MRT->setBBSelectRegOut(SelectOut);
2850  unsigned NewSelectIn = createBBSelectReg(TII, MRI);
2851  MRT->setBBSelectRegIn(NewSelectIn);
2852  return NewSelectIn;
2853  }
2854 }
2855 
2857  for (auto &MBBI : MF) {
2858  for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(),
2859  E = MBBI.instr_end();
2860  I != E; ++I) {
2861  MachineInstr &Instr = *I;
2862  if (Instr.isPHI()) {
2863  int numPreds = getPHINumInputs(Instr);
2864  for (int i = 0; i < numPreds; ++i) {
2865  assert(Instr.getOperand(i * 2 + 1).isReg() &&
2866  "PHI Operand not a register");
2867  }
2868  }
2869  }
2870  }
2871 }
2872 
2873 bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
2874  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2875  const SIInstrInfo *TII = ST.getInstrInfo();
2876  TRI = ST.getRegisterInfo();
2877  MRI = &(MF.getRegInfo());
2878  initFallthroughMap(MF);
2879 
2881  LLVM_DEBUG(dbgs() << "----STRUCTURIZER START----\n");
2882  LLVM_DEBUG(MF.dump());
2883 
2884  Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo());
2885  LLVM_DEBUG(Regions->dump());
2886 
2887  RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI);
2888  setRegionMRT(RTree);
2889  initializeSelectRegisters(RTree, 0, MRI, TII);
2890  LLVM_DEBUG(RTree->dump(TRI));
2891  bool result = structurizeRegions(RTree, true);
2892  delete RTree;
2893  LLVM_DEBUG(dbgs() << "----STRUCTURIZER END----\n");
2894  initFallthroughMap(MF);
2895  return result;
2896 }
2897 
2899 
2900 INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2901  "AMDGPU Machine CFG Structurizer", false, false)
2903 INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2904  "AMDGPU Machine CFG Structurizer", false, false)
2905 
2907  return new AMDGPUMachineCFGStructurizer();
2908 }
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
AMDGPU specific subclass of TargetSubtarget.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool IsDead
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
const T & front() const
Return the first element of the SetVector.
Definition: SetVector.h:123
This class represents lattice values for constants.
Definition: AllocatorList.h:24
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:492
static void fixRegionTerminator(RegionMRT *Region)
Implements a dense probed hash-table based set.
Definition: DenseSet.h:250
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
static int structurize_once
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
unsigned insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned SrcReg, int Value) const
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
unsigned Reg
static bool mbbHasBackEdge(MachineBasicBlock *MBB, SmallPtrSet< MachineBasicBlock *, 8 > &MBBs)
const SIInstrInfo * getInstrInfo() const override
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:299
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
static void checkRegOnlyPHIInputs(MachineFunction &MF)
bool isPHI() const
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
const T & back() const
Return the last element of the SetVector.
Definition: SetVector.h:129
amdgpu machine cfg structurizer
static void removeExternalCFGSuccessors(MachineBasicBlock *MBB)
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
static MachineBasicBlock * getPHIPred(MachineInstr &PHI, unsigned Index)
return AArch64::GPR64RegClass contains(Reg)
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool regionIsSequence(RegionMRT *Region)
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
static use_iterator use_end()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
MachineBasicBlock * getFallThrough()
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
iterator_range< iterator > terminators()
def_iterator def_begin(unsigned RegNo) const
#define LLVM_DUMP_METHOD
Definition: Compiler.h:74
reverse_iterator rbegin()
Get an reverse_iterator to the end of the SetVector.
Definition: SetVector.h:103
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2091
void fixupRegionExits(RegionMRT *Region)
RegionT * getTopLevelRegion() const
Definition: RegionInfo.h:869
amdgpu machine cfg AMDGPU Machine CFG Structurizer
static bool isSource(Value *V)
Return true if the given value is a source in the use-def chain, producing a narrow &#39;TypeSize&#39; value...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
static bool isPHIRegionIndex(SmallVector< unsigned, 2 > PHIRegionIndices, unsigned Index)
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static void removeOldExitPreds(RegionMRT *Region)
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
static MachineBasicBlock * getSingleExitNode(MachineFunction &MF)
static bool hasOneExitNode(MachineFunction &MF)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
#define P(N)
const TargetRegisterInfo * getTargetRegisterInfo() const
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
unsigned const MachineRegisterInfo * MRI
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const
void setMBB(MachineBasicBlock *MBB)
static bool containsNewBackedge(MRT *Tree, SmallPtrSet< MachineBasicBlock *, 8 > &MBBs)
void insertReturn(MachineBasicBlock &MBB) const
Represent the analysis usage information of a pass.
static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index)
unsigned insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned SrcReg, int Value) const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
std::vector< MachineBasicBlock * >::const_iterator const_succ_iterator
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
iterator_range< po_iterator< T > > post_order(const T &G)
static unsigned createBBSelectReg(const SIInstrInfo *TII, MachineRegisterInfo *MRI)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
void materializeImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, int64_t Value) const
std::vector< MachineBasicBlock * >::iterator pred_iterator
static void fixMBBTerminator(MachineBasicBlock *MBB)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
static bool isFunctionEntryBlock(MachineBasicBlock *MBB)
size_t size() const
Definition: SmallVector.h:53
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setIsKill(bool Val=true)
static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, MachineBasicBlock *EndMBB)
SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
Definition: SmallPtrSet.h:267
Iterator for intrusive lists based on ilist_node.
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry &)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void splice(iterator InsertPt, iterator MBBI)
iterator_range< use_iterator > use_operands(unsigned Reg) const
MachineOperand class - Representation of each machine instruction operand.
static MachineBasicBlock * split(MachineBasicBlock::iterator I)
Promote Memory to Register
Definition: Mem2Reg.cpp:110
static unsigned getPHINumInputs(MachineInstr &PHI)
reg_iterator reg_begin(unsigned RegNo) const
unsigned pred_size() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Target - Wrapper for Target specific information.
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:212
static void collectPHIs(MachineBasicBlock *MBB, SmallVector< MachineInstr *, 2 > &PHIs)
unsigned succ_size() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
static unsigned getPHIDestReg(MachineInstr &PHI)
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1969
Representation of each machine instruction.
Definition: MachineInstr.h:64
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Interface definition for SIInstrInfo.
Flatten the CFG
reverse_iterator rend()
Get a reverse_iterator to the beginning of the SetVector.
Definition: SetVector.h:113
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
void setReg(unsigned Reg)
Change the register this operand corresponds to.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
#define I(x, y, z)
Definition: MD5.cpp:58
char & AMDGPUMachineCFGStructurizerID
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
bool isReg() const
isReg - Tests if this is a MO_Register operand.
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:366
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
static def_iterator def_end()
LLVM Value Representation.
Definition: Value.h:73
static void setPhiPred(MachineInstr &PHI, unsigned Index, MachineBasicBlock *NewPred)
A vector that has set insertion semantics.
Definition: SetVector.h:41
static const Function * getParent(const Value *V)
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1967
INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", "AMDGPU Machine CFG Structurizer", false, false) INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
#define LLVM_DEBUG(X)
Definition: Debug.h:123
static reg_iterator reg_end()
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
RegionT * getRegionFor(BlockT *BB) const
Get the smallest region that contains a BasicBlock.
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, MachineBasicBlock *IfEnd) const
std::vector< MachineBasicBlock * >::iterator succ_iterator
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const SIRegisterInfo * getRegisterInfo() const override