LLVM  8.0.1
AMDGPUTargetMachine.cpp
Go to the documentation of this file.
1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// The AMDGPU target machine contains all of the hardware specific
12 /// information needed to emit code for R600 and SI GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUTargetMachine.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUAliasAnalysis.h"
19 #include "AMDGPUCallLowering.h"
21 #include "AMDGPULegalizerInfo.h"
22 #include "AMDGPUMacroFusion.h"
23 #include "AMDGPUTargetObjectFile.h"
25 #include "GCNIterativeScheduler.h"
26 #include "GCNSchedStrategy.h"
27 #include "R600MachineScheduler.h"
28 #include "SIMachineScheduler.h"
33 #include "llvm/CodeGen/Passes.h"
35 #include "llvm/IR/Attributes.h"
36 #include "llvm/IR/Function.h"
38 #include "llvm/Pass.h"
40 #include "llvm/Support/Compiler.h"
43 #include "llvm/Transforms/IPO.h"
46 #include "llvm/Transforms/Scalar.h"
48 #include "llvm/Transforms/Utils.h"
50 #include <memory>
51 
52 using namespace llvm;
53 
55  "r600-ir-structurize",
56  cl::desc("Use StructurizeCFG IR pass"),
57  cl::init(true));
58 
60  "amdgpu-sroa",
61  cl::desc("Run SROA after promote alloca pass"),
63  cl::init(true));
64 
65 static cl::opt<bool>
66 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
67  cl::desc("Run early if-conversion"),
68  cl::init(false));
69 
71  "r600-if-convert",
72  cl::desc("Use if conversion pass"),
74  cl::init(true));
75 
76 // Option to disable vectorizer for tests.
78  "amdgpu-load-store-vectorizer",
79  cl::desc("Enable load store vectorizer"),
80  cl::init(true),
81  cl::Hidden);
82 
83 // Option to control global loads scalarization
85  "amdgpu-scalarize-global-loads",
86  cl::desc("Enable global load scalarization"),
87  cl::init(true),
88  cl::Hidden);
89 
90 // Option to run internalize pass.
92  "amdgpu-internalize-symbols",
93  cl::desc("Enable elimination of non-kernel functions and unused globals"),
94  cl::init(false),
95  cl::Hidden);
96 
97 // Option to inline all early.
99  "amdgpu-early-inline-all",
100  cl::desc("Inline all functions early"),
101  cl::init(false),
102  cl::Hidden);
103 
105  "amdgpu-sdwa-peephole",
106  cl::desc("Enable SDWA peepholer"),
107  cl::init(true));
108 
110  "amdgpu-dpp-combine",
111  cl::desc("Enable DPP combiner"),
112  cl::init(false));
113 
114 // Enable address space based alias analysis
115 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
116  cl::desc("Enable AMDGPU Alias Analysis"),
117  cl::init(true));
118 
119 // Option to run late CFG structurizer
121  "amdgpu-late-structurize",
122  cl::desc("Enable late CFG structurization"),
124  cl::Hidden);
125 
127  "amdgpu-function-calls",
128  cl::desc("Enable AMDGPU function call support"),
130  cl::init(false),
131  cl::Hidden);
132 
133 // Enable lib calls simplifications
135  "amdgpu-simplify-libcall",
136  cl::desc("Enable amdgpu library simplifications"),
137  cl::init(true),
138  cl::Hidden);
139 
141  "amdgpu-ir-lower-kernel-arguments",
142  cl::desc("Lower kernel argument loads in IR pass"),
143  cl::init(true),
144  cl::Hidden);
145 
146 // Enable atomic optimization
148  "amdgpu-atomic-optimizations",
149  cl::desc("Enable atomic optimizations"),
150  cl::init(false),
151  cl::Hidden);
152 
153 // Enable Mode register optimization
155  "amdgpu-mode-register",
156  cl::desc("Enable mode register pass"),
157  cl::init(true),
158  cl::Hidden);
159 
160 extern "C" void LLVMInitializeAMDGPUTarget() {
161  // Register the target
164 
214 }
215 
216 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
217  return llvm::make_unique<AMDGPUTargetObjectFile>();
218 }
219 
221  return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
222 }
223 
225  return new SIScheduleDAGMI(C);
226 }
227 
228 static ScheduleDAGInstrs *
230  ScheduleDAGMILive *DAG =
231  new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
235  return DAG;
236 }
237 
238 static ScheduleDAGInstrs *
240  auto DAG = new GCNIterativeScheduler(C,
242  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
243  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
244  return DAG;
245 }
246 
248  return new GCNIterativeScheduler(C,
250 }
251 
252 static ScheduleDAGInstrs *
254  auto DAG = new GCNIterativeScheduler(C,
256  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
257  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
258  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
259  return DAG;
260 }
261 
263 R600SchedRegistry("r600", "Run R600's custom scheduler",
265 
267 SISchedRegistry("si", "Run SI's custom scheduler",
269 
271 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
272  "Run GCN scheduler to maximize occupancy",
274 
276 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
277  "Run GCN scheduler to maximize occupancy (experimental)",
279 
281 GCNMinRegSchedRegistry("gcn-minreg",
282  "Run GCN iterative scheduler for minimal register usage (experimental)",
284 
286 GCNILPSchedRegistry("gcn-ilp",
287  "Run GCN iterative scheduler for ILP scheduling (experimental)",
289 
290 static StringRef computeDataLayout(const Triple &TT) {
291  if (TT.getArch() == Triple::r600) {
292  // 32-bit pointers.
293  return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
294  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
295  }
296 
297  // 32-bit private, local, and region pointers. 64-bit global, constant and
298  // flat.
299  return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
300  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
301  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
302 }
303 
305 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
306  if (!GPU.empty())
307  return GPU;
308 
309  if (TT.getArch() == Triple::amdgcn)
310  return "generic";
311 
312  return "r600";
313 }
314 
316  // The AMDGPU toolchain only supports generating shared objects, so we
317  // must always use PIC.
318  return Reloc::PIC_;
319 }
320 
322  StringRef CPU, StringRef FS,
323  TargetOptions Options,
326  CodeGenOpt::Level OptLevel)
327  : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
328  FS, Options, getEffectiveRelocModel(RM),
329  getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
330  TLOF(createTLOF(getTargetTriple())) {
331  initAsmInfo();
332 }
333 
336 
338 
340  Attribute GPUAttr = F.getFnAttribute("target-cpu");
341  return GPUAttr.hasAttribute(Attribute::None) ?
342  getTargetCPU() : GPUAttr.getValueAsString();
343 }
344 
346  Attribute FSAttr = F.getFnAttribute("target-features");
347 
348  return FSAttr.hasAttribute(Attribute::None) ?
350  FSAttr.getValueAsString();
351 }
352 
353 /// Predicate for Internalize pass.
354 static bool mustPreserveGV(const GlobalValue &GV) {
355  if (const Function *F = dyn_cast<Function>(&GV))
356  return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
357 
358  return !GV.use_empty();
359 }
360 
362  Builder.DivergentTarget = true;
363 
364  bool EnableOpt = getOptLevel() > CodeGenOpt::None;
365  bool Internalize = InternalizeSymbols;
366  bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableAMDGPUFunctionCalls;
367  bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
368  bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
369 
371  delete Builder.Inliner;
373  }
374 
375  Builder.addExtension(
377  [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
379  if (AMDGPUAA) {
382  }
384  if (Internalize) {
386  PM.add(createGlobalDCEPass());
387  }
388  if (EarlyInline)
390  });
391 
392  const auto &Opt = Options;
393  Builder.addExtension(
395  [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
397  if (AMDGPUAA) {
400  }
402  if (LibCallSimplify)
404  });
405 
406  Builder.addExtension(
409  // Add infer address spaces pass to the opt pipeline after inlining
410  // but before SROA to increase SROA opportunities.
412 
413  // This should run after inlining to have any chance of doing anything,
414  // and before other cleanup optimizations.
416  });
417 }
418 
419 //===----------------------------------------------------------------------===//
420 // R600 Target Machine (R600 -> Cayman)
421 //===----------------------------------------------------------------------===//
422 
424  StringRef CPU, StringRef FS,
428  CodeGenOpt::Level OL, bool JIT)
429  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
431 }
432 
434  const Function &F) const {
435  StringRef GPU = getGPUName(F);
436  StringRef FS = getFeatureString(F);
437 
438  SmallString<128> SubtargetKey(GPU);
439  SubtargetKey.append(FS);
440 
441  auto &I = SubtargetMap[SubtargetKey];
442  if (!I) {
443  // This needs to be done before we create a new subtarget since any
444  // creation will depend on the TM and the code generation flags on the
445  // function that reside in TargetOptions.
447  I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
448  }
449 
450  return I.get();
451 }
452 
455  return TargetTransformInfo(R600TTIImpl(this, F));
456 }
457 
458 //===----------------------------------------------------------------------===//
459 // GCN Target Machine (SI+)
460 //===----------------------------------------------------------------------===//
461 
463  StringRef CPU, StringRef FS,
467  CodeGenOpt::Level OL, bool JIT)
468  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
469 
471  StringRef GPU = getGPUName(F);
472  StringRef FS = getFeatureString(F);
473 
474  SmallString<128> SubtargetKey(GPU);
475  SubtargetKey.append(FS);
476 
477  auto &I = SubtargetMap[SubtargetKey];
478  if (!I) {
479  // This needs to be done before we create a new subtarget since any
480  // creation will depend on the TM and the code generation flags on the
481  // function that reside in TargetOptions.
483  I = llvm::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
484  }
485 
486  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
487 
488  return I.get();
489 }
490 
493  return TargetTransformInfo(GCNTTIImpl(this, F));
494 }
495 
496 //===----------------------------------------------------------------------===//
497 // AMDGPU Pass Setup
498 //===----------------------------------------------------------------------===//
499 
500 namespace {
501 
502 class AMDGPUPassConfig : public TargetPassConfig {
503 public:
504  AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
505  : TargetPassConfig(TM, PM) {
506  // Exceptions and StackMaps are not supported, so these passes will never do
507  // anything.
508  disablePass(&StackMapLivenessID);
509  disablePass(&FuncletLayoutID);
510  }
511 
512  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
513  return getTM<AMDGPUTargetMachine>();
514  }
515 
517  createMachineScheduler(MachineSchedContext *C) const override {
521  return DAG;
522  }
523 
524  void addEarlyCSEOrGVNPass();
525  void addStraightLineScalarOptimizationPasses();
526  void addIRPasses() override;
527  void addCodeGenPrepare() override;
528  bool addPreISel() override;
529  bool addInstSelector() override;
530  bool addGCPasses() override;
531 };
532 
533 class R600PassConfig final : public AMDGPUPassConfig {
534 public:
535  R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
536  : AMDGPUPassConfig(TM, PM) {}
537 
538  ScheduleDAGInstrs *createMachineScheduler(
539  MachineSchedContext *C) const override {
540  return createR600MachineScheduler(C);
541  }
542 
543  bool addPreISel() override;
544  bool addInstSelector() override;
545  void addPreRegAlloc() override;
546  void addPreSched2() override;
547  void addPreEmitPass() override;
548 };
549 
550 class GCNPassConfig final : public AMDGPUPassConfig {
551 public:
552  GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
553  : AMDGPUPassConfig(TM, PM) {
554  // It is necessary to know the register usage of the entire call graph. We
555  // allow calls without EnableAMDGPUFunctionCalls if they are marked
556  // noinline, so this is always required.
557  setRequiresCodeGenSCCOrder(true);
558  }
559 
560  GCNTargetMachine &getGCNTargetMachine() const {
561  return getTM<GCNTargetMachine>();
562  }
563 
565  createMachineScheduler(MachineSchedContext *C) const override;
566 
567  bool addPreISel() override;
568  void addMachineSSAOptimization() override;
569  bool addILPOpts() override;
570  bool addInstSelector() override;
571  bool addIRTranslator() override;
572  bool addLegalizeMachineIR() override;
573  bool addRegBankSelect() override;
574  bool addGlobalInstructionSelect() override;
575  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
576  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
577  void addPreRegAlloc() override;
578  void addPostRegAlloc() override;
579  void addPreSched2() override;
580  void addPreEmitPass() override;
581 };
582 
583 } // end anonymous namespace
584 
585 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
587  addPass(createGVNPass());
588  else
589  addPass(createEarlyCSEPass());
590 }
591 
592 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
593  addPass(createLICMPass());
596  // ReassociateGEPs exposes more opportunites for SLSR. See
597  // the example in reassociate-geps-and-slsr.ll.
599  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
600  // EarlyCSE can reuse.
601  addEarlyCSEOrGVNPass();
602  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
603  addPass(createNaryReassociatePass());
604  // NaryReassociate on GEPs creates redundant common expressions, so run
605  // EarlyCSE after it.
606  addPass(createEarlyCSEPass());
607 }
608 
609 void AMDGPUPassConfig::addIRPasses() {
610  const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
611 
612  // There is no reason to run these.
613  disablePass(&StackMapLivenessID);
614  disablePass(&FuncletLayoutID);
615  disablePass(&PatchableFunctionID);
616 
617  addPass(createAtomicExpandPass());
618 
619  // This must occur before inlining, as the inliner will not look through
620  // bitcast calls.
622 
624 
625  // Function calls are not supported, so make sure we inline everything.
626  addPass(createAMDGPUAlwaysInlinePass());
628  // We need to add the barrier noop pass, otherwise adding the function
629  // inlining pass will cause all of the PassConfigs passes to be run
630  // one function at a time, which means if we have a nodule with two
631  // functions, then we will generate code for the first function
632  // without ever running any passes on the second.
633  addPass(createBarrierNoopPass());
634 
635  if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
636  // TODO: May want to move later or split into an early and late one.
637 
639  }
640 
641  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
642  if (TM.getTargetTriple().getArch() == Triple::r600)
644 
645  // Replace OpenCL enqueued block function pointers with global variables.
647 
648  if (TM.getOptLevel() > CodeGenOpt::None) {
649  addPass(createInferAddressSpacesPass());
650  addPass(createAMDGPUPromoteAlloca());
651 
652  if (EnableSROA)
653  addPass(createSROAPass());
654 
655  addStraightLineScalarOptimizationPasses();
656 
658  addPass(createAMDGPUAAWrapperPass());
660  AAResults &AAR) {
661  if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
662  AAR.addAAResult(WrapperPass->getResult());
663  }));
664  }
665  }
666 
668 
669  // EarlyCSE is not always strong enough to clean up what LSR produces. For
670  // example, GVN can combine
671  //
672  // %0 = add %a, %b
673  // %1 = add %b, %a
674  //
675  // and
676  //
677  // %0 = shl nsw %a, 2
678  // %1 = shl %a, 2
679  //
680  // but EarlyCSE can do neither of them.
681  if (getOptLevel() != CodeGenOpt::None)
682  addEarlyCSEOrGVNPass();
683 }
684 
685 void AMDGPUPassConfig::addCodeGenPrepare() {
686  if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
689 
691 
694 }
695 
696 bool AMDGPUPassConfig::addPreISel() {
697  addPass(createLowerSwitchPass());
698  addPass(createFlattenCFGPass());
699  return false;
700 }
701 
702 bool AMDGPUPassConfig::addInstSelector() {
703  addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
704  return false;
705 }
706 
707 bool AMDGPUPassConfig::addGCPasses() {
708  // Do nothing. GC is not supported.
709  return false;
710 }
711 
712 //===----------------------------------------------------------------------===//
713 // R600 Pass Setup
714 //===----------------------------------------------------------------------===//
715 
716 bool R600PassConfig::addPreISel() {
717  AMDGPUPassConfig::addPreISel();
718 
720  addPass(createStructurizeCFGPass());
721  return false;
722 }
723 
724 bool R600PassConfig::addInstSelector() {
725  addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
726  return false;
727 }
728 
729 void R600PassConfig::addPreRegAlloc() {
730  addPass(createR600VectorRegMerger());
731 }
732 
733 void R600PassConfig::addPreSched2() {
734  addPass(createR600EmitClauseMarkers(), false);
736  addPass(&IfConverterID, false);
737  addPass(createR600ClauseMergePass(), false);
738 }
739 
740 void R600PassConfig::addPreEmitPass() {
741  addPass(createAMDGPUCFGStructurizerPass(), false);
742  addPass(createR600ExpandSpecialInstrsPass(), false);
743  addPass(&FinalizeMachineBundlesID, false);
744  addPass(createR600Packetizer(), false);
745  addPass(createR600ControlFlowFinalizer(), false);
746 }
747 
749  return new R600PassConfig(*this, PM);
750 }
751 
752 //===----------------------------------------------------------------------===//
753 // GCN Pass Setup
754 //===----------------------------------------------------------------------===//
755 
756 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
757  MachineSchedContext *C) const {
758  const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
759  if (ST.enableSIScheduler())
760  return createSIMachineScheduler(C);
762 }
763 
764 bool GCNPassConfig::addPreISel() {
765  AMDGPUPassConfig::addPreISel();
766 
769  }
770 
771  // FIXME: We need to run a pass to propagate the attributes when calls are
772  // supported.
774 
775  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
776  // regions formed by them.
778  if (!LateCFGStructurize) {
779  addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
780  }
781  addPass(createSinkingPass());
783  if (!LateCFGStructurize) {
785  }
786 
787  return false;
788 }
789 
790 void GCNPassConfig::addMachineSSAOptimization() {
792 
793  // We want to fold operands after PeepholeOptimizer has run (or as part of
794  // it), because it will eliminate extra copies making it easier to fold the
795  // real source operand. We want to eliminate dead instructions after, so that
796  // we see fewer uses of the copies. We then need to clean up the dead
797  // instructions leftover after the operands are folded as well.
798  //
799  // XXX - Can we get away without running DeadMachineInstructionElim again?
800  addPass(&SIFoldOperandsID);
801  if (EnableDPPCombine)
802  addPass(&GCNDPPCombineID);
804  addPass(&SILoadStoreOptimizerID);
805  if (EnableSDWAPeephole) {
806  addPass(&SIPeepholeSDWAID);
807  addPass(&EarlyMachineLICMID);
808  addPass(&MachineCSEID);
809  addPass(&SIFoldOperandsID);
811  }
813 }
814 
815 bool GCNPassConfig::addILPOpts() {
817  addPass(&EarlyIfConverterID);
818 
820  return false;
821 }
822 
823 bool GCNPassConfig::addInstSelector() {
824  AMDGPUPassConfig::addInstSelector();
825  addPass(&SIFixSGPRCopiesID);
826  addPass(createSILowerI1CopiesPass());
827  addPass(createSIFixupVectorISelPass());
828  addPass(createSIAddIMGInitPass());
829  return false;
830 }
831 
832 bool GCNPassConfig::addIRTranslator() {
833  addPass(new IRTranslator());
834  return false;
835 }
836 
837 bool GCNPassConfig::addLegalizeMachineIR() {
838  addPass(new Legalizer());
839  return false;
840 }
841 
842 bool GCNPassConfig::addRegBankSelect() {
843  addPass(new RegBankSelect());
844  return false;
845 }
846 
847 bool GCNPassConfig::addGlobalInstructionSelect() {
848  addPass(new InstructionSelect());
849  return false;
850 }
851 
852 void GCNPassConfig::addPreRegAlloc() {
853  if (LateCFGStructurize) {
855  }
856  addPass(createSIWholeQuadModePass());
857 }
858 
859 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
860  // FIXME: We have to disable the verifier here because of PHIElimination +
861  // TwoAddressInstructions disabling it.
862 
863  // This must be run immediately after phi elimination and before
864  // TwoAddressInstructions, otherwise the processing of the tied operand of
865  // SI_ELSE will introduce a copy of the tied operand source after the else.
866  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
867 
868  // This must be run after SILowerControlFlow, since it needs to use the
869  // machine-level CFG, but before register allocation.
870  insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
871 
872  TargetPassConfig::addFastRegAlloc(RegAllocPass);
873 }
874 
875 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
877 
879 
880  // This must be run immediately after phi elimination and before
881  // TwoAddressInstructions, otherwise the processing of the tied operand of
882  // SI_ELSE will introduce a copy of the tied operand source after the else.
883  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
884 
885  // This must be run after SILowerControlFlow, since it needs to use the
886  // machine-level CFG, but before register allocation.
887  insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
888 
890 }
891 
892 void GCNPassConfig::addPostRegAlloc() {
893  addPass(&SIFixVGPRCopiesID);
895  addPass(&SIOptimizeExecMaskingID);
897 }
898 
899 void GCNPassConfig::addPreSched2() {
900 }
901 
902 void GCNPassConfig::addPreEmitPass() {
903  addPass(createSIMemoryLegalizerPass());
904  addPass(createSIInsertWaitcntsPass());
906  addPass(createSIModeRegisterPass());
907 
908  // The hazard recognizer that runs as part of the post-ra scheduler does not
909  // guarantee to be able handle all hazards correctly. This is because if there
910  // are multiple scheduling regions in a basic block, the regions are scheduled
911  // bottom up, so when we begin to schedule a region we don't know what
912  // instructions were emitted directly before it.
913  //
914  // Here we add a stand-alone hazard recognizer pass which can handle all
915  // cases.
916  //
917  // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
918  // be better for it to emit S_NOP <N> when possible.
919  addPass(&PostRAHazardRecognizerID);
920 
921  addPass(&SIInsertSkipsPassID);
923  addPass(&BranchRelaxationPassID);
924 }
925 
927  return new GCNPassConfig(*this, PM);
928 }
FunctionPass * createSpeculativeExecutionPass()
char & SIFormMemoryClausesID
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
FunctionPass * createStraightLineStrengthReducePass()
uint64_t CallInst * C
FunctionPass * createGVNPass(bool NoLoads=false)
Create a legacy GVN pass.
Definition: GVN.cpp:2599
StringRef getTargetFeatureString() const
Target & getTheGCNTarget()
The target for GCN GPUs.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value...
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
This class represents lattice values for constants.
Definition: AllocatorList.h:24
FunctionPass * createLowerSwitchPass()
void initializeSIFixVGPRCopiesPass(PassRegistry &)
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
void initializeSIInsertWaitcntsPass(PassRegistry &)
void initializeSIFormMemoryClausesPass(PassRegistry &)
ModulePass * createR600OpenCLImageTypeLoweringPass()
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with...
Definition: TargetMachine.h:78
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
char & SILoadStoreOptimizerID
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
char & SIPeepholeSDWAID
void initializeSIModeRegisterPass(PassRegistry &)
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
This file describes how to lower LLVM calls to machine code calls.
char & FuncletLayoutID
This pass lays out funclets contiguously.
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions...
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
FunctionPass * createAMDGPUPromoteAlloca()
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
ModulePass * createAMDGPULowerKernelAttributesPass()
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
FunctionPass * createAMDGPUCodeGenPreparePass()
F(f)
R600 Machine Scheduler interface.
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
FunctionPass * createAMDGPUCFGStructurizerPass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form...
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
FunctionPass * createSIAddIMGInitPass()
FunctionPass * createSIMemoryLegalizerPass()
Pass * Inliner
Inliner - Specifies the inliner to use.
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
FunctionPass * createSIInsertWaitcntsPass()
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
StringRef getFeatureString(const Function &F) const
#define LLVM_READNONE
Definition: Compiler.h:177
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(false))
void resetTargetOptions(const Function &F) const
Reset the target options based on the function&#39;s attributes.
This file declares the targeting of the InstructionSelector class for AMDGPU.
Pass * createAMDGPUFunctionInliningPass()
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.
Pass * createAMDGPUAnnotateKernelFeaturesPass()
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
This file contains the simple types necessary to represent the attributes associated with functions a...
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
No attributes have been set.
Definition: Attributes.h:72
void initializeAMDGPUInlinerPass(PassRegistry &)
FunctionPass * createSinkingPass()
Definition: Sink.cpp:304
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
char & SIOptimizeExecMaskingPreRAID
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier, e.g.
Target-Independent Code Generator Pass Configuration Options.
static StringRef computeDataLayout(const Triple &TT)
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
FunctionPass * createR600ExpandSpecialInstrsPass()
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
RegisterTargetMachine - Helper template for registering a target machine implementation, for use in the target machine initialization function.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
Definition: MachineCSE.cpp:134
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:290
FunctionPass * createSIFixupVectorISelPass()
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
FunctionPass * createSILowerI1CopiesPass()
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
StringRef getTargetCPU() const
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
void initializeSIFixSGPRCopiesPass(PassRegistry &)
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
FunctionPass * createR600VectorRegMerger()
void initializeSIFixupVectorISelPass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
SI Machine Scheduler interface.
StringRef getGPUName(const Function &F) const
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:75
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions...
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
#define P(N)
char & GCNDPPCombineID
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
FunctionPass * createAMDGPULowerKernelArgumentsPass()
bool hasAttribute(AttrKind Val) const
Return true if the attribute is present.
Definition: Attributes.cpp:202
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
FunctionPass * createSIDebuggerInsertNopsPass()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
Definition: RegBankSelect.h:91
FunctionPass * createFlattenCFGPass()
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
FunctionPass * createSIWholeQuadModePass()
This file provides the interface for LLVM&#39;s Global Value Numbering pass which eliminates fully redund...
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
char & SIInsertSkipsPassID
virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass)
addOptimizedRegAlloc - Add passes related to register allocation.
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
bool isEntryFunctionCC(CallingConv::ID CC)
void LLVMInitializeAMDGPUTarget()
void initializeSIPeepholeSDWAPass(PassRegistry &)
Pass * createLICMPass()
Definition: LICM.cpp:278
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
FunctionPass * createR600ControlFlowFinalizer()
Legacy wrapper pass to provide the AMDGPUAAResult object.
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
This class describes a target machine that is implemented with the LLVM target-independent code gener...
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
const Triple & getTargetTriple() const
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
void initializeSILowerControlFlowPass(PassRegistry &)
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
ModulePass * createAMDGPULowerIntrinsicsPass()
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
FunctionPass * createSIModeRegisterPass()
const TargetSubtargetInfo * getSubtargetImpl() const
FunctionPass * createR600ClauseMergePass()
The AMDGPU TargetMachine interface definition for hw codgen targets.
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...
static cl::opt< bool, true > EnableAMDGPUFunctionCalls("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(false), cl::Hidden)
void initializeSIShrinkInstructionsPass(PassRegistry &)
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
Analysis pass providing a never-invalidated alias analysis result.
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations, allowing them to see the code as it is coming out of the frontend.
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
void initializeSIInsertSkipsPass(PassRegistry &)
void initializeR600PacketizerPass(PassRegistry &)
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
FunctionPass * createAMDGPUAnnotateUniformValues()
This is the AMGPU address space based alias analysis pass.
char & SIFixWWMLivenessID
Provides passes to inlining "always_inline" functions.
char & SIOptimizeExecMaskingID
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
char & AMDGPUUnifyDivergentExitNodesID
bool enableSIScheduler() const
void initializeSIFixWWMLivenessPass(PassRegistry &)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
void initializeSIMemoryLegalizerPass(PassRegistry &)
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
void initializeSIWholeQuadModePass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
FunctionPass * createAMDGPUAtomicOptimizerPass()
void initializeR600VectorRegMergerPass(PassRegistry &)
char & SIFixVGPRCopiesID
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
void initializeGCNDPPCombinePass(PassRegistry &)
ImmutablePass * createAMDGPUAAWrapperPass()
FunctionPass * createR600EmitClauseMarkers()
void initializeR600ClauseMergePassPass(PassRegistry &)
This pass is responsible for selecting generic machine instructions to target-specific instructions...
ModulePass * createAMDGPUFixFunctionBitcastsPass()
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
Target - Wrapper for Target specific information.
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
This file declares the targeting of the Machinelegalizer class for AMDGPU.
virtual void addFastRegAlloc(FunctionPass *RegAllocPass)
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
FunctionPass * createR600Packetizer()
void initializeSILoadStoreOptimizerPass(PassRegistry &)
char & SILowerControlFlowID
ModulePass * createAMDGPUUnifyMetadataPass()
void initializeSIAnnotateControlFlowPass(PassRegistry &)
A ScheduleDAG for scheduling lists of MachineInstr.
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
FunctionPass * createInferAddressSpacesPass()
void initializeSIFoldOperandsPass(PassRegistry &)
char & SIFoldOperandsID
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:563
FunctionPass * createSIShrinkInstructionsPass()
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
void initializeSIDebuggerInsertNopsPass(PassRegistry &)
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
TargetOptions Options
Definition: TargetMachine.h:97
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
char & SIFixSGPRCopiesID
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetOptions &)
FunctionPass * createSROAPass()
Definition: SROA.cpp:4585
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
ImmutablePass * createAMDGPUExternalAAWrapperPass()
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:562
This file declares the IRTranslator pass.
FunctionPass * createAMDGPUUseNativeCallsPass()
char & PostRAHazardRecognizerID
createPostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
Definition: EarlyCSE.cpp:1320
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
void initializeSILowerI1CopiesPass(PassRegistry &)
void addExtension(ExtensionPointTy Ty, ExtensionFn Fn)
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
FunctionPass * createAtomicExpandPass()
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
Definition: GlobalISel.cpp:19
bool use_empty() const
Definition: Value.h:323
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:439
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
FunctionPass * createNaryReassociatePass()