LLVM  8.0.1
AMDGPUSubtarget.h
Go to the documentation of this file.
1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUCallLowering.h"
20 #include "R600FrameLowering.h"
21 #include "R600ISelLowering.h"
22 #include "R600InstrInfo.h"
23 #include "SIFrameLowering.h"
24 #include "SIISelLowering.h"
25 #include "SIInstrInfo.h"
26 #include "Utils/AMDGPUBaseInfo.h"
27 #include "llvm/ADT/Triple.h"
35 #include <cassert>
36 #include <cstdint>
37 #include <memory>
38 #include <utility>
39 
40 #define GET_SUBTARGETINFO_HEADER
41 #include "AMDGPUGenSubtargetInfo.inc"
42 #define GET_SUBTARGETINFO_HEADER
43 #include "R600GenSubtargetInfo.inc"
44 
45 namespace llvm {
46 
47 class StringRef;
48 
50 public:
51  enum Generation {
52  R600 = 0,
53  R700 = 1,
54  EVERGREEN = 2,
59  GFX9 = 7
60  };
61 
62 private:
63  Triple TargetTriple;
64 
65 protected:
70  bool HasSDWA;
72  bool HasMulI24;
73  bool HasMulU24;
79  unsigned WavefrontSize;
80 
81 public:
82  AMDGPUSubtarget(const Triple &TT);
83 
84  static const AMDGPUSubtarget &get(const MachineFunction &MF);
85  static const AMDGPUSubtarget &get(const TargetMachine &TM,
86  const Function &F);
87 
88  /// \returns Default range flat work group size for a calling convention.
89  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
90 
91  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
92  /// for function \p F, or minimum/maximum flat work group sizes explicitly
93  /// requested using "amdgpu-flat-work-group-size" attribute attached to
94  /// function \p F.
95  ///
96  /// \returns Subtarget's default values if explicitly requested values cannot
97  /// be converted to integer, or violate subtarget's specifications.
98  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
99 
100  /// \returns Subtarget's default pair of minimum/maximum number of waves per
101  /// execution unit for function \p F, or minimum/maximum number of waves per
102  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
103  /// attached to function \p F.
104  ///
105  /// \returns Subtarget's default values if explicitly requested values cannot
106  /// be converted to integer, violate subtarget's specifications, or are not
107  /// compatible with minimum/maximum number of waves limited by flat work group
108  /// size, register usage, and/or lds usage.
109  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
110 
111  /// Return the amount of LDS that can be used that will not restrict the
112  /// occupancy lower than WaveCount.
113  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
114  const Function &) const;
115 
116  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
117  /// the given LDS memory size is the only constraint.
118  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
119 
120  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
121 
122  bool isAmdHsaOS() const {
123  return TargetTriple.getOS() == Triple::AMDHSA;
124  }
125 
126  bool isAmdPalOS() const {
127  return TargetTriple.getOS() == Triple::AMDPAL;
128  }
129 
130  bool isMesa3DOS() const {
131  return TargetTriple.getOS() == Triple::Mesa3D;
132  }
133 
134  bool isMesaKernel(const Function &F) const {
135  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
136  }
137 
138  bool isAmdHsaOrMesa(const Function &F) const {
139  return isAmdHsaOS() || isMesaKernel(F);
140  }
141 
142  bool has16BitInsts() const {
143  return Has16BitInsts;
144  }
145 
146  bool hasMadMixInsts() const {
147  return HasMadMixInsts;
148  }
149 
150  bool hasFP32Denormals() const {
151  return FP32Denormals;
152  }
153 
154  bool hasFPExceptions() const {
155  return FPExceptions;
156  }
157 
158  bool hasSDWA() const {
159  return HasSDWA;
160  }
161 
162  bool hasVOP3PInsts() const {
163  return HasVOP3PInsts;
164  }
165 
166  bool hasMulI24() const {
167  return HasMulI24;
168  }
169 
170  bool hasMulU24() const {
171  return HasMulU24;
172  }
173 
174  bool hasInv2PiInlineImm() const {
175  return HasInv2PiInlineImm;
176  }
177 
178  bool hasFminFmaxLegacy() const {
179  return HasFminFmaxLegacy;
180  }
181 
182  bool hasTrigReducedRange() const {
183  return HasTrigReducedRange;
184  }
185 
186  bool isPromoteAllocaEnabled() const {
187  return EnablePromoteAlloca;
188  }
189 
190  unsigned getWavefrontSize() const {
191  return WavefrontSize;
192  }
193 
194  int getLocalMemorySize() const {
195  return LocalMemorySize;
196  }
197 
198  unsigned getAlignmentForImplicitArgPtr() const {
199  return isAmdHsaOS() ? 8 : 4;
200  }
201 
202  /// Returns the offset in bytes from the start of the input buffer
203  /// of the first explicit kernel argument.
204  unsigned getExplicitKernelArgOffset(const Function &F) const {
205  return isAmdHsaOrMesa(F) ? 0 : 36;
206  }
207 
208  /// \returns Maximum number of work groups per compute unit supported by the
209  /// subtarget and limited by given \p FlatWorkGroupSize.
210  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
211 
212  /// \returns Minimum flat work group size supported by the subtarget.
213  virtual unsigned getMinFlatWorkGroupSize() const = 0;
214 
215  /// \returns Maximum flat work group size supported by the subtarget.
216  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
217 
218  /// \returns Maximum number of waves per execution unit supported by the
219  /// subtarget and limited by given \p FlatWorkGroupSize.
220  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
221 
222  /// \returns Minimum number of waves per execution unit supported by the
223  /// subtarget.
224  virtual unsigned getMinWavesPerEU() const = 0;
225 
226  unsigned getMaxWavesPerEU() const { return 10; }
227 
228  /// Creates value range metadata on an workitemid.* inrinsic call or load.
229  bool makeLIDRangeMetadata(Instruction *I) const;
230 
231  /// \returns Number of bytes of arguments that are passed to a shader or
232  /// kernel in addition to the explicit ones declared for the function.
233  unsigned getImplicitArgNumBytes(const Function &F) const {
234  if (isMesaKernel(F))
235  return 16;
236  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
237  }
238  uint64_t getExplicitKernArgSize(const Function &F,
239  unsigned &MaxAlign) const;
240  unsigned getKernArgSegmentSize(const Function &F,
241  unsigned &MaxAlign) const;
242 
243  virtual ~AMDGPUSubtarget() {}
244 };
245 
247  public AMDGPUSubtarget {
248 public:
249  enum {
267  };
268 
270  TrapHandlerAbiNone = 0,
271  TrapHandlerAbiHsa = 1
272  };
273 
274  enum TrapID {
275  TrapIDHardwareReserved = 0,
276  TrapIDHSADebugTrap = 1,
277  TrapIDLLVMTrap = 2,
278  TrapIDLLVMDebugTrap = 3,
279  TrapIDDebugBreakpoint = 7,
280  TrapIDDebugReserved8 = 8,
281  TrapIDDebugReservedFE = 0xfe,
282  TrapIDDebugReservedFF = 0xff
283  };
284 
286  LLVMTrapHandlerRegValue = 1
287  };
288 
289 private:
290  /// GlobalISel related APIs.
291  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
292  std::unique_ptr<InstructionSelector> InstSelector;
293  std::unique_ptr<LegalizerInfo> Legalizer;
294  std::unique_ptr<RegisterBankInfo> RegBankInfo;
295 
296 protected:
297  // Basic subtarget description.
299  unsigned Gen;
300  unsigned IsaVersion;
304 
305  // Possibly statically set by tablegen, but may want to be overridden.
308 
309  // Dynamially set bits that enable features.
311  bool DX10Clamp;
322 
323  // Used as options.
330  bool DumpCode;
331 
332  // Subtarget statically properties set by tablegen
333  bool FP64;
334  bool FMA;
335  bool MIMG_R128;
336  bool IsGCN;
338  bool CIInsts;
339  bool VIInsts;
340  bool GFX9Insts;
345  bool HasMovrel;
354  bool HasDPP;
366  bool CaymanISA;
367  bool CFALUBug;
371 
372  // Dummy feature to use for assembler in tablegen.
374 
376 private:
377  SIInstrInfo InstrInfo;
378  SITargetLowering TLInfo;
379  SIFrameLowering FrameLowering;
380 
381 public:
382  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
383  const GCNTargetMachine &TM);
384  ~GCNSubtarget() override;
385 
386  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
387  StringRef GPU, StringRef FS);
388 
389  const SIInstrInfo *getInstrInfo() const override {
390  return &InstrInfo;
391  }
392 
393  const SIFrameLowering *getFrameLowering() const override {
394  return &FrameLowering;
395  }
396 
397  const SITargetLowering *getTargetLowering() const override {
398  return &TLInfo;
399  }
400 
401  const SIRegisterInfo *getRegisterInfo() const override {
402  return &InstrInfo.getRegisterInfo();
403  }
404 
405  const CallLowering *getCallLowering() const override {
406  return CallLoweringInfo.get();
407  }
408 
409  const InstructionSelector *getInstructionSelector() const override {
410  return InstSelector.get();
411  }
412 
413  const LegalizerInfo *getLegalizerInfo() const override {
414  return Legalizer.get();
415  }
416 
417  const RegisterBankInfo *getRegBankInfo() const override {
418  return RegBankInfo.get();
419  }
420 
421  // Nothing implemented, just prevent crashes on use.
422  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
423  return &TSInfo;
424  }
425 
426  const InstrItineraryData *getInstrItineraryData() const override {
427  return &InstrItins;
428  }
429 
430  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
431 
433  return (Generation)Gen;
434  }
435 
436  unsigned getWavefrontSizeLog2() const {
437  return Log2_32(WavefrontSize);
438  }
439 
440  int getLDSBankCount() const {
441  return LDSBankCount;
442  }
443 
444  unsigned getMaxPrivateElementSize() const {
445  return MaxPrivateElementSize;
446  }
447 
448  bool hasIntClamp() const {
449  return HasIntClamp;
450  }
451 
452  bool hasFP64() const {
453  return FP64;
454  }
455 
456  bool hasMIMG_R128() const {
457  return MIMG_R128;
458  }
459 
460  bool hasHWFP64() const {
461  return FP64;
462  }
463 
464  bool hasFastFMAF32() const {
465  return FastFMAF32;
466  }
467 
468  bool hasHalfRate64Ops() const {
469  return HalfRate64Ops;
470  }
471 
472  bool hasAddr64() const {
473  return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
474  }
475 
476  bool hasBFE() const {
477  return true;
478  }
479 
480  bool hasBFI() const {
481  return true;
482  }
483 
484  bool hasBFM() const {
485  return hasBFE();
486  }
487 
488  bool hasBCNT(unsigned Size) const {
489  return true;
490  }
491 
492  bool hasFFBL() const {
493  return true;
494  }
495 
496  bool hasFFBH() const {
497  return true;
498  }
499 
500  bool hasMed3_16() const {
501  return getGeneration() >= AMDGPUSubtarget::GFX9;
502  }
503 
504  bool hasMin3Max3_16() const {
505  return getGeneration() >= AMDGPUSubtarget::GFX9;
506  }
507 
508  bool hasFmaMixInsts() const {
509  return HasFmaMixInsts;
510  }
511 
512  bool hasCARRY() const {
513  return true;
514  }
515 
516  bool hasFMA() const {
517  return FMA;
518  }
519 
520  bool hasSwap() const {
521  return GFX9Insts;
522  }
523 
525  return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
526  }
527 
528  bool enableHugePrivateBuffer() const {
529  return EnableHugePrivateBuffer;
530  }
531 
533  return EnableUnsafeDSOffsetFolding;
534  }
535 
536  bool dumpCode() const {
537  return DumpCode;
538  }
539 
540  /// Return the amount of LDS that can be used that will not restrict the
541  /// occupancy lower than WaveCount.
542  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
543  const Function &) const;
544 
545  bool hasFP16Denormals() const {
546  return FP64FP16Denormals;
547  }
548 
549  bool hasFP64Denormals() const {
550  return FP64FP16Denormals;
551  }
552 
554  return getGeneration() >= AMDGPUSubtarget::GFX9;
555  }
556 
557  bool enableDX10Clamp() const {
558  return DX10Clamp;
559  }
560 
561  bool enableIEEEBit(const MachineFunction &MF) const {
563  }
564 
565  bool useFlatForGlobal() const {
566  return FlatForGlobal;
567  }
568 
569  /// \returns If target supports ds_read/write_b128 and user enables generation
570  /// of ds_read/write_b128.
571  bool useDS128() const {
572  return CIInsts && EnableDS128;
573  }
574 
575  /// \returns If MUBUF instructions always perform range checking, even for
576  /// buffer resources used for private memory access.
578  return getGeneration() < AMDGPUSubtarget::GFX9;
579  }
580 
581  /// \returns If target requires PRT Struct NULL support (zero result registers
582  /// for sparse texture support).
583  bool usePRTStrictNull() const {
584  return EnablePRTStrictNull;
585  }
586 
588  return AutoWaitcntBeforeBarrier;
589  }
590 
591  bool hasCodeObjectV3() const {
592  // FIXME: Need to add code object v3 support for mesa and pal.
593  return isAmdHsaOS() ? CodeObjectV3 : false;
594  }
595 
597  return UnalignedBufferAccess;
598  }
599 
601  return UnalignedScratchAccess;
602  }
603 
604  bool hasApertureRegs() const {
605  return HasApertureRegs;
606  }
607 
608  bool isTrapHandlerEnabled() const {
609  return TrapHandler;
610  }
611 
612  bool isXNACKEnabled() const {
613  return EnableXNACK;
614  }
615 
616  bool hasFlatAddressSpace() const {
617  return FlatAddressSpace;
618  }
619 
620  bool hasFlatInstOffsets() const {
621  return FlatInstOffsets;
622  }
623 
624  bool hasFlatGlobalInsts() const {
625  return FlatGlobalInsts;
626  }
627 
628  bool hasFlatScratchInsts() const {
629  return FlatScratchInsts;
630  }
631 
633  return getGeneration() > GFX9;
634  }
635 
636  bool hasD16LoadStore() const {
637  return getGeneration() >= GFX9;
638  }
639 
640  /// Return if most LDS instructions have an m0 use that require m0 to be
641  /// iniitalized.
642  bool ldsRequiresM0Init() const {
643  return getGeneration() < GFX9;
644  }
645 
646  bool hasAddNoCarry() const {
647  return AddNoCarryInsts;
648  }
649 
650  bool hasUnpackedD16VMem() const {
651  return HasUnpackedD16VMem;
652  }
653 
654  // Covers VS/PS/CS graphics shaders
655  bool isMesaGfxShader(const Function &F) const {
656  return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
657  }
658 
659  bool hasMad64_32() const {
660  return getGeneration() >= SEA_ISLANDS;
661  }
662 
663  bool hasSDWAOmod() const {
664  return HasSDWAOmod;
665  }
666 
667  bool hasSDWAScalar() const {
668  return HasSDWAScalar;
669  }
670 
671  bool hasSDWASdst() const {
672  return HasSDWASdst;
673  }
674 
675  bool hasSDWAMac() const {
676  return HasSDWAMac;
677  }
678 
679  bool hasSDWAOutModsVOPC() const {
680  return HasSDWAOutModsVOPC;
681  }
682 
684  return getGeneration() < SEA_ISLANDS;
685  }
686 
687  bool hasDLInsts() const {
688  return HasDLInsts;
689  }
690 
691  bool hasDotInsts() const {
692  return HasDotInsts;
693  }
694 
695  bool isSRAMECCEnabled() const {
696  return EnableSRAMECC;
697  }
698 
699  // Scratch is allocated in 256 dword per wave blocks for the entire
700  // wavefront. When viewed from the perspecive of an arbitrary workitem, this
701  // is 4-byte aligned.
702  //
703  // Only 4-byte alignment is really needed to access anything. Transformations
704  // on the pointer value itself may rely on the alignment / known low bits of
705  // the pointer. Set this to something above the minimum to avoid needing
706  // dynamic realignment in common cases.
707  unsigned getStackAlignment() const {
708  return 16;
709  }
710 
711  bool enableMachineScheduler() const override {
712  return true;
713  }
714 
715  bool enableSubRegLiveness() const override {
716  return true;
717  }
718 
719  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
721 
722  /// \returns Number of execution units per compute unit supported by the
723  /// subtarget.
724  unsigned getEUsPerCU() const {
725  return AMDGPU::IsaInfo::getEUsPerCU(this);
726  }
727 
728  /// \returns Maximum number of waves per compute unit supported by the
729  /// subtarget without any kind of limitation.
730  unsigned getMaxWavesPerCU() const {
732  }
733 
734  /// \returns Maximum number of waves per compute unit supported by the
735  /// subtarget and limited by given \p FlatWorkGroupSize.
736  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
737  return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
738  }
739 
740  /// \returns Maximum number of waves per execution unit supported by the
741  /// subtarget without any kind of limitation.
742  unsigned getMaxWavesPerEU() const {
744  }
745 
746  /// \returns Number of waves per work group supported by the subtarget and
747  /// limited by given \p FlatWorkGroupSize.
748  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
749  return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
750  }
751 
752  // static wrappers
753  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
754 
755  // XXX - Why is this here if it isn't in the default pass set?
756  bool enableEarlyIfConversion() const override {
757  return true;
758  }
759 
760  void overrideSchedPolicy(MachineSchedPolicy &Policy,
761  unsigned NumRegionInstrs) const override;
762 
763  unsigned getMaxNumUserSGPRs() const {
764  return 16;
765  }
766 
767  bool hasSMemRealTime() const {
768  return HasSMemRealTime;
769  }
770 
771  bool hasMovrel() const {
772  return HasMovrel;
773  }
774 
775  bool hasVGPRIndexMode() const {
776  return HasVGPRIndexMode;
777  }
778 
779  bool useVGPRIndexMode(bool UserEnable) const {
780  return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
781  }
782 
783  bool hasScalarCompareEq64() const {
784  return getGeneration() >= VOLCANIC_ISLANDS;
785  }
786 
787  bool hasScalarStores() const {
788  return HasScalarStores;
789  }
790 
791  bool hasScalarAtomics() const {
792  return HasScalarAtomics;
793  }
794 
795 
796  bool hasDPP() const {
797  return HasDPP;
798  }
799 
800  bool hasR128A16() const {
801  return HasR128A16;
802  }
803 
804  bool enableSIScheduler() const {
805  return EnableSIScheduler;
806  }
807 
808  bool debuggerSupported() const {
809  return debuggerInsertNops() && debuggerEmitPrologue();
810  }
811 
812  bool debuggerInsertNops() const {
813  return DebuggerInsertNops;
814  }
815 
816  bool debuggerEmitPrologue() const {
817  return DebuggerEmitPrologue;
818  }
819 
820  bool loadStoreOptEnabled() const {
821  return EnableLoadStoreOpt;
822  }
823 
824  bool hasSGPRInitBug() const {
825  return SGPRInitBug;
826  }
827 
828  bool has12DWordStoreHazard() const {
829  return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
830  }
831 
832  // \returns true if the subtarget supports DWORDX3 load/store instructions.
833  bool hasDwordx3LoadStores() const {
834  return CIInsts;
835  }
836 
837  bool hasSMovFedHazard() const {
838  return getGeneration() >= AMDGPUSubtarget::GFX9;
839  }
840 
842  return getGeneration() >= AMDGPUSubtarget::GFX9;
843  }
844 
845  bool hasReadM0SendMsgHazard() const {
846  return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
847  }
848 
849  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
850  /// SGPRs
851  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
852 
853  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
854  /// VGPRs
855  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
856 
857  /// \returns true if the flat_scratch register should be initialized with the
858  /// pointer to the wave's scratch memory rather than a size and offset.
859  bool flatScratchIsPointer() const {
860  return getGeneration() >= AMDGPUSubtarget::GFX9;
861  }
862 
863  /// \returns true if the machine has merged shaders in which s0-s7 are
864  /// reserved by the hardware and user SGPRs start at s8
865  bool hasMergedShaders() const {
866  return getGeneration() >= GFX9;
867  }
868 
869  /// \returns SGPR allocation granularity supported by the subtarget.
870  unsigned getSGPRAllocGranule() const {
872  }
873 
874  /// \returns SGPR encoding granularity supported by the subtarget.
875  unsigned getSGPREncodingGranule() const {
877  }
878 
879  /// \returns Total number of SGPRs supported by the subtarget.
880  unsigned getTotalNumSGPRs() const {
882  }
883 
884  /// \returns Addressable number of SGPRs supported by the subtarget.
885  unsigned getAddressableNumSGPRs() const {
887  }
888 
889  /// \returns Minimum number of SGPRs that meets the given number of waves per
890  /// execution unit requirement supported by the subtarget.
891  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
892  return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
893  }
894 
895  /// \returns Maximum number of SGPRs that meets the given number of waves per
896  /// execution unit requirement supported by the subtarget.
897  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
898  return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
899  }
900 
901  /// \returns Reserved number of SGPRs for given function \p MF.
902  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
903 
904  /// \returns Maximum number of SGPRs that meets number of waves per execution
905  /// unit requirement for function \p MF, or number of SGPRs explicitly
906  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
907  ///
908  /// \returns Value that meets number of waves per execution unit requirement
909  /// if explicitly requested value cannot be converted to integer, violates
910  /// subtarget's specifications, or does not meet number of waves per execution
911  /// unit requirement.
912  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
913 
914  /// \returns VGPR allocation granularity supported by the subtarget.
915  unsigned getVGPRAllocGranule() const {
917  }
918 
919  /// \returns VGPR encoding granularity supported by the subtarget.
920  unsigned getVGPREncodingGranule() const {
922  }
923 
924  /// \returns Total number of VGPRs supported by the subtarget.
925  unsigned getTotalNumVGPRs() const {
927  }
928 
929  /// \returns Addressable number of VGPRs supported by the subtarget.
930  unsigned getAddressableNumVGPRs() const {
932  }
933 
934  /// \returns Minimum number of VGPRs that meets given number of waves per
935  /// execution unit requirement supported by the subtarget.
936  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
937  return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
938  }
939 
940  /// \returns Maximum number of VGPRs that meets given number of waves per
941  /// execution unit requirement supported by the subtarget.
942  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
943  return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
944  }
945 
946  /// \returns Maximum number of VGPRs that meets number of waves per execution
947  /// unit requirement for function \p MF, or number of VGPRs explicitly
948  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
949  ///
950  /// \returns Value that meets number of waves per execution unit requirement
951  /// if explicitly requested value cannot be converted to integer, violates
952  /// subtarget's specifications, or does not meet number of waves per execution
953  /// unit requirement.
954  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
955 
956  void getPostRAMutations(
957  std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
958  const override;
959 
960  /// \returns Maximum number of work groups per compute unit supported by the
961  /// subtarget and limited by given \p FlatWorkGroupSize.
962  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
963  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
964  }
965 
966  /// \returns Minimum flat work group size supported by the subtarget.
967  unsigned getMinFlatWorkGroupSize() const override {
969  }
970 
971  /// \returns Maximum flat work group size supported by the subtarget.
972  unsigned getMaxFlatWorkGroupSize() const override {
974  }
975 
976  /// \returns Maximum number of waves per execution unit supported by the
977  /// subtarget and limited by given \p FlatWorkGroupSize.
978  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
979  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
980  }
981 
982  /// \returns Minimum number of waves per execution unit supported by the
983  /// subtarget.
984  unsigned getMinWavesPerEU() const override {
986  }
987 };
988 
989 class R600Subtarget final : public R600GenSubtargetInfo,
990  public AMDGPUSubtarget {
991 private:
992  R600InstrInfo InstrInfo;
993  R600FrameLowering FrameLowering;
994  bool FMA;
995  bool CaymanISA;
996  bool CFALUBug;
997  bool DX10Clamp;
998  bool HasVertexCache;
999  bool R600ALUInst;
1000  bool FP64;
1001  short TexVTXClauseSize;
1002  Generation Gen;
1003  R600TargetLowering TLInfo;
1004  InstrItineraryData InstrItins;
1005  SelectionDAGTargetInfo TSInfo;
1006 
1007 public:
1008  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
1009  const TargetMachine &TM);
1010 
1011  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
1012 
1013  const R600FrameLowering *getFrameLowering() const override {
1014  return &FrameLowering;
1015  }
1016 
1017  const R600TargetLowering *getTargetLowering() const override {
1018  return &TLInfo;
1019  }
1020 
1021  const R600RegisterInfo *getRegisterInfo() const override {
1022  return &InstrInfo.getRegisterInfo();
1023  }
1024 
1025  const InstrItineraryData *getInstrItineraryData() const override {
1026  return &InstrItins;
1027  }
1028 
1029  // Nothing implemented, just prevent crashes on use.
1031  return &TSInfo;
1032  }
1033 
1034  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1035 
1037  return Gen;
1038  }
1039 
1040  unsigned getStackAlignment() const {
1041  return 4;
1042  }
1043 
1044  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1045  StringRef GPU, StringRef FS);
1046 
1047  bool hasBFE() const {
1048  return (getGeneration() >= EVERGREEN);
1049  }
1050 
1051  bool hasBFI() const {
1052  return (getGeneration() >= EVERGREEN);
1053  }
1054 
1055  bool hasBCNT(unsigned Size) const {
1056  if (Size == 32)
1057  return (getGeneration() >= EVERGREEN);
1058 
1059  return false;
1060  }
1061 
1062  bool hasBORROW() const {
1063  return (getGeneration() >= EVERGREEN);
1064  }
1065 
1066  bool hasCARRY() const {
1067  return (getGeneration() >= EVERGREEN);
1068  }
1069 
1070  bool hasCaymanISA() const {
1071  return CaymanISA;
1072  }
1073 
1074  bool hasFFBL() const {
1075  return (getGeneration() >= EVERGREEN);
1076  }
1077 
1078  bool hasFFBH() const {
1079  return (getGeneration() >= EVERGREEN);
1080  }
1081 
1082  bool hasFMA() const { return FMA; }
1083 
1084  bool hasCFAluBug() const { return CFALUBug; }
1085 
1086  bool hasVertexCache() const { return HasVertexCache; }
1087 
1088  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1089 
1090  bool enableMachineScheduler() const override {
1091  return true;
1092  }
1093 
1094  bool enableSubRegLiveness() const override {
1095  return true;
1096  }
1097 
1098  /// \returns Maximum number of work groups per compute unit supported by the
1099  /// subtarget and limited by given \p FlatWorkGroupSize.
1100  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1101  return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1102  }
1103 
1104  /// \returns Minimum flat work group size supported by the subtarget.
1105  unsigned getMinFlatWorkGroupSize() const override {
1107  }
1108 
1109  /// \returns Maximum flat work group size supported by the subtarget.
1110  unsigned getMaxFlatWorkGroupSize() const override {
1112  }
1113 
1114  /// \returns Maximum number of waves per execution unit supported by the
1115  /// subtarget and limited by given \p FlatWorkGroupSize.
1116  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
1117  return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
1118  }
1119 
1120  /// \returns Minimum number of waves per execution unit supported by the
1121  /// subtarget.
1122  unsigned getMinWavesPerEU() const override {
1123  return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1124  }
1125 };
1126 
1127 } // end namespace llvm
1128 
1129 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
bool hasBCNT(unsigned Size) const
bool enableIEEEBit(const MachineFunction &MF) const
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
bool enableEarlyIfConversion() const override
bool hasSDWAOmod() const
bool hasSDWAMac() const
bool privateMemoryResourceIsRangeChecked() const
bool hasApertureRegs() const
bool debuggerSupported() const
bool useDS128() const
bool hasScalarStores() const
bool enableMachineScheduler() const override
bool isMesaKernel(const Function &F) const
unsigned getMinFlatWorkGroupSize() const override
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Interface definition for R600InstrInfo.
bool hasReadM0MovRelInterpHazard() const
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:299
bool isPromoteAllocaEnabled() const
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool hasFlatGlobalInsts() const
bool supportsMinMaxDenormModes() const
This file describes how to lower LLVM calls to machine code calls.
bool hasFmaMixInsts() const
unsigned getSGPRAllocGranule() const
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const
bool hasTrigReducedRange() const
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
const SIInstrInfo * getInstrInfo() const override
bool hasMergedShaders() const
virtual unsigned getMinWavesPerEU() const =0
F(f)
InstrItineraryData InstrItins
unsigned getMaxWavesPerEU() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
bool hasFastFMAF32() const
Generation getGeneration() const
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:165
bool hasMad64_32() const
const RegisterBankInfo * getRegBankInfo() const override
bool hasVOP3PInsts() const
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasFP64Denormals() const
Holds all the information related to register banks.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
bool useVGPRIndexMode(bool UserEnable) const
bool isMesaGfxShader(const Function &F) const
bool hasDwordx3LoadStores() const
bool hasIntClamp() const
int getLocalMemorySize() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool enableDX10Clamp() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool debuggerInsertNops() const
bool hasSMovFedHazard() const
bool hasSDWAOutModsVOPC() const
bool vmemWriteNeedsExpWaitcnt() const
bool isTrapHandlerEnabled() const
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasSMemRealTime() const
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
bool hasDotInsts() const
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasHalfRate64Ops() const
bool useFlatForGlobal() const
unsigned getAddressableNumSGPRs() const
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinWavesPerEU() const override
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
Itinerary data supplied by a subtarget to be used by a target.
bool hasAddNoCarry() const
const CallLowering * getCallLowering() const override
virtual unsigned getMinFlatWorkGroupSize() const =0
bool dumpCode() const
bool debuggerEmitPrologue() const
bool isSRAMECCEnabled() const
bool hasUnalignedBufferAccess() const
const R600FrameLowering * getFrameLowering() const override
const InstrItineraryData * getInstrItineraryData() const override
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
bool hasFP32Denormals() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasScalarCompareEq64() const
unsigned getSGPREncodingGranule() const
bool isCompute(CallingConv::ID cc)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMaxWavesPerCU() const
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
bool hasCFAluBug() const
unsigned getStackAlignment() const
bool hasFminFmaxLegacy() const
bool hasDLInsts() const
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override
bool hasFPExceptions() const
bool enableMachineScheduler() const override
bool has16BitInsts() const
bool hasSwap() const
bool hasMovrel() const
unsigned MaxPrivateElementSize
bool usePRTStrictNull() const
SI DAG Lowering interface definition.
const SIFrameLowering * getFrameLowering() const override
const R600InstrInfo * getInstrInfo() const override
Generation getGeneration() const
const R600RegisterInfo & getRegisterInfo() const
Definition: R600InstrInfo.h:72
bool hasSDWASdst() const
bool hasMIMG_R128() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
unsigned getVGPREncodingGranule() const
bool hasUnalignedScratchAccess() const
bool enableSubRegLiveness() const override
TrapHandlerAbi getTrapHandlerAbi() const
bool hasScalarAtomics() const
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
bool hasFlatScratchInsts() const
bool hasVertexCache() const
unsigned getVGPRAllocGranule() const
bool hasUnpackedD16VMem() const
bool getScalarizeGlobalBehavior() const
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
bool hasAddr64() const
const R600RegisterInfo * getRegisterInfo() const override
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI)
bool enableHugePrivateBuffer() const
bool enableSIScheduler() const
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
bool hasMadMixInsts() const
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
bool hasFP64() const
bool hasFFBL() const
bool hasD16LoadStore() const
bool hasMin3Max3_16() const
bool hasVGPRIndexMode() const
bool hasCaymanISA() const
bool hasSGPRInitBug() const
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool hasAutoWaitcntBeforeBarrier() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
bool hasFFBH() const
unsigned getEUsPerCU() const
bool isShader(CallingConv::ID cc)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool hasMed3_16() const
int getLDSBankCount() const
bool hasBCNT(unsigned Size) const
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI)
const InstructionSelector * getInstructionSelector() const override
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
TargetSubtargetInfo - Generic base class for all target subtargets.
bool flatScratchIsPointer() const
unsigned getMaxWavesPerEU() const
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
bool enableSubRegLiveness() const override
SelectionDAGTargetInfo TSInfo
bool hasInv2PiInlineImm() const
Interface definition for SIInstrInfo.
short getTexVTXClauseSize() const
bool loadStoreOptEnabled() const
bool has12DWordStoreHazard() const
R600 DAG Lowering interface definition.
virtual unsigned getMaxFlatWorkGroupSize() const =0
AMDGPUSubtarget(const Triple &TT)
unsigned getTotalNumVGPRs() const
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool isXNACKEnabled() const
#define I(x, y, z)
Definition: MD5.cpp:58
bool hasFlatInstOffsets() const
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:47
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getStackAlignment() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasSDWAScalar() const
const InstrItineraryData * getInstrItineraryData() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getMaxNumUserSGPRs() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be iniitalized. ...
bool hasFlatLgkmVMemCountInOrder() const
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
const LegalizerInfo * getLegalizerInfo() const override
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const
bool hasCARRY() const
const R600TargetLowering * getTargetLowering() const override
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
void setScalarizeGlobalBehavior(bool b)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool unsafeDSOffsetFoldingEnabled() const
unsigned getAddressableNumVGPRs() const
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getTotalNumSGPRs() const
bool hasReadM0SendMsgHazard() const
unsigned getMaxPrivateElementSize() const
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
bool hasHWFP64() const
unsigned getWavefrontSizeLog2() const
bool hasR128A16() const
bool hasCodeObjectV3() const
bool hasFP16Denormals() const
const SIRegisterInfo * getRegisterInfo() const override