33 #define DEBUG_TYPE "amdgpu-subtarget" 35 #define GET_SUBTARGETINFO_TARGET_DESC 36 #define GET_SUBTARGETINFO_CTOR 37 #define AMDGPUSubtarget GCNSubtarget 38 #include "AMDGPUGenSubtargetInfo.inc" 39 #define GET_SUBTARGETINFO_TARGET_DESC 40 #define GET_SUBTARGETINFO_CTOR 41 #undef AMDGPUSubtarget 42 #include "R600GenSubtargetInfo.inc" 84 FullFS +=
"+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
90 FullFS +=
"+fp64-fp16-denormals,";
92 FullFS +=
"-fp32-denormals,";
95 FullFS +=
"+enable-prt-strict-null,";
107 if (!hasAddr64() && !FS.
contains(
"flat-for-global")) {
108 FlatForGlobal =
true;
112 if (MaxPrivateElementSize == 0)
113 MaxPrivateElementSize = 4;
115 if (LDSBankCount == 0)
123 if (!HasMovrel && !HasVGPRIndexMode)
156 IsaVersion(ISAVersion0_0_0),
157 InstrItins(getInstrItineraryForCPU(GPU)),
159 MaxPrivateElementSize(0),
162 HalfRate64Ops(
false),
164 FP64FP16Denormals(
false),
166 FlatForGlobal(
false),
167 AutoWaitcntBeforeBarrier(
false),
169 UnalignedScratchAccess(
false),
170 UnalignedBufferAccess(
false),
172 HasApertureRegs(
false),
175 DebuggerInsertNops(
false),
176 DebuggerEmitPrologue(
false),
178 EnableHugePrivateBuffer(
false),
180 EnableUnsafeDSOffsetFolding(
false),
181 EnableSIScheduler(
false),
183 EnablePRTStrictNull(
false),
192 HasSMemRealTime(
false),
194 HasFmaMixInsts(
false),
196 HasVGPRIndexMode(
false),
197 HasScalarStores(
false),
198 HasScalarAtomics(
false),
200 HasSDWAScalar(
false),
203 HasSDWAOutModsVOPC(
false),
208 EnableSRAMECC(
false),
209 FlatAddressSpace(
false),
210 FlatInstOffsets(
false),
211 FlatGlobalInsts(
false),
212 FlatScratchInsts(
false),
213 AddNoCarryInsts(
false),
214 HasUnpackedD16VMem(
false),
218 FeatureDisable(
false),
219 InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
226 *
this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
245 unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
246 NumWaves = std::min(NumWaves, MaxWaves);
257 std::pair<unsigned, unsigned>
280 std::pair<unsigned, unsigned>
Default =
286 F,
"amdgpu-max-work-group-size", Default.second);
287 Default.first = std::min(Default.first, Default.second);
291 F,
"amdgpu-flat-work-group-size", Default);
294 if (Requested.first > Requested.second)
318 unsigned MinImpliedByFlatWorkGroupSize =
320 bool RequestedFlatWorkGroupSize =
false;
326 Default.first = MinImpliedByFlatWorkGroupSize;
327 RequestedFlatWorkGroupSize =
true;
332 F,
"amdgpu-waves-per-eu", Default,
true);
335 if (Requested.second && Requested.first > Requested.second)
347 if (RequestedFlatWorkGroupSize &&
348 Requested.first < MinImpliedByFlatWorkGroupSize)
356 unsigned MinSize = 0;
358 bool IdQuery =
false;
361 if (
auto *CI = dyn_cast<CallInst>(I)) {
362 const Function *
F = CI->getCalledFunction();
364 unsigned Dim = UINT_MAX;
391 if (
auto Node = Kernel->
getMetadata(
"reqd_work_group_size"))
392 if (Node->getNumOperands() == 3)
393 MinSize = MaxSize = mdconst::extract<ConstantInt>(
394 Node->getOperand(Dim))->getZExtValue();
410 MDNode *MaxWorkGroupSizeRange = MDB.createRange(
APInt(32, MinSize),
417 unsigned &MaxAlign)
const {
422 uint64_t ExplicitArgBytes = 0;
428 unsigned Align = DL.getABITypeAlignment(ArgTy);
429 uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
430 ExplicitArgBytes =
alignTo(ExplicitArgBytes, Align) + AllocSize;
431 MaxAlign =
std::max(MaxAlign, Align);
434 return ExplicitArgBytes;
438 unsigned &MaxAlign)
const {
443 uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
445 if (ImplicitBytes != 0) {
447 TotalSize =
alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
473 unsigned NumRegionInstrs)
const {
485 if (!enableSIScheduler())
543 if (isXNACKEnabled())
554 std::pair<unsigned, unsigned> WavesPerEU = MFI.
getWavesPerEU();
556 unsigned MaxAddressableNumSGPRs =
getMaxNumSGPRs(WavesPerEU.first,
true);
562 F,
"amdgpu-num-sgpr", MaxNumSGPRs);
565 if (Requested && (Requested <= getReservedNumSGPRs(MF)))
576 if (Requested && Requested < InputNumSGPRs)
577 Requested = InputNumSGPRs;
581 if (Requested && Requested >
getMaxNumSGPRs(WavesPerEU.first,
false))
583 if (WavesPerEU.second &&
588 MaxNumSGPRs = Requested;
591 if (hasSGPRInitBug())
594 return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
595 MaxAddressableNumSGPRs);
604 std::pair<unsigned, unsigned> WavesPerEU = MFI.
getWavesPerEU();
611 F,
"amdgpu-num-vgpr", MaxNumVGPRs);
617 if (WavesPerEU.second &&
622 MaxNumVGPRs = Requested;
637 SUnit *SUa =
nullptr;
657 (TII->
isDS(MI1) && TII->
isDS(MI2))) {
665 if (&SU != &DAG->
ExitSU) {
680 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const {
681 Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
A parsed version of the target data layout string in and methods for querying it. ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
This class represents an incoming formal argument to a Function.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
unsigned getImplicitArgNumBytes(const Function &F) const
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
This file describes how to lower LLVM calls to machine code calls.
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
LLVMContext & getContext() const
All values hold a context through their type.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Mutate the DAG as a postpass after normal DAG building.
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
block Block Frequency true
InstrItineraryData InstrItins
unsigned getMaxWavesPerEU() const
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Generation getGeneration() const
static bool isSMRD(const MachineInstr &MI)
SmallVector< SDep, 4 > Preds
All sunit predecessors.
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
bool hasFlatScratchInit() const
static bool isDS(const MachineInstr &MI)
Calling convention used for Mesa/AMDPAL geometry shaders.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Calling convention used for Mesa/AMDPAL compute shaders.
static bool isFLAT(const MachineInstr &MI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
const HexagonInstrInfo * TII
int getLocalMemorySize() const
SPIR_KERNEL - Calling convention for SPIR kernel functions.
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
void apply(Opt *O, const Mod &M, const Mods &... Ms)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)
Type * getType() const
All values are typed, get the type of this value.
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
uint64_t getExplicitKernArgSize(const Function &F, unsigned &MaxAlign) const
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
unsigned getStackAlignment() const
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg...
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const Triple & getTargetTriple() const
std::pair< unsigned, unsigned > getWavesPerEU() const
The AMDGPU TargetMachine interface definition for hw codgen targets.
Calling convention used for Mesa/AMDPAL pixel shaders.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Triple - Helper class for working with autoconf configuration names.
unsigned getKernArgSegmentSize(const Function &F, unsigned &MaxAlign) const
unsigned getWavefrontSize() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
Information about stack frame layout on the target.
bool hasCaymanISA() const
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
unsigned getAlignmentForImplicitArgPtr() const
This class provides the information for the target register banks.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
Class for arbitrary precision integers.
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
This file declares the targeting of the Machinelegalizer class for AMDGPU.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
amdgpu Simplify well known AMD library false Value Value * Arg
Provides AMDGPU specific target descriptions.
A ScheduleDAG for scheduling lists of MachineInstr.
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Representation of each machine instruction.
SUnit ExitSU
Special node for the region exit.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
AMDGPUSubtarget(const Triple &TT)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
const Function * getParent() const
Return the enclosing method, or null if none.
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
static bool isVMEM(const MachineInstr &MI)
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
unsigned getMaxFlatWorkGroupSize() const override
unsigned getMinFlatWorkGroupSize() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Module * getParent()
Get the module that this global value is contained inside of...
SmallVector< SDep, 4 > Succs
All sunit successors.
unsigned getNumPreloadedSGPRs() const
Arbitrary strong DAG edge (no real dependence).
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Primary interface to the complete machine description for the target machine.
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
StringRef - Represent a constant reference to a string, i.e.
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
std::vector< SUnit > SUnits
The scheduling units.
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
unsigned getMinWavesPerEU() const override
const SITargetLowering * getTargetLowering() const override
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
R600Subtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Calling convention for AMDGPU code object kernels.
iterator_range< arg_iterator > args()
Scheduling unit. This is a node in the scheduling DAG.
const BasicBlock * getParent() const
const SIRegisterInfo * getRegisterInfo() const override