#include "Target/AMDGPU/AMDGPUSubtarget.h"

Inheritance diagram for llvm::AMDGPUSubtarget:

Collaboration diagram for llvm::AMDGPUSubtarget:

Public Types
enum	Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3, SOUTHERN_ISLANDS = 4, SEA_ISLANDS = 5, VOLCANIC_ISLANDS = 6, GFX9 = 7 }

Public Member Functions
	AMDGPUSubtarget (const Triple &TT)

std::pair< unsigned, unsigned >	getDefaultFlatWorkGroupSize (CallingConv::ID CC) const

std::pair< unsigned, unsigned >	getFlatWorkGroupSizes (const Function &F) const

std::pair< unsigned, unsigned >	getWavesPerEU (const Function &F) const

unsigned	getMaxLocalMemSizeWithWaveCount (unsigned WaveCount, const Function &) const
	Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount. More...

unsigned	getOccupancyWithLocalMemSize (uint32_t Bytes, const Function &) const
	Inverse of getMaxLocalMemWithWaveCount. More...

unsigned	getOccupancyWithLocalMemSize (const MachineFunction &MF) const

bool	isAmdHsaOS () const

bool	isAmdPalOS () const

bool	isMesa3DOS () const

bool	isMesaKernel (const Function &F) const

bool	isAmdHsaOrMesa (const Function &F) const

bool	has16BitInsts () const

bool	hasMadMixInsts () const

bool	hasFP32Denormals () const

bool	hasFPExceptions () const

bool	hasSDWA () const

bool	hasVOP3PInsts () const

bool	hasMulI24 () const

bool	hasMulU24 () const

bool	hasInv2PiInlineImm () const

bool	hasFminFmaxLegacy () const

bool	hasTrigReducedRange () const

bool	isPromoteAllocaEnabled () const

unsigned	getWavefrontSize () const

int	getLocalMemorySize () const

unsigned	getAlignmentForImplicitArgPtr () const

unsigned	getExplicitKernelArgOffset (const Function &F) const
	Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument. More...

virtual unsigned	getMaxWorkGroupsPerCU (unsigned FlatWorkGroupSize) const =0

virtual unsigned	getMinFlatWorkGroupSize () const =0

virtual unsigned	getMaxFlatWorkGroupSize () const =0

virtual unsigned	getMaxWavesPerEU (unsigned FlatWorkGroupSize) const =0

virtual unsigned	getMinWavesPerEU () const =0

unsigned	getMaxWavesPerEU () const

bool	makeLIDRangeMetadata (Instruction *I) const
	Creates value range metadata on an workitemid.* inrinsic call or load. More...

unsigned	getImplicitArgNumBytes (const Function &F) const

uint64_t	getExplicitKernArgSize (const Function &F, unsigned &MaxAlign) const

unsigned	getKernArgSegmentSize (const Function &F, unsigned &MaxAlign) const

virtual	~AMDGPUSubtarget ()

Static Public Member Functions
static const AMDGPUSubtarget &	get (const MachineFunction &MF)

static const AMDGPUSubtarget &	get (const TargetMachine &TM, const Function &F)

Protected Attributes
bool	Has16BitInsts

bool	HasMadMixInsts

bool	FP32Denormals

bool	FPExceptions

bool	HasSDWA

bool	HasVOP3PInsts

bool	HasMulI24

bool	HasMulU24

bool	HasInv2PiInlineImm

bool	HasFminFmaxLegacy

bool	EnablePromoteAlloca

bool	HasTrigReducedRange

int	LocalMemorySize

unsigned	WavefrontSize

Detailed Description

Definition at line 49 of file AMDGPUSubtarget.h.

Member Enumeration Documentation

◆ Generation

enum llvm::AMDGPUSubtarget::Generation

Enumerator
R600
R700
EVERGREEN
NORTHERN_ISLANDS
SOUTHERN_ISLANDS
SEA_ISLANDS
VOLCANIC_ISLANDS
GFX9

Definition at line 51 of file AMDGPUSubtarget.h.

Constructor & Destructor Documentation

◆ AMDGPUSubtarget()

AMDGPUSubtarget::AMDGPUSubtarget ( const Triple & TT )

Definition at line 132 of file AMDGPUSubtarget.cpp.

◆ ~AMDGPUSubtarget()

virtual llvm::AMDGPUSubtarget::~AMDGPUSubtarget ( )

inlinevirtual

Definition at line 243 of file AMDGPUSubtarget.h.

Member Function Documentation

◆ get() [1/2]

const AMDGPUSubtarget & AMDGPUSubtarget::get ( const MachineFunction & MF )

static

Definition at line 684 of file AMDGPUSubtarget.cpp.

References llvm::Triple::amdgcn, llvm::Triple::getArch(), llvm::MachineFunction::getSubtarget(), llvm::MachineFunction::getTarget(), and llvm::TargetMachine::getTargetTriple().

Referenced by llvm::AMDGPUMachineFunction::AMDGPUMachineFunction(), llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute(), llvm::createSIWholeQuadModePass(), llvm::AMDGPUTargetLowering::getImplicitParameterOffset(), getReassignedChan(), INITIALIZE_PASS(), isCallPromotable(), and readsVCCZ().

◆ get() [2/2]

const AMDGPUSubtarget & AMDGPUSubtarget::get	(	const TargetMachine &	TM,
		const Function &	F
	)

static

Definition at line 691 of file AMDGPUSubtarget.cpp.

References llvm::Triple::amdgcn, F(), llvm::Triple::getArch(), llvm::TargetMachine::getSubtarget(), and llvm::TargetMachine::getTargetTriple().

◆ getAlignmentForImplicitArgPtr()

unsigned llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr ( ) const

inline

Definition at line 198 of file AMDGPUSubtarget.h.

References isAmdHsaOS().

Referenced by getKernArgSegmentSize(), and llvm::SIMachineFunctionInfo::SIMachineFunctionInfo().

◆ getDefaultFlatWorkGroupSize()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getDefaultFlatWorkGroupSize ( CallingConv::ID CC ) const

Returns: Default range flat work group size for a calling convention.

Definition at line 258 of file AMDGPUSubtarget.cpp.

References llvm::CallingConv::AMDGPU_CS, llvm::CallingConv::AMDGPU_ES, llvm::CallingConv::AMDGPU_GS, llvm::CallingConv::AMDGPU_HS, llvm::CallingConv::AMDGPU_KERNEL, llvm::CallingConv::AMDGPU_LS, llvm::CallingConv::AMDGPU_PS, llvm::CallingConv::AMDGPU_VS, getWavefrontSize(), and llvm::CallingConv::SPIR_KERNEL.

Referenced by getFlatWorkGroupSizes().

◆ getExplicitKernArgSize()

uint64_t AMDGPUSubtarget::getExplicitKernArgSize	(	const Function &	F,
		unsigned &	MaxAlign
	)		const

Definition at line 416 of file AMDGPUSubtarget.cpp.

References llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align, llvm::alignTo(), llvm::CallingConv::AMDGPU_KERNEL, Arg, llvm::Function::args(), assert(), llvm::Function::getCallingConv(), llvm::Module::getDataLayout(), llvm::GlobalValue::getParent(), llvm::Value::getType(), llvm::max(), and llvm::CallingConv::SPIR_KERNEL.

Referenced by llvm::AMDGPUMachineFunction::AMDGPUMachineFunction(), getImplicitArgNumBytes(), and getKernArgSegmentSize().

◆ getExplicitKernelArgOffset()

unsigned llvm::AMDGPUSubtarget::getExplicitKernelArgOffset ( const Function & F ) const

inline

Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.

Definition at line 204 of file AMDGPUSubtarget.h.

References getMaxFlatWorkGroupSize(), getMaxWavesPerEU(), getMaxWorkGroupsPerCU(), getMinFlatWorkGroupSize(), getMinWavesPerEU(), and isAmdHsaOrMesa().

Referenced by llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute(), llvm::AMDGPUTargetLowering::getImplicitParameterOffset(), getKernArgSegmentSize(), and llvm::AMDGPUCallLowering::lowerFormalArguments().

◆ getFlatWorkGroupSizes()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getFlatWorkGroupSizes ( const Function & F ) const

Returns: Subtarget's default pair of minimum/maximum flat work group sizes for function F, or minimum/maximum flat work group sizes explicitly requested using "amdgpu-flat-work-group-size" attribute attached to function F.; Subtarget's default values if explicitly requested values cannot be converted to integer, or violate subtarget's specifications.

Definition at line 276 of file AMDGPUSubtarget.cpp.

References llvm::Default, llvm::Function::getCallingConv(), getDefaultFlatWorkGroupSize(), llvm::AMDGPU::getIntegerAttribute(), llvm::AMDGPU::getIntegerPairAttribute(), llvm::GCNSubtarget::getMaxFlatWorkGroupSize(), and llvm::GCNSubtarget::getMinFlatWorkGroupSize().

Referenced by getMaxLocalMemSizeWithWaveCount(), getOccupancyWithLocalMemSize(), getWavesPerEU(), isCallPromotable(), makeLIDRangeMetadata(), and llvm::SIMachineFunctionInfo::SIMachineFunctionInfo().

◆ getImplicitArgNumBytes()

unsigned llvm::AMDGPUSubtarget::getImplicitArgNumBytes ( const Function & F ) const

inline

Returns: Number of bytes of arguments that are passed to a shader or kernel in addition to the explicit ones declared for the function.

Definition at line 233 of file AMDGPUSubtarget.h.

References F(), getExplicitKernArgSize(), llvm::AMDGPU::getIntegerAttribute(), getKernArgSegmentSize(), and isMesaKernel().

Referenced by getKernArgSegmentSize().

◆ getKernArgSegmentSize()

unsigned AMDGPUSubtarget::getKernArgSegmentSize	(	const Function &	F,
		unsigned &	MaxAlign
	)		const

Definition at line 437 of file AMDGPUSubtarget.cpp.

References llvm::alignTo(), getAlignmentForImplicitArgPtr(), getExplicitKernArgSize(), getExplicitKernelArgOffset(), and getImplicitArgNumBytes().

Referenced by getImplicitArgNumBytes().

◆ getLocalMemorySize()

int llvm::AMDGPUSubtarget::getLocalMemorySize ( ) const

inline

Definition at line 194 of file AMDGPUSubtarget.h.

References LocalMemorySize.

Referenced by getMaxLocalMemSizeWithWaveCount(), getOccupancyWithLocalMemSize(), hasAnyNonFlatUseOfReg(), and isCallPromotable().

◆ getMaxFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize ( ) const

pure virtual

Returns: Maximum flat work group size supported by the subtarget.

Implemented in llvm::R600Subtarget, and llvm::GCNSubtarget.

Referenced by getExplicitKernelArgOffset().

◆ getMaxLocalMemSizeWithWaveCount()

unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount	(	unsigned	WaveCount,
		const Function &	F
	)		const

Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

Definition at line 229 of file AMDGPUSubtarget.cpp.

References getFlatWorkGroupSizes(), getLocalMemorySize(), llvm::GCNSubtarget::getMaxWavesPerEU(), and llvm::GCNSubtarget::getMaxWorkGroupsPerCU().

Referenced by llvm::GCNSubtarget::dumpCode(), and isCallPromotable().

◆ getMaxWavesPerEU() [1/2]

virtual unsigned llvm::AMDGPUSubtarget::getMaxWavesPerEU ( unsigned FlatWorkGroupSize ) const

pure virtual

Returns: Maximum number of waves per execution unit supported by the subtarget and limited by given FlatWorkGroupSize.

Implemented in llvm::R600Subtarget, and llvm::GCNSubtarget.

Referenced by isCallPromotable().

◆ getMaxWavesPerEU() [2/2]

unsigned llvm::AMDGPUSubtarget::getMaxWavesPerEU ( ) const

inline

Definition at line 226 of file AMDGPUSubtarget.h.

References I, and makeLIDRangeMetadata().

Referenced by getExplicitKernelArgOffset().

◆ getMaxWorkGroupsPerCU()

virtual unsigned llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU ( unsigned FlatWorkGroupSize ) const

pure virtual

Returns: Maximum number of work groups per compute unit supported by the subtarget and limited by given FlatWorkGroupSize.

Implemented in llvm::R600Subtarget, and llvm::GCNSubtarget.

Referenced by getExplicitKernelArgOffset().

◆ getMinFlatWorkGroupSize()

virtual unsigned llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize ( ) const

pure virtual

Returns: Minimum flat work group size supported by the subtarget.

Implemented in llvm::R600Subtarget, and llvm::GCNSubtarget.

Referenced by getExplicitKernelArgOffset().

◆ getMinWavesPerEU()

virtual unsigned llvm::AMDGPUSubtarget::getMinWavesPerEU ( ) const

pure virtual

Returns: Minimum number of waves per execution unit supported by the subtarget.

Implemented in llvm::R600Subtarget, and llvm::GCNSubtarget.

Referenced by getExplicitKernelArgOffset().

◆ getOccupancyWithLocalMemSize() [1/2]

unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize	(	uint32_t	Bytes,
		const Function &	F
	)		const

Inverse of getMaxLocalMemWithWaveCount.

Return the maximum wavecount if the given LDS memory size is the only constraint.

Definition at line 239 of file AMDGPUSubtarget.cpp.

References getFlatWorkGroupSizes(), getLocalMemorySize(), llvm::GCNSubtarget::getMaxWavesPerEU(), llvm::GCNSubtarget::getMaxWorkGroupsPerCU(), and llvm::max().

Referenced by getOccupancyWithLocalMemSize(), llvm::SIRegisterInfo::getRegPressureLimit(), isCallPromotable(), and llvm::SIMachineFunctionInfo::limitOccupancy().

◆ getOccupancyWithLocalMemSize() [2/2]

unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize ( const MachineFunction & MF ) const

Definition at line 252 of file AMDGPUSubtarget.cpp.

References llvm::MachineFunction::getFunction(), llvm::MachineFunction::getInfo(), and getOccupancyWithLocalMemSize().

◆ getWavefrontSize()

unsigned llvm::AMDGPUSubtarget::getWavefrontSize ( ) const

inline

Definition at line 190 of file AMDGPUSubtarget.h.

References WavefrontSize.

Referenced by llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPR(), buildMUBUFOffsetLoadStore(), llvm::SIFrameLowering::eliminateCallFramePseudoInstr(), llvm::SIRegisterInfo::eliminateFrameIndex(), llvm::SIFrameLowering::emitEntryFunctionPrologue(), llvm::SIFrameLowering::emitEpilogue(), llvm::SIFrameLowering::emitPrologue(), getDefaultFlatWorkGroupSize(), hasAnyNonFlatUseOfReg(), llvm::SIRegisterInfo::restoreSGPR(), and llvm::SIRegisterInfo::spillSGPR().

◆ getWavesPerEU()

std::pair< unsigned, unsigned > AMDGPUSubtarget::getWavesPerEU ( const Function & F ) const

Returns: Subtarget's default pair of minimum/maximum number of waves per execution unit for function F, or minimum/maximum number of waves per execution unit explicitly requested using "amdgpu-waves-per-eu" attribute attached to function F.; Subtarget's default values if explicitly requested values cannot be converted to integer, violate subtarget's specifications, or are not compatible with minimum/maximum number of waves limited by flat work group size, register usage, and/or lds usage.

Definition at line 306 of file AMDGPUSubtarget.cpp.

References llvm::Default, getFlatWorkGroupSizes(), llvm::AMDGPU::getIntegerPairAttribute(), llvm::GCNSubtarget::getMaxWavesPerEU(), llvm::GCNSubtarget::getMinWavesPerEU(), and llvm::Function::hasFnAttribute().

Referenced by isCallPromotable(), and llvm::SIMachineFunctionInfo::SIMachineFunctionInfo().