89 std::unique_ptr<MCStreamer> &&Streamer) {
101 std::unique_ptr<MCStreamer> Streamer)
110 return "AMDGPU Assembly Printer";
125 std::string ExpectedTarget;
137 HSAMetadataStream->begin(M);
162 std::string ISAVersionString;
170 HSAMetadataStream->end();
173 assert(Success &&
"Malformed HSA Metadata");
182 for (
auto i : PALMetadataMap) {
183 PALMetadataVector.push_back(i.first);
184 PALMetadataVector.push_back(i.second);
216 getAmdKernelCode(KernelCode, CurrentProgramInfo, *
MF);
221 HSAMetadataStream->emitKernel(*
MF, CurrentProgramInfo);
233 auto &
Context = Streamer.getContext();
234 auto &ObjectFileInfo = *
Context.getObjectFileInfo();
235 auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
237 Streamer.PushSection();
238 Streamer.SwitchSection(&ReadOnlySection);
242 Streamer.EmitValueToAlignment(64, 0, 1, 0);
243 if (ReadOnlySection.getAlignment() < 64)
244 ReadOnlySection.setAlignment(64);
249 *
getSTI(), KernelName, getAmdhsaKernelDescriptor(*
MF, CurrentProgramInfo),
258 Streamer.PopSection();
310 CallGraphResourceInfo.clear();
319 void AMDGPUAsmPrinter::readPALMetadata(
Module &M) {
321 if (!NamedMD || !NamedMD->getNumOperands())
326 for (
unsigned I = 0,
E = Tuple->getNumOperands() & -2;
I !=
E;
I += 2) {
327 auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(
I));
328 auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(
I + 1));
331 PALMetadataMap[
Key->getZExtValue()] = Val->getZExtValue();
336 void AMDGPUAsmPrinter::emitCommonFunctionComments(
339 uint64_t ScratchSize,
350 uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
353 uint16_t KernelCodeProperties = 0;
356 KernelCodeProperties |=
357 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
360 KernelCodeProperties |=
361 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
364 KernelCodeProperties |=
365 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
368 KernelCodeProperties |=
369 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
372 KernelCodeProperties |=
373 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
376 KernelCodeProperties |=
377 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
380 return KernelCodeProperties;
387 memset(&KernelDescriptor, 0x0,
sizeof(KernelDescriptor));
399 return KernelDescriptor;
423 getSIProgramInfo(CurrentProgramInfo, MF);
425 auto I = CallGraphResourceInfo.insert(
426 std::make_pair(&MF.
getFunction(), SIFunctionResourceInfo()));
427 SIFunctionResourceInfo &
Info =
I.first->second;
428 assert(
I.second &&
"should only be called once per function");
429 Info = analyzeResourceUsage(MF);
433 EmitPALMetadata(MF, CurrentProgramInfo);
435 EmitProgramInfoSI(MF, CurrentProgramInfo);
450 OutStreamer->emitRawComment(
" Function info:",
false);
451 SIFunctionResourceInfo &
Info = CallGraphResourceInfo[&MF.
getFunction()];
452 emitCommonFunctionComments(
455 Info.PrivateSegmentSize,
456 getFunctionCodeSize(MF), MFI);
460 OutStreamer->emitRawComment(
" Kernel info:",
false);
461 emitCommonFunctionComments(CurrentProgramInfo.
NumVGPR,
464 getFunctionCodeSize(MF), MFI);
472 " bytes/workgroup (compile time only)",
false);
480 " NumSGPRsForWavesPerEU: " +
483 " NumVGPRsForWavesPerEU: " +
491 " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
494 " DebuggerPrivateSegmentBufferSGPR: s" +
499 " COMPUTE_PGM_RSRC2:USER_SGPR: " +
502 " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
505 " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
508 " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
511 " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
514 " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
525 std::string Comment =
"\n";
528 Comment +=
" ; " +
HexLines[i] +
"\n";
539 uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(
const MachineFunction &MF)
const {
543 uint64_t CodeSize = 0;
550 if (
MI.isDebugInstr())
564 if (!UseOp.isImplicit() || !TII.
isFLAT(*UseOp.getParent()))
574 UsesVCC, UsesFlatScratch);
577 AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
579 SIFunctionResourceInfo
Info;
588 Info.UsesFlatScratch = MRI.
isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
601 Info.UsesFlatScratch =
false;
604 Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
605 Info.PrivateSegmentSize = FrameInfo.getStackSize();
607 Info.PrivateSegmentSize += FrameInfo.getMaxAlignment();
616 if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
617 MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
620 HighestVGPRReg =
Reg;
625 MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
628 HighestSGPRReg =
Reg;
635 Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
637 Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
643 int32_t MaxVGPR = -1;
644 int32_t MaxSGPR = -1;
645 uint64_t CalleeFrameSize = 0;
657 unsigned Reg = MO.getReg();
660 case AMDGPU::EXEC_LO:
661 case AMDGPU::EXEC_HI:
664 case AMDGPU::SRC_SHARED_BASE:
665 case AMDGPU::SRC_SHARED_LIMIT:
666 case AMDGPU::SRC_PRIVATE_BASE:
667 case AMDGPU::SRC_PRIVATE_LIMIT:
670 case AMDGPU::NoRegister:
680 case AMDGPU::FLAT_SCR:
681 case AMDGPU::FLAT_SCR_LO:
682 case AMDGPU::FLAT_SCR_HI:
685 case AMDGPU::XNACK_MASK:
686 case AMDGPU::XNACK_MASK_LO:
687 case AMDGPU::XNACK_MASK_HI:
702 if (AMDGPU::SReg_32RegClass.
contains(Reg)) {
704 "trap handler registers should not be used");
707 }
else if (AMDGPU::VGPR_32RegClass.
contains(Reg)) {
710 }
else if (AMDGPU::SReg_64RegClass.
contains(Reg)) {
712 "trap handler registers should not be used");
715 }
else if (AMDGPU::VReg_64RegClass.
contains(Reg)) {
718 }
else if (AMDGPU::VReg_96RegClass.
contains(Reg)) {
721 }
else if (AMDGPU::SReg_128RegClass.
contains(Reg)) {
723 "trap handler registers should not be used");
726 }
else if (AMDGPU::VReg_128RegClass.
contains(Reg)) {
729 }
else if (AMDGPU::SReg_256RegClass.
contains(Reg)) {
731 "trap handler registers should not be used");
734 }
else if (AMDGPU::VReg_256RegClass.
contains(Reg)) {
737 }
else if (AMDGPU::SReg_512RegClass.
contains(Reg)) {
739 "trap handler registers should not be used");
742 }
else if (AMDGPU::VReg_512RegClass.
contains(Reg)) {
749 int MaxUsed = HWReg + Width - 1;
751 MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
753 MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
771 ST.hasFlatAddressSpace());
772 MaxSGPR =
std::max(MaxSGPR, MaxSGPRGuess);
775 CalleeFrameSize =
std::max(CalleeFrameSize, UINT64_C(16384));
777 Info.UsesFlatScratch = ST.hasFlatAddressSpace();
778 Info.HasDynamicallySizedStack =
true;
782 auto I = CallGraphResourceInfo.find(Callee);
783 assert(
I != CallGraphResourceInfo.end() &&
784 "callee should have been handled before caller");
786 MaxSGPR =
std::max(
I->second.NumExplicitSGPR - 1, MaxSGPR);
787 MaxVGPR =
std::max(
I->second.NumVGPR - 1, MaxVGPR);
789 =
std::max(
I->second.PrivateSegmentSize, CalleeFrameSize);
790 Info.UsesVCC |=
I->second.UsesVCC;
791 Info.UsesFlatScratch |=
I->second.UsesFlatScratch;
792 Info.HasDynamicallySizedStack |=
I->second.HasDynamicallySizedStack;
793 Info.HasRecursion |=
I->second.HasRecursion;
797 Info.HasRecursion =
true;
802 Info.NumExplicitSGPR = MaxSGPR + 1;
803 Info.NumVGPR = MaxVGPR + 1;
804 Info.PrivateSegmentSize += CalleeFrameSize;
809 void AMDGPUAsmPrinter::getSIProgramInfo(
SIProgramInfo &ProgInfo,
811 SIFunctionResourceInfo
Info = analyzeResourceUsage(MF);
813 ProgInfo.
NumVGPR = Info.NumVGPR;
814 ProgInfo.
NumSGPR = Info.NumExplicitSGPR;
816 ProgInfo.
VCCUsed = Info.UsesVCC;
817 ProgInfo.
FlatUsed = Info.UsesFlatScratch;
818 ProgInfo.
DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
841 if (ProgInfo.
NumSGPR > MaxAddressableNumSGPRs) {
845 "addressable scalar registers",
848 MaxAddressableNumSGPRs);
850 ProgInfo.
NumSGPR = MaxAddressableNumSGPRs - 1;
855 ProgInfo.
NumSGPR += ExtraSGPRs;
859 unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
863 WaveDispatchNumSGPR += NumRegs;
865 WaveDispatchNumVGPR += NumRegs;
880 if (ProgInfo.
NumSGPR > MaxAddressableNumSGPRs) {
888 MaxAddressableNumSGPRs);
890 ProgInfo.
NumSGPR = MaxAddressableNumSGPRs;
940 unsigned LDSAlignShift;
949 unsigned LDSSpillSize =
950 MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize();
952 ProgInfo.
LDSSize = MFI->getLDSSize() + LDSSpillSize;
954 alignTo(ProgInfo.
LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
957 unsigned ScratchAlignShift = 10;
963 1ULL << ScratchAlignShift) >>
977 unsigned TIDIGCompCnt = 0;
978 if (MFI->hasWorkItemIDZ())
980 else if (MFI->hasWorkItemIDY())
1069 unsigned Rsrc2Reg = Rsrc1Reg + 1;
1094 unsigned NumUsedVgprsKey = ScratchSizeKey +
1096 unsigned NumUsedSgprsKey = ScratchSizeKey +
1104 PALMetadataMap[ScratchSizeKey] |=
1112 PALMetadataMap[ScratchSizeKey] |=
1116 PALMetadataMap[Rsrc2Reg] |=
1161 if (MFI->hasPrivateSegmentBuffer()) {
1166 if (MFI->hasDispatchPtr())
1169 if (MFI->hasQueuePtr())
1172 if (MFI->hasKernargSegmentPtr())
1175 if (MFI->hasDispatchID())
1178 if (MFI->hasFlatScratchInit())
1181 if (MFI->hasDispatchPtr())
1184 if (STM.debuggerSupported())
1187 if (STM.isXNACKEnabled())
1190 unsigned MaxKernArgAlign;
1202 if (STM.debuggerEmitPrologue()) {
1211 unsigned AsmVariant,
1217 if (ExtraCode && ExtraCode[0]) {
1218 if (ExtraCode[1] != 0)
1221 switch (ExtraCode[0]) {
bool enableIEEEBit(const MachineFunction &MF) const
virtual void EmitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
bool hasDispatchPtr() const
uint16_t DebuggerPrivateSegmentBufferSGPR
constexpr bool isUInt< 32 >(uint64_t x)
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
void EmitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
Interface definition for SIRegisterInfo.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool hasPrivateSegmentBuffer() const
#define S_00B848_VGPRS(x)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
AMDGPU specific subclass of TargetSubtarget.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
#define FP_DENORM_MODE_SP(x)
This class represents lattice values for constants.
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define G_00B84C_USER_SGPR(x)
A Module instance is used to store all the information related to an LLVM module. ...
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
void EmitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
#define G_00B84C_TGID_Z_EN(x)
unsigned getReg() const
getReg - Returns the register number.
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
iterator_range< reg_iterator > reg_operands(unsigned Reg) const
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK)=0
Instruction set architecture version.
#define S_00B84C_SCRATCH_EN(x)
const SIInstrInfo * getInstrInfo() const override
#define S_00B84C_TG_SIZE_EN(x)
#define S_00B848_DX10_CLAMP(x)
unsigned getPSInputAddr() const
uint32_t NumSGPRsForWavesPerEU
unsigned const TargetRegisterInfo * TRI
MachineFunction * MF
The current machine function.
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
#define G_00B84C_TGID_Y_EN(x)
const MCSubtargetInfo * getSTI() const
Interface definition for R600RegisterInfo.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
#define R_0286CC_SPI_PS_INPUT_ENA
bool hasDispatchID() const
#define S_00B028_SGPRS(x)
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
Track resource usage for kernels / entry functions.
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
const SIRegisterInfo & getRegisterInfo() const
return AArch64::GPR64RegClass contains(Reg)
bool hasFP64Denormals() const
bool hasFlatScratchInit() const
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
#define FP_DENORM_FLUSH_NONE
Calling convention used for Mesa/AMDPAL geometry shaders.
uint32_t code_properties
Code properties.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
Calling convention used for Mesa/AMDPAL compute shaders.
static bool isFLAT(const MachineInstr &MI)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
int getLocalMemorySize() const
AMD Kernel Code Object (amd_kernel_code_t).
bool isEntryFunction() const
SPIR_KERNEL - Calling convention for SPIR kernel functions.
bool enableDX10Clamp() const
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
#define G_00B84C_TRAP_HANDLER(x)
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isTrapHandlerEnabled() const
uint32_t compute_pgm_rsrc2
uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR
virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata)=0
#define FP_ROUND_MODE_SP(x)
Diagnostic information for stack size etc.
#define S_00B84C_TGID_Y_EN(x)
uint32_t private_segment_fixed_size
bool hasCodeObjectV3(const MCSubtargetInfo *STI)
Context object for machine code objects.
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
#define S_00B848_FLOAT_MODE(x)
#define R_00B848_COMPUTE_PGM_RSRC1
uint32_t group_segment_fixed_size
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
void EmitFunctionBody()
This method emits the body and trailer for a function.
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Type * getType() const
All values are typed, get the type of this value.
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getNumSpilledSGPRs() const
unsigned getAddressableNumSGPRs() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isGroupSegment(const GlobalValue *GV)
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
#define S_00B84C_TRAP_HANDLER(x)
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
uint16_t kernel_code_properties
bool isVerbose() const
Return true if assembly output should contain comments.
amdgpu Simplify well known AMD library false Value * Callee
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Analysis containing CSE Info
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool debuggerEmitPrologue() const
uint32_t NumVGPRsForWavesPerEU
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool hasFP32Denormals() const
#define S_00B848_IEEE_MODE(x)
bool isCompute(CallingConv::ID cc)
#define S_00B028_VGPRS(x)
static uint32_t getFPMode(const MachineFunction &F)
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define G_00B84C_TGID_X_EN(x)
#define S_00B84C_TIDIG_COMP_CNT(x)
#define FP_ROUND_MODE_DP(x)
const GlobalValue * getGlobal() const
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
TargetMachine & TM
Target machine description.
This class is intended to be used as a driving class for all asm writers.
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
void EmitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
#define S_00B84C_EXCP_EN_MSB(x)
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Generation getGeneration() const
MCContext & getContext() const
const Triple & getTargetTriple() const
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
#define S_00B84C_TGID_Z_EN(x)
The AMDGPU TargetMachine interface definition for hw codgen targets.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define S_00B84C_LDS_SIZE(x)
Calling convention used for Mesa/AMDPAL pixel shaders.
void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override
Targets can override this to emit stuff at the start of a basic block.
virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
std::string & str()
Flushes the stream contents to the target string and returns the string's reference.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
#define FP_ROUND_ROUND_TO_NEAREST
bool doesNotRecurse() const
Determine if the function is known not to recurse, directly or indirectly.
#define S_00B84C_EXCP_EN(x)
void LLVMInitializeAMDGPUAsmPrinter()
IsaVersion getIsaVersion(StringRef GPU)
unsigned getWavefrontSize() const
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
AMDGPUTargetStreamer * getTargetStreamer() const
unsigned getFunctionNumber() const
Return a unique ID for the current function.
#define G_00B84C_TIDIG_COMP_CNT(x)
MCStreamer & getStreamer()
std::vector< std::string > HexLines
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
bool hasKernargSegmentPtr() const
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel...
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
R600 Assembly printer class.
MachineOperand class - Representation of each machine instruction operand.
bool isStackRealigned() const
uint16_t debug_wavefront_private_segment_offset_sgpr
If is_debug_supported is 0 then must be 0.
const MCSubtargetInfo * getMCSubtargetInfo() const
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
bool hasSGPRInitBug() const
#define S_00B848_DEBUG_MODE(x)
#define FP_DENORM_MODE_DP(x)
#define S_0286E8_WAVESIZE(x)
const Function & getFunction() const
Return the LLVM function that this machine code represents.
#define S_00B84C_TGID_X_EN(x)
unsigned getNumSpilledVGPRs() const
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
#define AMD_HSA_BITS_SET(dst, mask, val)
std::vector< std::string > DisasmLines
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
bool isMemoryBound() const
void EmitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
amdgpu Simplify well known AMD library false Value Value * Arg
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getPSInputEnable() const
uint32_t compute_pgm_rsrc1
virtual bool EmitISAVersion(StringRef IsaVersionString)=0
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
void EmitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
virtual void EmitDirectiveAMDGCNTarget(StringRef Target)=0
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool doFinalization(Module &M) override
Shut down the asmprinter.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
#define S_00B848_SGPRS(x)
virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const
Targets can override this to emit stuff at the start of a basic block.
bool hasXNACK(const MCSubtargetInfo &STI)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
#define S_00B84C_USER_SGPR(x)
AMDGPU Assembly printer class.
#define R_00B860_COMPUTE_TMPRING_SIZE
Generic base class for all target subtargets.
bool isAmdHsaOrMesa(const Function &F) const
This represents a section on linux, lots of unix variants and some bare metal systems.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
virtual void EmitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
static unsigned getRsrcReg(CallingConv::ID CallConv)
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
unsigned getMaxNumUserSGPRs() const
#define S_00B860_WAVESIZE(x)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool needsWaveLimiter() const
#define R_00B84C_COMPUTE_PGM_RSRC2
#define S_00B848_PRIORITY(x)
A raw_ostream that writes to an std::string.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
void EmitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
#define R_0286E8_SPI_TMPRING_SIZE
void setAlignment(unsigned A)
setAlignment - Set the alignment (log2, not bytes) of the function.
unsigned getHWRegIndex(unsigned Reg) const
#define S_00B02C_EXTRA_LDS_SIZE(x)
This class implements an extremely fast bulk output stream that can only output to a stream...
Primary interface to the complete machine description for the target machine.
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
StringRef - Represent a constant reference to a string, i.e.
#define R_0286D0_SPI_PS_INPUT_ADDR
uint16_t debug_private_segment_buffer_sgpr
If is_debug_supported is 0 then must be 0.
const MachineOperand & getOperand(unsigned i) const
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
Calling convention for AMDGPU code object kernels.
std::vector< uint32_t > Metadata
PAL metadata represented as a vector.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
iterator_range< arg_iterator > args()
bool hasCodeObjectV3() const