LLVM  8.0.1
AMDGPUAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 ///
12 /// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13 /// code. When passed an MCAsmStreamer it prints assembly and when passed
14 /// an MCObjectStreamer it outputs binary code.
15 //
16 //===----------------------------------------------------------------------===//
17 //
18 
19 #include "AMDGPUAsmPrinter.h"
20 #include "AMDGPU.h"
21 #include "AMDGPUSubtarget.h"
22 #include "AMDGPUTargetMachine.h"
26 #include "R600AsmPrinter.h"
27 #include "R600Defines.h"
29 #include "R600RegisterInfo.h"
30 #include "SIDefines.h"
31 #include "SIInstrInfo.h"
32 #include "SIMachineFunctionInfo.h"
33 #include "SIRegisterInfo.h"
34 #include "Utils/AMDGPUBaseInfo.h"
35 #include "llvm/BinaryFormat/ELF.h"
37 #include "llvm/IR/DiagnosticInfo.h"
38 #include "llvm/MC/MCContext.h"
39 #include "llvm/MC/MCSectionELF.h"
40 #include "llvm/MC/MCStreamer.h"
46 
47 using namespace llvm;
48 using namespace llvm::AMDGPU;
49 using namespace llvm::AMDGPU::HSAMD;
50 
51 // TODO: This should get the default rounding mode from the kernel. We just set
52 // the default here, but this could change if the OpenCL rounding mode pragmas
53 // are used.
54 //
55 // The denormal mode here should match what is reported by the OpenCL runtime
56 // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
57 // can also be override to flush with the -cl-denorms-are-zero compiler flag.
58 //
59 // AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
60 // precision, and leaves single precision to flush all and does not report
61 // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
62 // CL_FP_DENORM for both.
63 //
64 // FIXME: It seems some instructions do not support single precision denormals
65 // regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,
66 // and sin_f32, cos_f32 on most parts).
67 
68 // We want to use these instructions, and using fp32 denormals also causes
69 // instructions to run at the double precision rate for the device so it's
70 // probably best to just report no single precision denormals.
73  // TODO: Is there any real use for the flush in only / flush out only modes?
74 
75  uint32_t FP32Denormals =
77 
78  uint32_t FP64Denormals =
80 
83  FP_DENORM_MODE_SP(FP32Denormals) |
84  FP_DENORM_MODE_DP(FP64Denormals);
85 }
86 
87 static AsmPrinter *
89  std::unique_ptr<MCStreamer> &&Streamer) {
90  return new AMDGPUAsmPrinter(tm, std::move(Streamer));
91 }
92 
93 extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
98 }
99 
101  std::unique_ptr<MCStreamer> Streamer)
102  : AsmPrinter(TM, std::move(Streamer)) {
104  HSAMetadataStream.reset(new MetadataStreamerV3());
105  else
106  HSAMetadataStream.reset(new MetadataStreamerV2());
107 }
108 
110  return "AMDGPU Assembly Printer";
111 }
112 
114  return TM.getMCSubtargetInfo();
115 }
116 
118  if (!OutStreamer)
119  return nullptr;
120  return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());
121 }
122 
125  std::string ExpectedTarget;
126  raw_string_ostream ExpectedTargetOS(ExpectedTarget);
127  IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS);
128 
129  getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
130  }
131 
132  if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
134  return;
135 
137  HSAMetadataStream->begin(M);
138 
140  readPALMetadata(M);
141 
143  return;
144 
145  // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
148 
149  // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
152  Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
153 }
154 
156  // Following code requires TargetStreamer to be present.
157  if (!getTargetStreamer())
158  return;
159 
161  // Emit ISA Version (NT_AMD_AMDGPU_ISA).
162  std::string ISAVersionString;
163  raw_string_ostream ISAVersionStream(ISAVersionString);
164  IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
165  getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
166  }
167 
168  // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
169  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
170  HSAMetadataStream->end();
171  bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
172  (void)Success;
173  assert(Success && "Malformed HSA Metadata");
174  }
175 
177  // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
178  if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
179  // Copy the PAL metadata from the map where we collected it into a vector,
180  // then write it as a .note.
181  PALMD::Metadata PALMetadataVector;
182  for (auto i : PALMetadataMap) {
183  PALMetadataVector.push_back(i.first);
184  PALMetadataVector.push_back(i.second);
185  }
186  getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
187  }
188  }
189 }
190 
192  const MachineBasicBlock *MBB) const {
194  return false;
195 
196  if (MBB->empty())
197  return true;
198 
199  // If this is a block implementing a long branch, an expression relative to
200  // the start of the block is needed. to the start of the block.
201  // XXX - Is there a smarter way to check this?
202  return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
203 }
204 
207  if (!MFI.isEntryFunction())
208  return;
209 
210  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
211  const Function &F = MF->getFunction();
212  if (!STM.hasCodeObjectV3() && STM.isAmdHsaOrMesa(F) &&
213  (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
214  F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
215  amd_kernel_code_t KernelCode;
216  getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
217  getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
218  }
219 
220  if (STM.isAmdHsaOS())
221  HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
222 }
223 
226  if (!MFI.isEntryFunction())
227  return;
230  return;
231 
232  auto &Streamer = getTargetStreamer()->getStreamer();
233  auto &Context = Streamer.getContext();
234  auto &ObjectFileInfo = *Context.getObjectFileInfo();
235  auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
236 
237  Streamer.PushSection();
238  Streamer.SwitchSection(&ReadOnlySection);
239 
240  // CP microcode requires the kernel descriptor to be allocated on 64 byte
241  // alignment.
242  Streamer.EmitValueToAlignment(64, 0, 1, 0);
243  if (ReadOnlySection.getAlignment() < 64)
244  ReadOnlySection.setAlignment(64);
245 
246  SmallString<128> KernelName;
247  getNameWithPrefix(KernelName, &MF->getFunction());
249  *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
250  CurrentProgramInfo.NumVGPRsForWavesPerEU,
251  CurrentProgramInfo.NumSGPRsForWavesPerEU -
253  CurrentProgramInfo.VCCUsed,
254  CurrentProgramInfo.FlatUsed),
255  CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
256  hasXNACK(*getSTI()));
257 
258  Streamer.PopSection();
259 }
260 
265  return;
266  }
267 
269  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
270  if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {
272  getNameWithPrefix(SymbolName, &MF->getFunction()),
274  SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
275  }
276  const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
277  if (STI.dumpCode()) {
278  // Disassemble function name label to text.
279  DisasmLines.push_back(MF->getName().str() + ":");
281  HexLines.push_back("");
282  }
283 
285 }
286 
288  const GCNSubtarget &STI = MBB.getParent()->getSubtarget<GCNSubtarget>();
289  if (STI.dumpCode() && !isBlockOnlyReachableByFallthrough(&MBB)) {
290  // Write a line for the basic block label if it is not only fallthrough.
291  DisasmLines.push_back(
292  (Twine("BB") + Twine(getFunctionNumber())
293  + "_" + Twine(MBB.getNumber()) + ":").str());
295  HexLines.push_back("");
296  }
298 }
299 
301 
302  // Group segment variables aren't emitted in HSA.
303  if (AMDGPU::isGroupSegment(GV))
304  return;
305 
307 }
308 
310  CallGraphResourceInfo.clear();
311  return AsmPrinter::doFinalization(M);
312 }
313 
314 // For the amdpal OS type, read the amdgpu.pal.metadata supplied by the
315 // frontend into our PALMetadataMap, ready for per-function modification. It
316 // is a NamedMD containing an MDTuple containing a number of MDNodes each of
317 // which is an integer value, and each two integer values forms a key=value
318 // pair that we store as PALMetadataMap[key]=value in the map.
319 void AMDGPUAsmPrinter::readPALMetadata(Module &M) {
320  auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
321  if (!NamedMD || !NamedMD->getNumOperands())
322  return;
323  auto Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0));
324  if (!Tuple)
325  return;
326  for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) {
327  auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I));
328  auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
329  if (!Key || !Val)
330  continue;
331  PALMetadataMap[Key->getZExtValue()] = Val->getZExtValue();
332  }
333 }
334 
335 // Print comments that apply to both callable functions and entry points.
336 void AMDGPUAsmPrinter::emitCommonFunctionComments(
337  uint32_t NumVGPR,
338  uint32_t NumSGPR,
339  uint64_t ScratchSize,
340  uint64_t CodeSize,
341  const AMDGPUMachineFunction *MFI) {
342  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
343  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
344  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
345  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
346  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
347  false);
348 }
349 
350 uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
351  const MachineFunction &MF) const {
353  uint16_t KernelCodeProperties = 0;
354 
355  if (MFI.hasPrivateSegmentBuffer()) {
356  KernelCodeProperties |=
357  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
358  }
359  if (MFI.hasDispatchPtr()) {
360  KernelCodeProperties |=
361  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
362  }
363  if (MFI.hasQueuePtr()) {
364  KernelCodeProperties |=
365  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
366  }
367  if (MFI.hasKernargSegmentPtr()) {
368  KernelCodeProperties |=
369  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
370  }
371  if (MFI.hasDispatchID()) {
372  KernelCodeProperties |=
373  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
374  }
375  if (MFI.hasFlatScratchInit()) {
376  KernelCodeProperties |=
377  amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
378  }
379 
380  return KernelCodeProperties;
381 }
382 
383 amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
384  const MachineFunction &MF,
385  const SIProgramInfo &PI) const {
386  amdhsa::kernel_descriptor_t KernelDescriptor;
387  memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
388 
392 
393  KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
394  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
395  KernelDescriptor.compute_pgm_rsrc1 = PI.ComputePGMRSrc1;
396  KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
397  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
398 
399  return KernelDescriptor;
400 }
401 
403  CurrentProgramInfo = SIProgramInfo();
404 
406 
407  // The starting address of all shader programs must be 256 bytes aligned.
408  // Regular functions just need the basic required instruction alignment.
409  MF.setAlignment(MFI->isEntryFunction() ? 8 : 2);
410 
412 
413  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
415  // FIXME: This should be an explicit check for Mesa.
416  if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
417  MCSectionELF *ConfigSection =
418  Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
419  OutStreamer->SwitchSection(ConfigSection);
420  }
421 
422  if (MFI->isEntryFunction()) {
423  getSIProgramInfo(CurrentProgramInfo, MF);
424  } else {
425  auto I = CallGraphResourceInfo.insert(
426  std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
427  SIFunctionResourceInfo &Info = I.first->second;
428  assert(I.second && "should only be called once per function");
429  Info = analyzeResourceUsage(MF);
430  }
431 
432  if (STM.isAmdPalOS())
433  EmitPALMetadata(MF, CurrentProgramInfo);
434  else if (!STM.isAmdHsaOS()) {
435  EmitProgramInfoSI(MF, CurrentProgramInfo);
436  }
437 
438  DisasmLines.clear();
439  HexLines.clear();
440  DisasmLineMaxLen = 0;
441 
443 
444  if (isVerbose()) {
445  MCSectionELF *CommentSection =
446  Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
447  OutStreamer->SwitchSection(CommentSection);
448 
449  if (!MFI->isEntryFunction()) {
450  OutStreamer->emitRawComment(" Function info:", false);
451  SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
452  emitCommonFunctionComments(
453  Info.NumVGPR,
454  Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
455  Info.PrivateSegmentSize,
456  getFunctionCodeSize(MF), MFI);
457  return false;
458  }
459 
460  OutStreamer->emitRawComment(" Kernel info:", false);
461  emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
462  CurrentProgramInfo.NumSGPR,
463  CurrentProgramInfo.ScratchSize,
464  getFunctionCodeSize(MF), MFI);
465 
466  OutStreamer->emitRawComment(
467  " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
468  OutStreamer->emitRawComment(
469  " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
470  OutStreamer->emitRawComment(
471  " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
472  " bytes/workgroup (compile time only)", false);
473 
474  OutStreamer->emitRawComment(
475  " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
476  OutStreamer->emitRawComment(
477  " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
478 
479  OutStreamer->emitRawComment(
480  " NumSGPRsForWavesPerEU: " +
481  Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
482  OutStreamer->emitRawComment(
483  " NumVGPRsForWavesPerEU: " +
484  Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
485 
486  OutStreamer->emitRawComment(
487  " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
488 
490  OutStreamer->emitRawComment(
491  " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
492  Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
493  OutStreamer->emitRawComment(
494  " DebuggerPrivateSegmentBufferSGPR: s" +
495  Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false);
496  }
497 
498  OutStreamer->emitRawComment(
499  " COMPUTE_PGM_RSRC2:USER_SGPR: " +
500  Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
501  OutStreamer->emitRawComment(
502  " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
503  Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
504  OutStreamer->emitRawComment(
505  " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
506  Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
507  OutStreamer->emitRawComment(
508  " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
509  Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
510  OutStreamer->emitRawComment(
511  " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
512  Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
513  OutStreamer->emitRawComment(
514  " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
515  Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
516  false);
517  }
518 
519  if (STM.dumpCode()) {
520 
521  OutStreamer->SwitchSection(
522  Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
523 
524  for (size_t i = 0; i < DisasmLines.size(); ++i) {
525  std::string Comment = "\n";
526  if (!HexLines[i].empty()) {
527  Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
528  Comment += " ; " + HexLines[i] + "\n";
529  }
530 
531  OutStreamer->EmitBytes(StringRef(DisasmLines[i]));
532  OutStreamer->EmitBytes(StringRef(Comment));
533  }
534  }
535 
536  return false;
537 }
538 
539 uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
540  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
541  const SIInstrInfo *TII = STM.getInstrInfo();
542 
543  uint64_t CodeSize = 0;
544 
545  for (const MachineBasicBlock &MBB : MF) {
546  for (const MachineInstr &MI : MBB) {
547  // TODO: CodeSize should account for multiple functions.
548 
549  // TODO: Should we count size of debug info?
550  if (MI.isDebugInstr())
551  continue;
552 
553  CodeSize += TII->getInstSizeInBytes(MI);
554  }
555  }
556 
557  return CodeSize;
558 }
559 
561  const SIInstrInfo &TII,
562  unsigned Reg) {
563  for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) {
564  if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent()))
565  return true;
566  }
567 
568  return false;
569 }
570 
572  const GCNSubtarget &ST) const {
573  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
574  UsesVCC, UsesFlatScratch);
575 }
576 
577 AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
578  const MachineFunction &MF) const {
579  SIFunctionResourceInfo Info;
580 
582  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
583  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
584  const MachineRegisterInfo &MRI = MF.getRegInfo();
585  const SIInstrInfo *TII = ST.getInstrInfo();
586  const SIRegisterInfo &TRI = TII->getRegisterInfo();
587 
588  Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
589  MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI);
590 
591  // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
592  // instructions aren't used to access the scratch buffer. Inline assembly may
593  // need it though.
594  //
595  // If we only have implicit uses of flat_scr on flat instructions, it is not
596  // really needed.
597  if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
598  (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
599  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
600  !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {
601  Info.UsesFlatScratch = false;
602  }
603 
604  Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
605  Info.PrivateSegmentSize = FrameInfo.getStackSize();
606  if (MFI->isStackRealigned())
607  Info.PrivateSegmentSize += FrameInfo.getMaxAlignment();
608 
609 
610  Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
611  MRI.isPhysRegUsed(AMDGPU::VCC_HI);
612 
613  // If there are no calls, MachineRegisterInfo can tell us the used register
614  // count easily.
615  // A tail call isn't considered a call for MachineFrameInfo's purposes.
616  if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
617  MCPhysReg HighestVGPRReg = AMDGPU::NoRegister;
618  for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) {
619  if (MRI.isPhysRegUsed(Reg)) {
620  HighestVGPRReg = Reg;
621  break;
622  }
623  }
624 
625  MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
626  for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) {
627  if (MRI.isPhysRegUsed(Reg)) {
628  HighestSGPRReg = Reg;
629  break;
630  }
631  }
632 
633  // We found the maximum register index. They start at 0, so add one to get the
634  // number of registers.
635  Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 :
636  TRI.getHWRegIndex(HighestVGPRReg) + 1;
637  Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 :
638  TRI.getHWRegIndex(HighestSGPRReg) + 1;
639 
640  return Info;
641  }
642 
643  int32_t MaxVGPR = -1;
644  int32_t MaxSGPR = -1;
645  uint64_t CalleeFrameSize = 0;
646 
647  for (const MachineBasicBlock &MBB : MF) {
648  for (const MachineInstr &MI : MBB) {
649  // TODO: Check regmasks? Do they occur anywhere except calls?
650  for (const MachineOperand &MO : MI.operands()) {
651  unsigned Width = 0;
652  bool IsSGPR = false;
653 
654  if (!MO.isReg())
655  continue;
656 
657  unsigned Reg = MO.getReg();
658  switch (Reg) {
659  case AMDGPU::EXEC:
660  case AMDGPU::EXEC_LO:
661  case AMDGPU::EXEC_HI:
662  case AMDGPU::SCC:
663  case AMDGPU::M0:
664  case AMDGPU::SRC_SHARED_BASE:
665  case AMDGPU::SRC_SHARED_LIMIT:
666  case AMDGPU::SRC_PRIVATE_BASE:
667  case AMDGPU::SRC_PRIVATE_LIMIT:
668  continue;
669 
670  case AMDGPU::NoRegister:
671  assert(MI.isDebugInstr());
672  continue;
673 
674  case AMDGPU::VCC:
675  case AMDGPU::VCC_LO:
676  case AMDGPU::VCC_HI:
677  Info.UsesVCC = true;
678  continue;
679 
680  case AMDGPU::FLAT_SCR:
681  case AMDGPU::FLAT_SCR_LO:
682  case AMDGPU::FLAT_SCR_HI:
683  continue;
684 
685  case AMDGPU::XNACK_MASK:
686  case AMDGPU::XNACK_MASK_LO:
687  case AMDGPU::XNACK_MASK_HI:
688  llvm_unreachable("xnack_mask registers should not be used");
689 
690  case AMDGPU::TBA:
691  case AMDGPU::TBA_LO:
692  case AMDGPU::TBA_HI:
693  case AMDGPU::TMA:
694  case AMDGPU::TMA_LO:
695  case AMDGPU::TMA_HI:
696  llvm_unreachable("trap handler registers should not be used");
697 
698  default:
699  break;
700  }
701 
702  if (AMDGPU::SReg_32RegClass.contains(Reg)) {
703  assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
704  "trap handler registers should not be used");
705  IsSGPR = true;
706  Width = 1;
707  } else if (AMDGPU::VGPR_32RegClass.contains(Reg)) {
708  IsSGPR = false;
709  Width = 1;
710  } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
711  assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
712  "trap handler registers should not be used");
713  IsSGPR = true;
714  Width = 2;
715  } else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
716  IsSGPR = false;
717  Width = 2;
718  } else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
719  IsSGPR = false;
720  Width = 3;
721  } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
722  assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
723  "trap handler registers should not be used");
724  IsSGPR = true;
725  Width = 4;
726  } else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
727  IsSGPR = false;
728  Width = 4;
729  } else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
730  assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
731  "trap handler registers should not be used");
732  IsSGPR = true;
733  Width = 8;
734  } else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
735  IsSGPR = false;
736  Width = 8;
737  } else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
738  assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
739  "trap handler registers should not be used");
740  IsSGPR = true;
741  Width = 16;
742  } else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
743  IsSGPR = false;
744  Width = 16;
745  } else {
746  llvm_unreachable("Unknown register class");
747  }
748  unsigned HWReg = TRI.getHWRegIndex(Reg);
749  int MaxUsed = HWReg + Width - 1;
750  if (IsSGPR) {
751  MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR;
752  } else {
753  MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR;
754  }
755  }
756 
757  if (MI.isCall()) {
758  // Pseudo used just to encode the underlying global. Is there a better
759  // way to track this?
760 
761  const MachineOperand *CalleeOp
762  = TII->getNamedOperand(MI, AMDGPU::OpName::callee);
763  const Function *Callee = cast<Function>(CalleeOp->getGlobal());
764  if (Callee->isDeclaration()) {
765  // If this is a call to an external function, we can't do much. Make
766  // conservative guesses.
767 
768  // 48 SGPRs - vcc, - flat_scr, -xnack
769  int MaxSGPRGuess =
770  47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
771  ST.hasFlatAddressSpace());
772  MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
773  MaxVGPR = std::max(MaxVGPR, 23);
774 
775  CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384));
776  Info.UsesVCC = true;
777  Info.UsesFlatScratch = ST.hasFlatAddressSpace();
778  Info.HasDynamicallySizedStack = true;
779  } else {
780  // We force CodeGen to run in SCC order, so the callee's register
781  // usage etc. should be the cumulative usage of all callees.
782  auto I = CallGraphResourceInfo.find(Callee);
783  assert(I != CallGraphResourceInfo.end() &&
784  "callee should have been handled before caller");
785 
786  MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
787  MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
788  CalleeFrameSize
789  = std::max(I->second.PrivateSegmentSize, CalleeFrameSize);
790  Info.UsesVCC |= I->second.UsesVCC;
791  Info.UsesFlatScratch |= I->second.UsesFlatScratch;
792  Info.HasDynamicallySizedStack |= I->second.HasDynamicallySizedStack;
793  Info.HasRecursion |= I->second.HasRecursion;
794  }
795 
796  if (!Callee->doesNotRecurse())
797  Info.HasRecursion = true;
798  }
799  }
800  }
801 
802  Info.NumExplicitSGPR = MaxSGPR + 1;
803  Info.NumVGPR = MaxVGPR + 1;
804  Info.PrivateSegmentSize += CalleeFrameSize;
805 
806  return Info;
807 }
808 
809 void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
810  const MachineFunction &MF) {
811  SIFunctionResourceInfo Info = analyzeResourceUsage(MF);
812 
813  ProgInfo.NumVGPR = Info.NumVGPR;
814  ProgInfo.NumSGPR = Info.NumExplicitSGPR;
815  ProgInfo.ScratchSize = Info.PrivateSegmentSize;
816  ProgInfo.VCCUsed = Info.UsesVCC;
817  ProgInfo.FlatUsed = Info.UsesFlatScratch;
818  ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
819 
820  if (!isUInt<32>(ProgInfo.ScratchSize)) {
821  DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
822  ProgInfo.ScratchSize, DS_Error);
823  MF.getFunction().getContext().diagnose(DiagStackSize);
824  }
825 
826  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
828  const SIInstrInfo *TII = STM.getInstrInfo();
829  const SIRegisterInfo *RI = &TII->getRegisterInfo();
830 
831  // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
832  // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
833  // unified.
834  unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
835  getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
836 
837  // Check the addressable register limit before we add ExtraSGPRs.
839  !STM.hasSGPRInitBug()) {
840  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
841  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
842  // This can happen due to a compiler bug or when using inline asm.
843  LLVMContext &Ctx = MF.getFunction().getContext();
845  "addressable scalar registers",
846  ProgInfo.NumSGPR, DS_Error,
848  MaxAddressableNumSGPRs);
849  Ctx.diagnose(Diag);
850  ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
851  }
852  }
853 
854  // Account for extra SGPRs and VGPRs reserved for debugger use.
855  ProgInfo.NumSGPR += ExtraSGPRs;
856 
857  // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
858  // dispatch registers are function args.
859  unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
860  for (auto &Arg : MF.getFunction().args()) {
861  unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32;
862  if (Arg.hasAttribute(Attribute::InReg))
863  WaveDispatchNumSGPR += NumRegs;
864  else
865  WaveDispatchNumVGPR += NumRegs;
866  }
867  ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
868  ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
869 
870  // Adjust number of registers used to meet default/requested minimum/maximum
871  // number of waves per execution unit request.
872  ProgInfo.NumSGPRsForWavesPerEU = std::max(
873  std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
874  ProgInfo.NumVGPRsForWavesPerEU = std::max(
875  std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
876 
878  STM.hasSGPRInitBug()) {
879  unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
880  if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
881  // This can happen due to a compiler bug or when using inline asm to use
882  // the registers which are usually reserved for vcc etc.
883  LLVMContext &Ctx = MF.getFunction().getContext();
885  "scalar registers",
886  ProgInfo.NumSGPR, DS_Error,
888  MaxAddressableNumSGPRs);
889  Ctx.diagnose(Diag);
890  ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
891  ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
892  }
893  }
894 
895  if (STM.hasSGPRInitBug()) {
896  ProgInfo.NumSGPR =
898  ProgInfo.NumSGPRsForWavesPerEU =
900  }
901 
902  if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
903  LLVMContext &Ctx = MF.getFunction().getContext();
904  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
905  MFI->getNumUserSGPRs(), DS_Error);
906  Ctx.diagnose(Diag);
907  }
908 
909  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
910  LLVMContext &Ctx = MF.getFunction().getContext();
911  DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
912  MFI->getLDSSize(), DS_Error);
913  Ctx.diagnose(Diag);
914  }
915 
917  &STM, ProgInfo.NumSGPRsForWavesPerEU);
919  &STM, ProgInfo.NumVGPRsForWavesPerEU);
920 
921  // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
922  // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
923  // attribute was requested.
924  if (STM.debuggerEmitPrologue()) {
926  RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
928  RI->getHWRegIndex(MFI->getScratchRSrcReg());
929  }
930 
931  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
932  // register.
933  ProgInfo.FloatMode = getFPMode(MF);
934 
935  ProgInfo.IEEEMode = STM.enableIEEEBit(MF);
936 
937  // Make clamp modifier on NaN input returns 0.
938  ProgInfo.DX10Clamp = STM.enableDX10Clamp();
939 
940  unsigned LDSAlignShift;
942  // LDS is allocated in 64 dword blocks.
943  LDSAlignShift = 8;
944  } else {
945  // LDS is allocated in 128 dword blocks.
946  LDSAlignShift = 9;
947  }
948 
949  unsigned LDSSpillSize =
950  MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize();
951 
952  ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
953  ProgInfo.LDSBlocks =
954  alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
955 
956  // Scratch is allocated in 256 dword blocks.
957  unsigned ScratchAlignShift = 10;
958  // We need to program the hardware with the amount of scratch memory that
959  // is used by the entire wave. ProgInfo.ScratchSize is the amount of
960  // scratch memory used per thread.
961  ProgInfo.ScratchBlocks =
962  alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
963  1ULL << ScratchAlignShift) >>
964  ScratchAlignShift;
965 
966  ProgInfo.ComputePGMRSrc1 =
967  S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
968  S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
969  S_00B848_PRIORITY(ProgInfo.Priority) |
970  S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
971  S_00B848_PRIV(ProgInfo.Priv) |
972  S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
973  S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
974  S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
975 
976  // 0 = X, 1 = XY, 2 = XYZ
977  unsigned TIDIGCompCnt = 0;
978  if (MFI->hasWorkItemIDZ())
979  TIDIGCompCnt = 2;
980  else if (MFI->hasWorkItemIDY())
981  TIDIGCompCnt = 1;
982 
983  ProgInfo.ComputePGMRSrc2 =
984  S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
985  S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
986  // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
988  S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
989  S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
990  S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
991  S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
992  S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
994  // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
995  S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
996  S_00B84C_EXCP_EN(0);
997 }
998 
999 static unsigned getRsrcReg(CallingConv::ID CallConv) {
1000  switch (CallConv) {
1001  default: LLVM_FALLTHROUGH;
1009  }
1010 }
1011 
1012 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
1013  const SIProgramInfo &CurrentProgramInfo) {
1015  unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
1016 
1018  OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
1019 
1020  OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc1, 4);
1021 
1022  OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
1023  OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc2, 4);
1024 
1025  OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
1026  OutStreamer->EmitIntValue(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
1027 
1028  // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
1029  // 0" comment but I don't see a corresponding field in the register spec.
1030  } else {
1031  OutStreamer->EmitIntValue(RsrcReg, 4);
1032  OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
1033  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
1034  OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
1035  OutStreamer->EmitIntValue(
1036  S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
1037  }
1038 
1041  OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4);
1042  OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
1043  OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
1044  OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
1045  OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
1046  }
1047 
1048  OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4);
1049  OutStreamer->EmitIntValue(MFI->getNumSpilledSGPRs(), 4);
1050  OutStreamer->EmitIntValue(R_SPILLED_VGPRS, 4);
1051  OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4);
1052 }
1053 
1054 // This is the equivalent of EmitProgramInfoSI above, but for when the OS type
1055 // is AMDPAL. It stores each compute/SPI register setting and other PAL
1056 // metadata items into the PALMetadataMap, combining with any provided by the
1057 // frontend as LLVM metadata. Once all functions are written, PALMetadataMap is
1058 // then written as a single block in the .note section.
1059 void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
1060  const SIProgramInfo &CurrentProgramInfo) {
1062  // Given the calling convention, calculate the register number for rsrc1. In
1063  // principle the register number could change in future hardware, but we know
1064  // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so
1065  // we can use the same fixed value that .AMDGPU.config has for Mesa. Note
1066  // that we use a register number rather than a byte offset, so we need to
1067  // divide by 4.
1068  unsigned Rsrc1Reg = getRsrcReg(MF.getFunction().getCallingConv()) / 4;
1069  unsigned Rsrc2Reg = Rsrc1Reg + 1;
1070  // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
1071  // with a constant offset to access any non-register shader-specific PAL
1072  // metadata key.
1073  unsigned ScratchSizeKey = PALMD::Key::CS_SCRATCH_SIZE;
1074  switch (MF.getFunction().getCallingConv()) {
1076  ScratchSizeKey = PALMD::Key::PS_SCRATCH_SIZE;
1077  break;
1079  ScratchSizeKey = PALMD::Key::VS_SCRATCH_SIZE;
1080  break;
1082  ScratchSizeKey = PALMD::Key::GS_SCRATCH_SIZE;
1083  break;
1085  ScratchSizeKey = PALMD::Key::ES_SCRATCH_SIZE;
1086  break;
1088  ScratchSizeKey = PALMD::Key::HS_SCRATCH_SIZE;
1089  break;
1091  ScratchSizeKey = PALMD::Key::LS_SCRATCH_SIZE;
1092  break;
1093  }
1094  unsigned NumUsedVgprsKey = ScratchSizeKey +
1096  unsigned NumUsedSgprsKey = ScratchSizeKey +
1098  PALMetadataMap[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU;
1099  PALMetadataMap[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU;
1101  PALMetadataMap[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1;
1102  PALMetadataMap[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2;
1103  // ScratchSize is in bytes, 16 aligned.
1104  PALMetadataMap[ScratchSizeKey] |=
1105  alignTo(CurrentProgramInfo.ScratchSize, 16);
1106  } else {
1107  PALMetadataMap[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
1108  S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks);
1109  if (CurrentProgramInfo.ScratchBlocks > 0)
1110  PALMetadataMap[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1);
1111  // ScratchSize is in bytes, 16 aligned.
1112  PALMetadataMap[ScratchSizeKey] |=
1113  alignTo(CurrentProgramInfo.ScratchSize, 16);
1114  }
1116  PALMetadataMap[Rsrc2Reg] |=
1117  S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
1118  PALMetadataMap[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable();
1119  PALMetadataMap[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr();
1120  }
1121 }
1122 
1123 // This is supposed to be log2(Size)
1125  switch (Size) {
1126  case 4:
1127  return AMD_ELEMENT_4_BYTES;
1128  case 8:
1129  return AMD_ELEMENT_8_BYTES;
1130  case 16:
1131  return AMD_ELEMENT_16_BYTES;
1132  default:
1133  llvm_unreachable("invalid private_element_size");
1134  }
1135 }
1136 
1137 void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
1138  const SIProgramInfo &CurrentProgramInfo,
1139  const MachineFunction &MF) const {
1140  const Function &F = MF.getFunction();
1143 
1145  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
1146 
1148 
1150  CurrentProgramInfo.ComputePGMRSrc1 |
1151  (CurrentProgramInfo.ComputePGMRSrc2 << 32);
1153 
1154  if (CurrentProgramInfo.DynamicCallStack)
1156 
1159  getElementByteSizeValue(STM.getMaxPrivateElementSize()));
1160 
1161  if (MFI->hasPrivateSegmentBuffer()) {
1162  Out.code_properties |=
1164  }
1165 
1166  if (MFI->hasDispatchPtr())
1168 
1169  if (MFI->hasQueuePtr())
1171 
1172  if (MFI->hasKernargSegmentPtr())
1174 
1175  if (MFI->hasDispatchID())
1177 
1178  if (MFI->hasFlatScratchInit())
1180 
1181  if (MFI->hasDispatchPtr())
1183 
1184  if (STM.debuggerSupported())
1186 
1187  if (STM.isXNACKEnabled())
1189 
1190  unsigned MaxKernArgAlign;
1191  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
1192  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
1193  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
1194  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
1195  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
1196 
1197  // These alignment values are specified in powers of two, so alignment =
1198  // 2^n. The minimum alignment is 2^4 = 16.
1199  Out.kernarg_segment_alignment = std::max((size_t)4,
1200  countTrailingZeros(MaxKernArgAlign));
1201 
1202  if (STM.debuggerEmitPrologue()) {
1204  CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
1206  CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR;
1207  }
1208 }
1209 
1211  unsigned AsmVariant,
1212  const char *ExtraCode, raw_ostream &O) {
1213  // First try the generic code, which knows about modifiers like 'c' and 'n'.
1214  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O))
1215  return false;
1216 
1217  if (ExtraCode && ExtraCode[0]) {
1218  if (ExtraCode[1] != 0)
1219  return true; // Unknown modifier.
1220 
1221  switch (ExtraCode[0]) {
1222  case 'r':
1223  break;
1224  default:
1225  return true;
1226  }
1227  }
1228 
1229  // TODO: Should be able to support other operand types like globals.
1230  const MachineOperand &MO = MI->getOperand(OpNo);
1231  if (MO.isReg()) {
1233  *MF->getSubtarget().getRegisterInfo());
1234  return false;
1235  }
1236 
1237  return true;
1238 }
bool enableIEEEBit(const MachineFunction &MF) const
virtual void EmitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition: AsmPrinter.cpp:435
uint16_t DebuggerPrivateSegmentBufferSGPR
Definition: SIProgramInfo.h:62
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:349
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:212
void EmitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
Interface definition for SIRegisterInfo.
Target & getTheGCNTarget()
The target for GCN GPUs.
#define S_00B848_VGPRS(x)
Definition: SIDefines.h:484
#define S_00B848_PRIV(x)
Definition: SIDefines.h:496
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:228
LLVMContext & Context
AMDGPU specific subclass of TargetSubtarget.
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:94
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:529
This class represents lattice values for constants.
Definition: AllocatorList.h:24
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:435
#define G_00B84C_USER_SGPR(x)
Definition: SIDefines.h:448
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
bool doFinalization(Module &M) override
Shut down the asmprinter.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:299
void EmitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
#define G_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:460
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:431
iterator_range< reg_iterator > reg_operands(unsigned Reg) const
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK)=0
Instruction set architecture version.
Definition: TargetParser.h:132
#define S_00B84C_SCRATCH_EN(x)
Definition: SIDefines.h:444
const SIInstrInfo * getInstrInfo() const override
#define S_00B84C_TG_SIZE_EN(x)
Definition: SIDefines.h:462
#define S_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:499
uint32_t NumSGPRsForWavesPerEU
Definition: SIProgramInfo.h:48
unsigned const TargetRegisterInfo * TRI
F(f)
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:97
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
#define G_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:457
const MCSubtargetInfo * getSTI() const
Interface definition for R600RegisterInfo.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:480
#define S_00B028_SGPRS(x)
Definition: SIDefines.h:441
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:22
Tuple of metadata.
Definition: Metadata.h:1106
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)=0
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:165
return AArch64::GPR64RegClass contains(Reg)
bool hasFP64Denormals() const
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
#define FP_DENORM_FLUSH_NONE
Definition: SIDefines.h:524
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
uint32_t code_properties
Code properties.
Definition: BitVector.h:938
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:471
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
const HexagonInstrInfo * TII
int getLocalMemorySize() const
AMD Kernel Code Object (amd_kernel_code_t).
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
bool enableDX10Clamp() const
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
#define G_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:451
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
bool isTrapHandlerEnabled() const
uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR
Definition: SIProgramInfo.h:56
virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata)=0
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:518
Diagnostic information for stack size etc.
#define S_00B84C_TGID_Y_EN(x)
Definition: SIDefines.h:456
bool hasCodeObjectV3(const MCSubtargetInfo *STI)
Context object for machine code objects.
Definition: MCContext.h:63
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
#define S_00B848_FLOAT_MODE(x)
Definition: SIDefines.h:493
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:483
Key
PAL metadata keys.
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
void EmitFunctionBody()
This method emits the body and trailer for a function.
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getAddressableNumSGPRs() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, unsigned OperandName) const
Returns the operand named Op.
bool isGroupSegment(const GlobalValue *GV)
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:252
#define S_00B84C_TRAP_HANDLER(x)
Definition: SIDefines.h:450
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
uint64_t ComputePGMRSrc2
Definition: SIProgramInfo.h:40
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:203
amdgpu Simplify well known AMD library false Value * Callee
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool dumpCode() const
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool debuggerEmitPrologue() const
uint32_t NumVGPRsForWavesPerEU
Definition: SIProgramInfo.h:51
bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const override
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool hasFP32Denormals() const
#define S_00B848_IEEE_MODE(x)
Definition: SIDefines.h:505
bool isCompute(CallingConv::ID cc)
#define S_00B028_VGPRS(x)
Definition: SIDefines.h:440
static uint32_t getFPMode(const MachineFunction &F)
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned const MachineRegisterInfo * MRI
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define G_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:454
#define S_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:465
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:519
const GlobalValue * getGlobal() const
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:82
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:79
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:425
void EmitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
#define S_00B84C_EXCP_EN_MSB(x)
Definition: SIDefines.h:469
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Generation getGeneration() const
const Triple & getTargetTriple() const
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition: SIDefines.h:432
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
Definition: SIDefines.h:521
#define S_00B84C_TGID_Z_EN(x)
Definition: SIDefines.h:459
The AMDGPU TargetMachine interface definition for hw codgen targets.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition: SIDefines.h:436
#define S_00B84C_LDS_SIZE(x)
Definition: SIDefines.h:473
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override
Targets can override this to emit stuff at the start of a basic block.
#define R_SPILLED_SGPRS
Definition: SIDefines.h:538
virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor, uint32_t Stepping, StringRef VendorName, StringRef ArchName)=0
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:499
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:210
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:511
bool doesNotRecurse() const
Determine if the function is known not to recurse, directly or indirectly.
Definition: Function.h:556
#define S_00B84C_EXCP_EN(x)
Definition: SIDefines.h:476
void LLVMInitializeAMDGPUAsmPrinter()
IsaVersion getIsaVersion(StringRef GPU)
unsigned getWavefrontSize() const
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition: SIDefines.h:437
AMDGPUTargetStreamer * getTargetStreamer() const
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:208
#define G_00B84C_TIDIG_COMP_CNT(x)
Definition: SIDefines.h:466
MCStreamer & getStreamer()
Definition: MCStreamer.h:92
std::vector< std::string > HexLines
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1167
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:434
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel...
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
R600 Assembly printer class.
MachineOperand class - Representation of each machine instruction operand.
uint16_t debug_wavefront_private_segment_offset_sgpr
If is_debug_supported is 0 then must be 0.
const MCSubtargetInfo * getMCSubtargetInfo() const
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header)=0
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
bool hasSGPRInitBug() const
#define S_00B848_DEBUG_MODE(x)
Definition: SIDefines.h:502
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:530
#define S_0286E8_WAVESIZE(x)
Definition: SIDefines.h:536
const Function & getFunction() const
Return the LLVM function that this machine code represents.
#define S_00B84C_TGID_X_EN(x)
Definition: SIDefines.h:453
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
#define AMD_HSA_BITS_SET(dst, mask, val)
std::vector< std::string > DisasmLines
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor)=0
void EmitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
amdgpu Simplify well known AMD library false Value Value * Arg
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
#define Success
virtual bool EmitISAVersion(StringRef IsaVersionString)=0
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
void EmitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
virtual void EmitDirectiveAMDGCNTarget(StringRef Target)=0
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool doFinalization(Module &M) override
Shut down the asmprinter.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Interface definition for SIInstrInfo.
void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
#define S_00B848_SGPRS(x)
Definition: SIDefines.h:487
virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const
Targets can override this to emit stuff at the start of a basic block.
bool hasXNACK(const MCSubtargetInfo &STI)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
#define S_00B84C_USER_SGPR(x)
Definition: SIDefines.h:447
#define I(x, y, z)
Definition: MD5.cpp:58
AMDGPU Assembly printer class.
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:532
Generic base class for all target subtargets.
bool isAmdHsaOrMesa(const Function &F) const
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition: MCSectionELF.h:28
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
#define R_SPILLED_VGPRS
Definition: SIDefines.h:539
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
virtual void EmitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AsmPrinter.cpp:725
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206
static unsigned getRsrcReg(CallingConv::ID CallConv)
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
unsigned getMaxNumUserSGPRs() const
#define S_00B860_WAVESIZE(x)
Definition: SIDefines.h:533
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:443
#define S_00B848_PRIORITY(x)
Definition: SIDefines.h:490
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:483
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
void EmitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
uint64_t ComputePGMRSrc1
Definition: SIProgramInfo.h:34
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:535
void setAlignment(unsigned A)
setAlignment - Set the alignment (log2, not bytes) of the function.
unsigned getHWRegIndex(unsigned Reg) const
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:433
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
IRTranslator LLVM IR MI
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition: SIDefines.h:438
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
AMDGPU metadata definitions and in-memory representations.
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition: SIDefines.h:481
uint16_t debug_private_segment_buffer_sgpr
If is_debug_supported is 0 then must be 0.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
const uint64_t Version
Definition: InstrProf.h:895
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
std::vector< uint32_t > Metadata
PAL metadata represented as a vector.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
iterator_range< arg_iterator > args()
Definition: Function.h:689
bool hasCodeObjectV3() const