LLVM  8.0.1
AMDGPUBaseInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUBaseInfo.h"
12 #include "AMDGPU.h"
13 #include "SIDefines.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionELF.h"
32 #include "llvm/Support/Casting.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstdint>
38 #include <cstring>
39 #include <utility>
40 
42 
43 #define GET_INSTRINFO_NAMED_OPS
44 #define GET_INSTRMAP_INFO
45 #include "AMDGPUGenInstrInfo.inc"
46 #undef GET_INSTRMAP_INFO
47 #undef GET_INSTRINFO_NAMED_OPS
48 
49 namespace {
50 
51 /// \returns Bit mask for given bit \p Shift and bit \p Width.
52 unsigned getBitMask(unsigned Shift, unsigned Width) {
53  return ((1 << Width) - 1) << Shift;
54 }
55 
56 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Packed \p Dst.
59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
60  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
61  Dst |= (Src << Shift) & getBitMask(Shift, Width);
62  return Dst;
63 }
64 
65 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
66 ///
67 /// \returns Unpacked bits.
68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
69  return (Src & getBitMask(Shift, Width)) >> Shift;
70 }
71 
72 /// \returns Vmcnt bit shift (lower bits).
73 unsigned getVmcntBitShiftLo() { return 0; }
74 
75 /// \returns Vmcnt bit width (lower bits).
76 unsigned getVmcntBitWidthLo() { return 4; }
77 
78 /// \returns Expcnt bit shift.
79 unsigned getExpcntBitShift() { return 4; }
80 
81 /// \returns Expcnt bit width.
82 unsigned getExpcntBitWidth() { return 3; }
83 
84 /// \returns Lgkmcnt bit shift.
85 unsigned getLgkmcntBitShift() { return 8; }
86 
87 /// \returns Lgkmcnt bit width.
88 unsigned getLgkmcntBitWidth() { return 4; }
89 
90 /// \returns Vmcnt bit shift (higher bits).
91 unsigned getVmcntBitShiftHi() { return 14; }
92 
93 /// \returns Vmcnt bit width (higher bits).
94 unsigned getVmcntBitWidthHi() { return 2; }
95 
96 } // end namespace anonymous
97 
98 namespace llvm {
99 
100 namespace AMDGPU {
101 
102 struct MIMGInfo {
103  uint16_t Opcode;
104  uint16_t BaseOpcode;
105  uint8_t MIMGEncoding;
106  uint8_t VDataDwords;
107  uint8_t VAddrDwords;
108 };
109 
110 #define GET_MIMGBaseOpcodesTable_IMPL
111 #define GET_MIMGDimInfoTable_IMPL
112 #define GET_MIMGInfoTable_IMPL
113 #define GET_MIMGLZMappingTable_IMPL
114 #include "AMDGPUGenSearchableTables.inc"
115 
116 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
117  unsigned VDataDwords, unsigned VAddrDwords) {
118  const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
119  VDataDwords, VAddrDwords);
120  return Info ? Info->Opcode : -1;
121 }
122 
123 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
124  const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
125  const MIMGInfo *NewInfo =
126  getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
127  NewChannels, OrigInfo->VAddrDwords);
128  return NewInfo ? NewInfo->Opcode : -1;
129 }
130 
131 struct MUBUFInfo {
132  uint16_t Opcode;
133  uint16_t BaseOpcode;
134  uint8_t dwords;
135  bool has_vaddr;
136  bool has_srsrc;
138 };
139 
140 #define GET_MUBUFInfoTable_DECL
141 #define GET_MUBUFInfoTable_IMPL
142 #include "AMDGPUGenSearchableTables.inc"
143 
144 int getMUBUFBaseOpcode(unsigned Opc) {
145  const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
146  return Info ? Info->BaseOpcode : -1;
147 }
148 
149 int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) {
150  const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords);
151  return Info ? Info->Opcode : -1;
152 }
153 
154 int getMUBUFDwords(unsigned Opc) {
155  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
156  return Info ? Info->dwords : 0;
157 }
158 
159 bool getMUBUFHasVAddr(unsigned Opc) {
160  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
161  return Info ? Info->has_vaddr : false;
162 }
163 
164 bool getMUBUFHasSrsrc(unsigned Opc) {
165  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
166  return Info ? Info->has_srsrc : false;
167 }
168 
169 bool getMUBUFHasSoffset(unsigned Opc) {
170  const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
171  return Info ? Info->has_soffset : false;
172 }
173 
174 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
175 // header files, so we need to wrap it in a function that takes unsigned
176 // instead.
177 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
178  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
179 }
180 
181 namespace IsaInfo {
182 
183 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
184  auto TargetTriple = STI->getTargetTriple();
185  auto Version = getIsaVersion(STI->getCPU());
186 
187  Stream << TargetTriple.getArchName() << '-'
188  << TargetTriple.getVendorName() << '-'
189  << TargetTriple.getOSName() << '-'
190  << TargetTriple.getEnvironmentName() << '-'
191  << "gfx"
192  << Version.Major
193  << Version.Minor
194  << Version.Stepping;
195 
196  if (hasXNACK(*STI))
197  Stream << "+xnack";
198  if (hasSRAMECC(*STI))
199  Stream << "+sram-ecc";
200 
201  Stream.flush();
202 }
203 
205  return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
206  STI->getFeatureBits().test(FeatureCodeObjectV3);
207 }
208 
209 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
210  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
211  return 16;
212  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
213  return 32;
214 
215  return 64;
216 }
217 
218 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
219  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
220  return 32768;
221  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
222  return 65536;
223 
224  return 0;
225 }
226 
227 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
228  return 4;
229 }
230 
232  unsigned FlatWorkGroupSize) {
233  if (!STI->getFeatureBits().test(FeatureGCN))
234  return 8;
235  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
236  if (N == 1)
237  return 40;
238  N = 40 / N;
239  return std::min(N, 16u);
240 }
241 
242 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
243  return getMaxWavesPerEU() * getEUsPerCU(STI);
244 }
245 
246 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
247  unsigned FlatWorkGroupSize) {
248  return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
249 }
250 
251 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
252  return 1;
253 }
254 
255 unsigned getMaxWavesPerEU() {
256  // FIXME: Need to take scratch memory into account.
257  return 10;
258 }
259 
260 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
261  unsigned FlatWorkGroupSize) {
262  return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
263  getEUsPerCU(STI)) / getEUsPerCU(STI);
264 }
265 
267  return 1;
268 }
269 
271  return 2048;
272 }
273 
275  unsigned FlatWorkGroupSize) {
276  return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
277  getWavefrontSize(STI);
278 }
279 
280 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
282  if (Version.Major >= 8)
283  return 16;
284  return 8;
285 }
286 
288  return 8;
289 }
290 
291 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
293  if (Version.Major >= 8)
294  return 800;
295  return 512;
296 }
297 
299  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
301 
303  if (Version.Major >= 8)
304  return 102;
305  return 104;
306 }
307 
308 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
309  assert(WavesPerEU != 0);
310 
311  if (WavesPerEU >= getMaxWavesPerEU())
312  return 0;
313 
314  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
315  if (STI->getFeatureBits().test(FeatureTrapHandler))
316  MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
317  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
318  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
319 }
320 
321 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
322  bool Addressable) {
323  assert(WavesPerEU != 0);
324 
326  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
327  if (Version.Major >= 8 && !Addressable)
328  AddressableNumSGPRs = 112;
329  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
330  if (STI->getFeatureBits().test(FeatureTrapHandler))
331  MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
332  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
333  return std::min(MaxNumSGPRs, AddressableNumSGPRs);
334 }
335 
336 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
337  bool FlatScrUsed, bool XNACKUsed) {
338  unsigned ExtraSGPRs = 0;
339  if (VCCUsed)
340  ExtraSGPRs = 2;
341 
343  if (Version.Major < 8) {
344  if (FlatScrUsed)
345  ExtraSGPRs = 4;
346  } else {
347  if (XNACKUsed)
348  ExtraSGPRs = 4;
349 
350  if (FlatScrUsed)
351  ExtraSGPRs = 6;
352  }
353 
354  return ExtraSGPRs;
355 }
356 
357 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
358  bool FlatScrUsed) {
359  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
360  STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
361 }
362 
363 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
364  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
365  // SGPRBlocks is actual number of SGPR blocks minus 1.
366  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
367 }
368 
369 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
370  return 4;
371 }
372 
374  return getVGPRAllocGranule(STI);
375 }
376 
377 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
378  return 256;
379 }
380 
382  return getTotalNumVGPRs(STI);
383 }
384 
385 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
386  assert(WavesPerEU != 0);
387 
388  if (WavesPerEU >= getMaxWavesPerEU())
389  return 0;
390  unsigned MinNumVGPRs =
391  alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
392  getVGPRAllocGranule(STI)) + 1;
393  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
394 }
395 
396 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
397  assert(WavesPerEU != 0);
398 
399  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
400  getVGPRAllocGranule(STI));
401  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
402  return std::min(MaxNumVGPRs, AddressableNumVGPRs);
403 }
404 
405 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
406  NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
407  // VGPRBlocks is actual number of VGPR blocks minus 1.
408  return NumVGPRs / getVGPREncodingGranule(STI) - 1;
409 }
410 
411 } // end namespace IsaInfo
412 
414  const MCSubtargetInfo *STI) {
416 
417  memset(&Header, 0, sizeof(Header));
418 
421  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
422  Header.amd_machine_version_major = Version.Major;
423  Header.amd_machine_version_minor = Version.Minor;
424  Header.amd_machine_version_stepping = Version.Stepping;
425  Header.kernel_code_entry_byte_offset = sizeof(Header);
426  // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
427  Header.wavefront_size = 6;
428 
429  // If the code object does not support indirect functions, then the value must
430  // be 0xffffffff.
431  Header.call_convention = -1;
432 
433  // These alignment values are specified in powers of two, so alignment =
434  // 2^n. The minimum alignment is 2^4 = 16.
435  Header.kernarg_segment_alignment = 4;
436  Header.group_segment_alignment = 4;
437  Header.private_segment_alignment = 4;
438 }
439 
442  memset(&KD, 0, sizeof(KD));
444  amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
447  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
449  amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
451  amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
452  return KD;
453 }
454 
455 bool isGroupSegment(const GlobalValue *GV) {
457 }
458 
459 bool isGlobalSegment(const GlobalValue *GV) {
461 }
462 
466 }
467 
469  return TT.getOS() != Triple::AMDHSA;
470 }
471 
472 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
473  Attribute A = F.getFnAttribute(Name);
474  int Result = Default;
475 
476  if (A.isStringAttribute()) {
477  StringRef Str = A.getValueAsString();
478  if (Str.getAsInteger(0, Result)) {
479  LLVMContext &Ctx = F.getContext();
480  Ctx.emitError("can't parse integer attribute " + Name);
481  }
482  }
483 
484  return Result;
485 }
486 
487 std::pair<int, int> getIntegerPairAttribute(const Function &F,
488  StringRef Name,
489  std::pair<int, int> Default,
490  bool OnlyFirstRequired) {
491  Attribute A = F.getFnAttribute(Name);
492  if (!A.isStringAttribute())
493  return Default;
494 
495  LLVMContext &Ctx = F.getContext();
496  std::pair<int, int> Ints = Default;
497  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
498  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
499  Ctx.emitError("can't parse first integer attribute " + Name);
500  return Default;
501  }
502  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
503  if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
504  Ctx.emitError("can't parse second integer attribute " + Name);
505  return Default;
506  }
507  }
508 
509  return Ints;
510 }
511 
513  unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
514  if (Version.Major < 9)
515  return VmcntLo;
516 
517  unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
518  return VmcntLo | VmcntHi;
519 }
520 
522  return (1 << getExpcntBitWidth()) - 1;
523 }
524 
526  return (1 << getLgkmcntBitWidth()) - 1;
527 }
528 
530  unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
531  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
532  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
533  unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
534  if (Version.Major < 9)
535  return Waitcnt;
536 
537  unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
538  return Waitcnt | VmcntHi;
539 }
540 
541 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
542  unsigned VmcntLo =
543  unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
544  if (Version.Major < 9)
545  return VmcntLo;
546 
547  unsigned VmcntHi =
548  unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
549  VmcntHi <<= getVmcntBitWidthLo();
550  return VmcntLo | VmcntHi;
551 }
552 
553 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
554  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
555 }
556 
557 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
558  return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
559 }
560 
561 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
562  unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
563  Vmcnt = decodeVmcnt(Version, Waitcnt);
564  Expcnt = decodeExpcnt(Version, Waitcnt);
565  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
566 }
567 
568 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
569  Waitcnt Decoded;
570  Decoded.VmCnt = decodeVmcnt(Version, Encoded);
571  Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
572  Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
573  return Decoded;
574 }
575 
576 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
577  unsigned Vmcnt) {
578  Waitcnt =
579  packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
580  if (Version.Major < 9)
581  return Waitcnt;
582 
583  Vmcnt >>= getVmcntBitWidthLo();
584  return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
585 }
586 
587 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
588  unsigned Expcnt) {
589  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
590 }
591 
592 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
593  unsigned Lgkmcnt) {
594  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
595 }
596 
598  unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
599  unsigned Waitcnt = getWaitcntBitMask(Version);
600  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
601  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
602  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
603  return Waitcnt;
604 }
605 
606 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
607  return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
608 }
609 
610 unsigned getInitialPSInputAddr(const Function &F) {
611  return getIntegerAttribute(F, "InitialPSInputAddr", 0);
612 }
613 
615  switch(cc) {
623  return true;
624  default:
625  return false;
626  }
627 }
628 
630  return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
631 }
632 
634  switch (CC) {
644  return true;
645  default:
646  return false;
647  }
648 }
649 
650 bool hasXNACK(const MCSubtargetInfo &STI) {
651  return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
652 }
653 
654 bool hasSRAMECC(const MCSubtargetInfo &STI) {
655  return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
656 }
657 
658 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
659  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
660 }
661 
662 bool hasPackedD16(const MCSubtargetInfo &STI) {
663  return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
664 }
665 
666 bool isSI(const MCSubtargetInfo &STI) {
667  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
668 }
669 
670 bool isCI(const MCSubtargetInfo &STI) {
671  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
672 }
673 
674 bool isVI(const MCSubtargetInfo &STI) {
675  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
676 }
677 
678 bool isGFX9(const MCSubtargetInfo &STI) {
679  return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
680 }
681 
682 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
683  return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
684 }
685 
686 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
687  const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
688  const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
689  return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
690  Reg == AMDGPU::SCC;
691 }
692 
693 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
694  for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
695  if (*R == Reg1) return true;
696  }
697  return false;
698 }
699 
700 #define MAP_REG2REG \
701  using namespace AMDGPU; \
702  switch(Reg) { \
703  default: return Reg; \
704  CASE_CI_VI(FLAT_SCR) \
705  CASE_CI_VI(FLAT_SCR_LO) \
706  CASE_CI_VI(FLAT_SCR_HI) \
707  CASE_VI_GFX9(TTMP0) \
708  CASE_VI_GFX9(TTMP1) \
709  CASE_VI_GFX9(TTMP2) \
710  CASE_VI_GFX9(TTMP3) \
711  CASE_VI_GFX9(TTMP4) \
712  CASE_VI_GFX9(TTMP5) \
713  CASE_VI_GFX9(TTMP6) \
714  CASE_VI_GFX9(TTMP7) \
715  CASE_VI_GFX9(TTMP8) \
716  CASE_VI_GFX9(TTMP9) \
717  CASE_VI_GFX9(TTMP10) \
718  CASE_VI_GFX9(TTMP11) \
719  CASE_VI_GFX9(TTMP12) \
720  CASE_VI_GFX9(TTMP13) \
721  CASE_VI_GFX9(TTMP14) \
722  CASE_VI_GFX9(TTMP15) \
723  CASE_VI_GFX9(TTMP0_TTMP1) \
724  CASE_VI_GFX9(TTMP2_TTMP3) \
725  CASE_VI_GFX9(TTMP4_TTMP5) \
726  CASE_VI_GFX9(TTMP6_TTMP7) \
727  CASE_VI_GFX9(TTMP8_TTMP9) \
728  CASE_VI_GFX9(TTMP10_TTMP11) \
729  CASE_VI_GFX9(TTMP12_TTMP13) \
730  CASE_VI_GFX9(TTMP14_TTMP15) \
731  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
732  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
733  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
734  CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
735  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
736  CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
737  CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
738  CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
739  }
740 
741 #define CASE_CI_VI(node) \
742  assert(!isSI(STI)); \
743  case node: return isCI(STI) ? node##_ci : node##_vi;
744 
745 #define CASE_VI_GFX9(node) \
746  case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
747 
748 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
749  if (STI.getTargetTriple().getArch() == Triple::r600)
750  return Reg;
752 }
753 
754 #undef CASE_CI_VI
755 #undef CASE_VI_GFX9
756 
757 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
758 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
759 
760 unsigned mc2PseudoReg(unsigned Reg) {
762 }
763 
764 #undef CASE_CI_VI
765 #undef CASE_VI_GFX9
766 #undef MAP_REG2REG
767 
768 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
769  assert(OpNo < Desc.NumOperands);
770  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
771  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
772  OpType <= AMDGPU::OPERAND_SRC_LAST;
773 }
774 
775 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
776  assert(OpNo < Desc.NumOperands);
777  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
778  switch (OpType) {
786  return true;
787  default:
788  return false;
789  }
790 }
791 
792 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
793  assert(OpNo < Desc.NumOperands);
794  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
795  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
797 }
798 
799 // Avoid using MCRegisterClass::getSize, since that function will go away
800 // (move from MC* level to Target* level). Return size in bits.
801 unsigned getRegBitWidth(unsigned RCID) {
802  switch (RCID) {
803  case AMDGPU::SGPR_32RegClassID:
804  case AMDGPU::VGPR_32RegClassID:
805  case AMDGPU::VS_32RegClassID:
806  case AMDGPU::SReg_32RegClassID:
807  case AMDGPU::SReg_32_XM0RegClassID:
808  return 32;
809  case AMDGPU::SGPR_64RegClassID:
810  case AMDGPU::VS_64RegClassID:
811  case AMDGPU::SReg_64RegClassID:
812  case AMDGPU::VReg_64RegClassID:
813  case AMDGPU::SReg_64_XEXECRegClassID:
814  return 64;
815  case AMDGPU::VReg_96RegClassID:
816  return 96;
817  case AMDGPU::SGPR_128RegClassID:
818  case AMDGPU::SReg_128RegClassID:
819  case AMDGPU::VReg_128RegClassID:
820  return 128;
821  case AMDGPU::SReg_256RegClassID:
822  case AMDGPU::VReg_256RegClassID:
823  return 256;
824  case AMDGPU::SReg_512RegClassID:
825  case AMDGPU::VReg_512RegClassID:
826  return 512;
827  default:
828  llvm_unreachable("Unexpected register class");
829  }
830 }
831 
832 unsigned getRegBitWidth(const MCRegisterClass &RC) {
833  return getRegBitWidth(RC.getID());
834 }
835 
836 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
837  unsigned OpNo) {
838  assert(OpNo < Desc.NumOperands);
839  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
840  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
841 }
842 
843 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
844  if (Literal >= -16 && Literal <= 64)
845  return true;
846 
847  uint64_t Val = static_cast<uint64_t>(Literal);
848  return (Val == DoubleToBits(0.0)) ||
849  (Val == DoubleToBits(1.0)) ||
850  (Val == DoubleToBits(-1.0)) ||
851  (Val == DoubleToBits(0.5)) ||
852  (Val == DoubleToBits(-0.5)) ||
853  (Val == DoubleToBits(2.0)) ||
854  (Val == DoubleToBits(-2.0)) ||
855  (Val == DoubleToBits(4.0)) ||
856  (Val == DoubleToBits(-4.0)) ||
857  (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
858 }
859 
860 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
861  if (Literal >= -16 && Literal <= 64)
862  return true;
863 
864  // The actual type of the operand does not seem to matter as long
865  // as the bits match one of the inline immediate values. For example:
866  //
867  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
868  // so it is a legal inline immediate.
869  //
870  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
871  // floating-point, so it is a legal inline immediate.
872 
873  uint32_t Val = static_cast<uint32_t>(Literal);
874  return (Val == FloatToBits(0.0f)) ||
875  (Val == FloatToBits(1.0f)) ||
876  (Val == FloatToBits(-1.0f)) ||
877  (Val == FloatToBits(0.5f)) ||
878  (Val == FloatToBits(-0.5f)) ||
879  (Val == FloatToBits(2.0f)) ||
880  (Val == FloatToBits(-2.0f)) ||
881  (Val == FloatToBits(4.0f)) ||
882  (Val == FloatToBits(-4.0f)) ||
883  (Val == 0x3e22f983 && HasInv2Pi);
884 }
885 
886 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
887  if (!HasInv2Pi)
888  return false;
889 
890  if (Literal >= -16 && Literal <= 64)
891  return true;
892 
893  uint16_t Val = static_cast<uint16_t>(Literal);
894  return Val == 0x3C00 || // 1.0
895  Val == 0xBC00 || // -1.0
896  Val == 0x3800 || // 0.5
897  Val == 0xB800 || // -0.5
898  Val == 0x4000 || // 2.0
899  Val == 0xC000 || // -2.0
900  Val == 0x4400 || // 4.0
901  Val == 0xC400 || // -4.0
902  Val == 0x3118; // 1/2pi
903 }
904 
905 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
906  assert(HasInv2Pi);
907 
908  int16_t Lo16 = static_cast<int16_t>(Literal);
909  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
910  return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
911 }
912 
913 bool isArgPassedInSGPR(const Argument *A) {
914  const Function *F = A->getParent();
915 
916  // Arguments to compute shaders are never a source of divergence.
918  switch (CC) {
921  return true;
929  // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
930  // Everything else is in VGPRs.
933  default:
934  // TODO: Should calls support inreg for SGPR inputs?
935  return false;
936  }
937 }
938 
939 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
940  if (isGCN3Encoding(ST))
941  return ByteOffset;
942  return ByteOffset >> 2;
943 }
944 
945 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
946  int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
947  return isGCN3Encoding(ST) ?
948  isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
949 }
950 
951 // Given Imm, split it into the values to put into the SOffset and ImmOffset
952 // fields in an MUBUF instruction. Return false if it is not possible (due to a
953 // hardware bug needing a workaround).
954 //
955 // The required alignment ensures that individual address components remain
956 // aligned if they are aligned to begin with. It also ensures that additional
957 // offsets within the given alignment can be added to the resulting ImmOffset.
958 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
959  const GCNSubtarget *Subtarget, uint32_t Align) {
960  const uint32_t MaxImm = alignDown(4095, Align);
961  uint32_t Overflow = 0;
962 
963  if (Imm > MaxImm) {
964  if (Imm <= MaxImm + 64) {
965  // Use an SOffset inline constant for 4..64
966  Overflow = Imm - MaxImm;
967  Imm = MaxImm;
968  } else {
969  // Try to keep the same value in SOffset for adjacent loads, so that
970  // the corresponding register contents can be re-used.
971  //
972  // Load values with all low-bits (except for alignment bits) set into
973  // SOffset, so that a larger range of values can be covered using
974  // s_movk_i32.
975  //
976  // Atomic operations fail to work correctly when individual address
977  // components are unaligned, even if their sum is aligned.
978  uint32_t High = (Imm + Align) & ~4095;
979  uint32_t Low = (Imm + Align) & 4095;
980  Imm = Low;
981  Overflow = High - Align;
982  }
983  }
984 
985  // There is a hardware bug in SI and CI which prevents address clamping in
986  // MUBUF instructions from working correctly with SOffsets. The immediate
987  // offset is unaffected.
988  if (Overflow > 0 &&
990  return false;
991 
992  ImmOffset = Imm;
993  SOffset = Overflow;
994  return true;
995 }
996 
997 namespace {
998 
999 struct SourceOfDivergence {
1000  unsigned Intr;
1001 };
1002 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
1003 
1004 #define GET_SourcesOfDivergence_IMPL
1005 #include "AMDGPUGenSearchableTables.inc"
1006 
1007 } // end anonymous namespace
1008 
1009 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
1010  return lookupSourceOfDivergence(IntrID);
1011 }
1012 } // namespace AMDGPU
1013 } // namespace llvm
int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine...
bool hasPackedD16(const MCSubtargetInfo &STI)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
bool getMUBUFHasSrsrc(unsigned Opc)
This class represents lattice values for constants.
Definition: AllocatorList.h:24
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getExpcntBitMask(const IsaVersion &Version)
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:299
Represents the counter values to wait for in an s_waitcnt instruction.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned Reg
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
Address space for 32-bit constant memory.
Definition: AMDGPU.h:263
unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Instruction set architecture version.
Definition: TargetParser.h:132
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned const TargetRegisterInfo * TRI
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
int getMUBUFDwords(unsigned Opc)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
uint64_t High
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
const Triple & getTargetTriple() const
bool isGlobalSegment(const GlobalValue *GV)
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi)
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor()
unsigned getID() const
getID() - Return the register class ID number.
uint32_t amd_kernel_code_version_major
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:718
amdgpu Simplify well known AMD library false Value Value const Twine & Name
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Address space for constant memory (VTX2)
Definition: AMDGPU.h:259
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo *TRI)
Is there any intersection between registers.
bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
Definition: Attributes.cpp:170
bool contains(unsigned Reg) const
contains - Return true if the specified register is included in this register class.
const FeatureBitset & getFeatureBits() const
AMD Kernel Code Object (amd_kernel_code_t).
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
uint16_t amd_machine_version_major
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs)
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned Intr
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
bool hasCodeObjectV3(const MCSubtargetInfo *STI)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
int getMCOpcode(uint16_t Opcode, unsigned Gen)
void emitError(unsigned LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
uint8_t OperandType
Information about the type of the operand.
Definition: MCInstrDesc.h:79
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, const GCNSubtarget *Subtarget, uint32_t Align)
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:290
uint8_t group_segment_alignment
bool isGroupSegment(const GlobalValue *GV)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
bool isReadOnlySegment(const GlobalValue *GV)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:224
uint16_t amd_machine_version_minor
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:25
uint32_t amd_kernel_code_version_minor
MCRegisterClass - Base class of TargetRegisterClass.
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
unsigned short NumOperands
Definition: MCInstrDesc.h:167
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:601
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool hasSRAMECC(const MCSubtargetInfo &STI)
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isCompute(CallingConv::ID cc)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isSI(const MCSubtargetInfo &STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:343
unsigned const MachineRegisterInfo * MRI
bool getMUBUFHasSoffset(unsigned Opc)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
bool hasMIMG_R128(const MCSubtargetInfo &STI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
int getMUBUFBaseOpcode(unsigned Opc)
uint8_t private_segment_alignment
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
MCRegAliasIterator enumerates all registers aliasing Reg.
Address space for local memory.
Definition: AMDGPU.h:260
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
Generation getGeneration() const
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:256
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this opearnd support only inlinable literals?
#define AMDHSA_BITS_SET(DST, MSK, VAL)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
IsaVersion getIsaVersion(StringRef GPU)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI)
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
uint16_t amd_machine_version_stepping
uint8_t wavefront_size
Wavefront size expressed as a power of two.
bool isArgPassedInSGPR(const Argument *A)
uint64_t DoubleToBits(double Double)
This function takes a double and returns the bit equivalent 64-bit integer.
Definition: MathExtras.h:591
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed)
StringRef getCPU() const
bool isShader(CallingConv::ID cc)
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition: Argument.h:48
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
bool isGCN3Encoding(const MCSubtargetInfo &STI)
#define MAP_REG2REG
bool isCI(const MCSubtargetInfo &STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getInitialPSInputAddr(const Function &F)
bool isGFX9(const MCSubtargetInfo &STI)
Provides AMDGPU specific target descriptions.
const Function * getParent() const
Definition: Argument.h:42
bool isVI(const MCSubtargetInfo &STI)
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:73
bool hasXNACK(const MCSubtargetInfo &STI)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
#define N
Generic base class for all target subtargets.
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool shouldEmitConstantsToTextSection(const Triple &TT)
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel&#39;s entry point instru...
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Can this operand also contain immediate values?
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg...
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
uint16_t amd_machine_kind
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFHasVAddr(unsigned Opc)
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
const uint64_t Version
Definition: InstrProf.h:895
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:274
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream)
Streams isa version string for given subtarget STI into Stream.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
unsigned getVmcntBitMask(const IsaVersion &Version)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...