LLVM  8.0.1
AMDGPULegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This file implements the targeting of the Machinelegalizer class for
11 /// AMDGPU.
12 /// \todo This should be generated by TableGen.
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "AMDGPUTargetMachine.h"
20 #include "llvm/IR/DerivedTypes.h"
21 #include "llvm/IR/Type.h"
22 #include "llvm/Support/Debug.h"
23 
24 using namespace llvm;
25 using namespace LegalizeActions;
26 
28  const GCNTargetMachine &TM) {
29  using namespace TargetOpcode;
30 
31  auto GetAddrSpacePtr = [&TM](unsigned AS) {
32  return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
33  };
34 
35  const LLT S1 = LLT::scalar(1);
36  const LLT S32 = LLT::scalar(32);
37  const LLT S64 = LLT::scalar(64);
38  const LLT S512 = LLT::scalar(512);
39 
40  const LLT V2S16 = LLT::vector(2, 16);
41  const LLT V4S16 = LLT::vector(4, 16);
42  const LLT V8S16 = LLT::vector(8, 16);
43 
44  const LLT V2S32 = LLT::vector(2, 32);
45  const LLT V3S32 = LLT::vector(3, 32);
46  const LLT V4S32 = LLT::vector(4, 32);
47  const LLT V5S32 = LLT::vector(5, 32);
48  const LLT V6S32 = LLT::vector(6, 32);
49  const LLT V7S32 = LLT::vector(7, 32);
50  const LLT V8S32 = LLT::vector(8, 32);
51  const LLT V9S32 = LLT::vector(9, 32);
52  const LLT V10S32 = LLT::vector(10, 32);
53  const LLT V11S32 = LLT::vector(11, 32);
54  const LLT V12S32 = LLT::vector(12, 32);
55  const LLT V13S32 = LLT::vector(13, 32);
56  const LLT V14S32 = LLT::vector(14, 32);
57  const LLT V15S32 = LLT::vector(15, 32);
58  const LLT V16S32 = LLT::vector(16, 32);
59 
60  const LLT V2S64 = LLT::vector(2, 64);
61  const LLT V3S64 = LLT::vector(3, 64);
62  const LLT V4S64 = LLT::vector(4, 64);
63  const LLT V5S64 = LLT::vector(5, 64);
64  const LLT V6S64 = LLT::vector(6, 64);
65  const LLT V7S64 = LLT::vector(7, 64);
66  const LLT V8S64 = LLT::vector(8, 64);
67 
68  std::initializer_list<LLT> AllS32Vectors =
69  {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
70  V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
71  std::initializer_list<LLT> AllS64Vectors =
72  {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
73 
74  const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
75  const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
76  const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
77  const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
78  const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
79 
80  const LLT CodePtr = FlatPtr;
81 
82  const LLT AddrSpaces[] = {
83  GlobalPtr,
84  ConstantPtr,
85  LocalPtr,
86  FlatPtr,
87  PrivatePtr
88  };
89 
90  setAction({G_BRCOND, S1}, Legal);
91 
92  setAction({G_ADD, S32}, Legal);
93  setAction({G_ASHR, S32}, Legal);
94  setAction({G_SUB, S32}, Legal);
95  setAction({G_MUL, S32}, Legal);
96 
97  // FIXME: 64-bit ones only legal for scalar
98  getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
99  .legalFor({S32, S1, S64, V2S32});
100 
101  getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
102  G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
103  .legalFor({{S32, S1}});
104 
105  setAction({G_BITCAST, V2S16}, Legal);
106  setAction({G_BITCAST, 1, S32}, Legal);
107 
108  setAction({G_BITCAST, S32}, Legal);
109  setAction({G_BITCAST, 1, V2S16}, Legal);
110 
111  getActionDefinitionsBuilder(G_FCONSTANT)
112  .legalFor({S32, S64});
113 
114  // G_IMPLICIT_DEF is a no-op so we can make it legal for any value type that
115  // can fit in a register.
116  // FIXME: We need to legalize several more operations before we can add
117  // a test case for size > 512.
118  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
119  .legalIf([=](const LegalityQuery &Query) {
120  return Query.Types[0].getSizeInBits() <= 512;
121  })
122  .clampScalar(0, S1, S512);
123 
124  getActionDefinitionsBuilder(G_CONSTANT)
125  .legalFor({S1, S32, S64});
126 
127  // FIXME: i1 operands to intrinsics should always be legal, but other i1
128  // values may not be legal. We need to figure out how to distinguish
129  // between these two scenarios.
130  setAction({G_CONSTANT, S1}, Legal);
131 
132  setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
133 
134  getActionDefinitionsBuilder(
135  { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA})
136  .legalFor({S32, S64});
137 
138  getActionDefinitionsBuilder(G_FPTRUNC)
139  .legalFor({{S32, S64}});
140 
141  // Use actual fsub instruction
142  setAction({G_FSUB, S32}, Legal);
143 
144  // Must use fadd + fneg
145  setAction({G_FSUB, S64}, Lower);
146 
147  setAction({G_FCMP, S1}, Legal);
148  setAction({G_FCMP, 1, S32}, Legal);
149  setAction({G_FCMP, 1, S64}, Legal);
150 
151  setAction({G_ZEXT, S64}, Legal);
152  setAction({G_ZEXT, 1, S32}, Legal);
153 
154  setAction({G_SEXT, S64}, Legal);
155  setAction({G_SEXT, 1, S32}, Legal);
156 
157  setAction({G_ANYEXT, S64}, Legal);
158  setAction({G_ANYEXT, 1, S32}, Legal);
159 
160  setAction({G_FPTOSI, S32}, Legal);
161  setAction({G_FPTOSI, 1, S32}, Legal);
162 
163  setAction({G_SITOFP, S32}, Legal);
164  setAction({G_SITOFP, 1, S32}, Legal);
165 
166  setAction({G_UITOFP, S32}, Legal);
167  setAction({G_UITOFP, 1, S32}, Legal);
168 
169  setAction({G_FPTOUI, S32}, Legal);
170  setAction({G_FPTOUI, 1, S32}, Legal);
171 
172  setAction({G_FPOW, S32}, Legal);
173  setAction({G_FEXP2, S32}, Legal);
174  setAction({G_FLOG2, S32}, Legal);
175 
176  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND})
177  .legalFor({S32, S64});
178 
179  for (LLT PtrTy : AddrSpaces) {
180  LLT IdxTy = LLT::scalar(PtrTy.getSizeInBits());
181  setAction({G_GEP, PtrTy}, Legal);
182  setAction({G_GEP, 1, IdxTy}, Legal);
183  }
184 
185  setAction({G_BLOCK_ADDR, CodePtr}, Legal);
186 
187  setAction({G_ICMP, S1}, Legal);
188  setAction({G_ICMP, 1, S32}, Legal);
189 
190  setAction({G_CTLZ, S32}, Legal);
191  setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal);
192  setAction({G_CTTZ, S32}, Legal);
193  setAction({G_CTTZ_ZERO_UNDEF, S32}, Legal);
194  setAction({G_BSWAP, S32}, Legal);
195  setAction({G_CTPOP, S32}, Legal);
196 
197  getActionDefinitionsBuilder(G_INTTOPTR)
198  .legalIf([](const LegalityQuery &Query) {
199  return true;
200  });
201 
202  getActionDefinitionsBuilder(G_PTRTOINT)
203  .legalIf([](const LegalityQuery &Query) {
204  return true;
205  });
206 
207  getActionDefinitionsBuilder({G_LOAD, G_STORE})
208  .legalIf([=, &ST](const LegalityQuery &Query) {
209  const LLT &Ty0 = Query.Types[0];
210 
211  // TODO: Decompose private loads into 4-byte components.
212  // TODO: Illegal flat loads on SI
213  switch (Ty0.getSizeInBits()) {
214  case 32:
215  case 64:
216  case 128:
217  return true;
218 
219  case 96:
220  // XXX hasLoadX3
222 
223  case 256:
224  case 512:
225  // TODO: constant loads
226  default:
227  return false;
228  }
229  });
230 
231 
232  auto &Atomics = getActionDefinitionsBuilder(
233  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
234  G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
235  G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
236  G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
237  .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
238  {S64, GlobalPtr}, {S64, LocalPtr}});
239  if (ST.hasFlatAddressSpace()) {
240  Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
241  }
242 
243  setAction({G_SELECT, S32}, Legal);
244  setAction({G_SELECT, 1, S1}, Legal);
245 
246  setAction({G_SHL, S32}, Legal);
247 
248 
249  // FIXME: When RegBankSelect inserts copies, it will only create new
250  // registers with scalar types. This means we can end up with
251  // G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer
252  // operands. In assert builds, the instruction selector will assert
253  // if it sees a generic instruction which isn't legal, so we need to
254  // tell it that scalar types are legal for pointer operands
255  setAction({G_GEP, S64}, Legal);
256 
257  for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
258  getActionDefinitionsBuilder(Op)
259  .legalIf([=](const LegalityQuery &Query) {
260  const LLT &VecTy = Query.Types[1];
261  const LLT &IdxTy = Query.Types[2];
262  return VecTy.getSizeInBits() % 32 == 0 &&
263  VecTy.getSizeInBits() <= 512 &&
264  IdxTy.getSizeInBits() == 32;
265  });
266  }
267 
268  // FIXME: Doesn't handle extract of illegal sizes.
269  getActionDefinitionsBuilder({G_EXTRACT, G_INSERT})
270  .legalIf([=](const LegalityQuery &Query) {
271  const LLT &Ty0 = Query.Types[0];
272  const LLT &Ty1 = Query.Types[1];
273  return (Ty0.getSizeInBits() % 32 == 0) &&
274  (Ty1.getSizeInBits() % 32 == 0);
275  });
276 
277  getActionDefinitionsBuilder(G_BUILD_VECTOR)
278  .legalForCartesianProduct(AllS32Vectors, {S32})
279  .legalForCartesianProduct(AllS64Vectors, {S64})
280  .clampNumElements(0, V16S32, V16S32)
281  .clampNumElements(0, V2S64, V8S64)
282  .minScalarSameAs(1, 0);
283 
284  // TODO: Support any combination of v2s32
285  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
286  .legalFor({{V4S32, V2S32},
287  {V8S32, V2S32},
288  {V8S32, V4S32},
289  {V4S64, V2S64},
290  {V4S16, V2S16},
291  {V8S16, V2S16},
292  {V8S16, V4S16}});
293 
294  // Merge/Unmerge
295  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
296  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
297  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
298 
299  getActionDefinitionsBuilder(Op)
300  .legalIf([=](const LegalityQuery &Query) {
301  const LLT &BigTy = Query.Types[BigTyIdx];
302  const LLT &LitTy = Query.Types[LitTyIdx];
303  return BigTy.getSizeInBits() % 32 == 0 &&
304  LitTy.getSizeInBits() % 32 == 0 &&
305  BigTy.getSizeInBits() <= 512;
306  })
307  // Any vectors left are the wrong size. Scalarize them.
308  .fewerElementsIf([](const LegalityQuery &Query) { return true; },
309  [](const LegalityQuery &Query) {
310  return std::make_pair(
311  0, Query.Types[0].getElementType());
312  })
313  .fewerElementsIf([](const LegalityQuery &Query) { return true; },
314  [](const LegalityQuery &Query) {
315  return std::make_pair(
316  1, Query.Types[1].getElementType());
317  });
318 
319  }
320 
321  computeTables();
322  verify(*ST.getInstrInfo());
323 }
This class represents lattice values for constants.
Definition: AllocatorList.h:24
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
Address space for private memory.
Definition: AMDGPU.h:261
const SIInstrInfo * getInstrInfo() const override
Address space for constant memory (VTX2)
Definition: AMDGPU.h:259
unsigned getPointerSizeInBits(unsigned AS) const
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
The operation itself must be expressed in terms of simpler actions on this target.
Definition: LegalizerInfo.h:73
Address space for flat memory.
Definition: AMDGPU.h:255
Address space for local memory.
Definition: AMDGPU.h:260
Generation getGeneration() const
AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM)
The AMDGPU TargetMachine interface definition for hw codgen targets.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:256
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
bool hasFlatAddressSpace() const
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
ArrayRef< LLT > Types
static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, bool &Write, bool &Effects, bool &StackPointer)
static LLT pointer(uint16_t AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space (defaulting to 0).
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:48