LLVM  8.0.1
NVPTXISelLowering.cpp
Go to the documentation of this file.
1 //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "NVPTXISelLowering.h"
17 #include "NVPTX.h"
18 #include "NVPTXSubtarget.h"
19 #include "NVPTXTargetMachine.h"
20 #include "NVPTXTargetObjectFile.h"
21 #include "NVPTXUtilities.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/CodeGen/Analysis.h"
33 #include "llvm/IR/Argument.h"
34 #include "llvm/IR/Attributes.h"
35 #include "llvm/IR/CallSite.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalValue.h"
41 #include "llvm/IR/Instruction.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/CodeGen.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <iterator>
59 #include <sstream>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 #define DEBUG_TYPE "nvptx-lower"
65 
66 using namespace llvm;
67 
68 static unsigned int uniqueCallSite = 0;
69 
71  "nvptx-sched4reg",
72  cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
73 
74 static cl::opt<unsigned>
76  cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
77  " 1: do it 2: do it aggressively"),
78  cl::init(2));
79 
81  "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
82  cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
83  " IEEE Compliant F32 div.rnd if available."),
84  cl::init(2));
85 
87  "nvptx-prec-sqrtf32", cl::Hidden,
88  cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
89  cl::init(true));
90 
92  "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
93  cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
94  cl::init(false));
95 
97  if (UsePrecDivF32.getNumOccurrences() > 0) {
98  // If nvptx-prec-div32=N is used on the command-line, always honor it
99  return UsePrecDivF32;
100  } else {
101  // Otherwise, use div.approx if fast math is enabled
103  return 0;
104  else
105  return 2;
106  }
107 }
108 
110  if (UsePrecSqrtF32.getNumOccurrences() > 0) {
111  // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
112  return UsePrecSqrtF32;
113  } else {
114  // Otherwise, use sqrt.approx if fast math is enabled
116  }
117 }
118 
120  // TODO: Get rid of this flag; there can be only one way to do this.
121  if (FtzEnabled.getNumOccurrences() > 0) {
122  // If nvptx-f32ftz is used on the command-line, always honor it
123  return FtzEnabled;
124  } else {
125  const Function &F = MF.getFunction();
126  // Otherwise, check for an nvptx-f32ftz attribute on the function
127  if (F.hasFnAttribute("nvptx-f32ftz"))
128  return F.getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
129  else
130  return false;
131  }
132 }
133 
134 static bool IsPTXVectorType(MVT VT) {
135  switch (VT.SimpleTy) {
136  default:
137  return false;
138  case MVT::v2i1:
139  case MVT::v4i1:
140  case MVT::v2i8:
141  case MVT::v4i8:
142  case MVT::v2i16:
143  case MVT::v4i16:
144  case MVT::v2i32:
145  case MVT::v4i32:
146  case MVT::v2i64:
147  case MVT::v2f16:
148  case MVT::v4f16:
149  case MVT::v8f16: // <4 x f16x2>
150  case MVT::v2f32:
151  case MVT::v4f32:
152  case MVT::v2f64:
153  return true;
154  }
155 }
156 
157 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
158 /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors
159 /// into their primitive components.
160 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
161 /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
162 /// LowerCall, and LowerReturn.
163 static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
164  Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
166  uint64_t StartingOffset = 0) {
167  SmallVector<EVT, 16> TempVTs;
168  SmallVector<uint64_t, 16> TempOffsets;
169 
170  // Special case for i128 - decompose to (i64, i64)
171  if (Ty->isIntegerTy(128)) {
172  ValueVTs.push_back(EVT(MVT::i64));
173  ValueVTs.push_back(EVT(MVT::i64));
174 
175  if (Offsets) {
176  Offsets->push_back(StartingOffset + 0);
177  Offsets->push_back(StartingOffset + 8);
178  }
179 
180  return;
181  }
182 
183  // Given a struct type, recursively traverse the elements with custom ComputePTXValueVTs.
184  if (StructType *STy = dyn_cast<StructType>(Ty)) {
185  auto const *SL = DL.getStructLayout(STy);
186  auto ElementNum = 0;
187  for(auto *EI : STy->elements()) {
188  ComputePTXValueVTs(TLI, DL, EI, ValueVTs, Offsets,
189  StartingOffset + SL->getElementOffset(ElementNum));
190  ++ElementNum;
191  }
192  return;
193  }
194 
195  ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
196  for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
197  EVT VT = TempVTs[i];
198  uint64_t Off = TempOffsets[i];
199  // Split vectors into individual elements, except for v2f16, which
200  // we will pass as a single scalar.
201  if (VT.isVector()) {
202  unsigned NumElts = VT.getVectorNumElements();
203  EVT EltVT = VT.getVectorElementType();
204  // Vectors with an even number of f16 elements will be passed to
205  // us as an array of v2f16 elements. We must match this so we
206  // stay in sync with Ins/Outs.
207  if (EltVT == MVT::f16 && NumElts % 2 == 0) {
208  EltVT = MVT::v2f16;
209  NumElts /= 2;
210  }
211  for (unsigned j = 0; j != NumElts; ++j) {
212  ValueVTs.push_back(EltVT);
213  if (Offsets)
214  Offsets->push_back(Off + j * EltVT.getStoreSize());
215  }
216  } else {
217  ValueVTs.push_back(VT);
218  if (Offsets)
219  Offsets->push_back(Off);
220  }
221  }
222 }
223 
224 // Check whether we can merge loads/stores of some of the pieces of a
225 // flattened function parameter or return value into a single vector
226 // load/store.
227 //
228 // The flattened parameter is represented as a list of EVTs and
229 // offsets, and the whole structure is aligned to ParamAlignment. This
230 // function determines whether we can load/store pieces of the
231 // parameter starting at index Idx using a single vectorized op of
232 // size AccessSize. If so, it returns the number of param pieces
233 // covered by the vector op. Otherwise, it returns 1.
235  unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
236  const SmallVectorImpl<uint64_t> &Offsets, unsigned ParamAlignment) {
237  assert(isPowerOf2_32(AccessSize) && "must be a power of 2!");
238 
239  // Can't vectorize if param alignment is not sufficient.
240  if (AccessSize > ParamAlignment)
241  return 1;
242  // Can't vectorize if offset is not aligned.
243  if (Offsets[Idx] & (AccessSize - 1))
244  return 1;
245 
246  EVT EltVT = ValueVTs[Idx];
247  unsigned EltSize = EltVT.getStoreSize();
248 
249  // Element is too large to vectorize.
250  if (EltSize >= AccessSize)
251  return 1;
252 
253  unsigned NumElts = AccessSize / EltSize;
254  // Can't vectorize if AccessBytes if not a multiple of EltSize.
255  if (AccessSize != EltSize * NumElts)
256  return 1;
257 
258  // We don't have enough elements to vectorize.
259  if (Idx + NumElts > ValueVTs.size())
260  return 1;
261 
262  // PTX ISA can only deal with 2- and 4-element vector ops.
263  if (NumElts != 4 && NumElts != 2)
264  return 1;
265 
266  for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) {
267  // Types do not match.
268  if (ValueVTs[j] != EltVT)
269  return 1;
270 
271  // Elements are not contiguous.
272  if (Offsets[j] - Offsets[j - 1] != EltSize)
273  return 1;
274  }
275  // OK. We can vectorize ValueVTs[i..i+NumElts)
276  return NumElts;
277 }
278 
279 // Flags for tracking per-element vectorization state of loads/stores
280 // of a flattened function parameter or return value.
282  PVF_INNER = 0x0, // Middle elements of a vector.
283  PVF_FIRST = 0x1, // First element of the vector.
284  PVF_LAST = 0x2, // Last element of the vector.
285  // Scalar is effectively a 1-element vector.
287 };
288 
289 // Computes whether and how we can vectorize the loads/stores of a
290 // flattened function parameter or return value.
291 //
292 // The flattened parameter is represented as the list of ValueVTs and
293 // Offsets, and is aligned to ParamAlignment bytes. We return a vector
294 // of the same size as ValueVTs indicating how each piece should be
295 // loaded/stored (i.e. as a scalar, or as part of a vector
296 // load/store).
300  unsigned ParamAlignment) {
301  // Set vector size to match ValueVTs and mark all elements as
302  // scalars by default.
304  VectorInfo.assign(ValueVTs.size(), PVF_SCALAR);
305 
306  // Check what we can vectorize using 128/64/32-bit accesses.
307  for (int I = 0, E = ValueVTs.size(); I != E; ++I) {
308  // Skip elements we've already processed.
309  assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state.");
310  for (unsigned AccessSize : {16, 8, 4, 2}) {
311  unsigned NumElts = CanMergeParamLoadStoresStartingAt(
312  I, AccessSize, ValueVTs, Offsets, ParamAlignment);
313  // Mark vectorized elements.
314  switch (NumElts) {
315  default:
316  llvm_unreachable("Unexpected return value");
317  case 1:
318  // Can't vectorize using this size, try next smaller size.
319  continue;
320  case 2:
321  assert(I + 1 < E && "Not enough elements.");
322  VectorInfo[I] = PVF_FIRST;
323  VectorInfo[I + 1] = PVF_LAST;
324  I += 1;
325  break;
326  case 4:
327  assert(I + 3 < E && "Not enough elements.");
328  VectorInfo[I] = PVF_FIRST;
329  VectorInfo[I + 1] = PVF_INNER;
330  VectorInfo[I + 2] = PVF_INNER;
331  VectorInfo[I + 3] = PVF_LAST;
332  I += 3;
333  break;
334  }
335  // Break out of the inner loop because we've already succeeded
336  // using largest possible AccessSize.
337  break;
338  }
339  }
340  return VectorInfo;
341 }
342 
343 // NVPTXTargetLowering Constructor.
345  const NVPTXSubtarget &STI)
346  : TargetLowering(TM), nvTM(&TM), STI(STI) {
347  // always lower memset, memcpy, and memmove intrinsics to load/store
348  // instructions, rather
349  // then generating calls to memset, mempcy or memmove.
350  MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
351  MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
352  MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
353 
356 
357  // Jump is Expensive. Don't create extra control flow for 'and', 'or'
358  // condition branches.
359  setJumpIsExpensive(true);
360 
361  // Wide divides are _very_ slow. Try to reduce the width of the divide if
362  // possible.
363  addBypassSlowDiv(64, 32);
364 
365  // By default, use the Source scheduling
366  if (sched4reg)
368  else
370 
371  auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
372  LegalizeAction NoF16Action) {
373  setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action);
374  };
375 
376  addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
377  addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
378  addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
379  addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
380  addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
381  addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
382  addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass);
383  addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass);
384 
385  // Conversion to/from FP16/FP16x2 is always legal.
392 
393  setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
394  setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
395 
396  // Operations not directly supported by NVPTX.
401  }
402 
403  // Some SIGN_EXTEND_INREG can be done using cvt instruction.
404  // For others we will expand to a SHL/SRA pair.
410 
417 
420 
421  // TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs
422  // that don't have h/w rotation we lower them to multi-instruction assembly.
423  // See ROT*_sw in NVPTXIntrInfo.td
428 
436 
437  // Indirect branch is not supported.
438  // This also disables Jump Table creation.
441 
444 
445  // We want to legalize constant related memmove and memcopy
446  // intrinsics.
448 
449  // Turn FP extload into load/fpextend
459  // Turn FP truncstore into trunc + store.
460  // FIXME: vector types should also be expanded
464 
465  // PTX does not support load / store predicate registers
468 
469  for (MVT VT : MVT::integer_valuetypes()) {
473  }
474 
475  // This is legal in NVPTX
479 
480  // TRAP can be lowered to PTX trap
482 
483  // Register custom handling for vector loads/stores
484  for (MVT VT : MVT::vector_valuetypes()) {
485  if (IsPTXVectorType(VT)) {
489  }
490  }
491 
492  // Custom handling for i8 intrinsics
494 
495  for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
501 
504  }
505 
509 
510  // PTX does not directly support SELP of i1, so promote to i32 first
512 
513  // PTX cannot multiply two i64s in a single instruction.
516 
517  // We have some custom DAG combine patterns for these nodes
525 
526  // setcc for f16x2 needs special handling to prevent legalizer's
527  // attempt to scalarize it due to v2i1 not being legal.
528  if (STI.allowFP16Math())
530 
531  // Promote fp16 arithmetic if fp16 hardware isn't available or the
532  // user passed --nvptx-no-fp16-math. The flag is useful because,
533  // although sm_53+ GPUs have some sort of FP16 support in
534  // hardware, only sm_53 and sm_60 have full implementation. Others
535  // only have token amount of hardware and are likely to run faster
536  // by using fp32 units instead.
537  for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
538  setFP16OperationAction(Op, MVT::f16, Legal, Promote);
539  setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
540  }
541 
542  // There's no neg.f16 instruction. Expand to (0-x).
545 
546  // (would be) Library functions.
547 
548  // These map to conversion instructions for scalar FP types.
549  for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
555  }
556 
557  // 'Expand' implements FCOPYSIGN without calling an external library.
562 
563  // These map to corresponding instructions for f32/f64. f16 must be
564  // promoted to f32. v2f16 is expanded to f16, which is then promoted
565  // to f32.
566  for (const auto &Op : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS,
572  }
577 
578  // No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
579  // No FPOW or FREM in PTX.
580 
581  // Now deduce the information based on the above mentioned
582  // actions
584 }
585 
586 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
587  switch ((NVPTXISD::NodeType)Opcode) {
589  break;
590  case NVPTXISD::CALL:
591  return "NVPTXISD::CALL";
592  case NVPTXISD::RET_FLAG:
593  return "NVPTXISD::RET_FLAG";
595  return "NVPTXISD::LOAD_PARAM";
596  case NVPTXISD::Wrapper:
597  return "NVPTXISD::Wrapper";
599  return "NVPTXISD::DeclareParam";
601  return "NVPTXISD::DeclareScalarParam";
603  return "NVPTXISD::DeclareRet";
605  return "NVPTXISD::DeclareScalarRet";
607  return "NVPTXISD::DeclareRetParam";
608  case NVPTXISD::PrintCall:
609  return "NVPTXISD::PrintCall";
611  return "NVPTXISD::PrintConvergentCall";
613  return "NVPTXISD::PrintCallUni";
615  return "NVPTXISD::PrintConvergentCallUni";
616  case NVPTXISD::LoadParam:
617  return "NVPTXISD::LoadParam";
619  return "NVPTXISD::LoadParamV2";
621  return "NVPTXISD::LoadParamV4";
623  return "NVPTXISD::StoreParam";
625  return "NVPTXISD::StoreParamV2";
627  return "NVPTXISD::StoreParamV4";
629  return "NVPTXISD::StoreParamS32";
631  return "NVPTXISD::StoreParamU32";
633  return "NVPTXISD::CallArgBegin";
634  case NVPTXISD::CallArg:
635  return "NVPTXISD::CallArg";
637  return "NVPTXISD::LastCallArg";
639  return "NVPTXISD::CallArgEnd";
640  case NVPTXISD::CallVoid:
641  return "NVPTXISD::CallVoid";
642  case NVPTXISD::CallVal:
643  return "NVPTXISD::CallVal";
645  return "NVPTXISD::CallSymbol";
646  case NVPTXISD::Prototype:
647  return "NVPTXISD::Prototype";
648  case NVPTXISD::MoveParam:
649  return "NVPTXISD::MoveParam";
651  return "NVPTXISD::StoreRetval";
653  return "NVPTXISD::StoreRetvalV2";
655  return "NVPTXISD::StoreRetvalV4";
657  return "NVPTXISD::PseudoUseParam";
658  case NVPTXISD::RETURN:
659  return "NVPTXISD::RETURN";
661  return "NVPTXISD::CallSeqBegin";
663  return "NVPTXISD::CallSeqEnd";
665  return "NVPTXISD::CallPrototype";
666  case NVPTXISD::ProxyReg:
667  return "NVPTXISD::ProxyReg";
668  case NVPTXISD::LoadV2:
669  return "NVPTXISD::LoadV2";
670  case NVPTXISD::LoadV4:
671  return "NVPTXISD::LoadV4";
672  case NVPTXISD::LDGV2:
673  return "NVPTXISD::LDGV2";
674  case NVPTXISD::LDGV4:
675  return "NVPTXISD::LDGV4";
676  case NVPTXISD::LDUV2:
677  return "NVPTXISD::LDUV2";
678  case NVPTXISD::LDUV4:
679  return "NVPTXISD::LDUV4";
680  case NVPTXISD::StoreV2:
681  return "NVPTXISD::StoreV2";
682  case NVPTXISD::StoreV4:
683  return "NVPTXISD::StoreV4";
685  return "NVPTXISD::FUN_SHFL_CLAMP";
687  return "NVPTXISD::FUN_SHFR_CLAMP";
688  case NVPTXISD::IMAD:
689  return "NVPTXISD::IMAD";
691  return "NVPTXISD::SETP_F16X2";
692  case NVPTXISD::Dummy:
693  return "NVPTXISD::Dummy";
695  return "NVPTXISD::MUL_WIDE_SIGNED";
697  return "NVPTXISD::MUL_WIDE_UNSIGNED";
698  case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32";
699  case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
701  return "NVPTXISD::Tex1DFloatFloatLevel";
703  return "NVPTXISD::Tex1DFloatFloatGrad";
704  case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32";
705  case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float";
707  return "NVPTXISD::Tex1DS32FloatLevel";
709  return "NVPTXISD::Tex1DS32FloatGrad";
710  case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32";
711  case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float";
713  return "NVPTXISD::Tex1DU32FloatLevel";
715  return "NVPTXISD::Tex1DU32FloatGrad";
716  case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32";
717  case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat";
719  return "NVPTXISD::Tex1DArrayFloatFloatLevel";
721  return "NVPTXISD::Tex1DArrayFloatFloatGrad";
722  case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32";
723  case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float";
725  return "NVPTXISD::Tex1DArrayS32FloatLevel";
727  return "NVPTXISD::Tex1DArrayS32FloatGrad";
728  case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32";
729  case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float";
731  return "NVPTXISD::Tex1DArrayU32FloatLevel";
733  return "NVPTXISD::Tex1DArrayU32FloatGrad";
734  case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32";
735  case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat";
737  return "NVPTXISD::Tex2DFloatFloatLevel";
739  return "NVPTXISD::Tex2DFloatFloatGrad";
740  case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32";
741  case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float";
743  return "NVPTXISD::Tex2DS32FloatLevel";
745  return "NVPTXISD::Tex2DS32FloatGrad";
746  case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32";
747  case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float";
749  return "NVPTXISD::Tex2DU32FloatLevel";
751  return "NVPTXISD::Tex2DU32FloatGrad";
752  case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32";
753  case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
755  return "NVPTXISD::Tex2DArrayFloatFloatLevel";
757  return "NVPTXISD::Tex2DArrayFloatFloatGrad";
758  case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32";
759  case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float";
761  return "NVPTXISD::Tex2DArrayS32FloatLevel";
763  return "NVPTXISD::Tex2DArrayS32FloatGrad";
764  case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32";
765  case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float";
767  return "NVPTXISD::Tex2DArrayU32FloatLevel";
769  return "NVPTXISD::Tex2DArrayU32FloatGrad";
770  case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32";
771  case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat";
773  return "NVPTXISD::Tex3DFloatFloatLevel";
775  return "NVPTXISD::Tex3DFloatFloatGrad";
776  case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32";
777  case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float";
779  return "NVPTXISD::Tex3DS32FloatLevel";
781  return "NVPTXISD::Tex3DS32FloatGrad";
782  case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32";
783  case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float";
785  return "NVPTXISD::Tex3DU32FloatLevel";
787  return "NVPTXISD::Tex3DU32FloatGrad";
788  case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat";
790  return "NVPTXISD::TexCubeFloatFloatLevel";
791  case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float";
793  return "NVPTXISD::TexCubeS32FloatLevel";
794  case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float";
796  return "NVPTXISD::TexCubeU32FloatLevel";
798  return "NVPTXISD::TexCubeArrayFloatFloat";
800  return "NVPTXISD::TexCubeArrayFloatFloatLevel";
802  return "NVPTXISD::TexCubeArrayS32Float";
804  return "NVPTXISD::TexCubeArrayS32FloatLevel";
806  return "NVPTXISD::TexCubeArrayU32Float";
808  return "NVPTXISD::TexCubeArrayU32FloatLevel";
810  return "NVPTXISD::Tld4R2DFloatFloat";
812  return "NVPTXISD::Tld4G2DFloatFloat";
814  return "NVPTXISD::Tld4B2DFloatFloat";
816  return "NVPTXISD::Tld4A2DFloatFloat";
818  return "NVPTXISD::Tld4R2DS64Float";
820  return "NVPTXISD::Tld4G2DS64Float";
822  return "NVPTXISD::Tld4B2DS64Float";
824  return "NVPTXISD::Tld4A2DS64Float";
826  return "NVPTXISD::Tld4R2DU64Float";
828  return "NVPTXISD::Tld4G2DU64Float";
830  return "NVPTXISD::Tld4B2DU64Float";
832  return "NVPTXISD::Tld4A2DU64Float";
833 
835  return "NVPTXISD::TexUnified1DFloatS32";
837  return "NVPTXISD::TexUnified1DFloatFloat";
839  return "NVPTXISD::TexUnified1DFloatFloatLevel";
841  return "NVPTXISD::TexUnified1DFloatFloatGrad";
843  return "NVPTXISD::TexUnified1DS32S32";
845  return "NVPTXISD::TexUnified1DS32Float";
847  return "NVPTXISD::TexUnified1DS32FloatLevel";
849  return "NVPTXISD::TexUnified1DS32FloatGrad";
851  return "NVPTXISD::TexUnified1DU32S32";
853  return "NVPTXISD::TexUnified1DU32Float";
855  return "NVPTXISD::TexUnified1DU32FloatLevel";
857  return "NVPTXISD::TexUnified1DU32FloatGrad";
859  return "NVPTXISD::TexUnified1DArrayFloatS32";
861  return "NVPTXISD::TexUnified1DArrayFloatFloat";
863  return "NVPTXISD::TexUnified1DArrayFloatFloatLevel";
865  return "NVPTXISD::TexUnified1DArrayFloatFloatGrad";
867  return "NVPTXISD::TexUnified1DArrayS32S32";
869  return "NVPTXISD::TexUnified1DArrayS32Float";
871  return "NVPTXISD::TexUnified1DArrayS32FloatLevel";
873  return "NVPTXISD::TexUnified1DArrayS32FloatGrad";
875  return "NVPTXISD::TexUnified1DArrayU32S32";
877  return "NVPTXISD::TexUnified1DArrayU32Float";
879  return "NVPTXISD::TexUnified1DArrayU32FloatLevel";
881  return "NVPTXISD::TexUnified1DArrayU32FloatGrad";
883  return "NVPTXISD::TexUnified2DFloatS32";
885  return "NVPTXISD::TexUnified2DFloatFloat";
887  return "NVPTXISD::TexUnified2DFloatFloatLevel";
889  return "NVPTXISD::TexUnified2DFloatFloatGrad";
891  return "NVPTXISD::TexUnified2DS32S32";
893  return "NVPTXISD::TexUnified2DS32Float";
895  return "NVPTXISD::TexUnified2DS32FloatLevel";
897  return "NVPTXISD::TexUnified2DS32FloatGrad";
899  return "NVPTXISD::TexUnified2DU32S32";
901  return "NVPTXISD::TexUnified2DU32Float";
903  return "NVPTXISD::TexUnified2DU32FloatLevel";
905  return "NVPTXISD::TexUnified2DU32FloatGrad";
907  return "NVPTXISD::TexUnified2DArrayFloatS32";
909  return "NVPTXISD::TexUnified2DArrayFloatFloat";
911  return "NVPTXISD::TexUnified2DArrayFloatFloatLevel";
913  return "NVPTXISD::TexUnified2DArrayFloatFloatGrad";
915  return "NVPTXISD::TexUnified2DArrayS32S32";
917  return "NVPTXISD::TexUnified2DArrayS32Float";
919  return "NVPTXISD::TexUnified2DArrayS32FloatLevel";
921  return "NVPTXISD::TexUnified2DArrayS32FloatGrad";
923  return "NVPTXISD::TexUnified2DArrayU32S32";
925  return "NVPTXISD::TexUnified2DArrayU32Float";
927  return "NVPTXISD::TexUnified2DArrayU32FloatLevel";
929  return "NVPTXISD::TexUnified2DArrayU32FloatGrad";
931  return "NVPTXISD::TexUnified3DFloatS32";
933  return "NVPTXISD::TexUnified3DFloatFloat";
935  return "NVPTXISD::TexUnified3DFloatFloatLevel";
937  return "NVPTXISD::TexUnified3DFloatFloatGrad";
939  return "NVPTXISD::TexUnified3DS32S32";
941  return "NVPTXISD::TexUnified3DS32Float";
943  return "NVPTXISD::TexUnified3DS32FloatLevel";
945  return "NVPTXISD::TexUnified3DS32FloatGrad";
947  return "NVPTXISD::TexUnified3DU32S32";
949  return "NVPTXISD::TexUnified3DU32Float";
951  return "NVPTXISD::TexUnified3DU32FloatLevel";
953  return "NVPTXISD::TexUnified3DU32FloatGrad";
955  return "NVPTXISD::TexUnifiedCubeFloatFloat";
957  return "NVPTXISD::TexUnifiedCubeFloatFloatLevel";
959  return "NVPTXISD::TexUnifiedCubeS32Float";
961  return "NVPTXISD::TexUnifiedCubeS32FloatLevel";
963  return "NVPTXISD::TexUnifiedCubeU32Float";
965  return "NVPTXISD::TexUnifiedCubeU32FloatLevel";
967  return "NVPTXISD::TexUnifiedCubeArrayFloatFloat";
969  return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel";
971  return "NVPTXISD::TexUnifiedCubeArrayS32Float";
973  return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel";
975  return "NVPTXISD::TexUnifiedCubeArrayU32Float";
977  return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
979  return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
981  return "NVPTXISD::Tld4UnifiedG2DFloatFloat";
983  return "NVPTXISD::Tld4UnifiedB2DFloatFloat";
985  return "NVPTXISD::Tld4UnifiedA2DFloatFloat";
987  return "NVPTXISD::Tld4UnifiedR2DS64Float";
989  return "NVPTXISD::Tld4UnifiedG2DS64Float";
991  return "NVPTXISD::Tld4UnifiedB2DS64Float";
993  return "NVPTXISD::Tld4UnifiedA2DS64Float";
995  return "NVPTXISD::Tld4UnifiedR2DU64Float";
997  return "NVPTXISD::Tld4UnifiedG2DU64Float";
999  return "NVPTXISD::Tld4UnifiedB2DU64Float";
1001  return "NVPTXISD::Tld4UnifiedA2DU64Float";
1002 
1003  case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp";
1004  case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp";
1005  case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp";
1006  case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp";
1007  case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp";
1008  case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp";
1009  case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp";
1010  case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp";
1011  case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp";
1012  case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp";
1013  case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp";
1014 
1015  case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp";
1016  case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp";
1017  case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp";
1018  case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp";
1019  case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp";
1020  case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp";
1021  case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp";
1022  case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp";
1023  case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp";
1024  case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp";
1025  case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp";
1026 
1027  case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp";
1028  case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp";
1029  case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp";
1030  case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp";
1031  case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp";
1032  case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp";
1033  case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp";
1034  case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp";
1035  case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp";
1036  case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp";
1037  case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp";
1038 
1039  case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp";
1040  case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp";
1041  case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp";
1042  case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp";
1043  case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp";
1044  case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp";
1045  case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp";
1046  case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp";
1047  case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp";
1048  case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp";
1049  case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp";
1050 
1051  case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp";
1052  case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp";
1053  case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp";
1054  case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp";
1055  case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp";
1056  case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp";
1057  case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp";
1058  case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp";
1059  case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp";
1060  case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp";
1061  case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp";
1062 
1063  case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap";
1064  case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap";
1065  case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap";
1066  case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap";
1067  case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap";
1068  case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap";
1069  case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap";
1070  case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap";
1071  case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap";
1072  case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap";
1073  case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap";
1074 
1075  case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap";
1076  case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap";
1077  case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap";
1078  case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap";
1079  case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap";
1080  case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap";
1081  case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap";
1082  case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap";
1083  case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap";
1084  case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap";
1085  case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap";
1086 
1087  case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap";
1088  case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap";
1089  case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap";
1090  case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap";
1091  case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap";
1092  case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap";
1093  case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap";
1094  case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap";
1095  case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap";
1096  case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap";
1097  case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap";
1098 
1099  case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap";
1100  case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap";
1101  case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap";
1102  case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap";
1103  case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap";
1104  case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap";
1105  case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap";
1106  case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap";
1107  case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap";
1108  case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap";
1109  case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap";
1110 
1111  case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap";
1112  case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap";
1113  case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap";
1114  case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap";
1115  case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap";
1116  case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap";
1117  case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap";
1118  case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap";
1119  case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap";
1120  case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap";
1121  case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap";
1122 
1123  case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero";
1124  case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero";
1125  case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero";
1126  case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero";
1127  case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero";
1128  case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero";
1129  case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero";
1130  case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero";
1131  case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero";
1132  case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero";
1133  case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero";
1134 
1135  case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero";
1136  case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero";
1137  case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero";
1138  case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero";
1139  case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero";
1140  case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero";
1141  case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero";
1142  case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero";
1143  case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero";
1144  case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero";
1145  case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero";
1146 
1147  case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero";
1148  case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero";
1149  case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero";
1150  case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero";
1151  case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero";
1152  case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero";
1153  case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero";
1154  case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero";
1155  case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero";
1156  case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero";
1157  case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero";
1158 
1159  case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero";
1160  case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero";
1161  case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero";
1162  case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero";
1163  case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero";
1164  case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero";
1165  case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero";
1166  case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero";
1167  case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero";
1168  case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero";
1169  case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero";
1170 
1171  case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero";
1172  case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero";
1173  case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero";
1174  case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero";
1175  case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero";
1176  case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero";
1177  case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero";
1178  case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero";
1179  case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero";
1180  case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero";
1181  case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero";
1182  }
1183  return nullptr;
1184 }
1185 
1188  if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
1189  return TypeSplitVector;
1190  if (VT == MVT::v2f16)
1191  return TypeLegal;
1193 }
1194 
1196  int Enabled, int &ExtraSteps,
1197  bool &UseOneConst,
1198  bool Reciprocal) const {
1199  if (!(Enabled == ReciprocalEstimate::Enabled ||
1200  (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
1201  return SDValue();
1202 
1203  if (ExtraSteps == ReciprocalEstimate::Unspecified)
1204  ExtraSteps = 0;
1205 
1206  SDLoc DL(Operand);
1207  EVT VT = Operand.getValueType();
1208  bool Ftz = useF32FTZ(DAG.getMachineFunction());
1209 
1210  auto MakeIntrinsicCall = [&](Intrinsic::ID IID) {
1211  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
1212  DAG.getConstant(IID, DL, MVT::i32), Operand);
1213  };
1214 
1215  // The sqrt and rsqrt refinement processes assume we always start out with an
1216  // approximation of the rsqrt. Therefore, if we're going to do any refinement
1217  // (i.e. ExtraSteps > 0), we must return an rsqrt. But if we're *not* doing
1218  // any refinement, we must return a regular sqrt.
1219  if (Reciprocal || ExtraSteps > 0) {
1220  if (VT == MVT::f32)
1221  return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
1223  else if (VT == MVT::f64)
1224  return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
1225  else
1226  return SDValue();
1227  } else {
1228  if (VT == MVT::f32)
1229  return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
1231  else {
1232  // There's no sqrt.approx.f64 instruction, so we emit
1233  // reciprocal(rsqrt(x)). This is faster than
1234  // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain
1235  // x * rsqrt(x).)
1236  return DAG.getNode(
1237  ISD::INTRINSIC_WO_CHAIN, DL, VT,
1239  MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
1240  }
1241  }
1242 }
1243 
1244 SDValue
1246  SDLoc dl(Op);
1247  const GlobalAddressSDNode *GAN = cast<GlobalAddressSDNode>(Op);
1248  auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());
1249  Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);
1250  return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
1251 }
1252 
1254  const DataLayout &DL, Type *retTy, const ArgListTy &Args,
1255  const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
1256  ImmutableCallSite CS) const {
1257  auto PtrVT = getPointerTy(DL);
1258 
1259  bool isABI = (STI.getSmVersion() >= 20);
1260  assert(isABI && "Non-ABI compilation is not supported");
1261  if (!isABI)
1262  return "";
1263 
1264  std::stringstream O;
1265  O << "prototype_" << uniqueCallSite << " : .callprototype ";
1266 
1267  if (retTy->getTypeID() == Type::VoidTyID) {
1268  O << "()";
1269  } else {
1270  O << "(";
1271  if (retTy->isFloatingPointTy() || (retTy->isIntegerTy() && !retTy->isIntegerTy(128))) {
1272  unsigned size = 0;
1273  if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
1274  size = ITy->getBitWidth();
1275  } else {
1276  assert(retTy->isFloatingPointTy() &&
1277  "Floating point type expected here");
1278  size = retTy->getPrimitiveSizeInBits();
1279  }
1280  // PTX ABI requires all scalar return values to be at least 32
1281  // bits in size. fp16 normally uses .b16 as its storage type in
1282  // PTX, so its size must be adjusted here, too.
1283  if (size < 32)
1284  size = 32;
1285 
1286  O << ".param .b" << size << " _";
1287  } else if (isa<PointerType>(retTy)) {
1288  O << ".param .b" << PtrVT.getSizeInBits() << " _";
1289  } else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) {
1290  auto &DL = CS.getCalledFunction()->getParent()->getDataLayout();
1291  O << ".param .align " << retAlignment << " .b8 _["
1292  << DL.getTypeAllocSize(retTy) << "]";
1293  } else {
1294  llvm_unreachable("Unknown return type");
1295  }
1296  O << ") ";
1297  }
1298  O << "_ (";
1299 
1300  bool first = true;
1301 
1302  unsigned OIdx = 0;
1303  for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1304  Type *Ty = Args[i].Ty;
1305  if (!first) {
1306  O << ", ";
1307  }
1308  first = false;
1309 
1310  if (!Outs[OIdx].Flags.isByVal()) {
1311  if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1312  unsigned align = 0;
1313  const CallInst *CallI = cast<CallInst>(CS.getInstruction());
1314  // +1 because index 0 is reserved for return type alignment
1315  if (!getAlign(*CallI, i + 1, align))
1316  align = DL.getABITypeAlignment(Ty);
1317  unsigned sz = DL.getTypeAllocSize(Ty);
1318  O << ".param .align " << align << " .b8 ";
1319  O << "_";
1320  O << "[" << sz << "]";
1321  // update the index for Outs
1322  SmallVector<EVT, 16> vtparts;
1323  ComputeValueVTs(*this, DL, Ty, vtparts);
1324  if (unsigned len = vtparts.size())
1325  OIdx += len - 1;
1326  continue;
1327  }
1328  // i8 types in IR will be i16 types in SDAG
1329  assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
1330  (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
1331  "type mismatch between callee prototype and arguments");
1332  // scalar type
1333  unsigned sz = 0;
1334  if (isa<IntegerType>(Ty)) {
1335  sz = cast<IntegerType>(Ty)->getBitWidth();
1336  if (sz < 32)
1337  sz = 32;
1338  } else if (isa<PointerType>(Ty)) {
1339  sz = PtrVT.getSizeInBits();
1340  } else if (Ty->isHalfTy())
1341  // PTX ABI requires all scalar parameters to be at least 32
1342  // bits in size. fp16 normally uses .b16 as its storage type
1343  // in PTX, so its size must be adjusted here, too.
1344  sz = 32;
1345  else
1346  sz = Ty->getPrimitiveSizeInBits();
1347  O << ".param .b" << sz << " ";
1348  O << "_";
1349  continue;
1350  }
1351  auto *PTy = dyn_cast<PointerType>(Ty);
1352  assert(PTy && "Param with byval attribute should be a pointer type");
1353  Type *ETy = PTy->getElementType();
1354 
1355  unsigned align = Outs[OIdx].Flags.getByValAlign();
1356  unsigned sz = DL.getTypeAllocSize(ETy);
1357  O << ".param .align " << align << " .b8 ";
1358  O << "_";
1359  O << "[" << sz << "]";
1360  }
1361  O << ");";
1362  return O.str();
1363 }
1364 
1365 unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
1366  ImmutableCallSite CS,
1367  Type *Ty, unsigned Idx,
1368  const DataLayout &DL) const {
1369  if (!CS) {
1370  // CallSite is zero, fallback to ABI type alignment
1371  return DL.getABITypeAlignment(Ty);
1372  }
1373 
1374  unsigned Align = 0;
1375  const Value *DirectCallee = CS.getCalledFunction();
1376 
1377  if (!DirectCallee) {
1378  // We don't have a direct function symbol, but that may be because of
1379  // constant cast instructions in the call.
1380  const Instruction *CalleeI = CS.getInstruction();
1381  assert(CalleeI && "Call target is not a function or derived value?");
1382 
1383  // With bitcast'd call targets, the instruction will be the call
1384  if (isa<CallInst>(CalleeI)) {
1385  // Check if we have call alignment metadata
1386  if (getAlign(*cast<CallInst>(CalleeI), Idx, Align))
1387  return Align;
1388 
1389  const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
1390  // Ignore any bitcast instructions
1391  while (isa<ConstantExpr>(CalleeV)) {
1392  const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
1393  if (!CE->isCast())
1394  break;
1395  // Look through the bitcast
1396  CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
1397  }
1398 
1399  // We have now looked past all of the bitcasts. Do we finally have a
1400  // Function?
1401  if (isa<Function>(CalleeV))
1402  DirectCallee = CalleeV;
1403  }
1404  }
1405 
1406  // Check for function alignment information if we found that the
1407  // ultimate target is a Function
1408  if (DirectCallee)
1409  if (getAlign(*cast<Function>(DirectCallee), Idx, Align))
1410  return Align;
1411 
1412  // Call is indirect or alignment information is not available, fall back to
1413  // the ABI type alignment
1414  return DL.getABITypeAlignment(Ty);
1415 }
1416 
1418  SmallVectorImpl<SDValue> &InVals) const {
1419  SelectionDAG &DAG = CLI.DAG;
1420  SDLoc dl = CLI.DL;
1422  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1424  SDValue Chain = CLI.Chain;
1425  SDValue Callee = CLI.Callee;
1426  bool &isTailCall = CLI.IsTailCall;
1427  ArgListTy &Args = CLI.getArgs();
1428  Type *RetTy = CLI.RetTy;
1429  ImmutableCallSite CS = CLI.CS;
1430  const DataLayout &DL = DAG.getDataLayout();
1431 
1432  bool isABI = (STI.getSmVersion() >= 20);
1433  assert(isABI && "Non-ABI compilation is not supported");
1434  if (!isABI)
1435  return Chain;
1436 
1437  SDValue tempChain = Chain;
1438  Chain = DAG.getCALLSEQ_START(Chain, uniqueCallSite, 0, dl);
1439  SDValue InFlag = Chain.getValue(1);
1440 
1441  unsigned paramCount = 0;
1442  // Args.size() and Outs.size() need not match.
1443  // Outs.size() will be larger
1444  // * if there is an aggregate argument with multiple fields (each field
1445  // showing up separately in Outs)
1446  // * if there is a vector argument with more than typical vector-length
1447  // elements (generally if more than 4) where each vector element is
1448  // individually present in Outs.
1449  // So a different index should be used for indexing into Outs/OutVals.
1450  // See similar issue in LowerFormalArguments.
1451  unsigned OIdx = 0;
1452  // Declare the .params or .reg need to pass values
1453  // to the function
1454  for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
1455  EVT VT = Outs[OIdx].VT;
1456  Type *Ty = Args[i].Ty;
1457 
1458  if (!Outs[OIdx].Flags.isByVal()) {
1461  ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets);
1462  unsigned ArgAlign =
1463  getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
1464  unsigned AllocSize = DL.getTypeAllocSize(Ty);
1465  SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1466  bool NeedAlign; // Does argument declaration specify alignment?
1467  if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1468  // declare .param .align <align> .b8 .param<n>[<size>];
1469  SDValue DeclareParamOps[] = {
1470  Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
1471  DAG.getConstant(paramCount, dl, MVT::i32),
1472  DAG.getConstant(AllocSize, dl, MVT::i32), InFlag};
1473  Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1474  DeclareParamOps);
1475  NeedAlign = true;
1476  } else {
1477  // declare .param .b<size> .param<n>;
1478  if ((VT.isInteger() || VT.isFloatingPoint()) && AllocSize < 4) {
1479  // PTX ABI requires integral types to be at least 32 bits in
1480  // size. FP16 is loaded/stored using i16, so it's handled
1481  // here as well.
1482  AllocSize = 4;
1483  }
1484  SDValue DeclareScalarParamOps[] = {
1485  Chain, DAG.getConstant(paramCount, dl, MVT::i32),
1486  DAG.getConstant(AllocSize * 8, dl, MVT::i32),
1487  DAG.getConstant(0, dl, MVT::i32), InFlag};
1488  Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
1489  DeclareScalarParamOps);
1490  NeedAlign = false;
1491  }
1492  InFlag = Chain.getValue(1);
1493 
1494  // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
1495  // than 32-bits are sign extended or zero extended, depending on
1496  // whether they are signed or unsigned types. This case applies
1497  // only to scalar parameters and not to aggregate values.
1498  bool ExtendIntegerParam =
1499  Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
1500 
1501  auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
1502  SmallVector<SDValue, 6> StoreOperands;
1503  for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
1504  // New store.
1505  if (VectorInfo[j] & PVF_FIRST) {
1506  assert(StoreOperands.empty() && "Unfinished preceeding store.");
1507  StoreOperands.push_back(Chain);
1508  StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
1509  StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32));
1510  }
1511 
1512  EVT EltVT = VTs[j];
1513  SDValue StVal = OutVals[OIdx];
1514  if (ExtendIntegerParam) {
1515  assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
1516  // zext/sext to i32
1517  StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
1518  : ISD::ZERO_EXTEND,
1519  dl, MVT::i32, StVal);
1520  } else if (EltVT.getSizeInBits() < 16) {
1521  // Use 16-bit registers for small stores as it's the
1522  // smallest general purpose register size supported by NVPTX.
1523  StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
1524  }
1525 
1526  // Record the value to store.
1527  StoreOperands.push_back(StVal);
1528 
1529  if (VectorInfo[j] & PVF_LAST) {
1530  unsigned NumElts = StoreOperands.size() - 3;
1532  switch (NumElts) {
1533  case 1:
1534  Op = NVPTXISD::StoreParam;
1535  break;
1536  case 2:
1538  break;
1539  case 4:
1541  break;
1542  default:
1543  llvm_unreachable("Invalid vector info.");
1544  }
1545 
1546  StoreOperands.push_back(InFlag);
1547 
1548  // Adjust type of the store op if we've extended the scalar
1549  // return value.
1550  EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j];
1551  unsigned EltAlign =
1552  NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0;
1553 
1554  Chain = DAG.getMemIntrinsicNode(
1555  Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
1556  TheStoreType, MachinePointerInfo(), EltAlign,
1558  InFlag = Chain.getValue(1);
1559 
1560  // Cleanup.
1561  StoreOperands.clear();
1562  }
1563  ++OIdx;
1564  }
1565  assert(StoreOperands.empty() && "Unfinished parameter store.");
1566  if (VTs.size() > 0)
1567  --OIdx;
1568  ++paramCount;
1569  continue;
1570  }
1571 
1572  // ByVal arguments
1575  auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
1576  assert(PTy && "Type of a byval parameter should be pointer");
1577  ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0);
1578 
1579  // declare .param .align <align> .b8 .param<n>[<size>];
1580  unsigned sz = Outs[OIdx].Flags.getByValSize();
1581  SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1582  unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
1583  // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
1584  // so we don't need to worry about natural alignment or not.
1585  // See TargetLowering::LowerCallTo().
1586 
1587  // Enforce minumum alignment of 4 to work around ptxas miscompile
1588  // for sm_50+. See corresponding alignment adjustment in
1589  // emitFunctionParamList() for details.
1590  if (ArgAlign < 4)
1591  ArgAlign = 4;
1592  SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
1593  DAG.getConstant(paramCount, dl, MVT::i32),
1594  DAG.getConstant(sz, dl, MVT::i32), InFlag};
1595  Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
1596  DeclareParamOps);
1597  InFlag = Chain.getValue(1);
1598  for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
1599  EVT elemtype = VTs[j];
1600  int curOffset = Offsets[j];
1601  unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
1602  auto PtrVT = getPointerTy(DL);
1603  SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
1604  DAG.getConstant(curOffset, dl, PtrVT));
1605  SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
1606  MachinePointerInfo(), PartAlign);
1607  if (elemtype.getSizeInBits() < 16) {
1608  theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
1609  }
1610  SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1611  SDValue CopyParamOps[] = { Chain,
1612  DAG.getConstant(paramCount, dl, MVT::i32),
1613  DAG.getConstant(curOffset, dl, MVT::i32),
1614  theVal, InFlag };
1615  Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
1616  CopyParamOps, elemtype,
1617  MachinePointerInfo(), /* Align */ 0,
1619 
1620  InFlag = Chain.getValue(1);
1621  }
1622  ++paramCount;
1623  }
1624 
1626  unsigned retAlignment = 0;
1627 
1628  // Handle Result
1629  if (Ins.size() > 0) {
1630  SmallVector<EVT, 16> resvtparts;
1631  ComputeValueVTs(*this, DL, RetTy, resvtparts);
1632 
1633  // Declare
1634  // .param .align 16 .b8 retval0[<size-in-bytes>], or
1635  // .param .b<size-in-bits> retval0
1636  unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy);
1637  // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
1638  // these three types to match the logic in
1639  // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
1640  // Plus, this behavior is consistent with nvcc's.
1641  if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() ||
1642  (RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) {
1643  // Scalar needs to be at least 32bit wide
1644  if (resultsz < 32)
1645  resultsz = 32;
1646  SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1647  SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
1648  DAG.getConstant(resultsz, dl, MVT::i32),
1649  DAG.getConstant(0, dl, MVT::i32), InFlag };
1650  Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
1651  DeclareRetOps);
1652  InFlag = Chain.getValue(1);
1653  } else {
1654  retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
1655  SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1656  SDValue DeclareRetOps[] = { Chain,
1657  DAG.getConstant(retAlignment, dl, MVT::i32),
1658  DAG.getConstant(resultsz / 8, dl, MVT::i32),
1659  DAG.getConstant(0, dl, MVT::i32), InFlag };
1660  Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
1661  DeclareRetOps);
1662  InFlag = Chain.getValue(1);
1663  }
1664  }
1665 
1666  // Both indirect calls and libcalls have nullptr Func. In order to distinguish
1667  // between them we must rely on the call site value which is valid for
1668  // indirect calls but is always null for libcalls.
1669  bool isIndirectCall = !Func && CS;
1670 
1671  if (isa<ExternalSymbolSDNode>(Callee)) {
1672  Function* CalleeFunc = nullptr;
1673 
1674  // Try to find the callee in the current module.
1675  Callee = DAG.getSymbolFunctionGlobalAddress(Callee, &CalleeFunc);
1676  assert(CalleeFunc != nullptr && "Libcall callee must be set.");
1677 
1678  // Set the "libcall callee" attribute to indicate that the function
1679  // must always have a declaration.
1680  CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");
1681  }
1682 
1683  if (isIndirectCall) {
1684  // This is indirect function call case : PTX requires a prototype of the
1685  // form
1686  // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
1687  // to be emitted, and the label has to used as the last arg of call
1688  // instruction.
1689  // The prototype is embedded in a string and put as the operand for a
1690  // CallPrototype SDNode which will print out to the value of the string.
1691  SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1692  std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS);
1693  const char *ProtoStr =
1694  nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
1695  SDValue ProtoOps[] = {
1696  Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
1697  };
1698  Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
1699  InFlag = Chain.getValue(1);
1700  }
1701  // Op to just print "call"
1702  SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1703  SDValue PrintCallOps[] = {
1704  Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
1705  };
1706  // We model convergent calls as separate opcodes.
1707  unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
1708  if (CLI.IsConvergent)
1711  Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
1712  InFlag = Chain.getValue(1);
1713 
1714  // Ops to print out the function name
1715  SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1716  SDValue CallVoidOps[] = { Chain, Callee, InFlag };
1717  Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
1718  InFlag = Chain.getValue(1);
1719 
1720  // Ops to print out the param list
1721  SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1722  SDValue CallArgBeginOps[] = { Chain, InFlag };
1723  Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
1724  CallArgBeginOps);
1725  InFlag = Chain.getValue(1);
1726 
1727  for (unsigned i = 0, e = paramCount; i != e; ++i) {
1728  unsigned opcode;
1729  if (i == (e - 1))
1730  opcode = NVPTXISD::LastCallArg;
1731  else
1732  opcode = NVPTXISD::CallArg;
1733  SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1734  SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
1735  DAG.getConstant(i, dl, MVT::i32), InFlag };
1736  Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
1737  InFlag = Chain.getValue(1);
1738  }
1739  SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1740  SDValue CallArgEndOps[] = { Chain,
1741  DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32),
1742  InFlag };
1743  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
1744  InFlag = Chain.getValue(1);
1745 
1746  if (isIndirectCall) {
1747  SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1748  SDValue PrototypeOps[] = { Chain,
1750  InFlag };
1751  Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
1752  InFlag = Chain.getValue(1);
1753  }
1754 
1755  SmallVector<SDValue, 16> ProxyRegOps;
1756  SmallVector<Optional<MVT>, 16> ProxyRegTruncates;
1757 
1758  // Generate loads from param memory/moves from registers for result
1759  if (Ins.size() > 0) {
1762  ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
1763  assert(VTs.size() == Ins.size() && "Bad value decomposition");
1764 
1765  unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
1766  auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
1767 
1768  SmallVector<EVT, 6> LoadVTs;
1769  int VecIdx = -1; // Index of the first element of the vector.
1770 
1771  // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
1772  // 32-bits are sign extended or zero extended, depending on whether
1773  // they are signed or unsigned types.
1774  bool ExtendIntegerRetVal =
1775  RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
1776 
1777  for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
1778  bool needTruncate = false;
1779  EVT TheLoadType = VTs[i];
1780  EVT EltType = Ins[i].VT;
1781  unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]);
1782  if (ExtendIntegerRetVal) {
1783  TheLoadType = MVT::i32;
1784  EltType = MVT::i32;
1785  needTruncate = true;
1786  } else if (TheLoadType.getSizeInBits() < 16) {
1787  if (VTs[i].isInteger())
1788  needTruncate = true;
1789  EltType = MVT::i16;
1790  }
1791 
1792  // Record index of the very first element of the vector.
1793  if (VectorInfo[i] & PVF_FIRST) {
1794  assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
1795  VecIdx = i;
1796  }
1797 
1798  LoadVTs.push_back(EltType);
1799 
1800  if (VectorInfo[i] & PVF_LAST) {
1801  unsigned NumElts = LoadVTs.size();
1802  LoadVTs.push_back(MVT::Other);
1803  LoadVTs.push_back(MVT::Glue);
1805  switch (NumElts) {
1806  case 1:
1807  Op = NVPTXISD::LoadParam;
1808  break;
1809  case 2:
1810  Op = NVPTXISD::LoadParamV2;
1811  break;
1812  case 4:
1813  Op = NVPTXISD::LoadParamV4;
1814  break;
1815  default:
1816  llvm_unreachable("Invalid vector info.");
1817  }
1818 
1819  SDValue LoadOperands[] = {
1820  Chain, DAG.getConstant(1, dl, MVT::i32),
1821  DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag};
1822  SDValue RetVal = DAG.getMemIntrinsicNode(
1823  Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
1824  MachinePointerInfo(), EltAlign,
1826 
1827  for (unsigned j = 0; j < NumElts; ++j) {
1828  ProxyRegOps.push_back(RetVal.getValue(j));
1829 
1830  if (needTruncate)
1831  ProxyRegTruncates.push_back(Optional<MVT>(Ins[VecIdx + j].VT));
1832  else
1833  ProxyRegTruncates.push_back(Optional<MVT>());
1834  }
1835 
1836  Chain = RetVal.getValue(NumElts);
1837  InFlag = RetVal.getValue(NumElts + 1);
1838 
1839  // Cleanup
1840  VecIdx = -1;
1841  LoadVTs.clear();
1842  }
1843  }
1844  }
1845 
1846  Chain = DAG.getCALLSEQ_END(Chain,
1847  DAG.getIntPtrConstant(uniqueCallSite, dl, true),
1848  DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
1849  true),
1850  InFlag, dl);
1851  InFlag = Chain.getValue(1);
1852  uniqueCallSite++;
1853 
1854  // Append ProxyReg instructions to the chain to make sure that `callseq_end`
1855  // will not get lost. Otherwise, during libcalls expansion, the nodes can become
1856  // dangling.
1857  for (unsigned i = 0; i < ProxyRegOps.size(); ++i) {
1858  SDValue Ret = DAG.getNode(
1859  NVPTXISD::ProxyReg, dl,
1860  DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
1861  { Chain, ProxyRegOps[i], InFlag }
1862  );
1863 
1864  Chain = Ret.getValue(1);
1865  InFlag = Ret.getValue(2);
1866 
1867  if (ProxyRegTruncates[i].hasValue()) {
1868  Ret = DAG.getNode(ISD::TRUNCATE, dl, ProxyRegTruncates[i].getValue(), Ret);
1869  }
1870 
1871  InVals.push_back(Ret);
1872  }
1873 
1874  // set isTailCall to false for now, until we figure out how to express
1875  // tail call optimization in PTX
1876  isTailCall = false;
1877  return Chain;
1878 }
1879 
1880 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
1881 // (see LegalizeDAG.cpp). This is slow and uses local memory.
1882 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
1883 SDValue
1884 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
1885  SDNode *Node = Op.getNode();
1886  SDLoc dl(Node);
1888  unsigned NumOperands = Node->getNumOperands();
1889  for (unsigned i = 0; i < NumOperands; ++i) {
1890  SDValue SubOp = Node->getOperand(i);
1891  EVT VVT = SubOp.getNode()->getValueType(0);
1892  EVT EltVT = VVT.getVectorElementType();
1893  unsigned NumSubElem = VVT.getVectorNumElements();
1894  for (unsigned j = 0; j < NumSubElem; ++j) {
1895  Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
1896  DAG.getIntPtrConstant(j, dl)));
1897  }
1898  }
1899  return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
1900 }
1901 
1902 // We can init constant f16x2 with a single .b32 move. Normally it
1903 // would get lowered as two constant loads and vector-packing move.
1904 // mov.b16 %h1, 0x4000;
1905 // mov.b16 %h2, 0x3C00;
1906 // mov.b32 %hh2, {%h2, %h1};
1907 // Instead we want just a constant move:
1908 // mov.b32 %hh2, 0x40003C00
1909 //
1910 // This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0
1911 // generates good SASS in both cases.
1912 SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
1913  SelectionDAG &DAG) const {
1914  //return Op;
1915  if (!(Op->getValueType(0) == MVT::v2f16 &&
1916  isa<ConstantFPSDNode>(Op->getOperand(0)) &&
1917  isa<ConstantFPSDNode>(Op->getOperand(1))))
1918  return Op;
1919 
1920  APInt E0 =
1921  cast<ConstantFPSDNode>(Op->getOperand(0))->getValueAPF().bitcastToAPInt();
1922  APInt E1 =
1923  cast<ConstantFPSDNode>(Op->getOperand(1))->getValueAPF().bitcastToAPInt();
1924  SDValue Const =
1925  DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32);
1926  return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const);
1927 }
1928 
1929 SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
1930  SelectionDAG &DAG) const {
1931  SDValue Index = Op->getOperand(1);
1932  // Constant index will be matched by tablegen.
1933  if (isa<ConstantSDNode>(Index.getNode()))
1934  return Op;
1935 
1936  // Extract individual elements and select one of them.
1937  SDValue Vector = Op->getOperand(0);
1938  EVT VectorVT = Vector.getValueType();
1939  assert(VectorVT == MVT::v2f16 && "Unexpected vector type.");
1940  EVT EltVT = VectorVT.getVectorElementType();
1941 
1942  SDLoc dl(Op.getNode());
1943  SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
1944  DAG.getIntPtrConstant(0, dl));
1945  SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
1946  DAG.getIntPtrConstant(1, dl));
1947  return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1,
1949 }
1950 
1951 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
1952 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
1953 /// amount, or
1954 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
1955 /// amount.
1956 SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
1957  SelectionDAG &DAG) const {
1958  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
1960 
1961  EVT VT = Op.getValueType();
1962  unsigned VTBits = VT.getSizeInBits();
1963  SDLoc dl(Op);
1964  SDValue ShOpLo = Op.getOperand(0);
1965  SDValue ShOpHi = Op.getOperand(1);
1966  SDValue ShAmt = Op.getOperand(2);
1967  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
1968 
1969  if (VTBits == 32 && STI.getSmVersion() >= 35) {
1970  // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
1971  // {dHi, dLo} = {aHi, aLo} >> Amt
1972  // dHi = aHi >> Amt
1973  // dLo = shf.r.clamp aLo, aHi, Amt
1974 
1975  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
1976  SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
1977  ShAmt);
1978 
1979  SDValue Ops[2] = { Lo, Hi };
1980  return DAG.getMergeValues(Ops, dl);
1981  }
1982  else {
1983  // {dHi, dLo} = {aHi, aLo} >> Amt
1984  // - if (Amt>=size) then
1985  // dLo = aHi >> (Amt-size)
1986  // dHi = aHi >> Amt (this is either all 0 or all 1)
1987  // else
1988  // dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
1989  // dHi = aHi >> Amt
1990 
1991  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1992  DAG.getConstant(VTBits, dl, MVT::i32),
1993  ShAmt);
1994  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
1995  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1996  DAG.getConstant(VTBits, dl, MVT::i32));
1997  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
1998  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
1999  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
2000 
2001  SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
2002  DAG.getConstant(VTBits, dl, MVT::i32),
2003  ISD::SETGE);
2004  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
2005  SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
2006 
2007  SDValue Ops[2] = { Lo, Hi };
2008  return DAG.getMergeValues(Ops, dl);
2009  }
2010 }
2011 
2012 /// LowerShiftLeftParts - Lower SHL_PARTS, which
2013 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
2014 /// amount, or
2015 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
2016 /// amount.
2017 SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
2018  SelectionDAG &DAG) const {
2019  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2020  assert(Op.getOpcode() == ISD::SHL_PARTS);
2021 
2022  EVT VT = Op.getValueType();
2023  unsigned VTBits = VT.getSizeInBits();
2024  SDLoc dl(Op);
2025  SDValue ShOpLo = Op.getOperand(0);
2026  SDValue ShOpHi = Op.getOperand(1);
2027  SDValue ShAmt = Op.getOperand(2);
2028 
2029  if (VTBits == 32 && STI.getSmVersion() >= 35) {
2030  // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
2031  // {dHi, dLo} = {aHi, aLo} << Amt
2032  // dHi = shf.l.clamp aLo, aHi, Amt
2033  // dLo = aLo << Amt
2034 
2035  SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
2036  ShAmt);
2037  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2038 
2039  SDValue Ops[2] = { Lo, Hi };
2040  return DAG.getMergeValues(Ops, dl);
2041  }
2042  else {
2043  // {dHi, dLo} = {aHi, aLo} << Amt
2044  // - if (Amt>=size) then
2045  // dLo = aLo << Amt (all 0)
2046  // dLo = aLo << (Amt-size)
2047  // else
2048  // dLo = aLo << Amt
2049  // dHi = (aHi << Amt) | (aLo >> (size-Amt))
2050 
2051  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2052  DAG.getConstant(VTBits, dl, MVT::i32),
2053  ShAmt);
2054  SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
2055  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2056  DAG.getConstant(VTBits, dl, MVT::i32));
2057  SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
2058  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2059  SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
2060 
2061  SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
2062  DAG.getConstant(VTBits, dl, MVT::i32),
2063  ISD::SETGE);
2064  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2065  SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
2066 
2067  SDValue Ops[2] = { Lo, Hi };
2068  return DAG.getMergeValues(Ops, dl);
2069  }
2070 }
2071 
2072 SDValue
2074  switch (Op.getOpcode()) {
2075  case ISD::RETURNADDR:
2076  return SDValue();
2077  case ISD::FRAMEADDR:
2078  return SDValue();
2079  case ISD::GlobalAddress:
2080  return LowerGlobalAddress(Op, DAG);
2082  return Op;
2083  case ISD::BUILD_VECTOR:
2084  return LowerBUILD_VECTOR(Op, DAG);
2086  return Op;
2088  return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2089  case ISD::CONCAT_VECTORS:
2090  return LowerCONCAT_VECTORS(Op, DAG);
2091  case ISD::STORE:
2092  return LowerSTORE(Op, DAG);
2093  case ISD::LOAD:
2094  return LowerLOAD(Op, DAG);
2095  case ISD::SHL_PARTS:
2096  return LowerShiftLeftParts(Op, DAG);
2097  case ISD::SRA_PARTS:
2098  case ISD::SRL_PARTS:
2099  return LowerShiftRightParts(Op, DAG);
2100  case ISD::SELECT:
2101  return LowerSelect(Op, DAG);
2102  default:
2103  llvm_unreachable("Custom lowering not defined for operation");
2104  }
2105 }
2106 
2107 SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
2108  SDValue Op0 = Op->getOperand(0);
2109  SDValue Op1 = Op->getOperand(1);
2110  SDValue Op2 = Op->getOperand(2);
2111  SDLoc DL(Op.getNode());
2112 
2113  assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
2114 
2115  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
2116  Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
2117  SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
2118  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
2119 
2120  return Trunc;
2121 }
2122 
2123 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
2124  if (Op.getValueType() == MVT::i1)
2125  return LowerLOADi1(Op, DAG);
2126 
2127  // v2f16 is legal, so we can't rely on legalizer to handle unaligned
2128  // loads and have to handle it here.
2129  if (Op.getValueType() == MVT::v2f16) {
2130  LoadSDNode *Load = cast<LoadSDNode>(Op);
2131  EVT MemVT = Load->getMemoryVT();
2132  if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
2133  Load->getAddressSpace(), Load->getAlignment())) {
2134  SDValue Ops[2];
2135  std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
2136  return DAG.getMergeValues(Ops, SDLoc(Op));
2137  }
2138  }
2139 
2140  return SDValue();
2141 }
2142 
2143 // v = ld i1* addr
2144 // =>
2145 // v1 = ld i8* addr (-> i16)
2146 // v = trunc i16 to i1
2147 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
2148  SDNode *Node = Op.getNode();
2149  LoadSDNode *LD = cast<LoadSDNode>(Node);
2150  SDLoc dl(Node);
2152  assert(Node->getValueType(0) == MVT::i1 &&
2153  "Custom lowering for i1 load only");
2154  SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
2155  LD->getPointerInfo(), LD->getAlignment(),
2156  LD->getMemOperand()->getFlags());
2157  SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
2158  // The legalizer (the caller) is expecting two values from the legalized
2159  // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
2160  // in LegalizeDAG.cpp which also uses MergeValues.
2161  SDValue Ops[] = { result, LD->getChain() };
2162  return DAG.getMergeValues(Ops, dl);
2163 }
2164 
2165 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
2166  StoreSDNode *Store = cast<StoreSDNode>(Op);
2167  EVT VT = Store->getMemoryVT();
2168 
2169  if (VT == MVT::i1)
2170  return LowerSTOREi1(Op, DAG);
2171 
2172  // v2f16 is legal, so we can't rely on legalizer to handle unaligned
2173  // stores and have to handle it here.
2174  if (VT == MVT::v2f16 &&
2175  !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
2176  Store->getAddressSpace(), Store->getAlignment()))
2177  return expandUnalignedStore(Store, DAG);
2178 
2179  if (VT.isVector())
2180  return LowerSTOREVector(Op, DAG);
2181 
2182  return SDValue();
2183 }
2184 
2185 SDValue
2186 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
2187  SDNode *N = Op.getNode();
2188  SDValue Val = N->getOperand(1);
2189  SDLoc DL(N);
2190  EVT ValVT = Val.getValueType();
2191 
2192  if (ValVT.isVector()) {
2193  // We only handle "native" vector sizes for now, e.g. <4 x double> is not
2194  // legal. We can (and should) split that into 2 stores of <2 x double> here
2195  // but I'm leaving that as a TODO for now.
2196  if (!ValVT.isSimple())
2197  return SDValue();
2198  switch (ValVT.getSimpleVT().SimpleTy) {
2199  default:
2200  return SDValue();
2201  case MVT::v2i8:
2202  case MVT::v2i16:
2203  case MVT::v2i32:
2204  case MVT::v2i64:
2205  case MVT::v2f16:
2206  case MVT::v2f32:
2207  case MVT::v2f64:
2208  case MVT::v4i8:
2209  case MVT::v4i16:
2210  case MVT::v4i32:
2211  case MVT::v4f16:
2212  case MVT::v4f32:
2213  case MVT::v8f16: // <4 x f16x2>
2214  // This is a "native" vector type
2215  break;
2216  }
2217 
2218  MemSDNode *MemSD = cast<MemSDNode>(N);
2219  const DataLayout &TD = DAG.getDataLayout();
2220 
2221  unsigned Align = MemSD->getAlignment();
2222  unsigned PrefAlign =
2223  TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
2224  if (Align < PrefAlign) {
2225  // This store is not sufficiently aligned, so bail out and let this vector
2226  // store be scalarized. Note that we may still be able to emit smaller
2227  // vector stores. For example, if we are storing a <4 x float> with an
2228  // alignment of 8, this check will fail but the legalizer will try again
2229  // with 2 x <2 x float>, which will succeed with an alignment of 8.
2230  return SDValue();
2231  }
2232 
2233  unsigned Opcode = 0;
2234  EVT EltVT = ValVT.getVectorElementType();
2235  unsigned NumElts = ValVT.getVectorNumElements();
2236 
2237  // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
2238  // Therefore, we must ensure the type is legal. For i1 and i8, we set the
2239  // stored type to i16 and propagate the "real" type as the memory type.
2240  bool NeedExt = false;
2241  if (EltVT.getSizeInBits() < 16)
2242  NeedExt = true;
2243 
2244  bool StoreF16x2 = false;
2245  switch (NumElts) {
2246  default:
2247  return SDValue();
2248  case 2:
2249  Opcode = NVPTXISD::StoreV2;
2250  break;
2251  case 4:
2252  Opcode = NVPTXISD::StoreV4;
2253  break;
2254  case 8:
2255  // v8f16 is a special case. PTX doesn't have st.v8.f16
2256  // instruction. Instead, we split the vector into v2f16 chunks and
2257  // store them with st.v4.b32.
2258  assert(EltVT == MVT::f16 && "Wrong type for the vector.");
2259  Opcode = NVPTXISD::StoreV4;
2260  StoreF16x2 = true;
2261  break;
2262  }
2263 
2265 
2266  // First is the chain
2267  Ops.push_back(N->getOperand(0));
2268 
2269  if (StoreF16x2) {
2270  // Combine f16,f16 -> v2f16
2271  NumElts /= 2;
2272  for (unsigned i = 0; i < NumElts; ++i) {
2273  SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
2274  DAG.getIntPtrConstant(i * 2, DL));
2275  SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
2276  DAG.getIntPtrConstant(i * 2 + 1, DL));
2277  SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1);
2278  Ops.push_back(V2);
2279  }
2280  } else {
2281  // Then the split values
2282  for (unsigned i = 0; i < NumElts; ++i) {
2283  SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
2284  DAG.getIntPtrConstant(i, DL));
2285  if (NeedExt)
2286  ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
2287  Ops.push_back(ExtVal);
2288  }
2289  }
2290 
2291  // Then any remaining arguments
2292  Ops.append(N->op_begin() + 2, N->op_end());
2293 
2294  SDValue NewSt =
2295  DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
2296  MemSD->getMemoryVT(), MemSD->getMemOperand());
2297 
2298  // return DCI.CombineTo(N, NewSt, true);
2299  return NewSt;
2300  }
2301 
2302  return SDValue();
2303 }
2304 
2305 // st i1 v, addr
2306 // =>
2307 // v1 = zxt v to i16
2308 // st.u8 i16, addr
2309 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
2310  SDNode *Node = Op.getNode();
2311  SDLoc dl(Node);
2312  StoreSDNode *ST = cast<StoreSDNode>(Node);
2313  SDValue Tmp1 = ST->getChain();
2314  SDValue Tmp2 = ST->getBasePtr();
2315  SDValue Tmp3 = ST->getValue();
2316  assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
2317  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
2318  SDValue Result =
2319  DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,
2320  ST->getAlignment(), ST->getMemOperand()->getFlags());
2321  return Result;
2322 }
2323 
2324 SDValue
2325 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
2326  std::string ParamSym;
2327  raw_string_ostream ParamStr(ParamSym);
2328 
2329  ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx;
2330  ParamStr.flush();
2331 
2332  std::string *SavedStr =
2333  nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str());
2334  return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
2335 }
2336 
2337 // Check to see if the kernel argument is image*_t or sampler_t
2338 
2339 static bool isImageOrSamplerVal(const Value *arg, const Module *context) {
2340  static const char *const specialTypes[] = { "struct._image2d_t",
2341  "struct._image3d_t",
2342  "struct._sampler_t" };
2343 
2344  Type *Ty = arg->getType();
2345  auto *PTy = dyn_cast<PointerType>(Ty);
2346 
2347  if (!PTy)
2348  return false;
2349 
2350  if (!context)
2351  return false;
2352 
2353  auto *STy = dyn_cast<StructType>(PTy->getElementType());
2354  if (!STy || STy->isLiteral())
2355  return false;
2356 
2357  return std::find(std::begin(specialTypes), std::end(specialTypes),
2358  STy->getName()) != std::end(specialTypes);
2359 }
2360 
2362  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2363  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2364  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2365  MachineFunction &MF = DAG.getMachineFunction();
2366  const DataLayout &DL = DAG.getDataLayout();
2367  auto PtrVT = getPointerTy(DAG.getDataLayout());
2368 
2369  const Function *F = &MF.getFunction();
2370  const AttributeList &PAL = F->getAttributes();
2371  const TargetLowering *TLI = STI.getTargetLowering();
2372 
2373  SDValue Root = DAG.getRoot();
2374  std::vector<SDValue> OutChains;
2375 
2376  bool isABI = (STI.getSmVersion() >= 20);
2377  assert(isABI && "Non-ABI compilation is not supported");
2378  if (!isABI)
2379  return Chain;
2380 
2381  std::vector<Type *> argTypes;
2382  std::vector<const Argument *> theArgs;
2383  for (const Argument &I : F->args()) {
2384  theArgs.push_back(&I);
2385  argTypes.push_back(I.getType());
2386  }
2387  // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
2388  // Ins.size() will be larger
2389  // * if there is an aggregate argument with multiple fields (each field
2390  // showing up separately in Ins)
2391  // * if there is a vector argument with more than typical vector-length
2392  // elements (generally if more than 4) where each vector element is
2393  // individually present in Ins.
2394  // So a different index should be used for indexing into Ins.
2395  // See similar issue in LowerCall.
2396  unsigned InsIdx = 0;
2397 
2398  int idx = 0;
2399  for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
2400  Type *Ty = argTypes[i];
2401 
2402  // If the kernel argument is image*_t or sampler_t, convert it to
2403  // a i32 constant holding the parameter position. This can later
2404  // matched in the AsmPrinter to output the correct mangled name.
2405  if (isImageOrSamplerVal(
2406  theArgs[i],
2407  (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
2408  : nullptr))) {
2409  assert(isKernelFunction(*F) &&
2410  "Only kernels can have image/sampler params");
2411  InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
2412  continue;
2413  }
2414 
2415  if (theArgs[i]->use_empty()) {
2416  // argument is dead
2417  if (Ty->isAggregateType() || Ty->isIntegerTy(128)) {
2418  SmallVector<EVT, 16> vtparts;
2419 
2420  ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
2421  assert(vtparts.size() > 0 && "empty aggregate type not expected");
2422  for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
2423  ++parti) {
2424  InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2425  ++InsIdx;
2426  }
2427  if (vtparts.size() > 0)
2428  --InsIdx;
2429  continue;
2430  }
2431  if (Ty->isVectorTy()) {
2432  EVT ObjectVT = getValueType(DL, Ty);
2433  unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
2434  for (unsigned parti = 0; parti < NumRegs; ++parti) {
2435  InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2436  ++InsIdx;
2437  }
2438  if (NumRegs > 0)
2439  --InsIdx;
2440  continue;
2441  }
2442  InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
2443  continue;
2444  }
2445 
2446  // In the following cases, assign a node order of "idx+1"
2447  // to newly created nodes. The SDNodes for params have to
2448  // appear in the same order as their order of appearance
2449  // in the original function. "idx+1" holds that order.
2450  if (!PAL.hasParamAttribute(i, Attribute::ByVal)) {
2451  bool aggregateIsPacked = false;
2452  if (StructType *STy = dyn_cast<StructType>(Ty))
2453  aggregateIsPacked = STy->isPacked();
2454 
2457  ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
2458  assert(VTs.size() > 0 && "Unexpected empty type.");
2459  auto VectorInfo =
2460  VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty));
2461 
2462  SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2463  int VecIdx = -1; // Index of the first element of the current vector.
2464  for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) {
2465  if (VectorInfo[parti] & PVF_FIRST) {
2466  assert(VecIdx == -1 && "Orphaned vector.");
2467  VecIdx = parti;
2468  }
2469 
2470  // That's the last element of this store op.
2471  if (VectorInfo[parti] & PVF_LAST) {
2472  unsigned NumElts = parti - VecIdx + 1;
2473  EVT EltVT = VTs[parti];
2474  // i1 is loaded/stored as i8.
2475  EVT LoadVT = EltVT;
2476  if (EltVT == MVT::i1)
2477  LoadVT = MVT::i8;
2478  else if (EltVT == MVT::v2f16)
2479  // getLoad needs a vector type, but it can't handle
2480  // vectors which contain v2f16 elements. So we must load
2481  // using i32 here and then bitcast back.
2482  LoadVT = MVT::i32;
2483 
2484  EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);
2485  SDValue VecAddr =
2486  DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
2487  DAG.getConstant(Offsets[VecIdx], dl, PtrVT));
2489  EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
2490  SDValue P =
2491  DAG.getLoad(VecVT, dl, Root, VecAddr,
2492  MachinePointerInfo(srcValue), aggregateIsPacked,
2495  if (P.getNode())
2496  P.getNode()->setIROrder(idx + 1);
2497  for (unsigned j = 0; j < NumElts; ++j) {
2498  SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P,
2499  DAG.getIntPtrConstant(j, dl));
2500  // We've loaded i1 as an i8 and now must truncate it back to i1
2501  if (EltVT == MVT::i1)
2502  Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
2503  // v2f16 was loaded as an i32. Now we must bitcast it back.
2504  else if (EltVT == MVT::v2f16)
2505  Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt);
2506  // Extend the element if necessary (e.g. an i8 is loaded
2507  // into an i16 register)
2508  if (Ins[InsIdx].VT.isInteger() &&
2509  Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) {
2510  unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
2511  : ISD::ZERO_EXTEND;
2512  Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
2513  }
2514  InVals.push_back(Elt);
2515  }
2516 
2517  // Reset vector tracking state.
2518  VecIdx = -1;
2519  }
2520  ++InsIdx;
2521  }
2522  if (VTs.size() > 0)
2523  --InsIdx;
2524  continue;
2525  }
2526 
2527  // Param has ByVal attribute
2528  // Return MoveParam(param symbol).
2529  // Ideally, the param symbol can be returned directly,
2530  // but when SDNode builder decides to use it in a CopyToReg(),
2531  // machine instruction fails because TargetExternalSymbol
2532  // (not lowered) is target dependent, and CopyToReg assumes
2533  // the source is lowered.
2534  EVT ObjectVT = getValueType(DL, Ty);
2535  assert(ObjectVT == Ins[InsIdx].VT &&
2536  "Ins type did not match function type");
2537  SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
2538  SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
2539  if (p.getNode())
2540  p.getNode()->setIROrder(idx + 1);
2541  InVals.push_back(p);
2542  }
2543 
2544  // Clang will check explicit VarArg and issue error if any. However, Clang
2545  // will let code with
2546  // implicit var arg like f() pass. See bug 617733.
2547  // We treat this case as if the arg list is empty.
2548  // if (F.isVarArg()) {
2549  // assert(0 && "VarArg not supported yet!");
2550  //}
2551 
2552  if (!OutChains.empty())
2553  DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
2554 
2555  return Chain;
2556 }
2557 
2558 SDValue
2560  bool isVarArg,
2561  const SmallVectorImpl<ISD::OutputArg> &Outs,
2562  const SmallVectorImpl<SDValue> &OutVals,
2563  const SDLoc &dl, SelectionDAG &DAG) const {
2564  MachineFunction &MF = DAG.getMachineFunction();
2565  Type *RetTy = MF.getFunction().getReturnType();
2566 
2567  bool isABI = (STI.getSmVersion() >= 20);
2568  assert(isABI && "Non-ABI compilation is not supported");
2569  if (!isABI)
2570  return Chain;
2571 
2572  const DataLayout DL = DAG.getDataLayout();
2575  ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
2576  assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
2577 
2578  auto VectorInfo = VectorizePTXValueVTs(
2579  VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1);
2580 
2581  // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
2582  // 32-bits are sign extended or zero extended, depending on whether
2583  // they are signed or unsigned types.
2584  bool ExtendIntegerRetVal =
2585  RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
2586 
2587  SmallVector<SDValue, 6> StoreOperands;
2588  for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
2589  // New load/store. Record chain and offset operands.
2590  if (VectorInfo[i] & PVF_FIRST) {
2591  assert(StoreOperands.empty() && "Orphaned operand list.");
2592  StoreOperands.push_back(Chain);
2593  StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32));
2594  }
2595 
2596  SDValue RetVal = OutVals[i];
2597  if (ExtendIntegerRetVal) {
2598  RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND
2599  : ISD::ZERO_EXTEND,
2600  dl, MVT::i32, RetVal);
2601  } else if (RetVal.getValueSizeInBits() < 16) {
2602  // Use 16-bit registers for small load-stores as it's the
2603  // smallest general purpose register size supported by NVPTX.
2604  RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
2605  }
2606 
2607  // Record the value to return.
2608  StoreOperands.push_back(RetVal);
2609 
2610  // That's the last element of this store op.
2611  if (VectorInfo[i] & PVF_LAST) {
2613  unsigned NumElts = StoreOperands.size() - 2;
2614  switch (NumElts) {
2615  case 1:
2616  Op = NVPTXISD::StoreRetval;
2617  break;
2618  case 2:
2620  break;
2621  case 4:
2623  break;
2624  default:
2625  llvm_unreachable("Invalid vector info.");
2626  }
2627 
2628  // Adjust type of load/store op if we've extended the scalar
2629  // return value.
2630  EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
2631  Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other),
2632  StoreOperands, TheStoreType,
2633  MachinePointerInfo(), /* Align */ 1,
2635  // Cleanup vector state.
2636  StoreOperands.clear();
2637  }
2638  }
2639 
2640  return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
2641 }
2642 
2644  SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
2645  SelectionDAG &DAG) const {
2646  if (Constraint.length() > 1)
2647  return;
2648  else
2649  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2650 }
2651 
2652 static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
2653  switch (Intrinsic) {
2654  default:
2655  return 0;
2656 
2658  return NVPTXISD::Tex1DFloatS32;
2666  return NVPTXISD::Tex1DS32S32;
2668  return NVPTXISD::Tex1DS32Float;
2674  return NVPTXISD::Tex1DU32S32;
2676  return NVPTXISD::Tex1DU32Float;
2681 
2706 
2708  return NVPTXISD::Tex2DFloatS32;
2716  return NVPTXISD::Tex2DS32S32;
2718  return NVPTXISD::Tex2DS32Float;
2724  return NVPTXISD::Tex2DU32S32;
2726  return NVPTXISD::Tex2DU32Float;
2731 
2756 
2758  return NVPTXISD::Tex3DFloatS32;
2766  return NVPTXISD::Tex3DS32S32;
2768  return NVPTXISD::Tex3DS32Float;
2774  return NVPTXISD::Tex3DU32S32;
2776  return NVPTXISD::Tex3DU32Float;
2781 
2794 
2807 
2832 
2857 
2882 
2907 
2932 
2957 
2970 
2983 
3008  }
3009 }
3010 
3011 static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
3012  switch (Intrinsic) {
3013  default:
3014  return 0;
3016  return NVPTXISD::Suld1DI8Clamp;
3018  return NVPTXISD::Suld1DI16Clamp;
3020  return NVPTXISD::Suld1DI32Clamp;
3022  return NVPTXISD::Suld1DI64Clamp;
3060  return NVPTXISD::Suld2DI8Clamp;
3062  return NVPTXISD::Suld2DI16Clamp;
3064  return NVPTXISD::Suld2DI32Clamp;
3066  return NVPTXISD::Suld2DI64Clamp;
3104  return NVPTXISD::Suld3DI8Clamp;
3106  return NVPTXISD::Suld3DI16Clamp;
3108  return NVPTXISD::Suld3DI32Clamp;
3110  return NVPTXISD::Suld3DI64Clamp;
3126  return NVPTXISD::Suld1DI8Trap;
3128  return NVPTXISD::Suld1DI16Trap;
3130  return NVPTXISD::Suld1DI32Trap;
3132  return NVPTXISD::Suld1DI64Trap;
3134  return NVPTXISD::Suld1DV2I8Trap;
3142  return NVPTXISD::Suld1DV4I8Trap;
3170  return NVPTXISD::Suld2DI8Trap;
3172  return NVPTXISD::Suld2DI16Trap;
3174  return NVPTXISD::Suld2DI32Trap;
3176  return NVPTXISD::Suld2DI64Trap;
3178  return NVPTXISD::Suld2DV2I8Trap;
3186  return NVPTXISD::Suld2DV4I8Trap;
3214  return NVPTXISD::Suld3DI8Trap;
3216  return NVPTXISD::Suld3DI16Trap;
3218  return NVPTXISD::Suld3DI32Trap;
3220  return NVPTXISD::Suld3DI64Trap;
3222  return NVPTXISD::Suld3DV2I8Trap;
3230  return NVPTXISD::Suld3DV4I8Trap;
3236  return NVPTXISD::Suld1DI8Zero;
3238  return NVPTXISD::Suld1DI16Zero;
3240  return NVPTXISD::Suld1DI32Zero;
3242  return NVPTXISD::Suld1DI64Zero;
3244  return NVPTXISD::Suld1DV2I8Zero;
3252  return NVPTXISD::Suld1DV4I8Zero;
3280  return NVPTXISD::Suld2DI8Zero;
3282  return NVPTXISD::Suld2DI16Zero;
3284  return NVPTXISD::Suld2DI32Zero;
3286  return NVPTXISD::Suld2DI64Zero;
3288  return NVPTXISD::Suld2DV2I8Zero;
3296  return NVPTXISD::Suld2DV4I8Zero;
3324  return NVPTXISD::Suld3DI8Zero;
3326  return NVPTXISD::Suld3DI16Zero;
3328  return NVPTXISD::Suld3DI32Zero;
3330  return NVPTXISD::Suld3DI64Zero;
3332  return NVPTXISD::Suld3DV2I8Zero;
3340  return NVPTXISD::Suld3DV4I8Zero;
3345  }
3346 }
3347 
3348 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
3349 // TgtMemIntrinsic
3350 // because we need the information that is only available in the "Value" type
3351 // of destination
3352 // pointer. In particular, the address space information.
3354  IntrinsicInfo &Info, const CallInst &I,
3355  MachineFunction &MF, unsigned Intrinsic) const {
3356  switch (Intrinsic) {
3357  default:
3358  return false;
3361  Info.opc = ISD::INTRINSIC_W_CHAIN;
3362  // memVT is bogus. These intrinsics have IntrInaccessibleMemOnly attribute
3363  // in order to model data exchange with other threads, but perform no real
3364  // memory accesses.
3365  Info.memVT = MVT::i1;
3366 
3367  // Our result depends on both our and other thread's arguments.
3369  return true;
3394  Info.opc = ISD::INTRINSIC_W_CHAIN;
3395  Info.memVT = MVT::v8f16;
3396  Info.ptrVal = I.getArgOperand(0);
3397  Info.offset = 0;
3399  Info.align = 16;
3400  return true;
3401  }
3402 
3415  Info.opc = ISD::INTRINSIC_W_CHAIN;
3416  Info.memVT = MVT::v4f16;
3417  Info.ptrVal = I.getArgOperand(0);
3418  Info.offset = 0;
3420  Info.align = 16;
3421  return true;
3422  }
3423 
3436  Info.opc = ISD::INTRINSIC_W_CHAIN;
3437  Info.memVT = MVT::v8f32;
3438  Info.ptrVal = I.getArgOperand(0);
3439  Info.offset = 0;
3441  Info.align = 16;
3442  return true;
3443  }
3444 
3457  Info.opc = ISD::INTRINSIC_VOID;
3458  Info.memVT = MVT::v4f16;
3459  Info.ptrVal = I.getArgOperand(0);
3460  Info.offset = 0;
3462  Info.align = 16;
3463  return true;
3464  }
3465 
3478  Info.opc = ISD::INTRINSIC_VOID;
3479  Info.memVT = MVT::v8f32;
3480  Info.ptrVal = I.getArgOperand(0);
3481  Info.offset = 0;
3483  Info.align = 16;
3484  return true;
3485  }
3486 
3491 
3514  auto &DL = I.getModule()->getDataLayout();
3515  Info.opc = ISD::INTRINSIC_W_CHAIN;
3516  Info.memVT = getValueType(DL, I.getType());
3517  Info.ptrVal = I.getArgOperand(0);
3518  Info.offset = 0;
3520  Info.align = 0;
3521  return true;
3522  }
3523 
3527  auto &DL = I.getModule()->getDataLayout();
3528  Info.opc = ISD::INTRINSIC_W_CHAIN;
3529  if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
3530  Info.memVT = getValueType(DL, I.getType());
3531  else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
3532  Info.memVT = getPointerTy(DL);
3533  else
3534  Info.memVT = getValueType(DL, I.getType());
3535  Info.ptrVal = I.getArgOperand(0);
3536  Info.offset = 0;
3538  Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3539 
3540  return true;
3541  }
3545  auto &DL = I.getModule()->getDataLayout();
3546 
3547  Info.opc = ISD::INTRINSIC_W_CHAIN;
3548  if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
3549  Info.memVT = getValueType(DL, I.getType());
3550  else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
3551  Info.memVT = getPointerTy(DL);
3552  else
3553  Info.memVT = getValueType(DL, I.getType());
3554  Info.ptrVal = I.getArgOperand(0);
3555  Info.offset = 0;
3557  Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3558 
3559  return true;
3560  }
3561 
3618  Info.opc = getOpcForTextureInstr(Intrinsic);
3619  Info.memVT = MVT::v4f32;
3620  Info.ptrVal = nullptr;
3621  Info.offset = 0;
3623  Info.align = 16;
3624  return true;
3625 
3738  Info.opc = getOpcForTextureInstr(Intrinsic);
3739  Info.memVT = MVT::v4i32;
3740  Info.ptrVal = nullptr;
3741  Info.offset = 0;
3743  Info.align = 16;
3744  return true;
3745 
3791  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3792  Info.memVT = MVT::i8;
3793  Info.ptrVal = nullptr;
3794  Info.offset = 0;
3796  Info.align = 16;
3797  return true;
3798 
3844  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3845  Info.memVT = MVT::i16;
3846  Info.ptrVal = nullptr;
3847  Info.offset = 0;
3849  Info.align = 16;
3850  return true;
3851 
3897  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3898  Info.memVT = MVT::i32;
3899  Info.ptrVal = nullptr;
3900  Info.offset = 0;
3902  Info.align = 16;
3903  return true;
3904 
3935  Info.opc = getOpcForSurfaceInstr(Intrinsic);
3936  Info.memVT = MVT::i64;
3937  Info.ptrVal = nullptr;
3938  Info.offset = 0;
3940  Info.align = 16;
3941  return true;
3942  }
3943  return false;
3944 }
3945 
3946 /// isLegalAddressingMode - Return true if the addressing mode represented
3947 /// by AM is legal for this target, for a load/store of the specified type.
3948 /// Used to guide target specific optimizations, like loop strength reduction
3949 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
3950 /// (CodeGenPrepare.cpp)
3952  const AddrMode &AM, Type *Ty,
3953  unsigned AS, Instruction *I) const {
3954  // AddrMode - This represents an addressing mode of:
3955  // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
3956  //
3957  // The legal address modes are
3958  // - [avar]
3959  // - [areg]
3960  // - [areg+immoff]
3961  // - [immAddr]
3962 
3963  if (AM.BaseGV) {
3964  return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
3965  }
3966 
3967  switch (AM.Scale) {
3968  case 0: // "r", "r+i" or "i" is allowed
3969  break;
3970  case 1:
3971  if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
3972  return false;
3973  // Otherwise we have r+i.
3974  break;
3975  default:
3976  // No scale > 1 is allowed
3977  return false;
3978  }
3979  return true;
3980 }
3981 
3982 //===----------------------------------------------------------------------===//
3983 // NVPTX Inline Assembly Support
3984 //===----------------------------------------------------------------------===//
3985 
3986 /// getConstraintType - Given a constraint letter, return the type of
3987 /// constraint it is for this target.
3990  if (Constraint.size() == 1) {
3991  switch (Constraint[0]) {
3992  default:
3993  break;
3994  case 'b':
3995  case 'r':
3996  case 'h':
3997  case 'c':
3998  case 'l':
3999  case 'f':
4000  case 'd':
4001  case '0':
4002  case 'N':
4003  return C_RegisterClass;
4004  }
4005  }
4006  return TargetLowering::getConstraintType(Constraint);
4007 }
4008 
4009 std::pair<unsigned, const TargetRegisterClass *>
4011  StringRef Constraint,
4012  MVT VT) const {
4013  if (Constraint.size() == 1) {
4014  switch (Constraint[0]) {
4015  case 'b':
4016  return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
4017  case 'c':
4018  return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
4019  case 'h':
4020  return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
4021  case 'r':
4022  return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
4023  case 'l':
4024  case 'N':
4025  return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
4026  case 'f':
4027  return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
4028  case 'd':
4029  return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
4030  }
4031  }
4032  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4033 }
4034 
4035 //===----------------------------------------------------------------------===//
4036 // NVPTX DAG Combining
4037 //===----------------------------------------------------------------------===//
4038 
4040  CodeGenOpt::Level OptLevel) const {
4041  // Always honor command-line argument
4042  if (FMAContractLevelOpt.getNumOccurrences() > 0)
4043  return FMAContractLevelOpt > 0;
4044 
4045  // Do not contract if we're not optimizing the code.
4046  if (OptLevel == 0)
4047  return false;
4048 
4049  // Honor TargetOptions flags that explicitly say fusion is okay.
4051  return true;
4052 
4053  return allowUnsafeFPMath(MF);
4054 }
4055 
4057  // Honor TargetOptions flags that explicitly say unsafe math is okay.
4058  if (MF.getTarget().Options.UnsafeFPMath)
4059  return true;
4060 
4061  // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
4062  const Function &F = MF.getFunction();
4063  if (F.hasFnAttribute("unsafe-fp-math")) {
4064  Attribute Attr = F.getFnAttribute("unsafe-fp-math");
4065  StringRef Val = Attr.getValueAsString();
4066  if (Val == "true")
4067  return true;
4068  }
4069 
4070  return false;
4071 }
4072 
4073 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
4074 /// operands N0 and N1. This is a helper for PerformADDCombine that is
4075 /// called with the default operands, and if that fails, with commuted
4076 /// operands.
4079  const NVPTXSubtarget &Subtarget,
4080  CodeGenOpt::Level OptLevel) {
4081  SelectionDAG &DAG = DCI.DAG;
4082  // Skip non-integer, non-scalar case
4083  EVT VT=N0.getValueType();
4084  if (VT.isVector())
4085  return SDValue();
4086 
4087  // fold (add (mul a, b), c) -> (mad a, b, c)
4088  //
4089  if (N0.getOpcode() == ISD::MUL) {
4090  assert (VT.isInteger());
4091  // For integer:
4092  // Since integer multiply-add costs the same as integer multiply
4093  // but is more costly than integer add, do the fusion only when
4094  // the mul is only used in the add.
4095  if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
4096  !N0.getNode()->hasOneUse())
4097  return SDValue();
4098 
4099  // Do the folding
4100  return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
4101  N0.getOperand(0), N0.getOperand(1), N1);
4102  }
4103  else if (N0.getOpcode() == ISD::FMUL) {
4104  if (VT == MVT::f32 || VT == MVT::f64) {
4105  const auto *TLI = static_cast<const NVPTXTargetLowering *>(
4106  &DAG.getTargetLoweringInfo());
4107  if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
4108  return SDValue();
4109 
4110  // For floating point:
4111  // Do the fusion only when the mul has less than 5 uses and all
4112  // are add.
4113  // The heuristic is that if a use is not an add, then that use
4114  // cannot be fused into fma, therefore mul is still needed anyway.
4115  // If there are more than 4 uses, even if they are all add, fusing
4116  // them will increase register pressue.
4117  //
4118  int numUses = 0;
4119  int nonAddCount = 0;
4120  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
4121  UE = N0.getNode()->use_end();
4122  UI != UE; ++UI) {
4123  numUses++;
4124  SDNode *User = *UI;
4125  if (User->getOpcode() != ISD::FADD)
4126  ++nonAddCount;
4127  }
4128  if (numUses >= 5)
4129  return SDValue();
4130  if (nonAddCount) {
4131  int orderNo = N->getIROrder();
4132  int orderNo2 = N0.getNode()->getIROrder();
4133  // simple heuristics here for considering potential register
4134  // pressure, the logics here is that the differnce are used
4135  // to measure the distance between def and use, the longer distance
4136  // more likely cause register pressure.
4137  if (orderNo - orderNo2 < 500)
4138  return SDValue();
4139 
4140  // Now, check if at least one of the FMUL's operands is live beyond the node N,
4141  // which guarantees that the FMA will not increase register pressure at node N.
4142  bool opIsLive = false;
4143  const SDNode *left = N0.getOperand(0).getNode();
4144  const SDNode *right = N0.getOperand(1).getNode();
4145 
4146  if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
4147  opIsLive = true;
4148 
4149  if (!opIsLive)
4150  for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
4151  SDNode *User = *UI;
4152  int orderNo3 = User->getIROrder();
4153  if (orderNo3 > orderNo) {
4154  opIsLive = true;
4155  break;
4156  }
4157  }
4158 
4159  if (!opIsLive)
4160  for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
4161  SDNode *User = *UI;
4162  int orderNo3 = User->getIROrder();
4163  if (orderNo3 > orderNo) {
4164  opIsLive = true;
4165  break;
4166  }
4167  }
4168 
4169  if (!opIsLive)
4170  return SDValue();
4171  }
4172 
4173  return DAG.getNode(ISD::FMA, SDLoc(N), VT,
4174  N0.getOperand(0), N0.getOperand(1), N1);
4175  }
4176  }
4177 
4178  return SDValue();
4179 }
4180 
4181 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
4182 ///
4185  const NVPTXSubtarget &Subtarget,
4186  CodeGenOpt::Level OptLevel) {
4187  SDValue N0 = N->getOperand(0);
4188  SDValue N1 = N->getOperand(1);
4189 
4190  // First try with the default operand order.
4191  if (SDValue Result =
4192  PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel))
4193  return Result;
4194 
4195  // If that didn't work, try again with the operands commuted.
4196  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
4197 }
4198 
4201  // The type legalizer turns a vector load of i8 values into a zextload to i16
4202  // registers, optionally ANY_EXTENDs it (if target type is integer),
4203  // and ANDs off the high 8 bits. Since we turn this load into a
4204  // target-specific DAG node, the DAG combiner fails to eliminate these AND
4205  // nodes. Do that here.
4206  SDValue Val = N->getOperand(0);
4207  SDValue Mask = N->getOperand(1);
4208 
4209  if (isa<ConstantSDNode>(Val)) {
4210  std::swap(Val, Mask);
4211  }
4212 
4213  SDValue AExt;
4214  // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
4215  if (Val.getOpcode() == ISD::ANY_EXTEND) {
4216  AExt = Val;
4217  Val = Val->getOperand(0);
4218  }
4219 
4220  if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
4221  Val = Val->getOperand(0);
4222  }
4223 
4224  if (Val->getOpcode() == NVPTXISD::LoadV2 ||
4225  Val->getOpcode() == NVPTXISD::LoadV4) {
4227  if (!MaskCnst) {
4228  // Not an AND with a constant
4229  return SDValue();
4230  }
4231 
4232  uint64_t MaskVal = MaskCnst->getZExtValue();
4233  if (MaskVal != 0xff) {
4234  // Not an AND that chops off top 8 bits
4235  return SDValue();
4236  }
4237 
4238  MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
4239  if (!Mem) {
4240  // Not a MemSDNode?!?
4241  return SDValue();
4242  }
4243 
4244  EVT MemVT = Mem->getMemoryVT();
4245  if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
4246  // We only handle the i8 case
4247  return SDValue();
4248  }
4249 
4250  unsigned ExtType =
4251  cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
4252  getZExtValue();
4253  if (ExtType == ISD::SEXTLOAD) {
4254  // If for some reason the load is a sextload, the and is needed to zero
4255  // out the high 8 bits
4256  return SDValue();
4257  }
4258 
4259  bool AddTo = false;
4260  if (AExt.getNode() != nullptr) {
4261  // Re-insert the ext as a zext.
4262  Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4263  AExt.getValueType(), Val);
4264  AddTo = true;
4265  }
4266 
4267  // If we get here, the AND is unnecessary. Just replace it with the load
4268  DCI.CombineTo(N, Val, AddTo);
4269  }
4270 
4271  return SDValue();
4272 }
4273 
4276  CodeGenOpt::Level OptLevel) {
4277  assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);
4278 
4279  // Don't do anything at less than -O2.
4280  if (OptLevel < CodeGenOpt::Default)
4281  return SDValue();
4282 
4283  SelectionDAG &DAG = DCI.DAG;
4284  SDLoc DL(N);
4285  EVT VT = N->getValueType(0);
4286  bool IsSigned = N->getOpcode() == ISD::SREM;
4287  unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV;
4288 
4289  const SDValue &Num = N->getOperand(0);
4290  const SDValue &Den = N->getOperand(1);
4291 
4292  for (const SDNode *U : Num->uses()) {
4293  if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&
4294  U->getOperand(1) == Den) {
4295  // Num % Den -> Num - (Num / Den) * Den
4296  return DAG.getNode(ISD::SUB, DL, VT, Num,
4297  DAG.getNode(ISD::MUL, DL, VT,
4298  DAG.getNode(DivOpc, DL, VT, Num, Den),
4299  Den));
4300  }
4301  }
4302  return SDValue();
4303 }
4304 
4306  Signed = 0,
4309 };
4310 
4311 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
4312 /// that can be demoted to \p OptSize bits without loss of information. The
4313 /// signedness of the operand, if determinable, is placed in \p S.
4315  unsigned OptSize,
4316  OperandSignedness &S) {
4317  S = Unknown;
4318 
4319  if (Op.getOpcode() == ISD::SIGN_EXTEND ||
4321  EVT OrigVT = Op.getOperand(0).getValueType();
4322  if (OrigVT.getSizeInBits() <= OptSize) {
4323  S = Signed;
4324  return true;
4325  }
4326  } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
4327  EVT OrigVT = Op.getOperand(0).getValueType();
4328  if (OrigVT.getSizeInBits() <= OptSize) {
4329  S = Unsigned;
4330  return true;
4331  }
4332  }
4333 
4334  return false;
4335 }
4336 
4337 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
4338 /// be demoted to \p OptSize bits without loss of information. If the operands
4339 /// contain a constant, it should appear as the RHS operand. The signedness of
4340 /// the operands is placed in \p IsSigned.
4342  unsigned OptSize,
4343  bool &IsSigned) {
4344  OperandSignedness LHSSign;
4345 
4346  // The LHS operand must be a demotable op
4347  if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
4348  return false;
4349 
4350  // We should have been able to determine the signedness from the LHS
4351  if (LHSSign == Unknown)
4352  return false;
4353 
4354  IsSigned = (LHSSign == Signed);
4355 
4356  // The RHS can be a demotable op or a constant
4357  if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
4358  const APInt &Val = CI->getAPIntValue();
4359  if (LHSSign == Unsigned) {
4360  return Val.isIntN(OptSize);
4361  } else {
4362  return Val.isSignedIntN(OptSize);
4363  }
4364  } else {
4365  OperandSignedness RHSSign;
4366  if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
4367  return false;
4368 
4369  return LHSSign == RHSSign;
4370  }
4371 }
4372 
4373 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
4374 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
4375 /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
4376 /// amount.
4379  EVT MulType = N->getValueType(0);
4380  if (MulType != MVT::i32 && MulType != MVT::i64) {
4381  return SDValue();
4382  }
4383 
4384  SDLoc DL(N);
4385  unsigned OptSize = MulType.getSizeInBits() >> 1;
4386  SDValue LHS = N->getOperand(0);
4387  SDValue RHS = N->getOperand(1);
4388 
4389  // Canonicalize the multiply so the constant (if any) is on the right
4390  if (N->getOpcode() == ISD::MUL) {
4391  if (isa<ConstantSDNode>(LHS)) {
4392  std::swap(LHS, RHS);
4393  }
4394  }
4395 
4396  // If we have a SHL, determine the actual multiply amount
4397  if (N->getOpcode() == ISD::SHL) {
4398  ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
4399  if (!ShlRHS) {
4400  return SDValue();
4401  }
4402 
4403  APInt ShiftAmt = ShlRHS->getAPIntValue();
4404  unsigned BitWidth = MulType.getSizeInBits();
4405  if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
4406  APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
4407  RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
4408  } else {
4409  return SDValue();
4410  }
4411  }
4412 
4413  bool Signed;
4414  // Verify that our operands are demotable
4415  if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
4416  return SDValue();
4417  }
4418 
4419  EVT DemotedVT;
4420  if (MulType == MVT::i32) {
4421  DemotedVT = MVT::i16;
4422  } else {
4423  DemotedVT = MVT::i32;
4424  }
4425 
4426  // Truncate the operands to the correct size. Note that these are just for
4427  // type consistency and will (likely) be eliminated in later phases.
4428  SDValue TruncLHS =
4429  DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
4430  SDValue TruncRHS =
4431  DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
4432 
4433  unsigned Opc;
4434  if (Signed) {
4436  } else {
4438  }
4439 
4440  return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
4441 }
4442 
4443 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
4446  CodeGenOpt::Level OptLevel) {
4447  if (OptLevel > 0) {
4448  // Try mul.wide combining at OptLevel > 0
4449  if (SDValue Ret = TryMULWIDECombine(N, DCI))
4450  return Ret;
4451  }
4452 
4453  return SDValue();
4454 }
4455 
4456 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
4459  CodeGenOpt::Level OptLevel) {
4460  if (OptLevel > 0) {
4461  // Try mul.wide combining at OptLevel > 0
4462  if (SDValue Ret = TryMULWIDECombine(N, DCI))
4463  return Ret;
4464  }
4465 
4466  return SDValue();
4467 }
4468 
4471  EVT CCType = N->getValueType(0);
4472  SDValue A = N->getOperand(0);
4473  SDValue B = N->getOperand(1);
4474 
4475  if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16)
4476  return SDValue();
4477 
4478  SDLoc DL(N);
4479  // setp.f16x2 returns two scalar predicates, which we need to
4480  // convert back to v2i1. The returned result will be scalarized by
4481  // the legalizer, but the comparison will remain a single vector
4482  // instruction.
4483  SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL,
4484  DCI.DAG.getVTList(MVT::i1, MVT::i1),
4485  {A, B, N->getOperand(2)});
4486  return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0),
4487  CCNode.getValue(1));
4488 }
4489 
4490 SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
4491  DAGCombinerInfo &DCI) const {
4493  switch (N->getOpcode()) {
4494  default: break;
4495  case ISD::ADD:
4496  case ISD::FADD:
4497  return PerformADDCombine(N, DCI, STI, OptLevel);
4498  case ISD::MUL:
4499  return PerformMULCombine(N, DCI, OptLevel);
4500  case ISD::SHL:
4501  return PerformSHLCombine(N, DCI, OptLevel);
4502  case ISD::AND:
4503  return PerformANDCombine(N, DCI);
4504  case ISD::UREM:
4505  case ISD::SREM:
4506  return PerformREMCombine(N, DCI, OptLevel);
4507  case ISD::SETCC:
4508  return PerformSETCCCombine(N, DCI);
4509  }
4510  return SDValue();
4511 }
4512 
4513 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
4516  EVT ResVT = N->getValueType(0);
4517  SDLoc DL(N);
4518 
4519  assert(ResVT.isVector() && "Vector load must have vector type");
4520 
4521  // We only handle "native" vector sizes for now, e.g. <4 x double> is not
4522  // legal. We can (and should) split that into 2 loads of <2 x double> here
4523  // but I'm leaving that as a TODO for now.
4524  assert(ResVT.isSimple() && "Can only handle simple types");
4525  switch (ResVT.getSimpleVT().SimpleTy) {
4526  default:
4527  return;
4528  case MVT::v2i8:
4529  case MVT::v2i16:
4530  case MVT::v2i32:
4531  case MVT::v2i64:
4532  case MVT::v2f16:
4533  case MVT::v2f32:
4534  case MVT::v2f64:
4535  case MVT::v4i8:
4536  case MVT::v4i16:
4537  case MVT::v4i32:
4538  case MVT::v4f16:
4539  case MVT::v4f32:
4540  case MVT::v8f16: // <4 x f16x2>
4541  // This is a "native" vector type
4542  break;
4543  }
4544 
4545  LoadSDNode *LD = cast<LoadSDNode>(N);
4546 
4547  unsigned Align = LD->getAlignment();
4548  auto &TD = DAG.getDataLayout();
4549  unsigned PrefAlign =
4550  TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
4551  if (Align < PrefAlign) {
4552  // This load is not sufficiently aligned, so bail out and let this vector
4553  // load be scalarized. Note that we may still be able to emit smaller
4554  // vector loads. For example, if we are loading a <4 x float> with an
4555  // alignment of 8, this check will fail but the legalizer will try again
4556  // with 2 x <2 x float>, which will succeed with an alignment of 8.
4557  return;
4558  }
4559 
4560  EVT EltVT = ResVT.getVectorElementType();
4561  unsigned NumElts = ResVT.getVectorNumElements();
4562 
4563  // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
4564  // Therefore, we must ensure the type is legal. For i1 and i8, we set the
4565  // loaded type to i16 and propagate the "real" type as the memory type.
4566  bool NeedTrunc = false;
4567  if (EltVT.getSizeInBits() < 16) {
4568  EltVT = MVT::i16;
4569  NeedTrunc = true;
4570  }
4571 
4572  unsigned Opcode = 0;
4573  SDVTList LdResVTs;
4574  bool LoadF16x2 = false;
4575 
4576  switch (NumElts) {
4577  default:
4578  return;
4579  case 2:
4580  Opcode = NVPTXISD::LoadV2;
4581  LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
4582  break;
4583  case 4: {
4584  Opcode = NVPTXISD::LoadV4;
4585  EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
4586  LdResVTs = DAG.getVTList(ListVTs);
4587  break;
4588  }
4589  case 8: {
4590  // v8f16 is a special case. PTX doesn't have ld.v8.f16
4591  // instruction. Instead, we split the vector into v2f16 chunks and
4592  // load them with ld.v4.b32.
4593  assert(EltVT == MVT::f16 && "Unsupported v8 vector type.");
4594  LoadF16x2 = true;
4595  Opcode = NVPTXISD::LoadV4;
4596  EVT ListVTs[] = {MVT::v2f16, MVT::v2f16, MVT::v2f16, MVT::v2f16,
4597  MVT::Other};
4598  LdResVTs = DAG.getVTList(ListVTs);
4599  break;
4600  }
4601  }
4602 
4603  // Copy regular operands
4604  SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
4605 
4606  // The select routine does not have access to the LoadSDNode instance, so
4607  // pass along the extension information
4608  OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
4609 
4610  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
4611  LD->getMemoryVT(),
4612  LD->getMemOperand());
4613 
4614  SmallVector<SDValue, 8> ScalarRes;
4615  if (LoadF16x2) {
4616  // Split v2f16 subvectors back into individual elements.
4617  NumElts /= 2;
4618  for (unsigned i = 0; i < NumElts; ++i) {
4619  SDValue SubVector = NewLD.getValue(i);
4620  SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
4621  DAG.getIntPtrConstant(0, DL));
4622  SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
4623  DAG.getIntPtrConstant(1, DL));
4624  ScalarRes.push_back(E0);
4625  ScalarRes.push_back(E1);
4626  }
4627  } else {
4628  for (unsigned i = 0; i < NumElts; ++i) {
4629  SDValue Res = NewLD.getValue(i);
4630  if (NeedTrunc)
4631  Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
4632  ScalarRes.push_back(Res);
4633  }
4634  }
4635 
4636  SDValue LoadChain = NewLD.getValue(NumElts);
4637 
4638  SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes);
4639 
4640  Results.push_back(BuildVec);
4641  Results.push_back(LoadChain);
4642 }
4643 
4646  SDValue Chain = N->getOperand(0);
4647  SDValue Intrin = N->getOperand(1);
4648  SDLoc DL(N);
4649 
4650  // Get the intrinsic ID
4651  unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
4652  switch (IntrinNo) {
4653  default:
4654  return;
4661  EVT ResVT = N->getValueType(0);
4662 
4663  if (ResVT.isVector()) {
4664  // Vector LDG/LDU
4665 
4666  unsigned NumElts = ResVT.getVectorNumElements();
4667  EVT EltVT = ResVT.getVectorElementType();
4668 
4669  // Since LDU/LDG are target nodes, we cannot rely on DAG type
4670  // legalization.
4671  // Therefore, we must ensure the type is legal. For i1 and i8, we set the
4672  // loaded type to i16 and propagate the "real" type as the memory type.
4673  bool NeedTrunc = false;
4674  if (EltVT.getSizeInBits() < 16) {
4675  EltVT = MVT::i16;
4676  NeedTrunc = true;
4677  }
4678 
4679  unsigned Opcode = 0;
4680  SDVTList LdResVTs;
4681 
4682  switch (NumElts) {
4683  default:
4684  return;
4685  case 2:
4686  switch (IntrinNo) {
4687  default:
4688  return;
4692  Opcode = NVPTXISD::LDGV2;
4693  break;
4697  Opcode = NVPTXISD::LDUV2;
4698  break;
4699  }
4700  LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
4701  break;
4702  case 4: {
4703  switch (IntrinNo) {
4704  default:
4705  return;
4709  Opcode = NVPTXISD::LDGV4;
4710  break;
4714  Opcode = NVPTXISD::LDUV4;
4715  break;
4716  }
4717  EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
4718  LdResVTs = DAG.getVTList(ListVTs);
4719  break;
4720  }
4721  }
4722 
4723  SmallVector<SDValue, 8> OtherOps;
4724 
4725  // Copy regular operands
4726 
4727  OtherOps.push_back(Chain); // Chain
4728  // Skip operand 1 (intrinsic ID)
4729  // Others
4730  OtherOps.append(N->op_begin() + 2, N->op_end());
4731 
4732  MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4733 
4734  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
4735  MemSD->getMemoryVT(),
4736  MemSD->getMemOperand());
4737 
4738  SmallVector<SDValue, 4> ScalarRes;
4739 
4740  for (unsigned i = 0; i < NumElts; ++i) {
4741  SDValue Res = NewLD.getValue(i);
4742  if (NeedTrunc)
4743  Res =
4744  DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
4745  ScalarRes.push_back(Res);
4746  }
4747 
4748  SDValue LoadChain = NewLD.getValue(NumElts);
4749 
4750  SDValue BuildVec =
4751  DAG.getBuildVector(ResVT, DL, ScalarRes);
4752 
4753  Results.push_back(BuildVec);
4754  Results.push_back(LoadChain);
4755  } else {
4756  // i8 LDG/LDU
4757  assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
4758  "Custom handling of non-i8 ldu/ldg?");
4759 
4760  // Just copy all operands as-is
4761  SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
4762 
4763  // Force output to i16
4764  SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
4765 
4766  MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
4767 
4768  // We make sure the memory type is i8, which will be used during isel
4769  // to select the proper instruction.
4770  SDValue NewLD =
4771  DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
4772  MVT::i8, MemSD->getMemOperand());
4773 
4774  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
4775  NewLD.getValue(0)));
4776  Results.push_back(NewLD.getValue(1));
4777  }
4778  }
4779  }
4780 }
4781 
4782 void NVPTXTargetLowering::ReplaceNodeResults(
4784  switch (N->getOpcode()) {
4785  default:
4786  report_fatal_error("Unhandled custom legalization");
4787  case ISD::LOAD:
4788  ReplaceLoadVector(N, DAG, Results);
4789  return;
4791  ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
4792  return;
4793  }
4794 }
4795 
4796 // Pin NVPTXTargetObjectFile's vtables to this file.
4798 
4800  const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
4801  return getDataSection();
4802 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
Instances of this class represent a uniqued identifier for a section in the current translation unit...
Definition: MCSection.h:39
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:594
EVT getValueType() const
Return the ValueType of the referenced return value.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand...
const GlobalValue * getGlobal() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC&#39;s if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:937
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it&#39;s not CSE&#39;d)...
Definition: SelectionDAG.h:836
static unsigned CanMergeParamLoadStoresStartingAt(unsigned Idx, uint32_t AccessSize, const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< uint64_t > &Offsets, unsigned ParamAlignment)
uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B)
Return the greatest common divisor of the values using Euclid&#39;s algorithm.
Definition: MathExtras.h:563
bool getAlign(const Function &F, unsigned index, unsigned &align)
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:358
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:650
This class represents lattice values for constants.
Definition: AllocatorList.h:24
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:605
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:367
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
unsigned getIROrder() const
Return the node ordering.
const SDValue & getBasePtr() const
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:588
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:858
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1204
const SDValue & getValue() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl< ISD::OutputArg > &, unsigned retAlignment, ImmutableCallSite CS) const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:630
static bool IsMulWideOperandDemotable(SDValue Op, unsigned OptSize, OperandSignedness &S)
IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptS...
unsigned getVectorNumElements() const
const SDValue & getChain() const
static bool isImageOrSamplerVal(const Value *arg, const Module *context)
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1025
Function Alias Analysis Results
bool useF32FTZ(const MachineFunction &MF) const
unsigned getAlignment() const
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
unsigned const TargetRegisterInfo * TRI
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target...
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:384
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags=0)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:466
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:435
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:265
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MCSection * SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1135
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:456
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
static SDValue PerformREMCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
static unsigned int uniqueCallSite
static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, unsigned OptSize, bool &IsSigned)
AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits...
bool hasOneUse() const
Return true if there is exactly one use of this node.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
static cl::opt< bool > UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true))
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:138
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:993
Shift and rotation operations.
Definition: ISDOpcodes.h:410
static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputePTXValueVTs - For the given Type Ty, returns the set of primitive EVTs that compose it...
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
Class to represent struct types.
Definition: DerivedTypes.h:201
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth...
Definition: ISDOpcodes.h:393
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
InstrTy * getInstruction() const
Definition: CallSite.h:92
void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth)
Tells the code generator which bitwidths to bypass.
The memory access is dereferenceable (i.e., doesn&#39;t trap).
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:401
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG...
Definition: ISDOpcodes.h:73
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
SmallVector< ISD::InputArg, 32 > Ins
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:423
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:889
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
bool isKernelFunction(const Function &F)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:398
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:478
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
ManagedStringPool * getManagedStrPool() const
SmallVector< ISD::OutputArg, 32 > Outs
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array...
op_iterator op_begin() const
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors...
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
void setIROrder(unsigned Order)
Set the node ordering.
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
amdgpu Simplify well known AMD library false Value * Callee
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:84
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
Class to represent pointers.
Definition: DerivedTypes.h:467
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
UNDEF - An undefined node.
Definition: ISDOpcodes.h:178
This class is used to represent ISD::STORE nodes.
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1...
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:524
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const
0: type with no size
Definition: Type.h:57
#define P(N)
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:169
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
Machine Value Type.
ParamVectorizationFlags
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static cl::opt< unsigned > FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2))
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
static cl::opt< bool > sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false))
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Simple binary floating point operators.
Definition: ISDOpcodes.h:283
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:332
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
const NVPTXTargetLowering * getTargetLowering() const override
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:824
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
static unsigned getOpcForSurfaceInstr(unsigned Intrinsic)
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:740
This class provides iterator support for SDUse operands that use a specific SDNode.
bool isHalfTy() const
Return true if this is &#39;half&#39;, a 16-bit IEEE fp type.
Definition: Type.h:144
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:767
const APInt & getAPIntValue() const
SectionKind - This is a simple POD value that classifies the properties of a section.
Definition: SectionKind.h:23
static mvt_range vector_valuetypes()
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:450
unsigned MaxStoresPerMemmove
Specify maximum bytes of store instructions per memmove call.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
std::vector< ArgListEntry > ArgListTy
Extended Value Type.
Definition: ValueTypes.h:34
bool allowUnsafeFPMath(MachineFunction &MF) const
static SDValue PerformSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This structure contains all information that is necessary for lowering calls.
size_t size() const
Definition: SmallVector.h:53
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207
const TargetMachine & getTargetMachine() const
static cl::opt< int > UsePrecDivF32("nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" " IEEE Compliant F32 div.rnd if available."), cl::init(2))
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
static bool IsPTXVectorType(MVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned first
unsigned getAddressSpace() const
The memory access writes data.
static SDValue TryMULWIDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces...
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:404
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1167
std::string * getManagedString(const char *S)
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &ExtraSteps, bool &UseOneConst, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:339
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
NVPTXTargetLowering(const NVPTXTargetMachine &TM, const NVPTXSubtarget &STI)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:734
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:258
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:413
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
static bool Enabled
Definition: Statistic.cpp:51
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT...
Definition: ValueTypes.h:73
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:70
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
iterator_range< use_iterator > uses()
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1309
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:312
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:638
NVPTXTargetMachine.
SmallVector< SDValue, 32 > OutVals
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static SDValue PerformSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
PointerUnion< const Value *, const PseudoSourceValue * > ptrVal
TargetOptions Options
Definition: TargetMachine.h:97
Establish a view to a call site for examination.
Definition: CallSite.h:711
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
unsigned getOpcode() const
SDValue getValue(unsigned R) const
unsigned MaxStoresPerMemcpy
Specify maximum bytes of store instructions per memcpy call.
static SmallVector< ParamVectorizationFlags, 16 > VectorizePTXValueVTs(const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< uint64_t > &Offsets, unsigned ParamAlignment)
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:456
unsigned getNumRegisters(LLVMContext &Context, EVT VT) const
Return the number of registers that this ValueType will eventually require.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:457
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
const MachinePointerInfo & getPointerInfo() const
OperandSignedness
const unsigned Kind
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:345
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:483
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
SDValue getSymbolFunctionGlobalAddress(SDValue Op, Function **TargetFunction=nullptr)
Returs an GlobalAddress of the function from the current module with name matching the given External...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
bool allowFP16Math() const
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:962
unsigned int getSmVersion() const
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static const Function * getParent(const Value *V)
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
bool isCast() const
Return true if this is a convert constant expression.
Definition: Constants.cpp:1145
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
Definition: DataLayout.h:446
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:230
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
unsigned getNumOperands() const
Conversion operators.
Definition: ISDOpcodes.h:465
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
LLVMContext * getContext() const
Definition: SelectionDAG.h:407
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static unsigned getOpcForTextureInstr(unsigned Intrinsic)
This file describes how to lower LLVM code to machine code.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:622
const NVPTXRegisterInfo * getRegisterInfo() const override
BRIND - Indirect branch.
Definition: ISDOpcodes.h:634
This class is used to represent ISD::LOAD nodes.
const NVPTXTargetMachine * nvTM