LLVM  8.0.1
NVPTXTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "NVPTXUtilities.h"
12 #include "llvm/Analysis/LoopInfo.h"
16 #include "llvm/CodeGen/CostTable.h"
18 #include "llvm/Support/Debug.h"
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "NVPTXtti"
22 
23 // Whether the given intrinsic reads threadIdx.x/y/z.
24 static bool readsThreadIndex(const IntrinsicInst *II) {
25  switch (II->getIntrinsicID()) {
26  default: return false;
30  return true;
31  }
32 }
33 
34 static bool readsLaneId(const IntrinsicInst *II) {
36 }
37 
38 // Whether the given intrinsic is an atomic instruction in PTX.
39 static bool isNVVMAtomic(const IntrinsicInst *II) {
40  switch (II->getIntrinsicID()) {
41  default: return false;
45 
68  return true;
69  }
70 }
71 
73  // Without inter-procedural analysis, we conservatively assume that arguments
74  // to __device__ functions are divergent.
75  if (const Argument *Arg = dyn_cast<Argument>(V))
76  return !isKernelFunction(*Arg->getParent());
77 
78  if (const Instruction *I = dyn_cast<Instruction>(V)) {
79  // Without pointer analysis, we conservatively assume values loaded from
80  // generic or local address space are divergent.
81  if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
82  unsigned AS = LI->getPointerAddressSpace();
83  return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
84  }
85  // Atomic instructions may cause divergence. Atomic instructions are
86  // executed sequentially across all threads in a warp. Therefore, an earlier
87  // executed thread may see different memory inputs than a later executed
88  // thread. For example, suppose *a = 0 initially.
89  //
90  // atom.global.add.s32 d, [a], 1
91  //
92  // returns 0 for the first thread that enters the critical region, and 1 for
93  // the second thread.
94  if (I->isAtomic())
95  return true;
96  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
97  // Instructions that read threadIdx are obviously divergent.
98  if (readsThreadIndex(II) || readsLaneId(II))
99  return true;
100  // Handle the NVPTX atomic instrinsics that cannot be represented as an
101  // atomic IR instruction.
102  if (isNVVMAtomic(II))
103  return true;
104  }
105  // Conservatively consider the return value of function calls as divergent.
106  // We could analyze callees with bodies more precisely using
107  // inter-procedural analysis.
108  if (isa<CallInst>(I))
109  return true;
110  }
111 
112  return false;
113 }
114 
116  unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
117  TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
119  // Legalize the type.
120  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
121 
122  int ISD = TLI->InstructionOpcodeToISD(Opcode);
123 
124  switch (ISD) {
125  default:
126  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
127  Opd1PropInfo, Opd2PropInfo);
128  case ISD::ADD:
129  case ISD::MUL:
130  case ISD::XOR:
131  case ISD::OR:
132  case ISD::AND:
133  // The machine code (SASS) simulates an i64 with two i32. Therefore, we
134  // estimate that arithmetic operations on i64 are twice as expensive as
135  // those on types that can fit into one machine register.
136  if (LT.second.SimpleTy == MVT::i64)
137  return 2 * LT.first;
138  // Delegate other cases to the basic TTI.
139  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
140  Opd1PropInfo, Opd2PropInfo);
141  }
142 }
143 
147 
148  // Enable partial unrolling and runtime unrolling, but reduce the
149  // threshold. This partially unrolls small loops which are often
150  // unrolled by the PTX to SASS compiler and unrolling earlier can be
151  // beneficial.
152  UP.Partial = UP.Runtime = true;
153  UP.PartialThreshold = UP.Threshold / 4;
154 }
static bool readsThreadIndex(const IntrinsicInst *II)
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:568
This class represents an incoming formal argument to a Function.
Definition: Argument.h:30
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Cost tables and simple lookup functions.
The main scalar evolution driver.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
static bool readsLaneId(const IntrinsicInst *II)
An instruction for reading from memory.
Definition: Instructions.h:168
bool isKernelFunction(const Function &F)
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:424
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
This file a TargetTransformInfo::Concept conforming object specific to the NVPTX target machine...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool isSourceOfDivergence(const Value *V)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
OperandValueProperties
Additional properties of an operand&#39;s values.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
amdgpu Simplify well known AMD library false Value Value * Arg
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
unsigned Threshold
The cost threshold for the unrolled loop.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
Parameters that control the generic loop unrolling transformation.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isNVVMAtomic(const IntrinsicInst *II)
LLVM Value Representation.
Definition: Value.h:73
OperandValueKind
Additional information about an operand&#39;s possible values.
This pass exposes codegen information to IR-level passes.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
This file describes how to lower LLVM code to machine code.