LLVM  8.0.1
AMDGPUAnnotateKernelFeatures.cpp
Go to the documentation of this file.
1 //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This pass adds target attributes to functions which use intrinsics
11 /// which will impact calling convention lowering.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/Triple.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Use.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Casting.h"
39 
40 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
41 
42 using namespace llvm;
43 
44 namespace {
45 
46 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
47 private:
48  const TargetMachine *TM = nullptr;
49 
50  bool addFeatureAttributes(Function &F);
51 
52 public:
53  static char ID;
54 
55  AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
56 
57  bool doInitialization(CallGraph &CG) override;
58  bool runOnSCC(CallGraphSCC &SCC) override;
59 
60  StringRef getPassName() const override {
61  return "AMDGPU Annotate Kernel Features";
62  }
63 
64  void getAnalysisUsage(AnalysisUsage &AU) const override {
65  AU.setPreservesAll();
67  }
68 
69  static bool visitConstantExpr(const ConstantExpr *CE);
70  static bool visitConstantExprsRecursively(
71  const Constant *EntryC,
72  SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
73 };
74 
75 } // end anonymous namespace
76 
78 
80 
81 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
82  "Add AMDGPU function attributes", false, false)
83 
84 
85 // The queue ptr is only needed when casting to flat, not from it.
86 static bool castRequiresQueuePtr(unsigned SrcAS) {
87  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
88 }
89 
90 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
92 }
93 
94 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
95  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
96  unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
97  return castRequiresQueuePtr(SrcAS);
98  }
99 
100  return false;
101 }
102 
103 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
104  const Constant *EntryC,
105  SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
106 
107  if (!ConstantExprVisited.insert(EntryC).second)
108  return false;
109 
111  Stack.push_back(EntryC);
112 
113  while (!Stack.empty()) {
114  const Constant *C = Stack.pop_back_val();
115 
116  // Check this constant expression.
117  if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
118  if (visitConstantExpr(CE))
119  return true;
120  }
121 
122  // Visit all sub-expressions.
123  for (const Use &U : C->operands()) {
124  const auto *OpC = dyn_cast<Constant>(U);
125  if (!OpC)
126  continue;
127 
128  if (!ConstantExprVisited.insert(OpC).second)
129  continue;
130 
131  Stack.push_back(OpC);
132  }
133  }
134 
135  return false;
136 }
137 
138 // We do not need to note the x workitem or workgroup id because they are always
139 // initialized.
140 //
141 // TODO: We should not add the attributes if the known compile time workgroup
142 // size is 1 for y/z.
144  bool &NonKernelOnly,
145  bool &IsQueuePtr) {
146  switch (ID) {
148  NonKernelOnly = true;
149  return "amdgpu-work-item-id-x";
151  NonKernelOnly = true;
152  return "amdgpu-work-group-id-x";
155  return "amdgpu-work-item-id-y";
158  return "amdgpu-work-item-id-z";
161  return "amdgpu-work-group-id-y";
164  return "amdgpu-work-group-id-z";
166  return "amdgpu-dispatch-ptr";
168  return "amdgpu-dispatch-id";
170  return "amdgpu-kernarg-segment-ptr";
172  return "amdgpu-implicitarg-ptr";
174  case Intrinsic::trap:
176  IsQueuePtr = true;
177  return "amdgpu-queue-ptr";
178  default:
179  return "";
180  }
181 }
182 
183 static bool handleAttr(Function &Parent, const Function &Callee,
184  StringRef Name) {
185  if (Callee.hasFnAttribute(Name)) {
186  Parent.addFnAttr(Name);
187  return true;
188  }
189 
190  return false;
191 }
192 
193 static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
194  bool &NeedQueuePtr) {
195  // X ids unnecessarily propagated to kernels.
196  static const StringRef AttrNames[] = {
197  { "amdgpu-work-item-id-x" },
198  { "amdgpu-work-item-id-y" },
199  { "amdgpu-work-item-id-z" },
200  { "amdgpu-work-group-id-x" },
201  { "amdgpu-work-group-id-y" },
202  { "amdgpu-work-group-id-z" },
203  { "amdgpu-dispatch-ptr" },
204  { "amdgpu-dispatch-id" },
205  { "amdgpu-kernarg-segment-ptr" },
206  { "amdgpu-implicitarg-ptr" }
207  };
208 
209  if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
210  NeedQueuePtr = true;
211 
212  for (StringRef AttrName : AttrNames)
213  handleAttr(Parent, Callee, AttrName);
214 }
215 
216 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
217  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
218  bool HasFlat = ST.hasFlatAddressSpace();
219  bool HasApertureRegs = ST.hasApertureRegs();
220  SmallPtrSet<const Constant *, 8> ConstantExprVisited;
221 
222  bool Changed = false;
223  bool NeedQueuePtr = false;
224  bool HaveCall = false;
225  bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
226 
227  for (BasicBlock &BB : F) {
228  for (Instruction &I : BB) {
229  CallSite CS(&I);
230  if (CS) {
232 
233  // TODO: Do something with indirect calls.
234  if (!Callee) {
235  if (!CS.isInlineAsm())
236  HaveCall = true;
237  continue;
238  }
239 
240  Intrinsic::ID IID = Callee->getIntrinsicID();
241  if (IID == Intrinsic::not_intrinsic) {
242  HaveCall = true;
243  copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
244  Changed = true;
245  } else {
246  bool NonKernelOnly = false;
247  StringRef AttrName = intrinsicToAttrName(IID,
248  NonKernelOnly, NeedQueuePtr);
249  if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
250  F.addFnAttr(AttrName);
251  Changed = true;
252  }
253  }
254  }
255 
256  if (NeedQueuePtr || HasApertureRegs)
257  continue;
258 
259  if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
260  if (castRequiresQueuePtr(ASC)) {
261  NeedQueuePtr = true;
262  continue;
263  }
264  }
265 
266  for (const Use &U : I.operands()) {
267  const auto *OpC = dyn_cast<Constant>(U);
268  if (!OpC)
269  continue;
270 
271  if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
272  NeedQueuePtr = true;
273  break;
274  }
275  }
276  }
277  }
278 
279  if (NeedQueuePtr) {
280  F.addFnAttr("amdgpu-queue-ptr");
281  Changed = true;
282  }
283 
284  // TODO: We could refine this to captured pointers that could possibly be
285  // accessed by flat instructions. For now this is mostly a poor way of
286  // estimating whether there are calls before argument lowering.
287  if (HasFlat && !IsFunc && HaveCall) {
288  F.addFnAttr("amdgpu-flat-scratch");
289  Changed = true;
290  }
291 
292  return Changed;
293 }
294 
295 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
296  Module &M = SCC.getCallGraph().getModule();
297  Triple TT(M.getTargetTriple());
298 
299  bool Changed = false;
300  for (CallGraphNode *I : SCC) {
301  Function *F = I->getFunction();
302  if (!F || F->isDeclaration())
303  continue;
304 
305  Changed |= addFeatureAttributes(*F);
306  }
307 
308  return Changed;
309 }
310 
311 bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
312  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
313  if (!TPC)
314  report_fatal_error("TargetMachine is required");
315 
316  TM = &TPC->getTM<TargetMachine>();
317  return false;
318 }
319 
321  return new AMDGPUAnnotateKernelFeatures();
322 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
Definition: Module.h:240
unsigned getSrcAddressSpace() const
Returns the address space of the pointer operand.
unsigned getOpcode() const
Return the opcode at the root of this constant expression.
Definition: Constants.h:1210
bool isInlineAsm() const
Definition: CallSite.h:305
bool hasApertureRegs() const
AMDGPU specific subclass of TargetSubtarget.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
Address space for private memory.
Definition: AMDGPU.h:261
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
This defines the Use class.
A node in the call graph for a module.
Definition: CallGraph.h:165
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph...
Module & getModule() const
Returns the module the call graph corresponds to.
Definition: CallGraph.h:114
This class represents a conversion between pointers from one address space to another.
amdgpu Simplify well known AMD library false Value Value const Twine & Name
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
Pass * createAMDGPUAnnotateKernelFeaturesPass()
const CallGraph & getCallGraph()
static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC)
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:889
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
amdgpu Simplify well known AMD library false Value * Callee
Value * getOperand(unsigned i) const
Definition: User.h:170
static void copyFeaturesToFunction(Function &Parent, const Function &Callee, bool &NeedQueuePtr)
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool isEntryFunctionCC(CallingConv::ID CC)
char & AMDGPUAnnotateKernelFeaturesID
Represent the analysis usage information of a pass.
Address space for local memory.
Definition: AMDGPU.h:260
op_range operands()
Definition: User.h:238
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool hasFlatAddressSpace() const
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, "Add AMDGPU function attributes", false, false) static bool castRequiresQueuePtr(unsigned SrcAS)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:194
void setPreservesAll()
Set by analyses that do not transform their input at all.
static bool handleAttr(Function &Parent, const Function &Callee, StringRef Name)
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:74
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
#define DEBUG_TYPE
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:230
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr)