LLVM  8.0.1
AMDGPUPromoteAlloca.cpp
Go to the documentation of this file.
1 //===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass eliminates allocas by either converting them into vectors or
11 // by migrating them to local address space.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/Triple.h"
23 #include "llvm/ADT/Twine.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/IR/Metadata.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/User.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/Debug.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <map>
58 #include <tuple>
59 #include <utility>
60 #include <vector>
61 
62 #define DEBUG_TYPE "amdgpu-promote-alloca"
63 
64 using namespace llvm;
65 
66 namespace {
67 
68 static cl::opt<bool> DisablePromoteAllocaToVector(
69  "disable-promote-alloca-to-vector",
70  cl::desc("Disable promote alloca to vector"),
71  cl::init(false));
72 
73 static cl::opt<bool> DisablePromoteAllocaToLDS(
74  "disable-promote-alloca-to-lds",
75  cl::desc("Disable promote alloca to LDS"),
76  cl::init(false));
77 
78 // FIXME: This can create globals so should be a module pass.
79 class AMDGPUPromoteAlloca : public FunctionPass {
80 private:
81  const TargetMachine *TM;
82  Module *Mod = nullptr;
83  const DataLayout *DL = nullptr;
84 
85  // FIXME: This should be per-kernel.
86  uint32_t LocalMemLimit = 0;
87  uint32_t CurrentLocalMemUsage = 0;
88 
89  bool IsAMDGCN = false;
90  bool IsAMDHSA = false;
91 
92  std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
93  Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
94 
95  /// BaseAlloca is the alloca root the search started from.
96  /// Val may be that alloca or a recursive user of it.
97  bool collectUsesWithPtrTypes(Value *BaseAlloca,
98  Value *Val,
99  std::vector<Value*> &WorkList) const;
100 
101  /// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
102  /// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
103  /// Returns true if both operands are derived from the same alloca. Val should
104  /// be the same value as one of the input operands of UseInst.
105  bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,
106  Instruction *UseInst,
107  int OpIdx0, int OpIdx1) const;
108 
109  /// Check whether we have enough local memory for promotion.
110  bool hasSufficientLocalMem(const Function &F);
111 
112 public:
113  static char ID;
114 
115  AMDGPUPromoteAlloca() : FunctionPass(ID) {}
116 
117  bool doInitialization(Module &M) override;
118  bool runOnFunction(Function &F) override;
119 
120  StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
121 
122  bool handleAlloca(AllocaInst &I, bool SufficientLDS);
123 
124  void getAnalysisUsage(AnalysisUsage &AU) const override {
125  AU.setPreservesCFG();
127  }
128 };
129 
130 } // end anonymous namespace
131 
132 char AMDGPUPromoteAlloca::ID = 0;
133 
134 INITIALIZE_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
135  "AMDGPU promote alloca to vector or LDS", false, false)
136 
138 
139 bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
140  Mod = &M;
141  DL = &Mod->getDataLayout();
142 
143  return false;
144 }
145 
147  if (skipFunction(F))
148  return false;
149 
150  if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
151  TM = &TPC->getTM<TargetMachine>();
152  else
153  return false;
154 
155  const Triple &TT = TM->getTargetTriple();
156  IsAMDGCN = TT.getArch() == Triple::amdgcn;
157  IsAMDHSA = TT.getOS() == Triple::AMDHSA;
158 
160  if (!ST.isPromoteAllocaEnabled())
161  return false;
162 
163  bool SufficientLDS = hasSufficientLocalMem(F);
164  bool Changed = false;
165  BasicBlock &EntryBB = *F.begin();
166  for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) {
168 
169  ++I;
170  if (AI)
171  Changed |= handleAlloca(*AI, SufficientLDS);
172  }
173 
174  return Changed;
175 }
176 
177 std::pair<Value *, Value *>
178 AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
179  const Function &F = *Builder.GetInsertBlock()->getParent();
181 
182  if (!IsAMDHSA) {
183  Function *LocalSizeYFn
185  Function *LocalSizeZFn
187 
188  CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
189  CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
190 
191  ST.makeLIDRangeMetadata(LocalSizeY);
192  ST.makeLIDRangeMetadata(LocalSizeZ);
193 
194  return std::make_pair(LocalSizeY, LocalSizeZ);
195  }
196 
197  // We must read the size out of the dispatch pointer.
198  assert(IsAMDGCN);
199 
200  // We are indexing into this struct, and want to extract the workgroup_size_*
201  // fields.
202  //
203  // typedef struct hsa_kernel_dispatch_packet_s {
204  // uint16_t header;
205  // uint16_t setup;
206  // uint16_t workgroup_size_x ;
207  // uint16_t workgroup_size_y;
208  // uint16_t workgroup_size_z;
209  // uint16_t reserved0;
210  // uint32_t grid_size_x ;
211  // uint32_t grid_size_y ;
212  // uint32_t grid_size_z;
213  //
214  // uint32_t private_segment_size;
215  // uint32_t group_segment_size;
216  // uint64_t kernel_object;
217  //
218  // #ifdef HSA_LARGE_MODEL
219  // void *kernarg_address;
220  // #elif defined HSA_LITTLE_ENDIAN
221  // void *kernarg_address;
222  // uint32_t reserved1;
223  // #else
224  // uint32_t reserved1;
225  // void *kernarg_address;
226  // #endif
227  // uint64_t reserved2;
228  // hsa_signal_t completion_signal; // uint64_t wrapper
229  // } hsa_kernel_dispatch_packet_t
230  //
231  Function *DispatchPtrFn
233 
234  CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
237 
238  // Size of the dispatch packet struct.
240 
241  Type *I32Ty = Type::getInt32Ty(Mod->getContext());
242  Value *CastDispatchPtr = Builder.CreateBitCast(
243  DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
244 
245  // We could do a single 64-bit load here, but it's likely that the basic
246  // 32-bit and extract sequence is already present, and it is probably easier
247  // to CSE this. The loads should be mergable later anyway.
248  Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 1);
249  LoadInst *LoadXY = Builder.CreateAlignedLoad(GEPXY, 4);
250 
251  Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
252  LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
253 
254  MDNode *MD = MDNode::get(Mod->getContext(), None);
257  ST.makeLIDRangeMetadata(LoadZU);
258 
259  // Extract y component. Upper half of LoadZU should be zero already.
260  Value *Y = Builder.CreateLShr(LoadXY, 16);
261 
262  return std::make_pair(Y, LoadZU);
263 }
264 
265 Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
266  const AMDGPUSubtarget &ST =
269 
270  switch (N) {
271  case 0:
272  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_x
274  break;
275  case 1:
276  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_y
278  break;
279 
280  case 2:
281  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_z
283  break;
284  default:
285  llvm_unreachable("invalid dimension");
286  }
287 
288  Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
289  CallInst *CI = Builder.CreateCall(WorkitemIdFn);
290  ST.makeLIDRangeMetadata(CI);
291 
292  return CI;
293 }
294 
296  return VectorType::get(ArrayTy->getElementType(),
297  ArrayTy->getNumElements());
298 }
299 
300 static Value *
302  const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
303  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
304 
305  auto I = GEPIdx.find(GEP);
306  return I == GEPIdx.end() ? nullptr : I->second;
307 }
308 
310  // FIXME we only support simple cases
311  if (GEP->getNumOperands() != 3)
312  return nullptr;
313 
314  ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
315  if (!I0 || !I0->isZero())
316  return nullptr;
317 
318  return GEP->getOperand(2);
319 }
320 
321 // Not an instruction handled below to turn into a vector.
322 //
323 // TODO: Check isTriviallyVectorizable for calls and handle other
324 // instructions.
325 static bool canVectorizeInst(Instruction *Inst, User *User) {
326  switch (Inst->getOpcode()) {
327  case Instruction::Load: {
328  // Currently only handle the case where the Pointer Operand is a GEP.
329  // Also we could not vectorize volatile or atomic loads.
330  LoadInst *LI = cast<LoadInst>(Inst);
331  if (isa<AllocaInst>(User) &&
332  LI->getPointerOperandType() == User->getType() &&
333  isa<VectorType>(LI->getType()))
334  return true;
335  return isa<GetElementPtrInst>(LI->getPointerOperand()) && LI->isSimple();
336  }
337  case Instruction::BitCast:
338  return true;
339  case Instruction::Store: {
340  // Must be the stored pointer operand, not a stored value, plus
341  // since it should be canonical form, the User should be a GEP.
342  // Also we could not vectorize volatile or atomic stores.
343  StoreInst *SI = cast<StoreInst>(Inst);
344  if (isa<AllocaInst>(User) &&
345  SI->getPointerOperandType() == User->getType() &&
346  isa<VectorType>(SI->getValueOperand()->getType()))
347  return true;
348  return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && SI->isSimple();
349  }
350  default:
351  return false;
352  }
353 }
354 
355 static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
356 
357  if (DisablePromoteAllocaToVector) {
358  LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
359  return false;
360  }
361 
362  Type *AT = Alloca->getAllocatedType();
363  SequentialType *AllocaTy = dyn_cast<SequentialType>(AT);
364 
365  LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n");
366 
367  // FIXME: There is no reason why we can't support larger arrays, we
368  // are just being conservative for now.
369  // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
370  // could also be promoted but we don't currently handle this case
371  if (!AllocaTy ||
372  AllocaTy->getNumElements() > 16 ||
373  AllocaTy->getNumElements() < 2 ||
375  LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
376  return false;
377  }
378 
379  std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
380  std::vector<Value*> WorkList;
381  for (User *AllocaUser : Alloca->users()) {
383  if (!GEP) {
384  if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
385  return false;
386 
387  WorkList.push_back(AllocaUser);
388  continue;
389  }
390 
391  Value *Index = GEPToVectorIndex(GEP);
392 
393  // If we can't compute a vector index from this GEP, then we can't
394  // promote this alloca to vector.
395  if (!Index) {
396  LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP
397  << '\n');
398  return false;
399  }
400 
401  GEPVectorIdx[GEP] = Index;
402  for (User *GEPUser : AllocaUser->users()) {
403  if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
404  return false;
405 
406  WorkList.push_back(GEPUser);
407  }
408  }
409 
410  VectorType *VectorTy = dyn_cast<VectorType>(AllocaTy);
411  if (!VectorTy)
412  VectorTy = arrayTypeToVecType(cast<ArrayType>(AllocaTy));
413 
414  LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
415  << *VectorTy << '\n');
416 
417  for (Value *V : WorkList) {
418  Instruction *Inst = cast<Instruction>(V);
419  IRBuilder<> Builder(Inst);
420  switch (Inst->getOpcode()) {
421  case Instruction::Load: {
422  if (Inst->getType() == AT)
423  break;
424 
425  Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
426  Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
427  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
428 
429  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
430  Value *VecValue = Builder.CreateLoad(BitCast);
431  Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
432  Inst->replaceAllUsesWith(ExtractElement);
433  Inst->eraseFromParent();
434  break;
435  }
436  case Instruction::Store: {
437  StoreInst *SI = cast<StoreInst>(Inst);
438  if (SI->getValueOperand()->getType() == AT)
439  break;
440 
441  Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
442  Value *Ptr = SI->getPointerOperand();
443  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
444  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
445  Value *VecValue = Builder.CreateLoad(BitCast);
446  Value *NewVecValue = Builder.CreateInsertElement(VecValue,
447  SI->getValueOperand(),
448  Index);
449  Builder.CreateStore(NewVecValue, BitCast);
450  Inst->eraseFromParent();
451  break;
452  }
453  case Instruction::BitCast:
454  case Instruction::AddrSpaceCast:
455  break;
456 
457  default:
458  llvm_unreachable("Inconsistency in instructions promotable to vector");
459  }
460  }
461  return true;
462 }
463 
464 static bool isCallPromotable(CallInst *CI) {
466  if (!II)
467  return false;
468 
469  switch (II->getIntrinsicID()) {
470  case Intrinsic::memcpy:
471  case Intrinsic::memmove:
472  case Intrinsic::memset:
480  return true;
481  default:
482  return false;
483  }
484 }
485 
486 bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
487  Value *Val,
488  Instruction *Inst,
489  int OpIdx0,
490  int OpIdx1) const {
491  // Figure out which operand is the one we might not be promoting.
492  Value *OtherOp = Inst->getOperand(OpIdx0);
493  if (Val == OtherOp)
494  OtherOp = Inst->getOperand(OpIdx1);
495 
496  if (isa<ConstantPointerNull>(OtherOp))
497  return true;
498 
499  Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
500  if (!isa<AllocaInst>(OtherObj))
501  return false;
502 
503  // TODO: We should be able to replace undefs with the right pointer type.
504 
505  // TODO: If we know the other base object is another promotable
506  // alloca, not necessarily this alloca, we can do this. The
507  // important part is both must have the same address space at
508  // the end.
509  if (OtherObj != BaseAlloca) {
510  LLVM_DEBUG(
511  dbgs() << "Found a binary instruction with another alloca object\n");
512  return false;
513  }
514 
515  return true;
516 }
517 
518 bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
519  Value *BaseAlloca,
520  Value *Val,
521  std::vector<Value*> &WorkList) const {
522 
523  for (User *User : Val->users()) {
524  if (is_contained(WorkList, User))
525  continue;
526 
527  if (CallInst *CI = dyn_cast<CallInst>(User)) {
528  if (!isCallPromotable(CI))
529  return false;
530 
531  WorkList.push_back(User);
532  continue;
533  }
534 
535  Instruction *UseInst = cast<Instruction>(User);
536  if (UseInst->getOpcode() == Instruction::PtrToInt)
537  return false;
538 
539  if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
540  if (LI->isVolatile())
541  return false;
542 
543  continue;
544  }
545 
546  if (StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
547  if (SI->isVolatile())
548  return false;
549 
550  // Reject if the stored value is not the pointer operand.
551  if (SI->getPointerOperand() != Val)
552  return false;
553  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
554  if (RMW->isVolatile())
555  return false;
556  } else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
557  if (CAS->isVolatile())
558  return false;
559  }
560 
561  // Only promote a select if we know that the other select operand
562  // is from another pointer that will also be promoted.
563  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
564  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
565  return false;
566 
567  // May need to rewrite constant operands.
568  WorkList.push_back(ICmp);
569  }
570 
571  if (UseInst->getOpcode() == Instruction::AddrSpaceCast) {
572  // Give up if the pointer may be captured.
573  if (PointerMayBeCaptured(UseInst, true, true))
574  return false;
575  // Don't collect the users of this.
576  WorkList.push_back(User);
577  continue;
578  }
579 
580  if (!User->getType()->isPointerTy())
581  continue;
582 
583  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
584  // Be conservative if an address could be computed outside the bounds of
585  // the alloca.
586  if (!GEP->isInBounds())
587  return false;
588  }
589 
590  // Only promote a select if we know that the other select operand is from
591  // another pointer that will also be promoted.
592  if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
593  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
594  return false;
595  }
596 
597  // Repeat for phis.
598  if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
599  // TODO: Handle more complex cases. We should be able to replace loops
600  // over arrays.
601  switch (Phi->getNumIncomingValues()) {
602  case 1:
603  break;
604  case 2:
605  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
606  return false;
607  break;
608  default:
609  return false;
610  }
611  }
612 
613  WorkList.push_back(User);
614  if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))
615  return false;
616  }
617 
618  return true;
619 }
620 
621 bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
622 
623  FunctionType *FTy = F.getFunctionType();
625 
626  // If the function has any arguments in the local address space, then it's
627  // possible these arguments require the entire local memory space, so
628  // we cannot use local memory in the pass.
629  for (Type *ParamTy : FTy->params()) {
630  PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
631  if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
632  LocalMemLimit = 0;
633  LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "
634  "local memory disabled.\n");
635  return false;
636  }
637  }
638 
639  LocalMemLimit = ST.getLocalMemorySize();
640  if (LocalMemLimit == 0)
641  return false;
642 
643  const DataLayout &DL = Mod->getDataLayout();
644 
645  // Check how much local memory is being used by global objects
646  CurrentLocalMemUsage = 0;
647  for (GlobalVariable &GV : Mod->globals()) {
648  if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
649  continue;
650 
651  for (const User *U : GV.users()) {
652  const Instruction *Use = dyn_cast<Instruction>(U);
653  if (!Use)
654  continue;
655 
656  if (Use->getParent()->getParent() == &F) {
657  unsigned Align = GV.getAlignment();
658  if (Align == 0)
659  Align = DL.getABITypeAlignment(GV.getValueType());
660 
661  // FIXME: Try to account for padding here. The padding is currently
662  // determined from the inverse order of uses in the function. I'm not
663  // sure if the use list order is in any way connected to this, so the
664  // total reported size is likely incorrect.
665  uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
666  CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
667  CurrentLocalMemUsage += AllocSize;
668  break;
669  }
670  }
671  }
672 
673  unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
674  F);
675 
676  // Restrict local memory usage so that we don't drastically reduce occupancy,
677  // unless it is already significantly reduced.
678 
679  // TODO: Have some sort of hint or other heuristics to guess occupancy based
680  // on other factors..
681  unsigned OccupancyHint = ST.getWavesPerEU(F).second;
682  if (OccupancyHint == 0)
683  OccupancyHint = 7;
684 
685  // Clamp to max value.
686  OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU());
687 
688  // Check the hint but ignore it if it's obviously wrong from the existing LDS
689  // usage.
690  MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
691 
692 
693  // Round up to the next tier of usage.
694  unsigned MaxSizeWithWaveCount
695  = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
696 
697  // Program is possibly broken by using more local mem than available.
698  if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
699  return false;
700 
701  LocalMemLimit = MaxSizeWithWaveCount;
702 
703  LLVM_DEBUG(dbgs() << F.getName() << " uses " << CurrentLocalMemUsage
704  << " bytes of LDS\n"
705  << " Rounding size to " << MaxSizeWithWaveCount
706  << " with a maximum occupancy of " << MaxOccupancy << '\n'
707  << " and " << (LocalMemLimit - CurrentLocalMemUsage)
708  << " available for promotion\n");
709 
710  return true;
711 }
712 
713 // FIXME: Should try to pick the most likely to be profitable allocas first.
714 bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
715  // Array allocations are probably not worth handling, since an allocation of
716  // the array type is the canonical form.
717  if (!I.isStaticAlloca() || I.isArrayAllocation())
718  return false;
719 
720  IRBuilder<> Builder(&I);
721 
722  // First try to replace the alloca with a vector
723  Type *AllocaTy = I.getAllocatedType();
724 
725  LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
726 
727  if (tryPromoteAllocaToVector(&I))
728  return true; // Promoted to vector.
729 
730  if (DisablePromoteAllocaToLDS)
731  return false;
732 
733  const Function &ContainingFunction = *I.getParent()->getParent();
734  CallingConv::ID CC = ContainingFunction.getCallingConv();
735 
736  // Don't promote the alloca to LDS for shader calling conventions as the work
737  // item ID intrinsics are not supported for these calling conventions.
738  // Furthermore not all LDS is available for some of the stages.
739  switch (CC) {
742  break;
743  default:
744  LLVM_DEBUG(
745  dbgs()
746  << " promote alloca to LDS not supported with calling convention.\n");
747  return false;
748  }
749 
750  // Not likely to have sufficient local memory for promotion.
751  if (!SufficientLDS)
752  return false;
753 
754  const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction);
755  unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
756 
757  const DataLayout &DL = Mod->getDataLayout();
758 
759  unsigned Align = I.getAlignment();
760  if (Align == 0)
761  Align = DL.getABITypeAlignment(I.getAllocatedType());
762 
763  // FIXME: This computed padding is likely wrong since it depends on inverse
764  // usage order.
765  //
766  // FIXME: It is also possible that if we're allowed to use all of the memory
767  // could could end up using more than the maximum due to alignment padding.
768 
769  uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
770  uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
771  NewSize += AllocSize;
772 
773  if (NewSize > LocalMemLimit) {
774  LLVM_DEBUG(dbgs() << " " << AllocSize
775  << " bytes of local memory not available to promote\n");
776  return false;
777  }
778 
779  CurrentLocalMemUsage = NewSize;
780 
781  std::vector<Value*> WorkList;
782 
783  if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
784  LLVM_DEBUG(dbgs() << " Do not know how to convert all uses\n");
785  return false;
786  }
787 
788  LLVM_DEBUG(dbgs() << "Promoting alloca to local memory\n");
789 
790  Function *F = I.getParent()->getParent();
791 
792  Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
793  GlobalVariable *GV = new GlobalVariable(
794  *Mod, GVTy, false, GlobalValue::InternalLinkage,
795  UndefValue::get(GVTy),
796  Twine(F->getName()) + Twine('.') + I.getName(),
797  nullptr,
801  GV->setAlignment(I.getAlignment());
802 
803  Value *TCntY, *TCntZ;
804 
805  std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
806  Value *TIdX = getWorkitemID(Builder, 0);
807  Value *TIdY = getWorkitemID(Builder, 1);
808  Value *TIdZ = getWorkitemID(Builder, 2);
809 
810  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
811  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
812  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
813  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
814  TID = Builder.CreateAdd(TID, TIdZ);
815 
816  Value *Indices[] = {
818  TID
819  };
820 
821  Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
822  I.mutateType(Offset->getType());
823  I.replaceAllUsesWith(Offset);
824  I.eraseFromParent();
825 
826  for (Value *V : WorkList) {
827  CallInst *Call = dyn_cast<CallInst>(V);
828  if (!Call) {
829  if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
830  Value *Src0 = CI->getOperand(0);
831  Type *EltTy = Src0->getType()->getPointerElementType();
833 
834  if (isa<ConstantPointerNull>(CI->getOperand(0)))
835  CI->setOperand(0, ConstantPointerNull::get(NewTy));
836 
837  if (isa<ConstantPointerNull>(CI->getOperand(1)))
838  CI->setOperand(1, ConstantPointerNull::get(NewTy));
839 
840  continue;
841  }
842 
843  // The operand's value should be corrected on its own and we don't want to
844  // touch the users.
845  if (isa<AddrSpaceCastInst>(V))
846  continue;
847 
848  Type *EltTy = V->getType()->getPointerElementType();
850 
851  // FIXME: It doesn't really make sense to try to do this for all
852  // instructions.
853  V->mutateType(NewTy);
854 
855  // Adjust the types of any constant operands.
856  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
857  if (isa<ConstantPointerNull>(SI->getOperand(1)))
858  SI->setOperand(1, ConstantPointerNull::get(NewTy));
859 
860  if (isa<ConstantPointerNull>(SI->getOperand(2)))
861  SI->setOperand(2, ConstantPointerNull::get(NewTy));
862  } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
863  for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
864  if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
865  Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
866  }
867  }
868 
869  continue;
870  }
871 
872  IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
873  Builder.SetInsertPoint(Intr);
874  switch (Intr->getIntrinsicID()) {
877  // These intrinsics are for address space 0 only
878  Intr->eraseFromParent();
879  continue;
880  case Intrinsic::memcpy: {
881  MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
882  Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(),
883  MemCpy->getRawSource(), MemCpy->getSourceAlignment(),
884  MemCpy->getLength(), MemCpy->isVolatile());
885  Intr->eraseFromParent();
886  continue;
887  }
888  case Intrinsic::memmove: {
889  MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
890  Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(),
891  MemMove->getRawSource(), MemMove->getSourceAlignment(),
892  MemMove->getLength(), MemMove->isVolatile());
893  Intr->eraseFromParent();
894  continue;
895  }
896  case Intrinsic::memset: {
897  MemSetInst *MemSet = cast<MemSetInst>(Intr);
898  Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
899  MemSet->getLength(), MemSet->getDestAlignment(),
900  MemSet->isVolatile());
901  Intr->eraseFromParent();
902  continue;
903  }
908  Intr->eraseFromParent();
909  // FIXME: I think the invariant marker should still theoretically apply,
910  // but the intrinsics need to be changed to accept pointers with any
911  // address space.
912  continue;
913  case Intrinsic::objectsize: {
914  Value *Src = Intr->getOperand(0);
915  Type *SrcTy = Src->getType()->getPointerElementType();
916  Function *ObjectSize = Intrinsic::getDeclaration(Mod,
919  );
920 
921  CallInst *NewCall = Builder.CreateCall(
922  ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)});
923  Intr->replaceAllUsesWith(NewCall);
924  Intr->eraseFromParent();
925  continue;
926  }
927  default:
928  Intr->print(errs());
929  llvm_unreachable("Don't know how to promote alloca intrinsic use.");
930  }
931  }
932  return true;
933 }
934 
936  return new AMDGPUPromoteAlloca();
937 }
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1477
Value * getValueOperand()
Definition: Instructions.h:410
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool isSimple() const
Definition: Instructions.h:277
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1393
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:529
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:299
bool isPromoteAllocaEnabled() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
unsigned getSourceAlignment() const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:630
Value * getValue() const
Address space for private memory.
Definition: AMDGPU.h:261
This class wraps the llvm.memset intrinsic.
FunctionPass * createAMDGPUPromoteAlloca()
Type * getPointerOperandType() const
Definition: Instructions.h:416
Metadata node.
Definition: Metadata.h:864
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Definition: IRBuilder.h:404
An instruction for reading from memory.
Definition: Instructions.h:168
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:692
Hexagon Common GEP
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
Definition: InstrTypes.h:1261
Value * getLength() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:265
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static const AMDGPUSubtarget & get(const MachineFunction &MF)
static Value * calculateVectorIndex(Value *Ptr, const std::map< GetElementPtrInst *, Value *> &GEPIdx)
static bool canVectorizeInst(Instruction *Inst, User *User)
Address space for constant memory (VTX2)
Definition: AMDGPU.h:259
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:376
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:113
This class wraps the llvm.memmove intrinsic.
int getLocalMemorySize() const
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:652
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned Intr
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1014
CallInst * CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memmove between the specified pointers.
Definition: IRBuilder.h:494
unsigned getDestAlignment() const
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool tryPromoteAllocaToVector(AllocaInst *Alloca)
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1732
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:92
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:290
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
Class to represent array types.
Definition: DerivedTypes.h:369
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:621
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
An instruction for storing to memory.
Definition: Instructions.h:321
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
iterator begin()
Definition: Function.h:656
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1020
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:854
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1401
static Value * GEPToVectorIndex(GetElementPtrInst *GEP)
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition: IRBuilder.h:1580
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
Represent the analysis usage information of a pass.
Address space for local memory.
Definition: AMDGPU.h:260
This instruction compares its operands according to the predicate given to the constructor.
void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
Definition: AsmWriter.cpp:4148
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:285
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
bool isVolatile() const
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1048
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:106
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
This is the superclass of the array and vector type classes.
Definition: DerivedTypes.h:343
unsigned getNumOperands() const
Definition: User.h:192
#define DEBUG_TYPE
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static bool isCallPromotable(CallInst *CI)
iterator end()
Definition: BasicBlock.h:271
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
Module.h This file contains the declarations for the Module class.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
CallInst * CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Definition: IRBuilder.h:446
This class wraps the llvm.memcpy intrinsic.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
Value * getRawSource() const
Return the arguments to the instruction.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The access may modify the value stored in memory.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:164
Class to represent vector types.
Definition: DerivedTypes.h:393
iterator_range< user_iterator > users()
Definition: Value.h:400
static VectorType * arrayTypeToVecType(ArrayType *ArrayTy)
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
void addDereferenceableAttr(unsigned i, uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
Definition: InstrTypes.h:1321
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:216
Type * getPointerOperandType() const
Definition: Instructions.h:288
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:581
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Rename collisions when linking (static functions).
Definition: GlobalValue.h:56
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1974
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:604
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:606
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1124
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
Type * getElementType() const
Definition: DerivedTypes.h:360
char & AMDGPUPromoteAllocaID
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
bool isSimple() const
Definition: Instructions.h:402
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
#define LLVM_DEBUG(X)
Definition: Debug.h:123
virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const =0
Value * getPointerOperand()
Definition: Instructions.h:413
Value * getRawDest() const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1245