LLVM  8.0.1
NVPTXLowerAggrCopies.cpp
Go to the documentation of this file.
1 //===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // \file
11 // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
12 // the size is large or is not a compile-time constant.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "NVPTXLowerAggrCopies.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DataLayout.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/Intrinsics.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/Support/Debug.h"
31 
32 #define DEBUG_TYPE "nvptx"
33 
34 using namespace llvm;
35 
36 namespace {
37 
38 // actual analysis class, which is a functionpass
39 struct NVPTXLowerAggrCopies : public FunctionPass {
40  static char ID;
41 
42  NVPTXLowerAggrCopies() : FunctionPass(ID) {}
43 
44  void getAnalysisUsage(AnalysisUsage &AU) const override {
47  }
48 
49  bool runOnFunction(Function &F) override;
50 
51  static const unsigned MaxAggrCopySize = 128;
52 
53  StringRef getPassName() const override {
54  return "Lower aggregate copies/intrinsics into loops";
55  }
56 };
57 
59 
63 
64  const DataLayout &DL = F.getParent()->getDataLayout();
66  const TargetTransformInfo &TTI =
67  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
68 
69  // Collect all aggregate loads and mem* calls.
70  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
71  for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
72  ++II) {
73  if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
74  if (!LI->hasOneUse())
75  continue;
76 
77  if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
78  continue;
79 
80  if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
81  if (SI->getOperand(0) != LI)
82  continue;
83  AggrLoads.push_back(LI);
84  }
85  } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
86  // Convert intrinsic calls with variable size or with constant size
87  // larger than the MaxAggrCopySize threshold.
88  if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
89  if (LenCI->getZExtValue() >= MaxAggrCopySize) {
90  MemCalls.push_back(IntrCall);
91  }
92  } else {
93  MemCalls.push_back(IntrCall);
94  }
95  }
96  }
97  }
98 
99  if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
100  return false;
101  }
102 
103  //
104  // Do the transformation of an aggr load/copy/set to a loop
105  //
106  for (LoadInst *LI : AggrLoads) {
107  StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin());
108  Value *SrcAddr = LI->getOperand(0);
109  Value *DstAddr = SI->getOperand(1);
110  unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
111  ConstantInt *CopyLen =
112  ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
113 
114  createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
115  /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
116  /* CopyLen */ CopyLen,
117  /* SrcAlign */ LI->getAlignment(),
118  /* DestAlign */ SI->getAlignment(),
119  /* SrcIsVolatile */ LI->isVolatile(),
120  /* DstIsVolatile */ SI->isVolatile(), TTI);
121 
122  SI->eraseFromParent();
123  LI->eraseFromParent();
124  }
125 
126  // Transform mem* intrinsic calls.
127  for (MemIntrinsic *MemCall : MemCalls) {
128  if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
129  expandMemCpyAsLoop(Memcpy, TTI);
130  } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
131  expandMemMoveAsLoop(Memmove);
132  } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
133  expandMemSetAsLoop(Memset);
134  }
135  MemCall->eraseFromParent();
136  }
137 
138  return true;
139 }
140 
141 } // namespace
142 
143 namespace llvm {
145 }
146 
147 INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
148  "Lower aggregate copies, and llvm.mem* intrinsics into loops",
149  false, false)
150 
152  return new NVPTXLowerAggrCopies();
153 }
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVMContext & Context
This class represents lattice values for constants.
Definition: AllocatorList.h:24
void expandMemMoveAsLoop(MemMoveInst *MemMove)
Expand MemMove as a loop. MemMove is not deleted.
iterator end()
Definition: Function.h:658
This class wraps the llvm.memset intrinsic.
F(f)
An instruction for reading from memory.
Definition: Instructions.h:168
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
AnalysisUsage & addRequired()
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
This class wraps the llvm.memmove intrinsic.
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
An instruction for storing to memory.
Definition: Instructions.h:321
iterator begin()
Definition: Function.h:656
Value * getOperand(unsigned i) const
Definition: User.h:170
static bool runOnFunction(Function &F, bool PostInlining)
Wrapper pass for TargetTransformInfo.
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &)
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
size_t size() const
Definition: SmallVector.h:53
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
This class wraps the llvm.memcpy intrinsic.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:354
FunctionPass * createLowerAggrCopies()
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:366
void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, unsigned SrcAlign, unsigned DestAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:419
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
This pass exposes codegen information to IR-level passes.
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI)
Expand MemCpy as a loop. MemCpy is not deleted.
INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies", "Lower aggregate copies, and llvm.mem* intrinsics into loops", false, false) FunctionPass *llvm