19 if (
VectorType *VTy = dyn_cast<VectorType>(Type)) {
20 return VTy->getBitWidth() / 8;
28 unsigned SrcAlign,
unsigned DestAlign,
29 bool SrcIsVolatile,
bool DstIsVolatile,
45 uint64_t LoopEndCount = CopyLen->
getZExtValue() / LoopOpSize;
47 unsigned SrcAS = cast<PointerType>(SrcAddr->
getType())->getAddressSpace();
48 unsigned DstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
50 if (LoopEndCount != 0) {
63 if (SrcAddr->
getType() != SrcOpType) {
64 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
66 if (DstAddr->
getType() != DstOpType) {
67 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
79 LoopBuilder.
CreateStore(Load, DstGEP, DstIsVolatile);
91 uint64_t BytesCopied = LoopEndCount * LoopOpSize;
92 uint64_t RemainingBytes = CopyLen->
getZExtValue() - BytesCopied;
98 SrcAlign = std::min(SrcAlign, LoopOpSize);
99 DestAlign = std::min(DestAlign, LoopOpSize);
103 SrcAlign, DestAlign);
105 for (
auto OpTy : RemainingOps) {
108 uint64_t GepIndex = BytesCopied / OperandSize;
109 assert(GepIndex * OperandSize == BytesCopied &&
110 "Division should have no Remainder!");
115 : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
116 Value *SrcGEP = RBuilder.CreateInBoundsGEP(
118 Value *
Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
124 : RBuilder.CreateBitCast(DstAddr, DstPtrType);
125 Value *DstGEP = RBuilder.CreateInBoundsGEP(
127 RBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
129 BytesCopied += OperandSize;
133 "Bytes copied should match size in the call!");
138 Value *CopyLen,
unsigned SrcAlign,
139 unsigned DestAlign,
bool SrcIsVolatile,
144 PreLoopBB->
splitBasicBlock(InsertBefore,
"post-loop-memcpy-expansion");
155 unsigned SrcAS = cast<PointerType>(SrcAddr->
getType())->getAddressSpace();
156 unsigned DstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
159 if (SrcAddr->
getType() != SrcOpType) {
160 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
162 if (DstAddr->
getType() != DstOpType) {
163 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
170 "expected size argument to memcpy to be an integer type!");
172 bool LoopOpIsInt8 = LoopOpType == Int8Type;
174 Value *RuntimeLoopCount = LoopOpIsInt8 ?
176 PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
187 LoopBuilder.
CreateStore(Load, DstGEP, DstIsVolatile);
195 Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
196 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
204 Ctx,
"loop-memcpy-residual-header", PreLoopBB->
getParent(),
nullptr);
212 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
213 LoopBB, ResHeaderBB);
217 LoopBuilder.
CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
223 ResLoopBB, PostLoopBB);
228 ResBuilder.
CreatePHI(CopyLenType, 2,
"residual-loop-index");
235 Value *FullOffset = ResBuilder.
CreateAdd(RuntimeBytesCopied, ResidualIndex);
241 ResBuilder.
CreateStore(Load, DstGEP, DstIsVolatile);
245 ResidualIndex->
addIncoming(ResNewIndex, ResLoopBB);
249 ResBuilder.
CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
257 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
261 LoopBuilder.
CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
290 unsigned SrcAlign,
unsigned DestAlign,
291 bool SrcIsVolatile,
bool DstIsVolatile) {
303 SrcAddr, DstAddr,
"compare_src_dst");
314 CopyBackwardsBB->
setName(
"copy_backwards");
316 CopyForwardBB->
setName(
"copy_forward");
318 ExitBB->
setName(
"memmove_done");
330 PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
331 Value *IndexPtr = LoopBuilder.CreateSub(
333 Value *Element = LoopBuilder.CreateLoad(
334 LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr),
"element");
335 LoopBuilder.CreateStore(Element,
336 LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
337 LoopBuilder.CreateCondBr(
349 PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0,
"index_ptr");
350 Value *FwdElement = FwdLoopBuilder.CreateLoad(
351 FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi),
"element");
352 FwdLoopBuilder.CreateStore(
353 FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
354 Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
356 FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
379 unsigned dstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
380 DstAddr = Builder.CreateBitCast(DstAddr,
383 Builder.CreateCondBr(
static unsigned getLoopOperandSizeInBytes(Type *Type)
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
This class represents lattice values for constants.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
void expandMemMoveAsLoop(MemMoveInst *MemMove)
Expand MemMove as a loop. MemMove is not deleted.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
unsigned getSourceAlignment() const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
This class wraps the llvm.memset intrinsic.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Value * getLength() const
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
LLVMContext & getContext() const
Get the context in which this basic block lives.
This class wraps the llvm.memmove intrinsic.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
void setName(const Twine &Name)
Change the name of the value.
unsigned getDestAlignment() const
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Type * getType() const
All values are typed, get the type of this value.
Class to represent pointers.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
This is an important base class in LLVM.
This instruction compares its operands according to the predicate given to the constructor.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Class to represent integer types.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
This is the shared class of boolean and integer constants.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
This class wraps the llvm.memcpy intrinsic.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Value * getRawSource() const
Return the arguments to the instruction.
Class to represent vector types.
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, unsigned Align, bool IsVolatile)
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
const Function * getParent() const
Return the enclosing method, or null if none.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, unsigned SrcAlign, unsigned DestAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant...
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, unsigned SrcAlign, unsigned DestAlign, bool SrcIsVolatile, bool DstIsVolatile)
void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, unsigned SrcAlign, unsigned DestAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant...
static IntegerType * getInt8Ty(LLVMContext &C)
Value * getRawDest() const
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI)
Expand MemCpy as a loop. MemCpy is not deleted.
const BasicBlock * getParent() const