29 #define DEBUG_TYPE "vectorutils" 37 cl::desc(
"Maximum factor for an interleaved access group (default = 8)"),
90 unsigned ScalarOpdIdx) {
95 return (ScalarOpdIdx == 1);
129 std::advance(GEPTI, LastOperand - 2);
154 if (i != InductionOperand &&
162 Value *UniqueCast =
nullptr;
165 if (CI && CI->
getType() == Ty) {
179 if (!PtrTy || PtrTy->isAggregateType())
185 Value *OrigPtr = Ptr;
188 int64_t PtrAccessSize = 1;
208 if (OrigPtr == Ptr) {
209 if (
const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
210 if (M->getOperand(0)->getSCEVType() !=
scConstant)
213 const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
220 if (PtrAccessSize != StepVal)
222 V = M->getOperand(1);
227 Type *StripedOffRecurrenceCast =
nullptr;
229 StripedOffRecurrenceCast =
C->
getType();
244 if (StripedOffRecurrenceCast)
261 return C->getAggregateElement(EltNo);
265 if (!isa<ConstantInt>(III->getOperand(2)))
267 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
272 return III->getOperand(1);
281 int InEl = SVI->getMaskValue(EltNo);
284 if (InEl < (
int)LHSWidth)
294 if (Elt->isNullValue())
308 if (
auto *
C = dyn_cast<Constant>(V))
309 if (isa<VectorType>(V->
getType()))
310 return C->getSplatValue();
316 for (
int MaskElt : ShuffleInst->getShuffleMask())
317 if (MaskElt != 0 && MaskElt != -1)
320 auto *InsertEltInst =
322 if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
323 !cast<ConstantInt>(InsertEltInst->getOperand(2))->
isZero())
345 bool SeenExtFromIllegalType =
false;
346 for (
auto *BB : Blocks)
347 for (
auto &
I : *BB) {
350 if (TTI && (isa<ZExtInst>(&
I) || isa<SExtInst>(&
I)) &&
352 SeenExtFromIllegalType =
true;
355 if ((isa<TruncInst>(&
I) || isa<ICmpInst>(&
I)) &&
356 !
I.getType()->isVectorTy() &&
357 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
360 if (TTI && isa<TruncInst>(&
I) && TTI->
isTypeLegal(
I.getType()))
368 if (Worklist.
empty() || (TTI && !SeenExtFromIllegalType))
372 while (!Worklist.
empty()) {
376 if (Visited.
count(Val))
381 if (!isa<Instruction>(Val))
396 if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
397 !InstructionSet.
count(I))
403 if (isa<BitCastInst>(I) || isa<PtrToIntInst>(
I) || isa<IntToPtrInst>(I) ||
405 DBits[Leader] |= ~0ULL;
415 if (DBits[Leader] == ~0ULL)
419 for (
Value *
O : cast<User>(I)->operands()) {
428 for (
auto &
I : DBits)
429 for (
auto *U :
I.first->users())
430 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
434 uint64_t LeaderDemandedBits = 0;
436 LeaderDemandedBits |= DBits[*
MI];
438 uint64_t MinBW = (
sizeof(LeaderDemandedBits) * 8) -
450 if (isa<PHINode>(*
MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) {
458 if (!isa<Instruction>(*
MI))
460 Type *Ty = (*MI)->getType();
462 Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
464 MinBWs[cast<Instruction>(*MI)] = MinBW;
472 template <
typename ListT>
477 List.insert(AccGroups);
481 for (
auto &AccGroupListOp : AccGroups->
operands()) {
482 auto *Item = cast<MDNode>(AccGroupListOp.get());
493 if (AccGroups1 == AccGroups2)
500 if (Union.
size() == 0)
502 if (Union.
size() == 1)
503 return cast<MDNode>(Union.
front());
514 if (!MayAccessMem1 && !MayAccessMem2)
535 if (AccGroupSet2.
count(MD1))
539 auto *Item = cast<MDNode>(Node.get());
541 if (AccGroupSet2.
count(Item))
546 if (Intersection.
size() == 0)
548 if (Intersection.
size() == 1)
549 return cast<MDNode>(Intersection.
front());
567 for (
int J = 1,
E = VL.
size(); MD && J !=
E; ++J) {
610 for (
unsigned i = 0; i < VF; i++)
611 for (
unsigned j = 0; j < Group.
getFactor(); ++j) {
612 unsigned HasMember = Group.
getMember(j) ? 1 : 0;
613 Mask.push_back(Builder.
getInt1(HasMember));
620 unsigned ReplicationFactor,
unsigned VF) {
622 for (
unsigned i = 0; i < VF; i++)
623 for (
unsigned j = 0; j < ReplicationFactor; j++)
632 for (
unsigned i = 0; i < VF; i++)
633 for (
unsigned j = 0; j < NumVecs; j++)
640 unsigned Stride,
unsigned VF) {
642 for (
unsigned i = 0; i < VF; i++)
649 unsigned NumInts,
unsigned NumUndefs) {
651 for (
unsigned i = 0; i < NumInts; i++)
655 for (
unsigned i = 0; i < NumUndefs; i++)
668 assert(VecTy1 && VecTy2 &&
670 "Expect two vectors with the same element type");
673 unsigned NumElts2 = VecTy2->getNumElements();
674 assert(NumElts1 >= NumElts2 &&
"Unexpect the first vector has less elements");
676 if (NumElts1 > NumElts2) {
688 unsigned NumVecs = Vecs.
size();
689 assert(NumVecs > 1 &&
"Should be at least two vectors");
695 for (
unsigned i = 0; i < NumVecs - 1; i += 2) {
696 Value *V0 = ResList[i], *V1 = ResList[i + 1];
697 assert((V0->
getType() == V1->getType() || i == NumVecs - 2) &&
698 "Only the last vector may have a different type");
704 if (NumVecs % 2 != 0)
708 NumVecs = ResList.
size();
709 }
while (NumVecs > 1);
714 bool InterleavedAccessInfo::isStrided(
int Stride) {
719 void InterleavedAccessInfo::collectConstStrideAccesses(
722 auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
733 for (
auto &
I : *BB) {
747 int64_t Stride =
getPtrStride(PSE, Ptr, TheLoop, Strides,
759 AccessStrideInfo[&
I] = StrideDescriptor(Stride, Scev, Size, Align);
800 bool EnablePredicatedInterleavedMemAccesses) {
806 collectConstStrideAccesses(AccessStrideInfo, Strides);
808 if (AccessStrideInfo.
empty())
812 collectDependences();
831 for (
auto BI = AccessStrideInfo.
rbegin(),
E = AccessStrideInfo.
rend();
834 StrideDescriptor DesB = BI->second;
840 if (isStrided(DesB.Stride) &&
842 Group = getInterleaveGroup(B);
844 LLVM_DEBUG(
dbgs() <<
"LV: Creating an interleave group with:" << *B
846 Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
849 StoreGroups.
insert(Group);
854 for (
auto AI = std::next(BI); AI !=
E; ++AI) {
856 StrideDescriptor DesA = AI->second;
877 if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
883 if (isInterleaved(A)) {
885 StoreGroups.
remove(StoreGroup);
886 releaseGroup(StoreGroup);
899 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
908 if (isInterleaved(A) ||
915 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
925 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
932 if (DistanceToB % static_cast<int64_t>(DesB.Size))
940 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
946 Group->
getIndex(B) + DistanceToB /
static_cast<int64_t
>(DesB.Size);
951 <<
" into the interleave group with" << *B
953 InterleaveGroupMap[A] = Group;
963 for (
auto *Group : StoreGroups)
964 if (Group->getNumMembers() != Group->getFactor()) {
966 dbgs() <<
"LV: Invalidate candidate interleaved store group due " 984 for (
auto *Group : LoadGroups) {
988 if (Group->getNumMembers() == Group->getFactor())
997 if (!
getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides,
false,
1000 dbgs() <<
"LV: Invalidate candidate interleaved group due to " 1001 "first group member potentially pointer-wrapping.\n");
1002 releaseGroup(Group);
1005 Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
1008 if (!
getPtrStride(PSE, LastMemberPtr, TheLoop, Strides,
false,
1011 dbgs() <<
"LV: Invalidate candidate interleaved group due to " 1012 "last group member potentially pointer-wrapping.\n");
1013 releaseGroup(Group);
1021 if (Group->isReverse()) {
1023 dbgs() <<
"LV: Invalidate candidate interleaved group due to " 1024 "a reverse access with gaps.\n");
1025 releaseGroup(Group);
1029 dbgs() <<
"LV: Interleaved group requires epilogue iteration.\n");
1030 RequiresScalarEpilogue =
true;
1038 if (!requiresScalarEpilogue())
1043 for (
auto &
I : InterleaveGroupMap) {
1048 for (
auto *Ptr : DelSet) {
1051 <<
"LV: Invalidate candidate interleaved group due to gaps that " 1052 "require a scalar epilogue (not allowed under optsize) and cannot " 1053 "be masked (not enabled). \n");
1057 RequiresScalarEpilogue =
false;
1060 template <
typename InstT>
1069 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1070 [](std::pair<int, Instruction *> p) {
return p.second; });
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
A parsed version of the target data layout string in and methods for querying it. ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Tracking metadata reference owned by Metadata.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
void setInsertPos(InstTy *Inst)
uint64_t getZExtValue() const
Get zero extended value.
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
const T & front() const
Return the first element of the SetVector.
This class represents lattice values for constants.
ArrayRef< T > getArrayRef() const
size_type size() const
Determine the number of elements in the SetVector.
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
void push_back(const T &Elt)
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
This class represents a function call, abstracting a target machine's calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
This instruction constructs a fixed permutation of two input vectors.
LLVMContext & getContext() const
All values hold a context through their type.
This class implements a map that also provides access to all stored values in a deterministic order...
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
An instruction for reading from memory.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
bool isVectorTy() const
True if this is an instance of VectorType.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
This is the base class for unary cast operator classes.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
bool match(Val *V, const Pattern &P)
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
This is the base class for all instructions that perform data casts.
static Value * concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
bool isIntegerTy() const
True if this is an instance of IntegerType.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
RPOIterator endRPO() const
uint64_t getNumElements() const
bool remove(const value_type &X)
Remove an item from the set vector.
This node represents multiplication of some number of SCEVs.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
const APInt & getAPInt() const
InstTy * getMember(unsigned Index) const
Get the member with the given index Index.
int64_t getSExtValue() const
Get sign extended value.
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getType() const
All values are typed, get the type of this value.
bool insert(const value_type &X)
Insert a new element into the SetVector.
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
op_range operands() const
unsigned getNumMembers() const
LLVMContext & getContext() const
The group of interleaved loads/stores sharing the same stride and close to each other.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
static MDNode * intersect(MDNode *A, MDNode *B)
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Value * getOperand(unsigned i) const
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Class to represent pointers.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
This instruction inserts a single (scalar) element into a VectorType value.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
size_t size() const
size - Get the array size.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
unsigned getIndex(const InstTy *Instr) const
Get the index for the given member.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
Type * getIndexedType() const
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A SetVector that performs no allocations if smaller than a certain size.
unsigned getNumOperands() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Provides information about what library functions are available for the current target.
LLVM_NODISCARD T pop_back_val()
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
reverse_iterator rbegin()
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
unsigned getFactor() const
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned getVectorNumElements() const
Store the result of a depth first search within basic blocks contained by a single loop...
Class to represent vector types.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Class for arbitrary precision integers.
iterator_range< user_iterator > users()
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Represents a single loop in the control flow graph.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Type * getResultElementType() const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
unsigned getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool insertMember(InstTy *Instr, int Index, unsigned NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned getNumOperands() const
Return number of MDNode operands.
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
static Constant * get(ArrayRef< Constant *> V)
Type * getElementType() const
std::vector< uint32_t > Metadata
PAL metadata represented as a vector.
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
const BasicBlock * getParent() const
This class represents a constant integer value.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
gep_type_iterator gep_type_begin(const User *GEP)
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.