14 #ifndef LLVM_ANALYSIS_VECTORUTILS_H 15 #define LLVM_ANALYSIS_VECTORUTILS_H 24 template <
typename T>
class ArrayRef;
26 class GetElementPtrInst;
201 unsigned Stride,
unsigned VF);
215 unsigned NumInts,
unsigned NumUndefs);
255 : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
258 : Align(Align), InsertPos(Instr) {
259 assert(Align &&
"The alignment should be non-zero");
262 assert(Factor > 1 &&
"Invalid interleave factor");
264 Reverse = Stride < 0;
279 assert(NewAlign &&
"The new member's alignment should be non-zero");
281 int Key = Index + SmallestKey;
284 if (Members.find(Key) != Members.end())
287 if (Key > LargestKey) {
289 if (Index >= static_cast<int>(Factor))
293 }
else if (Key < SmallestKey) {
295 if (LargestKey - Key >= static_cast<int>(Factor))
303 Members[
Key] = Instr;
312 auto Member = Members.find(Key);
313 if (Member == Members.end())
316 return Member->second;
322 for (
auto I : Members) {
323 if (
I.second == Instr)
324 return I.first - SmallestKey;
339 void addMetadata(InstTy *NewInst)
const;
345 if (getMember(getFactor() - 1))
350 assert(!getMember(0)->mayWriteToMemory() &&
351 "Group should have been invalidated");
352 assert(!isReverse() &&
"Group should have been invalidated");
393 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
401 void analyzeInterleaving(
bool EnableMaskedInterleavedGroup);
410 for (
auto &
I : InterleaveGroupMap)
412 for (
auto *Ptr : DelSet)
414 InterleaveGroupMap.clear();
415 RequiresScalarEpilogue =
false;
421 return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
429 if (InterleaveGroupMap.count(Instr))
430 return InterleaveGroupMap.find(Instr)->second;
436 return make_range(InterleaveGroups.begin(), InterleaveGroups.end());
446 void invalidateGroupsRequiringScalarEpilogue();
463 bool RequiresScalarEpilogue =
false;
475 struct StrideDescriptor {
476 StrideDescriptor() =
default;
477 StrideDescriptor(int64_t Stride,
const SCEV *Scev, uint64_t
Size,
479 : Stride(Stride), Scev(Scev),
Size(Size),
Align(Align) {}
485 const SCEV *Scev =
nullptr;
495 using StrideEntry = std::pair<Instruction *, StrideDescriptor>;
504 "Already in an interleaved access group");
505 InterleaveGroupMap[Instr] =
507 InterleaveGroups.
insert(InterleaveGroupMap[Instr]);
508 return InterleaveGroupMap[Instr];
513 for (
unsigned i = 0; i < Group->
getFactor(); i++)
515 InterleaveGroupMap.
erase(Member);
517 InterleaveGroups.
erase(Group);
522 void collectConstStrideAccesses(
527 static bool isStrided(
int Stride);
535 bool areDependencesValid()
const {
544 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
545 StrideEntry *
B)
const {
561 auto *Src = A->first;
562 auto SrcDes = A->second;
565 auto *
Sink = B->first;
566 auto SinkDes = B->second;
570 if (!Src->mayWriteToMemory())
574 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
579 if (!areDependencesValid())
584 return Dependences.
find(Src) == Dependences.
end() ||
592 void collectDependences() {
593 if (!areDependencesValid())
596 for (
auto Dep : *Deps)
597 Dependences[Dep.getSource(*LAI)].
insert(Dep.getDestination(*LAI));
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
void setInsertPos(InstTy *Inst)
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
This class represents lattice values for constants.
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
This class represents a function call, abstracting a target machine's calling convention.
This class implements a map that also provides access to all stored values in a deterministic order...
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void reset()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
InterleaveGroup(InstTy *Instr, int Stride, unsigned Align)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstTy * getMember(unsigned Index) const
Get the member with the given index Index.
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Drive the analysis of interleaved memory accesses in the loop.
unsigned getNumMembers() const
The group of interleaved loads/stores sharing the same stride and close to each other.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
iterator find(const_arg_type_t< KeyT > Val)
InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
bool erase(const KeyT &Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
This is an important base class in LLVM.
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
unsigned getAlignment() const
unsigned getIndex(const InstTy *Instr) const
Get the index for the given member.
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Drive the analysis of memory accesses in the loop.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
unsigned getFactor() const
A range adaptor for a pair of iterators.
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool insertMember(InstTy *Instr, int Index, unsigned NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
InstTy * getInsertPos() const
LLVM Value Representation.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.