74 #define DEBUG_TYPE "interleaved-access" 77 "lower-interleaved-accesses",
78 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
91 StringRef getPassName()
const override {
return "Interleaved Access Pass"; }
108 bool lowerInterleavedLoad(
LoadInst *LI,
128 "Lower interleaved memory accesses to target specific intrinsics",
false,
136 return new InterleavedAccess();
145 for (Index = 0; Index < Factor; Index++) {
150 for (; i < Mask.
size(); i++)
151 if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
154 if (i == Mask.
size())
167 unsigned &
Index,
unsigned MaxFactor) {
172 for (Factor = 2; Factor <= MaxFactor; Factor++)
191 unsigned MaxFactor,
unsigned OpNumElts) {
192 unsigned NumElts = Mask.
size();
197 for (Factor = 2; Factor <= MaxFactor; Factor++) {
198 if (NumElts % Factor)
201 unsigned LaneLen = NumElts / Factor;
209 for (; I < Factor; I++) {
210 unsigned SavedLaneValue;
211 unsigned SavedNoUndefs = 0;
214 for (J = 0; J < LaneLen - 1; J++) {
216 unsigned Lane = J * Factor +
I;
217 unsigned NextLane = Lane + Factor;
218 int LaneValue = Mask[Lane];
219 int NextLaneValue = Mask[NextLane];
222 if (LaneValue >= 0 && NextLaneValue >= 0 &&
223 LaneValue + 1 != NextLaneValue)
227 if (LaneValue >= 0 && NextLaneValue < 0) {
228 SavedLaneValue = LaneValue;
237 if (SavedNoUndefs > 0 && LaneValue < 0) {
239 if (NextLaneValue >= 0 &&
240 SavedLaneValue + SavedNoUndefs != (
unsigned)NextLaneValue)
252 }
else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
254 StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
255 }
else if (SavedNoUndefs > 0) {
257 StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
264 if (StartMask + LaneLen > OpNumElts*2)
276 bool InterleavedAccess::lowerInterleavedLoad(
290 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
295 if (!SVI || !isa<UndefValue>(SVI->
getOperand(1)))
301 if (Shuffles.
empty())
304 unsigned Factor,
Index;
315 Type *VecTy = Shuffles[0]->getType();
319 for (
unsigned i = 1; i < Shuffles.
size(); i++) {
320 if (Shuffles[i]->
getType() != VecTy)
332 if (!tryReplaceExtracts(Extracts, Shuffles))
335 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *LI <<
"\n");
338 if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))
341 for (
auto SVI : Shuffles)
348 bool InterleavedAccess::tryReplaceExtracts(
353 if (Extracts.
empty())
360 for (
auto *Extract : Extracts) {
362 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
363 auto Index = IndexOperand->getSExtValue();
368 for (
auto *Shuffle : Shuffles) {
371 if (!DT->dominates(Shuffle, Extract))
378 Shuffle->getShuffleMask(Indices);
379 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
380 if (Indices[
I] == Index) {
381 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
382 "Vector operations do not match");
383 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
388 if (ReplacementMap.
count(Extract))
394 if (!ReplacementMap.
count(Extract))
400 for (
auto &Replacement : ReplacementMap) {
401 auto *Extract = Replacement.first;
402 auto *Vector = Replacement.second.first;
403 auto Index = Replacement.second.second;
406 Extract->eraseFromParent();
412 bool InterleavedAccess::lowerInterleavedStore(
427 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *SI <<
"\n");
430 if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
440 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
446 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
448 TLI =
TM.getSubtargetImpl(F)->getTargetLowering();
449 MaxFactor = TLI->getMaxSupportedInterleaveFactor();
453 bool Changed =
false;
456 if (
LoadInst *LI = dyn_cast<LoadInst>(&
I))
457 Changed |= lowerInterleavedLoad(LI, DeadInsts);
460 Changed |= lowerInterleavedStore(SI, DeadInsts);
463 for (
auto I : DeadInsts)
464 I->eraseFromParent();
Value * getValueOperand()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents lattice values for constants.
INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE, "Lower interleaved memory accesses to target specific intrinsics", false, false) INITIALIZE_PASS_END(InterleavedAccess
void push_back(const T &Elt)
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
This instruction constructs a fixed permutation of two input vectors.
An instruction for reading from memory.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Type * getType() const
All values are typed, get the type of this value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
An instruction for storing to memory.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
static bool isReInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned MaxFactor, unsigned OpNumElts)
Check if the mask can be used in an interleaved store.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Value * getOperand(unsigned i) const
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
The instances of the Type class are immutable: once they are created, they are never changed...
size_t size() const
size - Get the array size.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
static wasm::ValType getType(const TargetRegisterClass *RC)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void initializeInterleavedAccessPass(PassRegistry &)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned getVectorNumElements() const
Lower interleaved memory accesses to target specific intrinsics
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor)
Check if the mask is a DE-interleave mask for an interleaved load.
LLVM_NODISCARD bool empty() const
StringRef getName() const
Return a constant reference to the value's name.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Primary interface to the complete machine description for the target machine.
bool hasOneUse() const
Return true if there is exactly one user of this value.
StringRef - Represent a constant reference to a string, i.e.
inst_range instructions(Function *F)
Legacy analysis pass which computes a DominatorTree.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
bool empty() const
empty - Check if the array is empty.
This file describes how to lower LLVM code to machine code.