51 #define DEBUG_TYPE "scalarizer" 58 cl::desc(
"Allow the scalarizer pass to scalarize loads and store"));
68 using ScatterMap = std::map<Value *, ValueVector>;
78 Scatterer() =
default;
84 ValueVector *cachePtr =
nullptr);
87 Value *operator[](
unsigned I);
90 unsigned size()
const {
return Size; }
96 ValueVector *CachePtr;
104 struct FCmpSplitter {
105 FCmpSplitter(
FCmpInst &fci) : FCI(fci) {}
117 struct ICmpSplitter {
118 ICmpSplitter(
ICmpInst &ici) : ICI(ici) {}
130 struct BinarySplitter {
146 uint64_t getElemAlign(
unsigned I) {
147 return MinAlign(VecAlign, I * ElemSize);
154 Type *ElemTy =
nullptr;
157 uint64_t VecAlign = 0;
160 uint64_t ElemSize = 0;
163 class ScalarizerVisitor :
public InstVisitor<ScalarizerVisitor, bool> {
165 ScalarizerVisitor(
unsigned ParallelLoopAccessMDKind)
166 : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) {
182 bool visitPHINode(
PHINode &PHI);
190 bool canTransferMetadata(
unsigned Kind);
191 void transferMetadata(
Instruction *
Op,
const ValueVector &CV);
196 template<
typename T>
bool splitBinary(
Instruction &,
const T &);
200 ScatterMap Scattered;
203 unsigned ParallelLoopAccessMDKind;
221 "Scalarize vector operations",
false,
false)
226 ValueVector *cachePtr)
227 : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
228 Type *Ty = V->getType();
231 Ty = PtrTy->getElementType();
234 Tmp.resize(
Size,
nullptr);
235 else if (CachePtr->empty())
236 CachePtr->resize(
Size,
nullptr);
238 assert(
Size == CachePtr->size() &&
"Inconsistent vector sizes");
242 Value *Scatterer::operator[](
unsigned I) {
243 ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
252 PtrTy->getAddressSpace());
257 V->getName() +
".i" +
Twine(I));
292 unsigned ParallelLoopAccessMDKind =
294 ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
295 return Impl.visit(F);
299 return new ScalarizerLegacyPass();
302 bool ScalarizerVisitor::visit(
Function &F) {
303 assert(Gathered.empty() && Scattered.empty());
323 if (
Argument *VArg = dyn_cast<Argument>(V)) {
328 return Scatterer(BB, BB->
begin(), V, &Scattered[V]);
346 void ScalarizerVisitor::gather(
Instruction *
Op,
const ValueVector &CV) {
352 transferMetadata(Op, CV);
356 ValueVector &SV = Scattered[
Op];
358 for (
unsigned I = 0,
E = SV.size(); I !=
E; ++
I) {
370 Gathered.
push_back(GatherList::value_type(Op, &SV));
375 bool ScalarizerVisitor::canTransferMetadata(
unsigned Tag) {
382 || Tag == ParallelLoopAccessMDKind
388 void ScalarizerVisitor::transferMetadata(
Instruction *Op,
const ValueVector &CV) {
391 for (
unsigned I = 0,
E = CV.size(); I !=
E; ++
I) {
392 if (
Instruction *New = dyn_cast<Instruction>(CV[I])) {
393 for (
const auto &MD : MDs)
394 if (canTransferMetadata(MD.first))
395 New->setMetadata(MD.first, MD.second);
404 bool ScalarizerVisitor::getVectorLayout(
Type *Ty,
unsigned Alignment,
412 Layout.ElemTy = Layout.VecTy->getElementType();
418 Layout.VecAlign = Alignment;
427 template<
typename Splitter>
428 bool ScalarizerVisitor::splitBinary(
Instruction &I,
const Splitter &
Split) {
437 assert(Op0.size() == NumElems &&
"Mismatched binary operation");
438 assert(Op1.size() == NumElems &&
"Mismatched binary operation");
440 Res.resize(NumElems);
441 for (
unsigned Elem = 0; Elem < NumElems; ++Elem)
442 Res[Elem] =
Split(Builder, Op0[Elem], Op1[Elem],
461 bool ScalarizerVisitor::splitCall(
CallInst &CI) {
477 ValueVector ScalarOperands(NumArgs);
480 Scattered.
resize(NumArgs);
484 for (
unsigned I = 0; I != NumArgs; ++
I) {
487 Scattered[
I] = scatter(&CI, OpI);
488 assert(Scattered[I].
size() == NumElems &&
"mismatched call operands");
490 ScalarOperands[
I] = OpI;
494 ValueVector Res(NumElems);
495 ValueVector ScalarCallOps(NumArgs);
501 for (
unsigned Elem = 0; Elem < NumElems; ++Elem) {
502 ScalarCallOps.clear();
504 for (
unsigned J = 0; J != NumArgs; ++J) {
506 ScalarCallOps.push_back(ScalarOperands[J]);
508 ScalarCallOps.push_back(Scattered[J][Elem]);
511 Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
519 bool ScalarizerVisitor::visitSelectInst(
SelectInst &
SI) {
526 Scatterer Op1 = scatter(&SI, SI.
getOperand(1));
527 Scatterer Op2 = scatter(&SI, SI.
getOperand(2));
528 assert(Op1.size() == NumElems &&
"Mismatched select");
529 assert(Op2.size() == NumElems &&
"Mismatched select");
531 Res.resize(NumElems);
534 Scatterer Op0 = scatter(&SI, SI.
getOperand(0));
535 assert(Op0.size() == NumElems &&
"Mismatched select");
536 for (
unsigned I = 0; I < NumElems; ++
I)
541 for (
unsigned I = 0; I < NumElems; ++
I)
549 bool ScalarizerVisitor::visitICmpInst(
ICmpInst &ICI) {
550 return splitBinary(ICI, ICmpSplitter(ICI));
553 bool ScalarizerVisitor::visitFCmpInst(
FCmpInst &FCI) {
554 return splitBinary(FCI, FCmpSplitter(FCI));
558 return splitBinary(BO, BinarySplitter(BO));
575 Scatterer
Base = scatter(&GEPI, Op0);
579 for (
unsigned I = 0; I < NumIndices; ++
I) {
587 Ops[
I] = scatter(&GEPI, Op);
591 Res.resize(NumElems);
592 for (
unsigned I = 0; I < NumElems; ++
I) {
594 Indices.
resize(NumIndices);
595 for (
unsigned J = 0; J < NumIndices; ++J)
596 Indices[J] = Ops[J][I];
601 NewGEPI->setIsInBounds();
607 bool ScalarizerVisitor::visitCastInst(
CastInst &CI) {
614 Scatterer Op0 = scatter(&CI, CI.
getOperand(0));
615 assert(Op0.size() == NumElems &&
"Mismatched cast");
617 Res.resize(NumElems);
618 for (
unsigned I = 0; I < NumElems; ++
I)
625 bool ScalarizerVisitor::visitBitCastInst(
BitCastInst &BCI) {
628 if (!DstVT || !SrcVT)
632 unsigned SrcNumElems = SrcVT->getNumElements();
634 Scatterer Op0 = scatter(&BCI, BCI.
getOperand(0));
636 Res.resize(DstNumElems);
638 if (DstNumElems == SrcNumElems) {
639 for (
unsigned I = 0; I < DstNumElems; ++
I)
642 }
else if (DstNumElems > SrcNumElems) {
645 unsigned FanOut = DstNumElems / SrcNumElems;
648 for (
unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
649 Value *V = Op0[Op0I];
653 while ((VI = dyn_cast<Instruction>(V)) &&
657 Scatterer Mid = scatter(&BCI, V);
658 for (
unsigned MidI = 0; MidI < FanOut; ++MidI)
659 Res[ResI++] = Mid[MidI];
663 unsigned FanIn = SrcNumElems / DstNumElems;
666 for (
unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
668 for (
unsigned MidI = 0; MidI < FanIn; ++MidI)
671 +
".upto" +
Twine(MidI));
686 Scatterer Op0 = scatter(&SVI, SVI.
getOperand(0));
687 Scatterer Op1 = scatter(&SVI, SVI.
getOperand(1));
689 Res.resize(NumElems);
691 for (
unsigned I = 0; I < NumElems; ++
I) {
695 else if (
unsigned(Selector) < Op0.size())
696 Res[I] = Op0[Selector];
698 Res[
I] = Op1[Selector - Op0.size()];
704 bool ScalarizerVisitor::visitPHINode(
PHINode &PHI) {
712 Res.resize(NumElems);
715 for (
unsigned I = 0; I < NumElems; ++
I)
719 for (
unsigned I = 0; I < NumOps; ++
I) {
722 for (
unsigned J = 0; J < NumElems; ++J)
723 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
729 bool ScalarizerVisitor::visitLoadInst(
LoadInst &LI) {
740 unsigned NumElems = Layout.VecTy->getNumElements();
744 Res.resize(NumElems);
746 for (
unsigned I = 0; I < NumElems; ++
I)
753 bool ScalarizerVisitor::visitStoreInst(
StoreInst &SI) {
765 unsigned NumElems = Layout.VecTy->getNumElements();
768 Scatterer Val = scatter(&SI, FullValue);
771 Stores.resize(NumElems);
772 for (
unsigned I = 0; I < NumElems; ++
I) {
773 unsigned Align = Layout.getElemAlign(I);
776 transferMetadata(&SI, Stores);
780 bool ScalarizerVisitor::visitCallInst(
CallInst &CI) {
781 return splitCall(CI);
786 bool ScalarizerVisitor::finish() {
789 if (Gathered.empty() && Scattered.empty())
791 for (
const auto &GMI : Gathered) {
793 ValueVector &CV = *GMI.second;
802 if (isa<PHINode>(Op))
804 for (
unsigned I = 0; I < Count; ++
I)
819 unsigned ParallelLoopAccessMDKind =
821 ScalarizerVisitor Impl(ParallelLoopAccessMDKind);
822 bool Changed = Impl.visit(F);
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Value * getValueOperand()
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it. ...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Type * getSrcTy() const
Return the source type, as a convenience.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
This class represents an incoming formal argument to a Function.
Base class for instruction visitors.
This class represents lattice values for constants.
A Module instance is used to store all the information related to an LLVM module. ...
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")' correctly, instead of converting the strin...
This class represents a function call, abstracting a target machine's calling convention.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
This instruction constructs a fixed permutation of two input vectors.
An instruction for reading from memory.
bool isVectorTy() const
True if this is an instance of VectorType.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
iterator begin()
Instruction iterator methods.
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
amdgpu Simplify well known AMD library false Value Value const Twine & Name
INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer", "Scalarize vector operations", false, false) INITIALIZE_PASS_END(ScalarizerLegacyPass
void initializeScalarizerLegacyPassPass(PassRegistry &)
This class represents the LLVM 'select' instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This is the base class for all instructions that perform data casts.
LLVMContext & getContext() const
Get the global data context.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
uint64_t getNumElements() const
Type * getSourceElementType() const
void visit(Iterator Start, Iterator End)
unsigned getNumIndices() const
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Type * getType() const
All values are typed, get the type of this value.
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
This instruction compares its operands according to the predicate given to the constructor.
This class represents a no-op cast from one type to another.
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
An instruction for storing to memory.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
void takeName(Value *V)
Transfer the name from V to this value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Value * getOperand(unsigned i) const
Class to represent pointers.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
bool isVoidTy() const
Return true if this is 'void'.
const BasicBlock & getEntryBlock() const
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
This instruction inserts a single (scalar) element into a VectorType value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
A set of analyses that are preserved following a run of a transformation pass.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
static Function * getScalarIntrinsicDeclaration(Module *M, Intrinsic::ID ID, VectorType *Ty)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
static bool isTriviallyScalariable(Intrinsic::ID ID)
This instruction compares its operands according to the predicate given to the constructor.
FunctionPass class - This class is used to implement most global optimizations.
Scalarize vector operations
Value * getPointerOperand()
self_iterator getIterator()
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static int getMaskValue(const Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
This is the shared class of boolean and integer constants.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Module.h This file contains the declarations for the Module class.
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
This file declares helper objects for defining debug options that can be configured via the command l...
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Type * getDestTy() const
Return the destination type, as a convenience.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
void setOperand(unsigned i, Value *Val)
unsigned getVectorNumElements() const
This pass converts vector operations into scalar operations, in order to expose optimization opportun...
Class to represent vector types.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
void push_back(pointer val)
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
static cl::opt< bool > ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize loads and store"))
unsigned getNumArgOperands() const
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
StringRef getName() const
Return a constant reference to the value's name.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
FunctionPass * createScalarizerPass()
Create a legacy pass manager instance of the Scalarizer pass.
A container for analyses that lazily runs them and caches their results.
static void Split(std::vector< std::string > &V, StringRef S)
Splits a string of comma separated items in to a vector of strings.
VectorType * getType() const
Overload to return most specific vector type.
Value * getPointerOperand()
const BasicBlock * getParent() const
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.