22 #define DEBUG_TYPE "aarch64tti" 38 return (CallerBits & CalleeBits) == CalleeBits;
54 return (64 - LZ + 15) / 16;
68 ImmVal = Imm.
sext((BitSize + 63) & ~0x3fU);
73 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
92 unsigned ImmIdx = ~0U;
96 case Instruction::GetElementPtr:
105 case Instruction::Sub:
106 case Instruction::Mul:
107 case Instruction::UDiv:
108 case Instruction::SDiv:
109 case Instruction::URem:
110 case Instruction::SRem:
111 case Instruction::And:
112 case Instruction::Or:
113 case Instruction::Xor:
114 case Instruction::ICmp:
118 case Instruction::Shl:
119 case Instruction::LShr:
120 case Instruction::AShr:
124 case Instruction::Trunc:
125 case Instruction::ZExt:
126 case Instruction::SExt:
127 case Instruction::IntToPtr:
128 case Instruction::PtrToInt:
129 case Instruction::BitCast:
130 case Instruction::PHI:
139 int NumConstants = (BitSize + 63) / 64;
168 int NumConstants = (BitSize + 63) / 64;
191 if (TyWidth == 32 || TyWidth == 64)
197 bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
202 auto toVectorTy = [&](
Type *ArgTy) {
221 case Instruction::Sub:
231 if (Args.
size() != 2 ||
232 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
233 !Args[1]->hasOneUse())
235 auto *Extend = cast<CastInst>(Args[1]);
240 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
246 Type *SrcTy = toVectorTy(Extend->getSrcTy());
248 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
253 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
254 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
258 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
264 assert(ISD &&
"Invalid opcode");
269 auto *SingleUser = cast<Instruction>(*I->
user_begin());
271 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
279 if (
auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
281 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
402 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
410 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
424 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
429 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
438 case Instruction::SExt:
443 case Instruction::ZExt:
444 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
461 if (!LT.second.isVector())
465 unsigned Width = LT.second.getVectorNumElements();
466 Index = Index % Width;
491 if (isWideningInstruction(Ty, Opcode, Args))
499 Opd1PropInfo, Opd2PropInfo);
541 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
546 Opd1PropInfo, Opd2PropInfo);
551 Opd2Info, Opd1PropInfo, Opd2PropInfo);
553 Opd2Info, Opd1PropInfo, Opd2PropInfo);
567 return (Cost + 1) * LT.first;
577 unsigned NumVectorInstToHideOverhead = 10;
578 int MaxMergeDistance = 64;
582 return NumVectorInstToHideOverhead;
597 const int AmortizationCost = 20;
599 VectorSelectTbl[] = {
626 LT.second.is128BitVector() && Alignment < 16) {
632 const int AmortizationCost = 6;
634 return LT.first * 2 * AmortizationCost;
638 unsigned ProfitableNumElements;
641 ProfitableNumElements = 4;
645 ProfitableNumElements = 8;
649 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
651 return NumVectorizableInstsToAmortize * NumVecElts * 2;
664 bool UseMaskForGaps) {
665 assert(Factor >= 2 &&
"Invalid interleave factor");
666 assert(isa<VectorType>(VecTy) &&
"Expect a vector type");
668 if (!UseMaskForCond && !UseMaskForGaps &&
669 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
676 if (NumElts % Factor == 0 &&
682 Alignment, AddressSpace,
683 UseMaskForCond, UseMaskForGaps);
688 for (
auto *
I : Tys) {
689 if (!
I->isVectorTy())
691 if (
I->getScalarSizeInBits() *
I->getVectorNumElements() == 128)
709 enum { MaxStridedLoads = 7 };
711 int StridedLoads = 0;
714 for (
const auto BB : L->
blocks()) {
715 for (
auto &
I : *BB) {
726 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
735 if (StridedLoads > MaxStridedLoads / 2)
742 int StridedLoads = countStridedLoads(L, SE);
744 <<
" strided loads\n");
774 Type *ExpectedType) {
788 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
794 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
803 if (Inst->
getType() == ExpectedType)
855 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
856 bool Considerable =
false;
857 AllowPromotionWithoutCommonHeader =
false;
858 if (!isa<SExtInst>(&I))
860 Type *ConsideredSExtType =
862 if (I.
getType() != ConsideredSExtType)
872 if (GEPInst->getNumOperands() > 2) {
873 AllowPromotionWithoutCommonHeader =
true;
899 assert(isa<VectorType>(Ty) &&
"Expected Ty to be a vector type");
902 case Instruction::FAdd:
903 case Instruction::FMul:
904 case Instruction::And:
905 case Instruction::Or:
906 case Instruction::Xor:
907 case Instruction::Mul:
911 case Instruction::ICmp:
912 return (ScalarBits < 64) &&
914 case Instruction::FCmp:
923 bool IsPairwiseForm) {
931 assert(ISD &&
"Invalid opcode");
945 return LT.first * Entry->Cost;
997 return LT.first * Entry->Cost;
Type * getVectorElementType() const
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
APInt sext(unsigned width) const
Sign extend to a new width.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isMisaligned128StoreSlow() const
This class represents lattice values for constants.
Type * getElementType(unsigned N) const
Cost tables and simple lookup functions.
unsigned getPrefetchDistance()
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
unsigned getLoopDepth() const
Return the nesting level of this loop.
unsigned getNumElements() const
Random access to the elements.
The main scalar evolution driver.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Type Conversion Cost Table.
An instruction for reading from memory.
static IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getMaxInterleaveFactor(unsigned VF)
unsigned getMinPrefetchStride()
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
unsigned getBitWidth() const
Return the number of bits in the APInt.
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
Value * getArgOperand(unsigned i) const
unsigned getCacheLineSize() const
unsigned short MatchingId
const FeatureBitset & getFeatureBits() const
Class to represent struct types.
int getIntImmCost(const APInt &Imm, Type *Ty)
Calculate the cost of materializing the given constant.
bool isIntegerTy() const
True if this is an instance of IntegerType.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
int64_t getSExtValue() const
Get sign extended value.
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Type * getType() const
All values are typed, get the type of this value.
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
This node represents a polynomial recurrence on the trip count of the specified loop.
Simple integer binary arithmetic operators.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned getPrefetchDistance() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
unsigned getWideningBaseCost() const
Value * getOperand(unsigned i) const
Analysis containing CSE Info
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
initializer< Ty > init(const Ty &Val)
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
Container class for subtarget features.
int getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
The instances of the Type class are immutable: once they are created, they are never changed...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
size_t size() const
size - Get the array size.
unsigned getMaxPrefetchIterationsAhead()
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
const TargetMachine & getTargetMachine() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
unsigned getMinPrefetchStride() const
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
unsigned getCacheLineSize()
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
int getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
unsigned getVectorNumElements() const
Class to represent vector types.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
unsigned getMaxInterleaveFactor() const
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
Select(COND, TRUEVAL, FALSEVAL).
iterator_range< user_iterator > users()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
unsigned getNumArgOperands() const
Bitwise operators - logical and, logical or, logical xor.
This class represents an analyzed expression in the program.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Represents a single loop in the control flow graph.
const Function * getParent() const
Return the enclosing method, or null if none.
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
unsigned getMaxPrefetchIterationsAhead() const
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Primary interface to the complete machine description for the target machine.
Type * getElementType() const
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
bool hasOneUse() const
Return true if there is exactly one user of this value.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
TRUNCATE - Completely drop the high bits.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
iterator_range< block_iterator > blocks() const
Information about a load/store intrinsic defined by the target.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
A wrapper class for inspecting calls to intrinsic functions.
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
unsigned getVectorInsertExtractBaseCost() const
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...