26 #define DEBUG_TYPE "systemztti" 82 case Instruction::GetElementPtr:
99 case Instruction::ICmp:
110 case Instruction::Sub:
120 case Instruction::Mul:
127 case Instruction::Or:
128 case Instruction::Xor:
138 case Instruction::And:
156 case Instruction::Shl:
157 case Instruction::LShr:
158 case Instruction::AShr:
163 case Instruction::UDiv:
164 case Instruction::SDiv:
165 case Instruction::URem:
166 case Instruction::SRem:
167 case Instruction::Trunc:
168 case Instruction::ZExt:
169 case Instruction::SExt:
170 case Instruction::IntToPtr:
171 case Instruction::PtrToInt:
172 case Instruction::BitCast:
173 case Instruction::PHI:
245 bool HasCall =
false;
246 unsigned NumStores = 0;
247 for (
auto &BB : L->
blocks())
248 for (
auto &
I : *BB) {
249 if (isa<CallInst>(&
I) || isa<InvokeInst>(&
I)) {
261 if (isa<StoreInst>(&
I)) {
262 Type *MemAccessTy =
I.getOperand(0)->getType();
270 unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
336 assert(Size > 0 &&
"Element must have non-zero size.");
346 assert(WideBits > 0 &&
"Could not compute size of vector");
347 return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
351 unsigned Opcode,
Type *Ty,
369 const unsigned DivInstrCost = 20;
370 const unsigned DivMulSeqCost = 10;
371 const unsigned SDivPow2Cost = 4;
374 Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
375 bool UnsignedDivRem =
376 Opcode == Instruction::UDiv || Opcode == Instruction::URem;
379 bool DivRemConst =
false;
380 bool DivRemConstPow2 =
false;
381 if ((SignedDivRem || UnsignedDivRem) && Args.
size() == 2) {
382 if (
const Constant *
C = dyn_cast<Constant>(Args[1])) {
385 ? dyn_cast_or_null<const ConstantInt>(
C->getSplatValue())
386 : dyn_cast<const ConstantInt>(
C));
387 if (CVal !=
nullptr &&
389 DivRemConstPow2 =
true;
397 "getArithmeticInstrCost() called with vector type.");
403 if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
404 Opcode == Instruction::AShr) {
409 return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
412 if ((SignedDivRem || UnsignedDivRem) && VF > 4)
423 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
424 Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
425 switch (ScalarBits) {
432 unsigned ScalarCost =
450 if (Opcode == Instruction::FRem) {
453 if (VF == 2 && ScalarBits == 32)
462 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
463 Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
467 if (Opcode == Instruction::FRem)
471 if (Opcode == Instruction::Or)
474 if (Opcode == Instruction::Xor && ScalarBits == 1) {
481 return (SignedDivRem ? SDivPow2Cost : 1);
483 return DivMulSeqCost;
484 if (SignedDivRem || UnsignedDivRem)
490 Opd1PropInfo, Opd2PropInfo, Args);
512 return (Index == 0 ? 0 : NumVectors);
519 return NumVectors - 1;
546 "Packing must reduce size of vector type.");
548 "Packing should not change number of elements.");
564 for (
unsigned P = 0;
P < Log2Diff; ++
P) {
585 "Should only be called with vector types.");
587 unsigned PackCost = 0;
591 if (SrcScalarBits > DstScalarBits)
594 else if (SrcScalarBits < DstScalarBits) {
597 PackCost = Log2Diff * DstNumParts;
599 PackCost += DstNumParts - 1;
608 Type *OpTy =
nullptr;
610 OpTy = CI->getOperand(0)->getType();
612 if (LogicI->getNumOperands() == 2)
613 if (
CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
614 if (isa<CmpInst>(LogicI->getOperand(1)))
615 OpTy = CI0->getOperand(0)->getType();
617 if (OpTy !=
nullptr) {
642 if (CmpOpTy !=
nullptr)
644 if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP)
656 assert (ST->
hasVector() &&
"getCastInstrCost() called with vector type.");
662 if (Opcode == Instruction::Trunc) {
668 if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
669 if (SrcScalarBits >= 8) {
675 unsigned NumSrcVectorOps =
676 (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors)
677 : (NumDstVectors / 2));
679 return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
681 else if (SrcScalarBits == 1)
685 if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
686 Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
691 if (DstScalarBits == 64) {
692 if (SrcScalarBits == 64)
693 return NumDstVectors;
695 if (SrcScalarBits == 1)
704 unsigned TotCost = VF * ScalarCost;
705 bool NeedsInserts =
true, NeedsExtracts =
true;
707 if (DstScalarBits == 128 &&
708 (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP))
709 NeedsInserts =
false;
710 if (SrcScalarBits == 128 &&
711 (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
712 NeedsExtracts =
false;
718 if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
724 if (Opcode == Instruction::FPTrunc) {
725 if (SrcScalarBits == 128)
728 return VF / 2 +
std::max(1U, VF / 4 );
731 if (Opcode == Instruction::FPExt) {
732 if (SrcScalarBits == 32 && DstScalarBits == 64) {
745 if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
746 if (SrcScalarBits >= 32 ||
747 (I !=
nullptr && isa<LoadInst>(I->
getOperand(0))))
749 return SrcScalarBits > 1 ? 2 : 5 ;
752 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
760 if (Opcode == Instruction::SExt)
761 Cost = (DstScalarBits < 64 ? 3 : 4);
762 if (Opcode == Instruction::ZExt)
778 unsigned ExtCost = 0;
781 if (!isa<LoadInst>(
Op) && !isa<ConstantInt>(
Op))
790 assert (ST->
hasVector() &&
"getCmpSelInstrCost() called with vector type.");
794 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
795 unsigned PredicateExtraCost = 0;
799 case CmpInst::Predicate::ICMP_NE:
800 case CmpInst::Predicate::ICMP_UGE:
801 case CmpInst::Predicate::ICMP_ULE:
802 case CmpInst::Predicate::ICMP_SGE:
803 case CmpInst::Predicate::ICMP_SLE:
804 PredicateExtraCost = 1;
806 case CmpInst::Predicate::FCMP_ONE:
807 case CmpInst::Predicate::FCMP_ORD:
808 case CmpInst::Predicate::FCMP_UEQ:
809 case CmpInst::Predicate::FCMP_UNO:
810 PredicateExtraCost = 2;
822 unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
830 unsigned PackCost = 0;
832 if (CmpOpTy !=
nullptr)
841 case Instruction::ICmp: {
845 if (I !=
nullptr && ScalarBits >= 32)
848 if (!Ld->hasOneUse() && Ld->getParent() == I->
getParent() &&
849 C->getZExtValue() == 0)
872 return ((Index % 2 == 0) ? 1 : 0);
874 if (Opcode == Instruction::ExtractElement) {
895 unsigned TruncBits = 0;
896 unsigned SExtBits = 0;
897 unsigned ZExtBits = 0;
900 if (isa<TruncInst>(UserI))
901 TruncBits = UserBits;
902 else if (isa<SExtInst>(UserI))
904 else if (isa<ZExtInst>(UserI))
907 if (TruncBits || SExtBits || ZExtBits) {
909 UserI = cast<Instruction>(*UserI->
user_begin());
912 if ((UserI->
getOpcode() == Instruction::Sub ||
913 UserI->
getOpcode() == Instruction::SDiv ||
914 UserI->
getOpcode() == Instruction::UDiv) &&
919 unsigned LoadOrTruncBits =
920 ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits));
923 case Instruction::Sub:
924 case Instruction::ICmp:
925 if (LoadedBits == 32 && ZExtBits == 64)
928 case Instruction::Mul:
929 if (UserI->
getOpcode() != Instruction::ICmp) {
930 if (LoadedBits == 16 &&
934 if (LoadOrTruncBits == 16)
938 case Instruction::SDiv:
939 if (LoadedBits == 32 && SExtBits == 64)
942 case Instruction::UDiv:
943 case Instruction::And:
944 case Instruction::Or:
945 case Instruction::Xor:
957 if (UserI->
getOpcode() == Instruction::ICmp)
961 return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
969 if (
auto *CI = dyn_cast<CallInst>(
I))
970 if (
auto *
F = CI->getCalledFunction())
990 for (
unsigned i = 0; i < 2; ++i) {
997 (isa<TruncInst>(OtherOp) || isa<SExtInst>(OtherOp) ||
998 isa<ZExtInst>(OtherOp)))
1013 if (!Src->
isVectorTy() && NumOps == 1 && I !=
nullptr) {
1021 else if (
const StoreInst *
SI = dyn_cast<StoreInst>(I)) {
1022 const Value *StoredVal =
SI->getValueOperand();
1045 bool UseMaskForCond,
1046 bool UseMaskForGaps) {
1047 if (UseMaskForCond || UseMaskForGaps)
1049 Alignment, AddressSpace,
1050 UseMaskForCond, UseMaskForGaps);
1051 assert(isa<VectorType>(VecTy) &&
1052 "Expect a vector type for interleaved memory op");
1055 auto ceil = [](
unsigned A,
unsigned B) {
return (A +
B - 1) /
B; };
1058 assert(Factor > 1 && NumElts % Factor == 0 &&
"Invalid interleave factor");
1059 unsigned VF = NumElts / Factor;
1062 unsigned NumPermutes = 0;
1068 BitVector UsedInsts(NumVectorMemOps,
false);
1069 std::vector<BitVector> ValueVecs(Factor,
BitVector(NumVectorMemOps,
false));
1070 for (
unsigned Index : Indices)
1071 for (
unsigned Elt = 0; Elt < VF; ++Elt) {
1072 unsigned Vec = (
Index + Elt * Factor) / NumEltsPerVecReg;
1076 NumVectorMemOps = UsedInsts.
count();
1078 for (
unsigned Index : Indices) {
1082 unsigned NumSrcVecs = ValueVecs[
Index].count();
1084 assert (NumSrcVecs >= NumDstVecs &&
"Expected at least as many sources");
1085 NumPermutes +=
std::max(1U, NumSrcVecs - NumDstVecs);
1091 unsigned NumSrcVecs = std::min(NumEltsPerVecReg, Factor);
1092 unsigned NumDstVecs = NumVectorMemOps;
1093 assert (NumSrcVecs > 1 &&
"Expected at least two source vectors.");
1094 NumPermutes += (NumDstVecs * NumSrcVecs) - NumDstVecs;
1098 return NumVectorMemOps + NumPermutes;
1119 unsigned ScalarizationCostPassed) {
1124 FMF, ScalarizationCostPassed);
constexpr bool isUInt< 32 >(uint64_t x)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
This class is the base class for the comparison instructions.
uint64_t getZExtValue() const
Get zero extended value.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents lattice values for constants.
Cost tables and simple lookup functions.
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy)
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)
The main scalar evolution driver.
bool isFP128Ty() const
Return true if this is 'fp128'.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
constexpr bool isInt< 16 >(int64_t x)
An instruction for reading from memory.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getBitWidth() const
Return the number of bits in the APInt.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value * > Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
const HexagonInstrInfo * TII
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool hasMiscellaneousExtensions2() const
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
int64_t getSExtValue() const
Get sign extended value.
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Type * getType() const
All values are typed, get the type of this value.
unsigned getNumberOfRegisters(bool Vector)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
const APInt & getValue() const
Return the constant as an APInt value reference.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
An instruction for storing to memory.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Value * getOperand(unsigned i) const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isVoidTy() const
Return true if this is 'void'.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
The instances of the Type class are immutable: once they are created, they are never changed...
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
size_t size() const
size - Get the array size.
This is an important base class in LLVM.
bool hasDivRemOp(Type *DataType, bool IsSigned)
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
unsigned getNumberOfParts(Type *Tp)
bool hasVectorEnhancements1() const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, const Instruction *I)
const SystemZInstrInfo * getInstrInfo() const override
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
unsigned getNumOperands() const
This is the shared class of boolean and integer constants.
constexpr bool isInt< 32 >(int64_t x)
size_type count() const
count - Returns the number of bits which are set.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
unsigned getRegisterBitWidth(bool Vector) const
int getIntImmCost(const APInt &Imm, Type *Ty)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
unsigned getVectorNumElements() const
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Represents a single loop in the control flow graph.
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Establish a view to a call site for examination.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
constexpr bool isUInt< 16 >(uint64_t x)
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it's an indirect...
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
bool isRxSBGMask(uint64_t Mask, unsigned BitSize, unsigned &Start, unsigned &End) const
bool hasOneUse() const
Return true if there is exactly one user of this value.
Convenience struct for specifying and reasoning about fast-math flags.
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue)
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
bool hasLoadStoreOnCond2() const
iterator_range< block_iterator > blocks() const
bool hasPopulationCount() const
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy)