22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 64 unsigned short MatchingId = 0;
67 bool WriteMem =
false;
136 case TCK_RecipThroughput:
137 return getInstructionThroughput(I);
140 return getInstructionLatency(I);
143 return getUserCost(I);
186 int getOperationCost(
unsigned Opcode,
Type *Ty,
Type *OpTy =
nullptr)
const;
193 int getGEPCost(
Type *PointeeType,
const Value *Ptr,
212 int getCallCost(
FunctionType *FTy,
int NumArgs = -1)
const;
218 int getCallCost(
const Function *
F,
int NumArgs = -1)
const;
231 unsigned getInliningThresholdMultiplier()
const;
248 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &
SI,
249 unsigned &JTSize)
const;
279 return getUserCost(U, Operands);
287 bool hasBranchDivergence()
const;
295 bool isSourceOfDivergence(
const Value *V)
const;
300 bool isAlwaysUniform(
const Value *V)
const;
318 unsigned getFlatAddressSpace()
const;
332 bool isLoweredToCall(
const Function *
F)
const;
458 bool isLegalAddImmediate(int64_t Imm)
const;
464 bool isLegalICmpImmediate(int64_t Imm)
const;
472 bool isLegalAddressingMode(
Type *Ty,
GlobalValue *BaseGV, int64_t BaseOffset,
473 bool HasBaseReg, int64_t Scale,
474 unsigned AddrSpace = 0,
484 bool canMacroFuseCmp()
const;
488 bool shouldFavorPostInc()
const;
493 bool isLegalMaskedLoad(
Type *DataType)
const;
498 bool isLegalMaskedScatter(
Type *DataType)
const;
499 bool isLegalMaskedGather(
Type *DataType)
const;
506 bool hasDivRemOp(
Type *DataType,
bool IsSigned)
const;
513 bool hasVolatileVariant(
Instruction *
I,
unsigned AddrSpace)
const;
516 bool prefersVectorizedAddressing()
const;
525 bool HasBaseReg, int64_t Scale,
526 unsigned AddrSpace = 0)
const;
532 bool LSRWithInstrQueries()
const;
537 bool isTruncateFree(
Type *Ty1,
Type *Ty2)
const;
546 bool isTypeLegal(
Type *Ty)
const;
549 unsigned getJumpBufAlignment()
const;
552 unsigned getJumpBufSize()
const;
556 bool shouldBuildLookupTables()
const;
560 bool shouldBuildLookupTablesForConstant(
Constant *
C)
const;
564 bool useColdCCForColdCall(
Function &F)
const;
574 bool supportsEfficientVectorElementLoadStore()
const;
577 bool enableAggressiveInterleaving(
bool LoopHasReductions)
const;
587 bool AllowOverlappingLoads =
false;
592 bool enableInterleavedAccessVectorization()
const;
597 bool enableMaskedInterleavedAccessVectorization()
const;
606 bool isFPVectorizationPotentiallyUnsafe()
const;
611 unsigned Alignment = 1,
612 bool *
Fast =
nullptr)
const;
618 bool haveFastSqrt(
Type *Ty)
const;
624 bool isFCmpOrdCheaperThanFCmpZero(
Type *Ty)
const;
628 int getFPOpCost(
Type *Ty)
const;
632 int getIntImmCost(
const APInt &Imm,
Type *Ty)
const;
637 int getIntImmCost(
unsigned Opc,
unsigned Idx,
const APInt &Imm,
649 int getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
const APInt &Imm,
677 OK_NonUniformConstantValue
686 unsigned getNumberOfRegisters(
bool Vector)
const;
689 unsigned getRegisterBitWidth(
bool Vector)
const;
692 unsigned getMinVectorRegisterBitWidth()
const;
700 bool shouldMaximizeVectorBandwidth(
bool OptSize)
const;
705 unsigned getMinimumVF(
unsigned ElemWidth)
const;
710 bool shouldConsiderAddressTypePromotion(
711 const Instruction &I,
bool &AllowPromotionWithoutCommonHeader)
const;
714 unsigned getCacheLineSize()
const;
734 unsigned getPrefetchDistance()
const;
739 unsigned getMinPrefetchStride()
const;
744 unsigned getMaxPrefetchIterationsAhead()
const;
749 unsigned getMaxInterleaveFactor(
unsigned VF)
const;
769 int getArithmeticInstrCost(
782 Type *SubTp =
nullptr)
const;
787 int getCastInstrCost(
unsigned Opcode,
Type *Dst,
Type *Src,
792 int getExtractWithExtendCost(
unsigned Opcode,
Type *Dst,
VectorType *VecTy,
793 unsigned Index = -1)
const;
797 int getCFInstrCost(
unsigned Opcode)
const;
802 int getCmpSelInstrCost(
unsigned Opcode,
Type *ValTy,
807 int getVectorInstrCost(
unsigned Opcode,
Type *Val,
unsigned Index = -1)
const;
810 int getMemoryOpCost(
unsigned Opcode,
Type *Src,
unsigned Alignment,
814 int getMaskedMemoryOpCost(
unsigned Opcode,
Type *Src,
unsigned Alignment,
815 unsigned AddressSpace)
const;
824 int getGatherScatterOpCost(
unsigned Opcode,
Type *DataTy,
Value *Ptr,
825 bool VariableMask,
unsigned Alignment)
const;
837 int getInterleavedMemoryOpCost(
unsigned Opcode,
Type *VecTy,
unsigned Factor,
839 unsigned AddressSpace,
840 bool UseMaskForCond =
false,
841 bool UseMaskForGaps =
false)
const;
856 int getArithmeticReductionCost(
unsigned Opcode,
Type *Ty,
857 bool IsPairwiseForm)
const;
858 int getMinMaxReductionCost(
Type *Ty,
Type *CondTy,
bool IsPairwiseForm,
859 bool IsUnsigned)
const;
866 unsigned VF = 1)
const;
873 unsigned ScalarizationCostPassed = UINT_MAX)
const;
880 unsigned getNumberOfParts(
Type *Tp)
const;
890 const SCEV *Ptr =
nullptr)
const;
907 unsigned getAtomicMemIntrinsicMaxElementSize()
const;
914 Type *ExpectedType)
const;
918 unsigned SrcAlign,
unsigned DestAlign)
const;
928 unsigned RemainingBytes,
930 unsigned DestAlign)
const;
962 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const;
965 bool isLegalToVectorizeLoad(
LoadInst *LI)
const;
971 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes,
973 unsigned AddrSpace)
const;
976 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes,
978 unsigned AddrSpace)
const;
982 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
983 unsigned ChainSizeInBytes,
988 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
989 unsigned ChainSizeInBytes,
1002 bool useReductionIntrinsic(
unsigned Opcode,
Type *Ty,
1013 int getInstructionLatency(
const Instruction *I)
const;
1017 int getInstructionThroughput(
const Instruction *I)
const;
1025 template <
typename T>
class Model;
1027 std::unique_ptr<Concept> TTIImpl;
1033 virtual const DataLayout &getDataLayout()
const = 0;
1034 virtual int getOperationCost(
unsigned Opcode,
Type *Ty,
Type *OpTy) = 0;
1035 virtual int getGEPCost(
Type *PointeeType,
const Value *Ptr,
1038 virtual int getCallCost(
FunctionType *FTy,
int NumArgs) = 0;
1039 virtual int getCallCost(
const Function *
F,
int NumArgs) = 0;
1040 virtual int getCallCost(
const Function *F,
1042 virtual unsigned getInliningThresholdMultiplier() = 0;
1047 virtual unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &
SI,
1048 unsigned &JTSize) = 0;
1051 virtual bool hasBranchDivergence() = 0;
1052 virtual bool isSourceOfDivergence(
const Value *V) = 0;
1053 virtual bool isAlwaysUniform(
const Value *V) = 0;
1054 virtual unsigned getFlatAddressSpace() = 0;
1055 virtual bool isLoweredToCall(
const Function *F) = 0;
1058 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1059 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1061 int64_t BaseOffset,
bool HasBaseReg,
1067 virtual bool canMacroFuseCmp() = 0;
1068 virtual bool shouldFavorPostInc()
const = 0;
1070 virtual bool isLegalMaskedLoad(
Type *DataType) = 0;
1071 virtual bool isLegalMaskedScatter(
Type *DataType) = 0;
1072 virtual bool isLegalMaskedGather(
Type *DataType) = 0;
1073 virtual bool hasDivRemOp(
Type *DataType,
bool IsSigned) = 0;
1074 virtual bool hasVolatileVariant(
Instruction *I,
unsigned AddrSpace) = 0;
1075 virtual bool prefersVectorizedAddressing() = 0;
1077 int64_t BaseOffset,
bool HasBaseReg,
1078 int64_t Scale,
unsigned AddrSpace) = 0;
1079 virtual bool LSRWithInstrQueries() = 0;
1080 virtual bool isTruncateFree(
Type *Ty1,
Type *Ty2) = 0;
1081 virtual bool isProfitableToHoist(
Instruction *I) = 0;
1082 virtual bool useAA() = 0;
1083 virtual bool isTypeLegal(
Type *Ty) = 0;
1084 virtual unsigned getJumpBufAlignment() = 0;
1085 virtual unsigned getJumpBufSize() = 0;
1086 virtual bool shouldBuildLookupTables() = 0;
1087 virtual bool shouldBuildLookupTablesForConstant(
Constant *
C) = 0;
1088 virtual bool useColdCCForColdCall(
Function &F) = 0;
1093 virtual bool supportsEfficientVectorElementLoadStore() = 0;
1094 virtual bool enableAggressiveInterleaving(
bool LoopHasReductions) = 0;
1096 bool IsZeroCmp)
const = 0;
1097 virtual bool enableInterleavedAccessVectorization() = 0;
1098 virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1099 virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1106 virtual bool haveFastSqrt(
Type *Ty) = 0;
1107 virtual bool isFCmpOrdCheaperThanFCmpZero(
Type *Ty) = 0;
1108 virtual int getFPOpCost(
Type *Ty) = 0;
1109 virtual int getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
const APInt &Imm,
1111 virtual int getIntImmCost(
const APInt &Imm,
Type *Ty) = 0;
1112 virtual int getIntImmCost(
unsigned Opc,
unsigned Idx,
const APInt &Imm,
1116 virtual unsigned getNumberOfRegisters(
bool Vector) = 0;
1117 virtual unsigned getRegisterBitWidth(
bool Vector)
const = 0;
1118 virtual unsigned getMinVectorRegisterBitWidth() = 0;
1119 virtual bool shouldMaximizeVectorBandwidth(
bool OptSize)
const = 0;
1120 virtual unsigned getMinimumVF(
unsigned ElemWidth)
const = 0;
1121 virtual bool shouldConsiderAddressTypePromotion(
1122 const Instruction &I,
bool &AllowPromotionWithoutCommonHeader) = 0;
1123 virtual unsigned getCacheLineSize() = 0;
1126 virtual unsigned getPrefetchDistance() = 0;
1127 virtual unsigned getMinPrefetchStride() = 0;
1128 virtual unsigned getMaxPrefetchIterationsAhead() = 0;
1129 virtual unsigned getMaxInterleaveFactor(
unsigned VF) = 0;
1138 virtual int getCastInstrCost(
unsigned Opcode,
Type *Dst,
Type *Src,
1140 virtual int getExtractWithExtendCost(
unsigned Opcode,
Type *Dst,
1142 virtual int getCFInstrCost(
unsigned Opcode) = 0;
1143 virtual int getCmpSelInstrCost(
unsigned Opcode,
Type *ValTy,
1145 virtual int getVectorInstrCost(
unsigned Opcode,
Type *Val,
1146 unsigned Index) = 0;
1147 virtual int getMemoryOpCost(
unsigned Opcode,
Type *Src,
unsigned Alignment,
1149 virtual int getMaskedMemoryOpCost(
unsigned Opcode,
Type *Src,
1151 unsigned AddressSpace) = 0;
1152 virtual int getGatherScatterOpCost(
unsigned Opcode,
Type *DataTy,
1153 Value *Ptr,
bool VariableMask,
1154 unsigned Alignment) = 0;
1155 virtual int getInterleavedMemoryOpCost(
unsigned Opcode,
Type *VecTy,
1159 unsigned AddressSpace,
1160 bool UseMaskForCond =
false,
1161 bool UseMaskForGaps =
false) = 0;
1162 virtual int getArithmeticReductionCost(
unsigned Opcode,
Type *Ty,
1163 bool IsPairwiseForm) = 0;
1164 virtual int getMinMaxReductionCost(
Type *Ty,
Type *CondTy,
1165 bool IsPairwiseForm,
bool IsUnsigned) = 0;
1168 unsigned ScalarizationCostPassed) = 0;
1173 virtual unsigned getNumberOfParts(
Type *Tp) = 0;
1175 const SCEV *Ptr) = 0;
1179 virtual unsigned getAtomicMemIntrinsicMaxElementSize()
const = 0;
1181 Type *ExpectedType) = 0;
1184 unsigned DestAlign)
const = 0;
1185 virtual void getMemcpyLoopResidualLoweringType(
1187 unsigned RemainingBytes,
unsigned SrcAlign,
unsigned DestAlign)
const = 0;
1195 virtual unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const = 0;
1196 virtual bool isLegalToVectorizeLoad(
LoadInst *LI)
const = 0;
1197 virtual bool isLegalToVectorizeStore(
StoreInst *SI)
const = 0;
1198 virtual bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes,
1200 unsigned AddrSpace)
const = 0;
1201 virtual bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes,
1203 unsigned AddrSpace)
const = 0;
1204 virtual unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
1205 unsigned ChainSizeInBytes,
1207 virtual unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
1208 unsigned ChainSizeInBytes,
1210 virtual bool useReductionIntrinsic(
unsigned Opcode,
Type *Ty,
1212 virtual bool shouldExpandReduction(
const IntrinsicInst *II)
const = 0;
1213 virtual int getInstructionLatency(
const Instruction *I) = 0;
1216 template <
typename T>
1221 Model(
T Impl) : Impl(std::move(Impl)) {}
1222 ~
Model()
override {}
1224 const DataLayout &getDataLayout()
const override {
1225 return Impl.getDataLayout();
1228 int getOperationCost(
unsigned Opcode,
Type *Ty,
Type *OpTy)
override {
1229 return Impl.getOperationCost(Opcode, Ty, OpTy);
1231 int getGEPCost(
Type *PointeeType,
const Value *Ptr,
1233 return Impl.getGEPCost(PointeeType, Ptr, Operands);
1236 return Impl.getExtCost(I, Src);
1238 int getCallCost(
FunctionType *FTy,
int NumArgs)
override {
1239 return Impl.getCallCost(FTy, NumArgs);
1241 int getCallCost(
const Function *
F,
int NumArgs)
override {
1242 return Impl.getCallCost(F, NumArgs);
1246 return Impl.getCallCost(F, Arguments);
1248 unsigned getInliningThresholdMultiplier()
override {
1249 return Impl.getInliningThresholdMultiplier();
1253 return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
1257 return Impl.getIntrinsicCost(IID, RetTy, Arguments);
1260 return Impl.getUserCost(U, Operands);
1262 bool hasBranchDivergence()
override {
return Impl.hasBranchDivergence(); }
1263 bool isSourceOfDivergence(
const Value *V)
override {
1264 return Impl.isSourceOfDivergence(V);
1267 bool isAlwaysUniform(
const Value *V)
override {
1268 return Impl.isAlwaysUniform(V);
1271 unsigned getFlatAddressSpace()
override {
1272 return Impl.getFlatAddressSpace();
1275 bool isLoweredToCall(
const Function *F)
override {
1276 return Impl.isLoweredToCall(F);
1280 return Impl.getUnrollingPreferences(L, SE, UP);
1282 bool isLegalAddImmediate(int64_t Imm)
override {
1283 return Impl.isLegalAddImmediate(Imm);
1285 bool isLegalICmpImmediate(int64_t Imm)
override {
1286 return Impl.isLegalICmpImmediate(Imm);
1288 bool isLegalAddressingMode(
Type *Ty,
GlobalValue *BaseGV, int64_t BaseOffset,
1289 bool HasBaseReg, int64_t Scale,
1292 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1293 Scale, AddrSpace, I);
1297 return Impl.isLSRCostLess(C1, C2);
1299 bool canMacroFuseCmp()
override {
1300 return Impl.canMacroFuseCmp();
1302 bool shouldFavorPostInc()
const override {
1303 return Impl.shouldFavorPostInc();
1306 return Impl.isLegalMaskedStore(DataType);
1308 bool isLegalMaskedLoad(
Type *DataType)
override {
1309 return Impl.isLegalMaskedLoad(DataType);
1311 bool isLegalMaskedScatter(
Type *DataType)
override {
1312 return Impl.isLegalMaskedScatter(DataType);
1314 bool isLegalMaskedGather(
Type *DataType)
override {
1315 return Impl.isLegalMaskedGather(DataType);
1317 bool hasDivRemOp(
Type *DataType,
bool IsSigned)
override {
1318 return Impl.hasDivRemOp(DataType, IsSigned);
1320 bool hasVolatileVariant(
Instruction *I,
unsigned AddrSpace)
override {
1321 return Impl.hasVolatileVariant(I, AddrSpace);
1323 bool prefersVectorizedAddressing()
override {
1324 return Impl.prefersVectorizedAddressing();
1327 bool HasBaseReg, int64_t Scale,
1328 unsigned AddrSpace)
override {
1329 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1332 bool LSRWithInstrQueries()
override {
1333 return Impl.LSRWithInstrQueries();
1335 bool isTruncateFree(
Type *Ty1,
Type *Ty2)
override {
1336 return Impl.isTruncateFree(Ty1, Ty2);
1338 bool isProfitableToHoist(
Instruction *I)
override {
1339 return Impl.isProfitableToHoist(I);
1341 bool useAA()
override {
return Impl.useAA(); }
1342 bool isTypeLegal(
Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
1343 unsigned getJumpBufAlignment()
override {
return Impl.getJumpBufAlignment(); }
1344 unsigned getJumpBufSize()
override {
return Impl.getJumpBufSize(); }
1345 bool shouldBuildLookupTables()
override {
1346 return Impl.shouldBuildLookupTables();
1348 bool shouldBuildLookupTablesForConstant(
Constant *
C)
override {
1349 return Impl.shouldBuildLookupTablesForConstant(C);
1351 bool useColdCCForColdCall(
Function &F)
override {
1352 return Impl.useColdCCForColdCall(F);
1356 bool Extract)
override {
1357 return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1360 unsigned VF)
override {
1361 return Impl.getOperandsScalarizationOverhead(Args, VF);
1364 bool supportsEfficientVectorElementLoadStore()
override {
1365 return Impl.supportsEfficientVectorElementLoadStore();
1368 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
1369 return Impl.enableAggressiveInterleaving(LoopHasReductions);
1372 bool IsZeroCmp)
const override {
1373 return Impl.enableMemCmpExpansion(IsZeroCmp);
1375 bool enableInterleavedAccessVectorization()
override {
1376 return Impl.enableInterleavedAccessVectorization();
1378 bool enableMaskedInterleavedAccessVectorization()
override {
1379 return Impl.enableMaskedInterleavedAccessVectorization();
1381 bool isFPVectorizationPotentiallyUnsafe()
override {
1382 return Impl.isFPVectorizationPotentiallyUnsafe();
1386 unsigned Alignment,
bool *
Fast)
override {
1387 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1391 return Impl.getPopcntSupport(IntTyWidthInBit);
1393 bool haveFastSqrt(
Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
1395 bool isFCmpOrdCheaperThanFCmpZero(
Type *Ty)
override {
1396 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1399 int getFPOpCost(
Type *Ty)
override {
return Impl.getFPOpCost(Ty); }
1401 int getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
const APInt &Imm,
1402 Type *Ty)
override {
1403 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1405 int getIntImmCost(
const APInt &Imm,
Type *Ty)
override {
1406 return Impl.getIntImmCost(Imm, Ty);
1408 int getIntImmCost(
unsigned Opc,
unsigned Idx,
const APInt &Imm,
1409 Type *Ty)
override {
1410 return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1413 Type *Ty)
override {
1414 return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1416 unsigned getNumberOfRegisters(
bool Vector)
override {
1417 return Impl.getNumberOfRegisters(Vector);
1419 unsigned getRegisterBitWidth(
bool Vector)
const override {
1420 return Impl.getRegisterBitWidth(Vector);
1422 unsigned getMinVectorRegisterBitWidth()
override {
1423 return Impl.getMinVectorRegisterBitWidth();
1425 bool shouldMaximizeVectorBandwidth(
bool OptSize)
const override {
1426 return Impl.shouldMaximizeVectorBandwidth(OptSize);
1428 unsigned getMinimumVF(
unsigned ElemWidth)
const override {
1429 return Impl.getMinimumVF(ElemWidth);
1431 bool shouldConsiderAddressTypePromotion(
1432 const Instruction &I,
bool &AllowPromotionWithoutCommonHeader)
override {
1433 return Impl.shouldConsiderAddressTypePromotion(
1434 I, AllowPromotionWithoutCommonHeader);
1436 unsigned getCacheLineSize()
override {
1437 return Impl.getCacheLineSize();
1440 return Impl.getCacheSize(Level);
1443 return Impl.getCacheAssociativity(Level);
1445 unsigned getPrefetchDistance()
override {
return Impl.getPrefetchDistance(); }
1446 unsigned getMinPrefetchStride()
override {
1447 return Impl.getMinPrefetchStride();
1449 unsigned getMaxPrefetchIterationsAhead()
override {
1450 return Impl.getMaxPrefetchIterationsAhead();
1452 unsigned getMaxInterleaveFactor(
unsigned VF)
override {
1453 return Impl.getMaxInterleaveFactor(VF);
1455 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &
SI,
1456 unsigned &JTSize)
override {
1457 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1465 return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1466 Opd1PropInfo, Opd2PropInfo, Args);
1469 Type *SubTp)
override {
1470 return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1472 int getCastInstrCost(
unsigned Opcode,
Type *Dst,
Type *Src,
1474 return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1476 int getExtractWithExtendCost(
unsigned Opcode,
Type *Dst,
VectorType *VecTy,
1477 unsigned Index)
override {
1478 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1480 int getCFInstrCost(
unsigned Opcode)
override {
1481 return Impl.getCFInstrCost(Opcode);
1483 int getCmpSelInstrCost(
unsigned Opcode,
Type *ValTy,
Type *CondTy,
1485 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1487 int getVectorInstrCost(
unsigned Opcode,
Type *Val,
unsigned Index)
override {
1488 return Impl.getVectorInstrCost(Opcode, Val, Index);
1490 int getMemoryOpCost(
unsigned Opcode,
Type *Src,
unsigned Alignment,
1491 unsigned AddressSpace,
const Instruction *I)
override {
1492 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1494 int getMaskedMemoryOpCost(
unsigned Opcode,
Type *Src,
unsigned Alignment,
1495 unsigned AddressSpace)
override {
1496 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1498 int getGatherScatterOpCost(
unsigned Opcode,
Type *DataTy,
1499 Value *Ptr,
bool VariableMask,
1500 unsigned Alignment)
override {
1501 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1504 int getInterleavedMemoryOpCost(
unsigned Opcode,
Type *VecTy,
unsigned Factor,
1506 unsigned AddressSpace,
bool UseMaskForCond,
1507 bool UseMaskForGaps)
override {
1508 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1509 Alignment, AddressSpace,
1510 UseMaskForCond, UseMaskForGaps);
1512 int getArithmeticReductionCost(
unsigned Opcode,
Type *Ty,
1513 bool IsPairwiseForm)
override {
1514 return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1516 int getMinMaxReductionCost(
Type *Ty,
Type *CondTy,
1517 bool IsPairwiseForm,
bool IsUnsigned)
override {
1518 return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1521 FastMathFlags FMF,
unsigned ScalarizationCostPassed)
override {
1522 return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1523 ScalarizationCostPassed);
1527 return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1531 return Impl.getCallInstrCost(F, RetTy, Tys);
1533 unsigned getNumberOfParts(
Type *Tp)
override {
1534 return Impl.getNumberOfParts(Tp);
1537 const SCEV *Ptr)
override {
1538 return Impl.getAddressComputationCost(Ty, SE, Ptr);
1541 return Impl.getCostOfKeepingLiveOverCall(Tys);
1545 return Impl.getTgtMemIntrinsic(Inst, Info);
1547 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
1548 return Impl.getAtomicMemIntrinsicMaxElementSize();
1551 Type *ExpectedType)
override {
1552 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1556 unsigned DestAlign)
const override {
1557 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1561 unsigned RemainingBytes,
1563 unsigned DestAlign)
const override {
1564 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1565 SrcAlign, DestAlign);
1569 return Impl.areInlineCompatible(Caller, Callee);
1574 return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1577 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1580 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1582 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
1583 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1585 bool isLegalToVectorizeLoad(
LoadInst *LI)
const override {
1586 return Impl.isLegalToVectorizeLoad(LI);
1588 bool isLegalToVectorizeStore(
StoreInst *SI)
const override {
1589 return Impl.isLegalToVectorizeStore(SI);
1591 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes,
1593 unsigned AddrSpace)
const override {
1594 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1597 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes,
1599 unsigned AddrSpace)
const override {
1600 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1603 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
1604 unsigned ChainSizeInBytes,
1606 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1608 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
1609 unsigned ChainSizeInBytes,
1611 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1613 bool useReductionIntrinsic(
unsigned Opcode,
Type *Ty,
1615 return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1617 bool shouldExpandReduction(
const IntrinsicInst *II)
const override {
1618 return Impl.shouldExpandReduction(II);
1620 int getInstructionLatency(
const Instruction *I)
override {
1621 return Impl.getInstructionLatency(I);
1625 template <
typename T>
1627 : TTIImpl(new
Model<
T>(Impl)) {}
1658 : TTICallback(Arg.TTICallback) {}
1660 : TTICallback(
std::move(
Arg.TTICallback)) {}
1662 TTICallback = RHS.TTICallback;
1666 TTICallback = std::move(RHS.TTICallback);
1686 std::function<Result(const Function &)> TTICallback;
1689 static Result getDefaultTTI(
const Function &F);
1700 virtual void anchor();
A parsed version of the target data layout string in and methods for querying it. ...
Atomic ordering constants.
This class represents lattice values for constants.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
static unsigned getScalarizationOverhead(Instruction *I, unsigned VF, const TargetTransformInfo &TTI)
Estimate the overhead of scalarizing an instruction.
value_op_iterator value_op_begin()
The main scalar evolution driver.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
Analysis pass providing the TargetTransformInfo.
value_op_iterator value_op_end()
An instruction for reading from memory.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
bool areInlineCompatible(const Function &Caller, const Function &Callee)
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
AtomicOrdering
Atomic ordering for LLVM's memory model.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Class to represent function types.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
An instruction for storing to memory.
amdgpu Simplify well known AMD library false Value * Callee
Analysis containing CSE Info
A set of analyses that are preserved following a run of a transformation pass.
The instances of the Type class are immutable: once they are created, they are never changed...
This is an important class for using LLVM in a threaded context.
This is an important base class in LLVM.
A CRTP mix-in that provides informational APIs needed for analysis passes.
AMDGPU Lower Kernel Arguments
TargetIRAnalysis(const TargetIRAnalysis &Arg)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ImmutablePass class - This class is used to provide information that does not need to be run...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Class to represent vector types.
Class for arbitrary precision integers.
amdgpu Simplify well known AMD library false Value Value * Arg
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
TargetIRAnalysis(TargetIRAnalysis &&Arg)
API to communicate dependencies between analyses during invalidation.
TargetTransformInfo Result
LLVM Value Representation.
print Print MemDeps of function
Convenience struct for specifying and reasoning about fast-math flags.
A container for analyses that lazily runs them and caches their results.
This header defines various interfaces for pass management in LLVM.
Information about a load/store intrinsic defined by the target.
A special type used by analysis passes to provide an address that identifies that particular analysis...
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
A wrapper class for inspecting calls to intrinsic functions.