LLVM  8.0.1
TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
12 #include "llvm/IR/CallSite.h"
13 #include "llvm/IR/DataLayout.h"
14 #include "llvm/IR/Instruction.h"
15 #include "llvm/IR/Instructions.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/IR/Module.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/PatternMatch.h"
22 #include <utility>
23 
24 using namespace llvm;
25 using namespace PatternMatch;
26 
27 #define DEBUG_TYPE "tti"
28 
29 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
30  cl::Hidden,
31  cl::desc("Recognize reduction patterns."));
32 
33 namespace {
34 /// No-op implementation of the TTI interface using the utility base
35 /// classes.
36 ///
37 /// This is used when no target specific information is available.
38 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
39  explicit NoTTIImpl(const DataLayout &DL)
40  : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
41 };
42 }
43 
45  : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
46 
48 
50  : TTIImpl(std::move(Arg.TTIImpl)) {}
51 
53  TTIImpl = std::move(RHS.TTIImpl);
54  return *this;
55 }
56 
58  Type *OpTy) const {
59  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
60  assert(Cost >= 0 && "TTI should not produce negative costs!");
61  return Cost;
62 }
63 
64 int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
65  int Cost = TTIImpl->getCallCost(FTy, NumArgs);
66  assert(Cost >= 0 && "TTI should not produce negative costs!");
67  return Cost;
68 }
69 
72  int Cost = TTIImpl->getCallCost(F, Arguments);
73  assert(Cost >= 0 && "TTI should not produce negative costs!");
74  return Cost;
75 }
76 
78  return TTIImpl->getInliningThresholdMultiplier();
79 }
80 
81 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
82  ArrayRef<const Value *> Operands) const {
83  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
84 }
85 
87  const Value *Src) const {
88  return TTIImpl->getExtCost(I, Src);
89 }
90 
93  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
94  assert(Cost >= 0 && "TTI should not produce negative costs!");
95  return Cost;
96 }
97 
98 unsigned
100  unsigned &JTSize) const {
101  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
102 }
103 
105  ArrayRef<const Value *> Operands) const {
106  int Cost = TTIImpl->getUserCost(U, Operands);
107  assert(Cost >= 0 && "TTI should not produce negative costs!");
108  return Cost;
109 }
110 
112  return TTIImpl->hasBranchDivergence();
113 }
114 
116  return TTIImpl->isSourceOfDivergence(V);
117 }
118 
120  return TTIImpl->isAlwaysUniform(V);
121 }
122 
124  return TTIImpl->getFlatAddressSpace();
125 }
126 
128  return TTIImpl->isLoweredToCall(F);
129 }
130 
132  Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
133  return TTIImpl->getUnrollingPreferences(L, SE, UP);
134 }
135 
137  return TTIImpl->isLegalAddImmediate(Imm);
138 }
139 
141  return TTIImpl->isLegalICmpImmediate(Imm);
142 }
143 
145  int64_t BaseOffset,
146  bool HasBaseReg,
147  int64_t Scale,
148  unsigned AddrSpace,
149  Instruction *I) const {
150  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
151  Scale, AddrSpace, I);
152 }
153 
155  return TTIImpl->isLSRCostLess(C1, C2);
156 }
157 
159  return TTIImpl->canMacroFuseCmp();
160 }
161 
163  return TTIImpl->shouldFavorPostInc();
164 }
165 
167  return TTIImpl->isLegalMaskedStore(DataType);
168 }
169 
171  return TTIImpl->isLegalMaskedLoad(DataType);
172 }
173 
175  return TTIImpl->isLegalMaskedGather(DataType);
176 }
177 
179  return TTIImpl->isLegalMaskedScatter(DataType);
180 }
181 
182 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
183  return TTIImpl->hasDivRemOp(DataType, IsSigned);
184 }
185 
187  unsigned AddrSpace) const {
188  return TTIImpl->hasVolatileVariant(I, AddrSpace);
189 }
190 
192  return TTIImpl->prefersVectorizedAddressing();
193 }
194 
196  int64_t BaseOffset,
197  bool HasBaseReg,
198  int64_t Scale,
199  unsigned AddrSpace) const {
200  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
201  Scale, AddrSpace);
202  assert(Cost >= 0 && "TTI should not produce negative costs!");
203  return Cost;
204 }
205 
207  return TTIImpl->LSRWithInstrQueries();
208 }
209 
211  return TTIImpl->isTruncateFree(Ty1, Ty2);
212 }
213 
215  return TTIImpl->isProfitableToHoist(I);
216 }
217 
218 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
219 
221  return TTIImpl->isTypeLegal(Ty);
222 }
223 
225  return TTIImpl->getJumpBufAlignment();
226 }
227 
229  return TTIImpl->getJumpBufSize();
230 }
231 
233  return TTIImpl->shouldBuildLookupTables();
234 }
236  return TTIImpl->shouldBuildLookupTablesForConstant(C);
237 }
238 
240  return TTIImpl->useColdCCForColdCall(F);
241 }
242 
243 unsigned TargetTransformInfo::
244 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
245  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
246 }
247 
248 unsigned TargetTransformInfo::
250  unsigned VF) const {
251  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
252 }
253 
255  return TTIImpl->supportsEfficientVectorElementLoadStore();
256 }
257 
258 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
259  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
260 }
261 
264  return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
265 }
266 
268  return TTIImpl->enableInterleavedAccessVectorization();
269 }
270 
272  return TTIImpl->enableMaskedInterleavedAccessVectorization();
273 }
274 
276  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
277 }
278 
280  unsigned BitWidth,
281  unsigned AddressSpace,
282  unsigned Alignment,
283  bool *Fast) const {
284  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
285  Alignment, Fast);
286 }
287 
289 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
290  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
291 }
292 
294  return TTIImpl->haveFastSqrt(Ty);
295 }
296 
298  return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
299 }
300 
302  int Cost = TTIImpl->getFPOpCost(Ty);
303  assert(Cost >= 0 && "TTI should not produce negative costs!");
304  return Cost;
305 }
306 
307 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
308  const APInt &Imm,
309  Type *Ty) const {
310  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
311  assert(Cost >= 0 && "TTI should not produce negative costs!");
312  return Cost;
313 }
314 
315 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
316  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
317  assert(Cost >= 0 && "TTI should not produce negative costs!");
318  return Cost;
319 }
320 
321 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
322  const APInt &Imm, Type *Ty) const {
323  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
324  assert(Cost >= 0 && "TTI should not produce negative costs!");
325  return Cost;
326 }
327 
329  const APInt &Imm, Type *Ty) const {
330  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
331  assert(Cost >= 0 && "TTI should not produce negative costs!");
332  return Cost;
333 }
334 
335 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
336  return TTIImpl->getNumberOfRegisters(Vector);
337 }
338 
339 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
340  return TTIImpl->getRegisterBitWidth(Vector);
341 }
342 
344  return TTIImpl->getMinVectorRegisterBitWidth();
345 }
346 
348  return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
349 }
350 
351 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
352  return TTIImpl->getMinimumVF(ElemWidth);
353 }
354 
356  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
357  return TTIImpl->shouldConsiderAddressTypePromotion(
358  I, AllowPromotionWithoutCommonHeader);
359 }
360 
362  return TTIImpl->getCacheLineSize();
363 }
364 
366  const {
367  return TTIImpl->getCacheSize(Level);
368 }
369 
371  CacheLevel Level) const {
372  return TTIImpl->getCacheAssociativity(Level);
373 }
374 
376  return TTIImpl->getPrefetchDistance();
377 }
378 
380  return TTIImpl->getMinPrefetchStride();
381 }
382 
384  return TTIImpl->getMaxPrefetchIterationsAhead();
385 }
386 
387 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
388  return TTIImpl->getMaxInterleaveFactor(VF);
389 }
390 
393  OperandValueKind OpInfo = OK_AnyValue;
394  OpProps = OP_None;
395 
396  if (auto *CI = dyn_cast<ConstantInt>(V)) {
397  if (CI->getValue().isPowerOf2())
398  OpProps = OP_PowerOf2;
400  }
401 
402  // A broadcast shuffle creates a uniform value.
403  // TODO: Add support for non-zero index broadcasts.
404  // TODO: Add support for different source vector width.
405  if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
406  if (ShuffleInst->isZeroEltSplat())
407  OpInfo = OK_UniformValue;
408 
409  const Value *Splat = getSplatValue(V);
410 
411  // Check for a splat of a constant or for a non uniform vector of constants
412  // and check if the constant(s) are all powers of two.
413  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
415  if (Splat) {
416  OpInfo = OK_UniformConstantValue;
417  if (auto *CI = dyn_cast<ConstantInt>(Splat))
418  if (CI->getValue().isPowerOf2())
419  OpProps = OP_PowerOf2;
420  } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
421  OpProps = OP_PowerOf2;
422  for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
423  if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
424  if (CI->getValue().isPowerOf2())
425  continue;
426  OpProps = OP_None;
427  break;
428  }
429  }
430  }
431 
432  // Check for a splat of a uniform value. This is not loop aware, so return
433  // true only for the obviously uniform cases (argument, globalvalue)
434  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
435  OpInfo = OK_UniformValue;
436 
437  return OpInfo;
438 }
439 
441  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
442  OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
443  OperandValueProperties Opd2PropInfo,
445  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
446  Opd1PropInfo, Opd2PropInfo, Args);
447  assert(Cost >= 0 && "TTI should not produce negative costs!");
448  return Cost;
449 }
450 
452  Type *SubTp) const {
453  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
454  assert(Cost >= 0 && "TTI should not produce negative costs!");
455  return Cost;
456 }
457 
458 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
459  Type *Src, const Instruction *I) const {
460  assert ((I == nullptr || I->getOpcode() == Opcode) &&
461  "Opcode should reflect passed instruction.");
462  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
463  assert(Cost >= 0 && "TTI should not produce negative costs!");
464  return Cost;
465 }
466 
468  VectorType *VecTy,
469  unsigned Index) const {
470  int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
471  assert(Cost >= 0 && "TTI should not produce negative costs!");
472  return Cost;
473 }
474 
475 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
476  int Cost = TTIImpl->getCFInstrCost(Opcode);
477  assert(Cost >= 0 && "TTI should not produce negative costs!");
478  return Cost;
479 }
480 
481 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
482  Type *CondTy, const Instruction *I) const {
483  assert ((I == nullptr || I->getOpcode() == Opcode) &&
484  "Opcode should reflect passed instruction.");
485  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
486  assert(Cost >= 0 && "TTI should not produce negative costs!");
487  return Cost;
488 }
489 
491  unsigned Index) const {
492  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
493  assert(Cost >= 0 && "TTI should not produce negative costs!");
494  return Cost;
495 }
496 
497 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
498  unsigned Alignment,
499  unsigned AddressSpace,
500  const Instruction *I) const {
501  assert ((I == nullptr || I->getOpcode() == Opcode) &&
502  "Opcode should reflect passed instruction.");
503  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
504  assert(Cost >= 0 && "TTI should not produce negative costs!");
505  return Cost;
506 }
507 
509  unsigned Alignment,
510  unsigned AddressSpace) const {
511  int Cost =
512  TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
513  assert(Cost >= 0 && "TTI should not produce negative costs!");
514  return Cost;
515 }
516 
518  Value *Ptr, bool VariableMask,
519  unsigned Alignment) const {
520  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
521  Alignment);
522  assert(Cost >= 0 && "TTI should not produce negative costs!");
523  return Cost;
524 }
525 
527  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
528  unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
529  bool UseMaskForGaps) const {
530  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
531  Alignment, AddressSpace,
532  UseMaskForCond,
533  UseMaskForGaps);
534  assert(Cost >= 0 && "TTI should not produce negative costs!");
535  return Cost;
536 }
537 
540  unsigned ScalarizationCostPassed) const {
541  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
542  ScalarizationCostPassed);
543  assert(Cost >= 0 && "TTI should not produce negative costs!");
544  return Cost;
545 }
546 
548  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
549  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
550  assert(Cost >= 0 && "TTI should not produce negative costs!");
551  return Cost;
552 }
553 
555  ArrayRef<Type *> Tys) const {
556  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
557  assert(Cost >= 0 && "TTI should not produce negative costs!");
558  return Cost;
559 }
560 
562  return TTIImpl->getNumberOfParts(Tp);
563 }
564 
566  ScalarEvolution *SE,
567  const SCEV *Ptr) const {
568  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
569  assert(Cost >= 0 && "TTI should not produce negative costs!");
570  return Cost;
571 }
572 
574  bool IsPairwiseForm) const {
575  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
576  assert(Cost >= 0 && "TTI should not produce negative costs!");
577  return Cost;
578 }
579 
581  bool IsPairwiseForm,
582  bool IsUnsigned) const {
583  int Cost =
584  TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
585  assert(Cost >= 0 && "TTI should not produce negative costs!");
586  return Cost;
587 }
588 
589 unsigned
591  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
592 }
593 
595  MemIntrinsicInfo &Info) const {
596  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
597 }
598 
600  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
601 }
602 
604  IntrinsicInst *Inst, Type *ExpectedType) const {
605  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
606 }
607 
609  Value *Length,
610  unsigned SrcAlign,
611  unsigned DestAlign) const {
612  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
613  DestAlign);
614 }
615 
618  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
619  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
620  SrcAlign, DestAlign);
621 }
622 
624  const Function *Callee) const {
625  return TTIImpl->areInlineCompatible(Caller, Callee);
626 }
627 
629  const Function *Caller, const Function *Callee,
631  return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
632 }
633 
635  Type *Ty) const {
636  return TTIImpl->isIndexedLoadLegal(Mode, Ty);
637 }
638 
640  Type *Ty) const {
641  return TTIImpl->isIndexedStoreLegal(Mode, Ty);
642 }
643 
645  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
646 }
647 
649  return TTIImpl->isLegalToVectorizeLoad(LI);
650 }
651 
653  return TTIImpl->isLegalToVectorizeStore(SI);
654 }
655 
657  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
658  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
659  AddrSpace);
660 }
661 
663  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
664  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
665  AddrSpace);
666 }
667 
669  unsigned LoadSize,
670  unsigned ChainSizeInBytes,
671  VectorType *VecTy) const {
672  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
673 }
674 
676  unsigned StoreSize,
677  unsigned ChainSizeInBytes,
678  VectorType *VecTy) const {
679  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
680 }
681 
683  Type *Ty, ReductionFlags Flags) const {
684  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
685 }
686 
688  return TTIImpl->shouldExpandReduction(II);
689 }
690 
691 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
692  return TTIImpl->getInstructionLatency(I);
693 }
694 
696  unsigned Level) {
697  // We don't need a shuffle if we just want to have element 0 in position 0 of
698  // the vector.
699  if (!SI && Level == 0 && IsLeft)
700  return true;
701  else if (!SI)
702  return false;
703 
705 
706  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
707  // we look at the left or right side.
708  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
709  Mask[i] = val;
710 
711  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
712  return Mask == ActualMask;
713 }
714 
715 namespace {
716 /// Kind of the reduction data.
718  RK_None, /// Not a reduction.
719  RK_Arithmetic, /// Binary reduction data.
720  RK_MinMax, /// Min/max reduction data.
721  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
722 };
723 /// Contains opcode + LHS/RHS parts of the reduction operations.
724 struct ReductionData {
725  ReductionData() = delete;
726  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
727  : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
728  assert(Kind != RK_None && "expected binary or min/max reduction only.");
729  }
730  unsigned Opcode = 0;
731  Value *LHS = nullptr;
732  Value *RHS = nullptr;
733  ReductionKind Kind = RK_None;
734  bool hasSameData(ReductionData &RD) const {
735  return Kind == RD.Kind && Opcode == RD.Opcode;
736  }
737 };
738 } // namespace
739 
741  Value *L, *R;
742  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
743  return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
744  if (auto *SI = dyn_cast<SelectInst>(I)) {
745  if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
746  m_SMax(m_Value(L), m_Value(R)).match(SI) ||
747  m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
748  m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
749  m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
750  m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
751  auto *CI = cast<CmpInst>(SI->getCondition());
752  return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
753  }
754  if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
755  m_UMax(m_Value(L), m_Value(R)).match(SI)) {
756  auto *CI = cast<CmpInst>(SI->getCondition());
757  return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
758  }
759  }
760  return llvm::None;
761 }
762 
764  unsigned Level,
765  unsigned NumLevels) {
766  // Match one level of pairwise operations.
767  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
768  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
769  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
770  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
771  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
772  if (!I)
773  return RK_None;
774 
775  assert(I->getType()->isVectorTy() && "Expecting a vector type");
776 
778  if (!RD)
779  return RK_None;
780 
782  if (!LS && Level)
783  return RK_None;
785  if (!RS && Level)
786  return RK_None;
787 
788  // On level 0 we can omit one shufflevector instruction.
789  if (!Level && !RS && !LS)
790  return RK_None;
791 
792  // Shuffle inputs must match.
793  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
794  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
795  Value *NextLevelOp = nullptr;
796  if (NextLevelOpR && NextLevelOpL) {
797  // If we have two shuffles their operands must match.
798  if (NextLevelOpL != NextLevelOpR)
799  return RK_None;
800 
801  NextLevelOp = NextLevelOpL;
802  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
803  // On the first level we can omit the shufflevector <0, undef,...>. So the
804  // input to the other shufflevector <1, undef> must match with one of the
805  // inputs to the current binary operation.
806  // Example:
807  // %NextLevelOpL = shufflevector %R, <1, undef ...>
808  // %BinOp = fadd %NextLevelOpL, %R
809  if (NextLevelOpL && NextLevelOpL != RD->RHS)
810  return RK_None;
811  else if (NextLevelOpR && NextLevelOpR != RD->LHS)
812  return RK_None;
813 
814  NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
815  } else
816  return RK_None;
817 
818  // Check that the next levels binary operation exists and matches with the
819  // current one.
820  if (Level + 1 != NumLevels) {
821  Optional<ReductionData> NextLevelRD =
822  getReductionData(cast<Instruction>(NextLevelOp));
823  if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
824  return RK_None;
825  }
826 
827  // Shuffle mask for pairwise operation must match.
828  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
829  if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
830  return RK_None;
831  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
832  if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
833  return RK_None;
834  } else {
835  return RK_None;
836  }
837 
838  if (++Level == NumLevels)
839  return RD->Kind;
840 
841  // Match next level.
842  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
843  NumLevels);
844 }
845 
847  unsigned &Opcode, Type *&Ty) {
848  if (!EnableReduxCost)
849  return RK_None;
850 
851  // Need to extract the first element.
852  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
853  unsigned Idx = ~0u;
854  if (CI)
855  Idx = CI->getZExtValue();
856  if (Idx != 0)
857  return RK_None;
858 
859  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
860  if (!RdxStart)
861  return RK_None;
863  if (!RD)
864  return RK_None;
865 
866  Type *VecTy = RdxStart->getType();
867  unsigned NumVecElems = VecTy->getVectorNumElements();
868  if (!isPowerOf2_32(NumVecElems))
869  return RK_None;
870 
871  // We look for a sequence of shuffle,shuffle,add triples like the following
872  // that builds a pairwise reduction tree.
873  //
874  // (X0, X1, X2, X3)
875  // (X0 + X1, X2 + X3, undef, undef)
876  // ((X0 + X1) + (X2 + X3), undef, undef, undef)
877  //
878  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
879  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
880  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
881  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
882  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
883  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
884  // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
885  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
886  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
887  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
888  // %r = extractelement <4 x float> %bin.rdx8, i32 0
889  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
890  RK_None)
891  return RK_None;
892 
893  Opcode = RD->Opcode;
894  Ty = VecTy;
895 
896  return RD->Kind;
897 }
898 
899 static std::pair<Value *, ShuffleVectorInst *>
901  ShuffleVectorInst *S = nullptr;
902 
903  if ((S = dyn_cast<ShuffleVectorInst>(L)))
904  return std::make_pair(R, S);
905 
906  S = dyn_cast<ShuffleVectorInst>(R);
907  return std::make_pair(L, S);
908 }
909 
910 static ReductionKind
912  unsigned &Opcode, Type *&Ty) {
913  if (!EnableReduxCost)
914  return RK_None;
915 
916  // Need to extract the first element.
917  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
918  unsigned Idx = ~0u;
919  if (CI)
920  Idx = CI->getZExtValue();
921  if (Idx != 0)
922  return RK_None;
923 
924  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
925  if (!RdxStart)
926  return RK_None;
928  if (!RD)
929  return RK_None;
930 
931  Type *VecTy = ReduxRoot->getOperand(0)->getType();
932  unsigned NumVecElems = VecTy->getVectorNumElements();
933  if (!isPowerOf2_32(NumVecElems))
934  return RK_None;
935 
936  // We look for a sequence of shuffles and adds like the following matching one
937  // fadd, shuffle vector pair at a time.
938  //
939  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
940  // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
941  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
942  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
943  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
944  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
945  // %r = extractelement <4 x float> %bin.rdx8, i32 0
946 
947  unsigned MaskStart = 1;
948  Instruction *RdxOp = RdxStart;
949  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
950  unsigned NumVecElemsRemain = NumVecElems;
951  while (NumVecElemsRemain - 1) {
952  // Check for the right reduction operation.
953  if (!RdxOp)
954  return RK_None;
955  Optional<ReductionData> RDLevel = getReductionData(RdxOp);
956  if (!RDLevel || !RDLevel->hasSameData(*RD))
957  return RK_None;
958 
959  Value *NextRdxOp;
960  ShuffleVectorInst *Shuffle;
961  std::tie(NextRdxOp, Shuffle) =
962  getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
963 
964  // Check the current reduction operation and the shuffle use the same value.
965  if (Shuffle == nullptr)
966  return RK_None;
967  if (Shuffle->getOperand(0) != NextRdxOp)
968  return RK_None;
969 
970  // Check that shuffle masks matches.
971  for (unsigned j = 0; j != MaskStart; ++j)
972  ShuffleMask[j] = MaskStart + j;
973  // Fill the rest of the mask with -1 for undef.
974  std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
975 
977  if (ShuffleMask != Mask)
978  return RK_None;
979 
980  RdxOp = dyn_cast<Instruction>(NextRdxOp);
981  NumVecElemsRemain /= 2;
982  MaskStart *= 2;
983  }
984 
985  Opcode = RD->Opcode;
986  Ty = VecTy;
987  return RD->Kind;
988 }
989 
990 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
991  switch (I->getOpcode()) {
992  case Instruction::GetElementPtr:
993  return getUserCost(I);
994 
995  case Instruction::Ret:
996  case Instruction::PHI:
997  case Instruction::Br: {
998  return getCFInstrCost(I->getOpcode());
999  }
1000  case Instruction::Add:
1001  case Instruction::FAdd:
1002  case Instruction::Sub:
1003  case Instruction::FSub:
1004  case Instruction::Mul:
1005  case Instruction::FMul:
1006  case Instruction::UDiv:
1007  case Instruction::SDiv:
1008  case Instruction::FDiv:
1009  case Instruction::URem:
1010  case Instruction::SRem:
1011  case Instruction::FRem:
1012  case Instruction::Shl:
1013  case Instruction::LShr:
1014  case Instruction::AShr:
1015  case Instruction::And:
1016  case Instruction::Or:
1017  case Instruction::Xor: {
1020  Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1021  Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
1023  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1024  Op1VP, Op2VP, Operands);
1025  }
1026  case Instruction::Select: {
1027  const SelectInst *SI = cast<SelectInst>(I);
1028  Type *CondTy = SI->getCondition()->getType();
1029  return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1030  }
1031  case Instruction::ICmp:
1032  case Instruction::FCmp: {
1033  Type *ValTy = I->getOperand(0)->getType();
1034  return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1035  }
1036  case Instruction::Store: {
1037  const StoreInst *SI = cast<StoreInst>(I);
1038  Type *ValTy = SI->getValueOperand()->getType();
1039  return getMemoryOpCost(I->getOpcode(), ValTy,
1040  SI->getAlignment(),
1041  SI->getPointerAddressSpace(), I);
1042  }
1043  case Instruction::Load: {
1044  const LoadInst *LI = cast<LoadInst>(I);
1045  return getMemoryOpCost(I->getOpcode(), I->getType(),
1046  LI->getAlignment(),
1047  LI->getPointerAddressSpace(), I);
1048  }
1049  case Instruction::ZExt:
1050  case Instruction::SExt:
1051  case Instruction::FPToUI:
1052  case Instruction::FPToSI:
1053  case Instruction::FPExt:
1054  case Instruction::PtrToInt:
1055  case Instruction::IntToPtr:
1056  case Instruction::SIToFP:
1057  case Instruction::UIToFP:
1058  case Instruction::Trunc:
1059  case Instruction::FPTrunc:
1060  case Instruction::BitCast:
1061  case Instruction::AddrSpaceCast: {
1062  Type *SrcTy = I->getOperand(0)->getType();
1063  return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1064  }
1065  case Instruction::ExtractElement: {
1066  const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1068  unsigned Idx = -1;
1069  if (CI)
1070  Idx = CI->getZExtValue();
1071 
1072  // Try to match a reduction sequence (series of shufflevector and vector
1073  // adds followed by a extractelement).
1074  unsigned ReduxOpCode;
1075  Type *ReduxType;
1076 
1077  switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1078  case RK_Arithmetic:
1079  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1080  /*IsPairwiseForm=*/false);
1081  case RK_MinMax:
1082  return getMinMaxReductionCost(
1083  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1084  /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1085  case RK_UnsignedMinMax:
1086  return getMinMaxReductionCost(
1087  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1088  /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1089  case RK_None:
1090  break;
1091  }
1092 
1093  switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1094  case RK_Arithmetic:
1095  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1096  /*IsPairwiseForm=*/true);
1097  case RK_MinMax:
1098  return getMinMaxReductionCost(
1099  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1100  /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1101  case RK_UnsignedMinMax:
1102  return getMinMaxReductionCost(
1103  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1104  /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1105  case RK_None:
1106  break;
1107  }
1108 
1109  return getVectorInstrCost(I->getOpcode(),
1110  EEI->getOperand(0)->getType(), Idx);
1111  }
1112  case Instruction::InsertElement: {
1113  const InsertElementInst * IE = cast<InsertElementInst>(I);
1114  ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1115  unsigned Idx = -1;
1116  if (CI)
1117  Idx = CI->getZExtValue();
1118  return getVectorInstrCost(I->getOpcode(),
1119  IE->getType(), Idx);
1120  }
1121  case Instruction::ShuffleVector: {
1122  const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1123  Type *Ty = Shuffle->getType();
1124  Type *SrcTy = Shuffle->getOperand(0)->getType();
1125 
1126  // TODO: Identify and add costs for insert subvector, etc.
1127  int SubIndex;
1128  if (Shuffle->isExtractSubvectorMask(SubIndex))
1129  return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
1130 
1131  if (Shuffle->changesLength())
1132  return -1;
1133 
1134  if (Shuffle->isIdentity())
1135  return 0;
1136 
1137  if (Shuffle->isReverse())
1138  return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
1139 
1140  if (Shuffle->isSelect())
1141  return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
1142 
1143  if (Shuffle->isTranspose())
1144  return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
1145 
1146  if (Shuffle->isZeroEltSplat())
1147  return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
1148 
1149  if (Shuffle->isSingleSource())
1150  return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
1151 
1152  return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
1153  }
1154  case Instruction::Call:
1155  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1156  SmallVector<Value *, 4> Args(II->arg_operands());
1157 
1158  FastMathFlags FMF;
1159  if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1160  FMF = FPMO->getFastMathFlags();
1161 
1162  return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1163  Args, FMF);
1164  }
1165  return -1;
1166  default:
1167  // We don't have any information on this instruction.
1168  return -1;
1169  }
1170 }
1171 
1173 
1174 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1175 
1177  std::function<Result(const Function &)> TTICallback)
1178  : TTICallback(std::move(TTICallback)) {}
1179 
1182  return TTICallback(F);
1183 }
1184 
1185 AnalysisKey TargetIRAnalysis::Key;
1186 
1187 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1188  return Result(F.getParent()->getDataLayout());
1189 }
1190 
1191 // Register the basic pass.
1193  "Target Transform Information", false, true)
1195 
1196 void TargetTransformInfoWrapperPass::anchor() {}
1197 
1199  : ImmutablePass(ID) {
1202 }
1203 
1205  TargetIRAnalysis TIRA)
1206  : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1209 }
1210 
1212  FunctionAnalysisManager DummyFAM;
1213  TTI = TIRA.run(F, DummyFAM);
1214  return *TTI;
1215 }
1216 
1217 ImmutablePass *
1219  return new TargetTransformInfoWrapperPass(std::move(TIRA));
1220 }
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:410
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:71
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
SI Whole Quad Mode
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:889
This class represents lattice values for constants.
Definition: AllocatorList.h:24
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels)
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
The main scalar evolution driver.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
MemIndexedMode
The type of load/store indexing.
unsigned getNumberOfRegisters(bool Vector) const
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
bool isReverse() const
Return true if this shuffle swaps the order of elements from exactly one source vector.
Analysis pass providing the TargetTransformInfo.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAlign, unsigned DestAlign) const
This instruction constructs a fixed permutation of two input vectors.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys) const
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:168
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point maximum function.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don&#39;t restrict interleaved unrolling to small loops.
llvm::Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isSingleSource() const
Return true if this shuffle chooses elements from exactly one source vector without changing the leng...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes.
unsigned getMaxInterleaveFactor(unsigned VF) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:48
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: BitVector.h:938
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy=nullptr) const
Estimate the cost of a specific operation when lowered.
bool hasBranchDivergence() const
Return true if branch divergence exists.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr) const
static AnalysisKey * ID()
Returns an opaque, unique ID for this analysis type.
Definition: PassManager.h:399
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point minimum function.
bool isLegalMaskedScatter(Type *DataType) const
Return true if the target supports masked gather/scatter AVX-512 fully supports gather and scatter fo...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:419
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getMinVectorRegisterBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it&#39;s free to truncate a value of type Ty1 to type Ty2.
PopcntSupportKind
Flags indicating the kind of support for population count.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Selects elements from the corresponding lane of either source operand.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
An instruction for storing to memory.
Definition: Instructions.h:321
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
bool isLegalToVectorizeLoad(LoadInst *LI) const
Reverse the order of the vector.
VectorType * getType() const
Overload to return most specific vector type.
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes.
Value * getOperand(unsigned i) const
Definition: User.h:170
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
int getExtCost(const Instruction *I, const Value *Src) const
Estimate the cost of a EXT operation when lowered.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
ExtractSubvector Index indicates start offset.
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
unsigned getMaxPrefetchIterationsAhead() const
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
This instruction inserts a single (scalar) element into a VectorType value.
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const
If not nullptr, enable inline expansion of memcmp.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
Wrapper pass for TargetTransformInfo.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
bool isLegalToVectorizeStore(StoreInst *SI) const
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
unsigned getRegisterBitWidth(bool Vector) const
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool isAlwaysUniform(const Value *V) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
unsigned getNumberOfParts(Type *Tp) const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
bool isIdentity() const
Return true if this shuffle chooses elements from exactly one source vector without lane crossings an...
AMDGPU Lower Kernel Arguments
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
TargetIRAnalysis()
Default construct a target IR analysis.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, const Instruction *I=nullptr) const
bool isLegalMaskedStore(Type *DataType) const
Return true if the target supports masked load/store AVX2 and AVX-512 targets allow masks for consecu...
Merge elements from two source vectors into one with any shuffle mask.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:74
const Value * getCondition() const
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
int getCallCost(FunctionType *FTy, int NumArgs=-1) const
Estimate the cost of a function call when lowered.
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument *> &Args) const
static std::pair< Value *, ShuffleVectorInst * > getShuffleAndOtherOprd(Value *L, Value *R)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
OperandValueProperties
Additional properties of an operand&#39;s values.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
ReductionKind
Kind of the reduction data.
int getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type...
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:256
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys) const
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
bool isLegalMaskedGather(Type *DataType) const
AddressSpace
Definition: NVPTXBaseInfo.h:22
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
bool isZeroEltSplat() const
Return true if all elements of this shuffle are the same value as the first element of exactly one so...
Class to represent vector types.
Definition: DerivedTypes.h:393
Class for arbitrary precision integers.
Definition: APInt.h:70
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
Result run(const Function &F, FunctionAnalysisManager &)
bool shouldExpandReduction(const IntrinsicInst *II) const
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
amdgpu Simplify well known AMD library false Value Value * Arg
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isTranspose() const
Return true if this shuffle transposes the elements of its inputs without changing the length of the ...
bool isLegalMaskedLoad(Type *DataType) const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:241
This class represents an analyzed expression in the program.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
Parameters that control the generic loop unrolling transformation.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
#define I(x, y, z)
Definition: MD5.cpp:58
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
const MemCmpExpansionOptions * enableMemCmpExpansion(bool IsZeroCmp) const
iterator_range< value_op_iterator > operand_values()
Definition: User.h:262
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=nullptr) const
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:366
bool shouldMaximizeVectorBandwidth(bool OptSize) const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:291
const unsigned Kind
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TargetTransformInfo Result
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point maximum function.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys) const
Estimate the cost of an intrinsic when lowered.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
unsigned getInliningThresholdMultiplier() const
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Broadcast element 0 to all other elements.
static Optional< ReductionData > getReductionData(Instruction *I)
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type *> &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const
int getCFInstrCost(unsigned Opcode) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool prefersVectorizedAddressing() const
Return true if target doesn&#39;t mind addresses in vectors.
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF) const
print Print MemDeps of function
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
unsigned getMinimumVF(unsigned ElemWidth) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
This pass exposes codegen information to IR-level passes.
TargetTransformInfo & getTTI(const Function &F)
CacheLevel
The possible cache levels.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr) const
VectorType * getType() const
Overload to return most specific vector type.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target&#39;s &#39;flat&#39; address space.
Information about a load/store intrinsic defined by the target.
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:71
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned) const
llvm::Optional< unsigned > getCacheSize(CacheLevel Level) const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.