LLVM  8.0.1
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file provides a helper that implements much of the TTI interface in
12 /// terms of the target-independent code generator and TargetLowering
13 /// interfaces.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
18 #define LLVM_CODEGEN_BASICTTIIMPL_H
19 
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CallSite.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/Operator.h"
43 #include "llvm/IR/Type.h"
44 #include "llvm/IR/Value.h"
45 #include "llvm/MC/MCSchedule.h"
46 #include "llvm/Support/Casting.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <limits>
55 #include <utility>
56 
57 namespace llvm {
58 
59 class Function;
60 class GlobalValue;
61 class LLVMContext;
62 class ScalarEvolution;
63 class SCEV;
64 class TargetMachine;
65 
66 extern cl::opt<unsigned> PartialUnrollingThreshold;
67 
68 /// Base class which can be used to help build a TTI implementation.
69 ///
70 /// This class provides as much implementation of the TTI interface as is
71 /// possible using the target independent parts of the code generator.
72 ///
73 /// In order to subclass it, your class must implement a getST() method to
74 /// return the subtarget, and a getTLI() method to return the target lowering.
75 /// We need these methods implemented in the derived class so that this class
76 /// doesn't have to duplicate storage for them.
77 template <typename T>
79 private:
81  using TTI = TargetTransformInfo;
82 
83  /// Estimate a cost of Broadcast as an extract and sequence of insert
84  /// operations.
85  unsigned getBroadcastShuffleOverhead(Type *Ty) {
86  assert(Ty->isVectorTy() && "Can only shuffle vectors");
87  unsigned Cost = 0;
88  // Broadcast cost is equal to the cost of extracting the zero'th element
89  // plus the cost of inserting it into every element of the result vector.
90  Cost += static_cast<T *>(this)->getVectorInstrCost(
91  Instruction::ExtractElement, Ty, 0);
92 
93  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
94  Cost += static_cast<T *>(this)->getVectorInstrCost(
95  Instruction::InsertElement, Ty, i);
96  }
97  return Cost;
98  }
99 
100  /// Estimate a cost of shuffle as a sequence of extract and insert
101  /// operations.
102  unsigned getPermuteShuffleOverhead(Type *Ty) {
103  assert(Ty->isVectorTy() && "Can only shuffle vectors");
104  unsigned Cost = 0;
105  // Shuffle cost is equal to the cost of extracting element from its argument
106  // plus the cost of inserting them onto the result vector.
107 
108  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
109  // index 0 of first vector, index 1 of second vector,index 2 of first
110  // vector and finally index 3 of second vector and insert them at index
111  // <0,1,2,3> of result vector.
112  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
113  Cost += static_cast<T *>(this)
114  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
115  Cost += static_cast<T *>(this)
116  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
117  }
118  return Cost;
119  }
120 
121  /// Estimate a cost of subvector extraction as a sequence of extract and
122  /// insert operations.
123  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
124  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
125  "Can only extract subvectors from vectors");
126  int NumSubElts = SubTy->getVectorNumElements();
127  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
128  "SK_ExtractSubvector index out of range");
129 
130  unsigned Cost = 0;
131  // Subvector extraction cost is equal to the cost of extracting element from
132  // the source type plus the cost of inserting them into the result vector
133  // type.
134  for (int i = 0; i != NumSubElts; ++i) {
135  Cost += static_cast<T *>(this)->getVectorInstrCost(
136  Instruction::ExtractElement, Ty, i + Index);
137  Cost += static_cast<T *>(this)->getVectorInstrCost(
138  Instruction::InsertElement, SubTy, i);
139  }
140  return Cost;
141  }
142 
143  /// Estimate a cost of subvector insertion as a sequence of extract and
144  /// insert operations.
145  unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
146  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
147  "Can only insert subvectors into vectors");
148  int NumSubElts = SubTy->getVectorNumElements();
149  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
150  "SK_InsertSubvector index out of range");
151 
152  unsigned Cost = 0;
153  // Subvector insertion cost is equal to the cost of extracting element from
154  // the source type plus the cost of inserting them into the result vector
155  // type.
156  for (int i = 0; i != NumSubElts; ++i) {
157  Cost += static_cast<T *>(this)->getVectorInstrCost(
158  Instruction::ExtractElement, SubTy, i);
159  Cost += static_cast<T *>(this)->getVectorInstrCost(
160  Instruction::InsertElement, Ty, i + Index);
161  }
162  return Cost;
163  }
164 
165  /// Local query method delegates up to T which *must* implement this!
166  const TargetSubtargetInfo *getST() const {
167  return static_cast<const T *>(this)->getST();
168  }
169 
170  /// Local query method delegates up to T which *must* implement this!
171  const TargetLoweringBase *getTLI() const {
172  return static_cast<const T *>(this)->getTLI();
173  }
174 
175  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
176  switch (M) {
177  case TTI::MIM_Unindexed:
178  return ISD::UNINDEXED;
179  case TTI::MIM_PreInc:
180  return ISD::PRE_INC;
181  case TTI::MIM_PreDec:
182  return ISD::PRE_DEC;
183  case TTI::MIM_PostInc:
184  return ISD::POST_INC;
185  case TTI::MIM_PostDec:
186  return ISD::POST_DEC;
187  }
188  llvm_unreachable("Unexpected MemIndexedMode");
189  }
190 
191 protected:
192  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
193  : BaseT(DL) {}
194 
196 
197 public:
198  /// \name Scalar TTI Implementations
199  /// @{
201  unsigned BitWidth, unsigned AddressSpace,
202  unsigned Alignment, bool *Fast) const {
203  EVT E = EVT::getIntegerVT(Context, BitWidth);
204  return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
205  }
206 
207  bool hasBranchDivergence() { return false; }
208 
209  bool isSourceOfDivergence(const Value *V) { return false; }
210 
211  bool isAlwaysUniform(const Value *V) { return false; }
212 
213  unsigned getFlatAddressSpace() {
214  // Return an invalid address space.
215  return -1;
216  }
217 
218  bool isLegalAddImmediate(int64_t imm) {
219  return getTLI()->isLegalAddImmediate(imm);
220  }
221 
222  bool isLegalICmpImmediate(int64_t imm) {
223  return getTLI()->isLegalICmpImmediate(imm);
224  }
225 
226  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
227  bool HasBaseReg, int64_t Scale,
228  unsigned AddrSpace, Instruction *I = nullptr) {
230  AM.BaseGV = BaseGV;
231  AM.BaseOffs = BaseOffset;
232  AM.HasBaseReg = HasBaseReg;
233  AM.Scale = Scale;
234  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235  }
236 
238  const DataLayout &DL) const {
239  EVT VT = getTLI()->getValueType(DL, Ty);
240  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241  }
242 
244  const DataLayout &DL) const {
245  EVT VT = getTLI()->getValueType(DL, Ty);
246  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247  }
248 
251  }
252 
253  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
254  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
256  AM.BaseGV = BaseGV;
257  AM.BaseOffs = BaseOffset;
258  AM.HasBaseReg = HasBaseReg;
259  AM.Scale = Scale;
260  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261  }
262 
263  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264  return getTLI()->isTruncateFree(Ty1, Ty2);
265  }
266 
268  return getTLI()->isProfitableToHoist(I);
269  }
270 
271  bool useAA() const { return getST()->useAA(); }
272 
273  bool isTypeLegal(Type *Ty) {
274  EVT VT = getTLI()->getValueType(DL, Ty);
275  return getTLI()->isTypeLegal(VT);
276  }
277 
278  int getGEPCost(Type *PointeeType, const Value *Ptr,
279  ArrayRef<const Value *> Operands) {
280  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281  }
282 
283  int getExtCost(const Instruction *I, const Value *Src) {
284  if (getTLI()->isExtFree(I))
286 
287  if (isa<ZExtInst>(I) || isa<SExtInst>(I))
288  if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289  if (getTLI()->isExtLoad(LI, I, DL))
291 
293  }
294 
295  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
297  return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
298  }
299 
300  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
301  ArrayRef<Type *> ParamTys) {
302  if (IID == Intrinsic::cttz) {
303  if (getTLI()->isCheapToSpeculateCttz())
306  }
307 
308  if (IID == Intrinsic::ctlz) {
309  if (getTLI()->isCheapToSpeculateCtlz())
312  }
313 
314  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
315  }
316 
318  unsigned &JumpTableSize) {
319  /// Try to find the estimated number of clusters. Note that the number of
320  /// clusters identified in this function could be different from the actural
321  /// numbers found in lowering. This function ignore switches that are
322  /// lowered with a mix of jump table / bit test / BTree. This function was
323  /// initially intended to be used when estimating the cost of switch in
324  /// inline cost heuristic, but it's a generic cost model to be used in other
325  /// places (e.g., in loop unrolling).
326  unsigned N = SI.getNumCases();
327  const TargetLoweringBase *TLI = getTLI();
328  const DataLayout &DL = this->getDataLayout();
329 
330  JumpTableSize = 0;
331  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
332 
333  // Early exit if both a jump table and bit test are not allowed.
334  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
335  return N;
336 
337  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
338  APInt MinCaseVal = MaxCaseVal;
339  for (auto CI : SI.cases()) {
340  const APInt &CaseVal = CI.getCaseValue()->getValue();
341  if (CaseVal.sgt(MaxCaseVal))
342  MaxCaseVal = CaseVal;
343  if (CaseVal.slt(MinCaseVal))
344  MinCaseVal = CaseVal;
345  }
346 
347  // Check if suitable for a bit test
348  if (N <= DL.getIndexSizeInBits(0u)) {
350  for (auto I : SI.cases())
351  Dests.insert(I.getCaseSuccessor());
352 
353  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
354  DL))
355  return 1;
356  }
357 
358  // Check if suitable for a jump table.
359  if (IsJTAllowed) {
360  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
361  return N;
362  uint64_t Range =
363  (MaxCaseVal - MinCaseVal)
364  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
365  // Check whether a range of clusters is dense enough for a jump table
366  if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
367  JumpTableSize = Range;
368  return 1;
369  }
370  }
371  return N;
372  }
373 
374  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
375 
376  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
377 
379  const TargetLoweringBase *TLI = getTLI();
382  }
383 
384  bool haveFastSqrt(Type *Ty) {
385  const TargetLoweringBase *TLI = getTLI();
386  EVT VT = TLI->getValueType(DL, Ty);
387  return TLI->isTypeLegal(VT) &&
389  }
390 
392  return true;
393  }
394 
395  unsigned getFPOpCost(Type *Ty) {
396  // Check whether FADD is available, as a proxy for floating-point in
397  // general.
398  const TargetLoweringBase *TLI = getTLI();
399  EVT VT = TLI->getValueType(DL, Ty);
403  }
404 
405  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406  const TargetLoweringBase *TLI = getTLI();
407  switch (Opcode) {
408  default: break;
409  case Instruction::Trunc:
410  if (TLI->isTruncateFree(OpTy, Ty))
413  case Instruction::ZExt:
414  if (TLI->isZExtFree(OpTy, Ty))
417  }
418 
419  return BaseT::getOperationCost(Opcode, Ty, OpTy);
420  }
421 
422  unsigned getInliningThresholdMultiplier() { return 1; }
423 
426  // This unrolling functionality is target independent, but to provide some
427  // motivation for its intended use, for x86:
428 
429  // According to the Intel 64 and IA-32 Architectures Optimization Reference
430  // Manual, Intel Core models and later have a loop stream detector (and
431  // associated uop queue) that can benefit from partial unrolling.
432  // The relevant requirements are:
433  // - The loop must have no more than 4 (8 for Nehalem and later) branches
434  // taken, and none of them may be calls.
435  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
436 
437  // According to the Software Optimization Guide for AMD Family 15h
438  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
439  // and loop buffer which can benefit from partial unrolling.
440  // The relevant requirements are:
441  // - The loop must have fewer than 16 branches
442  // - The loop must have less than 40 uops in all executed loop branches
443 
444  // The number of taken branches in a loop is hard to estimate here, and
445  // benchmarking has revealed that it is better not to be conservative when
446  // estimating the branch count. As a result, we'll ignore the branch limits
447  // until someone finds a case where it matters in practice.
448 
449  unsigned MaxOps;
450  const TargetSubtargetInfo *ST = getST();
451  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
452  MaxOps = PartialUnrollingThreshold;
453  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
454  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
455  else
456  return;
457 
458  // Scan the loop: don't unroll loops with calls.
459  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
460  ++I) {
461  BasicBlock *BB = *I;
462 
463  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
464  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
465  ImmutableCallSite CS(&*J);
466  if (const Function *F = CS.getCalledFunction()) {
467  if (!static_cast<T *>(this)->isLoweredToCall(F))
468  continue;
469  }
470 
471  return;
472  }
473  }
474 
475  // Enable runtime and partial unrolling up to the specified size.
476  // Enable using trip count upper bound to unroll loops.
477  UP.Partial = UP.Runtime = UP.UpperBound = true;
478  UP.PartialThreshold = MaxOps;
479 
480  // Avoid unrolling when optimizing for size.
481  UP.OptSizeThreshold = 0;
483 
484  // Set number of instructions optimized when "back edge"
485  // becomes "fall through" to default value of 2.
486  UP.BEInsns = 2;
487  }
488 
490  if (isa<LoadInst>(I))
491  return getST()->getSchedModel().DefaultLoadLatency;
492 
494  }
495 
496  /// @}
497 
498  /// \name Vector TTI Implementations
499  /// @{
500 
501  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
502 
503  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
504 
505  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
506  /// are set if the result needs to be inserted and/or extracted from vectors.
507  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
508  assert(Ty->isVectorTy() && "Can only scalarize vectors");
509  unsigned Cost = 0;
510 
511  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
512  if (Insert)
513  Cost += static_cast<T *>(this)
514  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
515  if (Extract)
516  Cost += static_cast<T *>(this)
517  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
518  }
519 
520  return Cost;
521  }
522 
523  /// Estimate the overhead of scalarizing an instructions unique
524  /// non-constant operands. The types of the arguments are ordinarily
525  /// scalar, in which case the costs are multiplied with VF.
527  unsigned VF) {
528  unsigned Cost = 0;
529  SmallPtrSet<const Value*, 4> UniqueOperands;
530  for (const Value *A : Args) {
531  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
532  Type *VecTy = nullptr;
533  if (A->getType()->isVectorTy()) {
534  VecTy = A->getType();
535  // If A is a vector operand, VF should be 1 or correspond to A.
536  assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
537  "Vector argument does not match VF");
538  }
539  else
540  VecTy = VectorType::get(A->getType(), VF);
541 
542  Cost += getScalarizationOverhead(VecTy, false, true);
543  }
544  }
545 
546  return Cost;
547  }
548 
550  assert(VecTy->isVectorTy());
551 
552  unsigned Cost = 0;
553 
554  Cost += getScalarizationOverhead(VecTy, true, false);
555  if (!Args.empty())
557  VecTy->getVectorNumElements());
558  else
559  // When no information on arguments is provided, we add the cost
560  // associated with one argument as a heuristic.
561  Cost += getScalarizationOverhead(VecTy, false, true);
562 
563  return Cost;
564  }
565 
566  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
567 
569  unsigned Opcode, Type *Ty,
575  // Check if any of the operands are vector operands.
576  const TargetLoweringBase *TLI = getTLI();
577  int ISD = TLI->InstructionOpcodeToISD(Opcode);
578  assert(ISD && "Invalid opcode");
579 
580  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
581 
582  bool IsFloat = Ty->isFPOrFPVectorTy();
583  // Assume that floating point arithmetic operations cost twice as much as
584  // integer operations.
585  unsigned OpCost = (IsFloat ? 2 : 1);
586 
587  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
588  // The operation is legal. Assume it costs 1.
589  // TODO: Once we have extract/insert subvector cost we need to use them.
590  return LT.first * OpCost;
591  }
592 
593  if (!TLI->isOperationExpand(ISD, LT.second)) {
594  // If the operation is custom lowered, then assume that the code is twice
595  // as expensive.
596  return LT.first * 2 * OpCost;
597  }
598 
599  // Else, assume that we need to scalarize this op.
600  // TODO: If one of the types get legalized by splitting, handle this
601  // similarly to what getCastInstrCost() does.
602  if (Ty->isVectorTy()) {
603  unsigned Num = Ty->getVectorNumElements();
604  unsigned Cost = static_cast<T *>(this)
605  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
606  // Return the cost of multiple scalar invocation plus the cost of
607  // inserting and extracting the values.
608  return getScalarizationOverhead(Ty, Args) + Num * Cost;
609  }
610 
611  // We don't know anything about this scalar instruction.
612  return OpCost;
613  }
614 
615  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
616  Type *SubTp) {
617  switch (Kind) {
618  case TTI::SK_Broadcast:
619  return getBroadcastShuffleOverhead(Tp);
620  case TTI::SK_Select:
621  case TTI::SK_Reverse:
622  case TTI::SK_Transpose:
625  return getPermuteShuffleOverhead(Tp);
627  return getExtractSubvectorOverhead(Tp, Index, SubTp);
629  return getInsertSubvectorOverhead(Tp, Index, SubTp);
630  }
631  llvm_unreachable("Unknown TTI::ShuffleKind");
632  }
633 
634  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
635  const Instruction *I = nullptr) {
636  const TargetLoweringBase *TLI = getTLI();
637  int ISD = TLI->InstructionOpcodeToISD(Opcode);
638  assert(ISD && "Invalid opcode");
639  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
640  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
641 
642  // Check for NOOP conversions.
643  if (SrcLT.first == DstLT.first &&
644  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
645 
646  // Bitcast between types that are legalized to the same type are free.
647  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
648  return 0;
649  }
650 
651  if (Opcode == Instruction::Trunc &&
652  TLI->isTruncateFree(SrcLT.second, DstLT.second))
653  return 0;
654 
655  if (Opcode == Instruction::ZExt &&
656  TLI->isZExtFree(SrcLT.second, DstLT.second))
657  return 0;
658 
659  if (Opcode == Instruction::AddrSpaceCast &&
661  Dst->getPointerAddressSpace()))
662  return 0;
663 
664  // If this is a zext/sext of a load, return 0 if the corresponding
665  // extending load exists on target.
666  if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
667  I && isa<LoadInst>(I->getOperand(0))) {
668  EVT ExtVT = EVT::getEVT(Dst);
669  EVT LoadVT = EVT::getEVT(Src);
670  unsigned LType =
671  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
672  if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
673  return 0;
674  }
675 
676  // If the cast is marked as legal (or promote) then assume low cost.
677  if (SrcLT.first == DstLT.first &&
678  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
679  return 1;
680 
681  // Handle scalar conversions.
682  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
683  // Scalar bitcasts are usually free.
684  if (Opcode == Instruction::BitCast)
685  return 0;
686 
687  // Just check the op cost. If the operation is legal then assume it costs
688  // 1.
689  if (!TLI->isOperationExpand(ISD, DstLT.second))
690  return 1;
691 
692  // Assume that illegal scalar instruction are expensive.
693  return 4;
694  }
695 
696  // Check vector-to-vector casts.
697  if (Dst->isVectorTy() && Src->isVectorTy()) {
698  // If the cast is between same-sized registers, then the check is simple.
699  if (SrcLT.first == DstLT.first &&
700  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
701 
702  // Assume that Zext is done using AND.
703  if (Opcode == Instruction::ZExt)
704  return 1;
705 
706  // Assume that sext is done using SHL and SRA.
707  if (Opcode == Instruction::SExt)
708  return 2;
709 
710  // Just check the op cost. If the operation is legal then assume it
711  // costs
712  // 1 and multiply by the type-legalization overhead.
713  if (!TLI->isOperationExpand(ISD, DstLT.second))
714  return SrcLT.first * 1;
715  }
716 
717  // If we are legalizing by splitting, query the concrete TTI for the cost
718  // of casting the original vector twice. We also need to factor in the
719  // cost of the split itself. Count that as 1, to be consistent with
720  // TLI->getTypeLegalizationCost().
721  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
723  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
725  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
726  Dst->getVectorNumElements() / 2);
727  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
728  Src->getVectorNumElements() / 2);
729  T *TTI = static_cast<T *>(this);
730  return TTI->getVectorSplitCost() +
731  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
732  }
733 
734  // In other cases where the source or destination are illegal, assume
735  // the operation will get scalarized.
736  unsigned Num = Dst->getVectorNumElements();
737  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
738  Opcode, Dst->getScalarType(), Src->getScalarType(), I);
739 
740  // Return the cost of multiple scalar invocation plus the cost of
741  // inserting and extracting the values.
742  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
743  }
744 
745  // We already handled vector-to-vector and scalar-to-scalar conversions.
746  // This
747  // is where we handle bitcast between vectors and scalars. We need to assume
748  // that the conversion is scalarized in one way or another.
749  if (Opcode == Instruction::BitCast)
750  // Illegal bitcasts are done by storing and loading from a stack slot.
751  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
752  : 0) +
753  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
754  : 0);
755 
756  llvm_unreachable("Unhandled cast");
757  }
758 
759  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
760  VectorType *VecTy, unsigned Index) {
761  return static_cast<T *>(this)->getVectorInstrCost(
762  Instruction::ExtractElement, VecTy, Index) +
763  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
764  VecTy->getElementType());
765  }
766 
767  unsigned getCFInstrCost(unsigned Opcode) {
768  // Branches are assumed to be predicted.
769  return 0;
770  }
771 
772  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
773  const Instruction *I) {
774  const TargetLoweringBase *TLI = getTLI();
775  int ISD = TLI->InstructionOpcodeToISD(Opcode);
776  assert(ISD && "Invalid opcode");
777 
778  // Selects on vectors are actually vector selects.
779  if (ISD == ISD::SELECT) {
780  assert(CondTy && "CondTy must exist");
781  if (CondTy->isVectorTy())
782  ISD = ISD::VSELECT;
783  }
784  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
785 
786  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
787  !TLI->isOperationExpand(ISD, LT.second)) {
788  // The operation is legal. Assume it costs 1. Multiply
789  // by the type-legalization overhead.
790  return LT.first * 1;
791  }
792 
793  // Otherwise, assume that the cast is scalarized.
794  // TODO: If one of the types get legalized by splitting, handle this
795  // similarly to what getCastInstrCost() does.
796  if (ValTy->isVectorTy()) {
797  unsigned Num = ValTy->getVectorNumElements();
798  if (CondTy)
799  CondTy = CondTy->getScalarType();
800  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
801  Opcode, ValTy->getScalarType(), CondTy, I);
802 
803  // Return the cost of multiple scalar invocation plus the cost of
804  // inserting and extracting the values.
805  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
806  }
807 
808  // Unknown scalar opcode.
809  return 1;
810  }
811 
812  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
813  std::pair<unsigned, MVT> LT =
814  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
815 
816  return LT.first;
817  }
818 
819  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
820  unsigned AddressSpace, const Instruction *I = nullptr) {
821  assert(!Src->isVoidTy() && "Invalid type");
822  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
823 
824  // Assuming that all loads of legal types cost 1.
825  unsigned Cost = LT.first;
826 
827  if (Src->isVectorTy() &&
828  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
829  // This is a vector load that legalizes to a larger type than the vector
830  // itself. Unless the corresponding extending load or truncating store is
831  // legal, then this will scalarize.
833  EVT MemVT = getTLI()->getValueType(DL, Src);
834  if (Opcode == Instruction::Store)
835  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
836  else
837  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
838 
839  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
840  // This is a vector load/store for some illegal type that is scalarized.
841  // We must account for the cost of building or decomposing the vector.
842  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
843  Opcode == Instruction::Store);
844  }
845  }
846 
847  return Cost;
848  }
849 
850  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
851  unsigned Factor,
852  ArrayRef<unsigned> Indices,
853  unsigned Alignment, unsigned AddressSpace,
854  bool UseMaskForCond = false,
855  bool UseMaskForGaps = false) {
856  VectorType *VT = dyn_cast<VectorType>(VecTy);
857  assert(VT && "Expect a vector type for interleaved memory op");
858 
859  unsigned NumElts = VT->getNumElements();
860  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
861 
862  unsigned NumSubElts = NumElts / Factor;
863  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
864 
865  // Firstly, the cost of load/store operation.
866  unsigned Cost;
867  if (UseMaskForCond || UseMaskForGaps)
868  Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
869  Opcode, VecTy, Alignment, AddressSpace);
870  else
871  Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
872  AddressSpace);
873 
874  // Legalize the vector type, and get the legalized and unlegalized type
875  // sizes.
876  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
877  unsigned VecTySize =
878  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
879  unsigned VecTyLTSize = VecTyLT.getStoreSize();
880 
881  // Return the ceiling of dividing A by B.
882  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
883 
884  // Scale the cost of the memory operation by the fraction of legalized
885  // instructions that will actually be used. We shouldn't account for the
886  // cost of dead instructions since they will be removed.
887  //
888  // E.g., An interleaved load of factor 8:
889  // %vec = load <16 x i64>, <16 x i64>* %ptr
890  // %v0 = shufflevector %vec, undef, <0, 8>
891  //
892  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
893  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
894  // type). The other loads are unused.
895  //
896  // We only scale the cost of loads since interleaved store groups aren't
897  // allowed to have gaps.
898  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
899  // The number of loads of a legal type it will take to represent a load
900  // of the unlegalized vector type.
901  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
902 
903  // The number of elements of the unlegalized type that correspond to a
904  // single legal instruction.
905  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
906 
907  // Determine which legal instructions will be used.
908  BitVector UsedInsts(NumLegalInsts, false);
909  for (unsigned Index : Indices)
910  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
911  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
912 
913  // Scale the cost of the load by the fraction of legal instructions that
914  // will be used.
915  Cost *= UsedInsts.count() / NumLegalInsts;
916  }
917 
918  // Then plus the cost of interleave operation.
919  if (Opcode == Instruction::Load) {
920  // The interleave cost is similar to extract sub vectors' elements
921  // from the wide vector, and insert them into sub vectors.
922  //
923  // E.g. An interleaved load of factor 2 (with one member of index 0):
924  // %vec = load <8 x i32>, <8 x i32>* %ptr
925  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
926  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
927  // <8 x i32> vector and insert them into a <4 x i32> vector.
928 
929  assert(Indices.size() <= Factor &&
930  "Interleaved memory op has too many members");
931 
932  for (unsigned Index : Indices) {
933  assert(Index < Factor && "Invalid index for interleaved memory op");
934 
935  // Extract elements from loaded vector for each sub vector.
936  for (unsigned i = 0; i < NumSubElts; i++)
937  Cost += static_cast<T *>(this)->getVectorInstrCost(
938  Instruction::ExtractElement, VT, Index + i * Factor);
939  }
940 
941  unsigned InsSubCost = 0;
942  for (unsigned i = 0; i < NumSubElts; i++)
943  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
944  Instruction::InsertElement, SubVT, i);
945 
946  Cost += Indices.size() * InsSubCost;
947  } else {
948  // The interleave cost is extract all elements from sub vectors, and
949  // insert them into the wide vector.
950  //
951  // E.g. An interleaved store of factor 2:
952  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
953  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
954  // The cost is estimated as extract all elements from both <4 x i32>
955  // vectors and insert into the <8 x i32> vector.
956 
957  unsigned ExtSubCost = 0;
958  for (unsigned i = 0; i < NumSubElts; i++)
959  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
960  Instruction::ExtractElement, SubVT, i);
961  Cost += ExtSubCost * Factor;
962 
963  for (unsigned i = 0; i < NumElts; i++)
964  Cost += static_cast<T *>(this)
965  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
966  }
967 
968  if (!UseMaskForCond)
969  return Cost;
970 
971  Type *I8Type = Type::getInt8Ty(VT->getContext());
972  VectorType *MaskVT = VectorType::get(I8Type, NumElts);
973  SubVT = VectorType::get(I8Type, NumSubElts);
974 
975  // The Mask shuffling cost is extract all the elements of the Mask
976  // and insert each of them Factor times into the wide vector:
977  //
978  // E.g. an interleaved group with factor 3:
979  // %mask = icmp ult <8 x i32> %vec1, %vec2
980  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
981  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
982  // The cost is estimated as extract all mask elements from the <8xi1> mask
983  // vector and insert them factor times into the <24xi1> shuffled mask
984  // vector.
985  for (unsigned i = 0; i < NumSubElts; i++)
986  Cost += static_cast<T *>(this)->getVectorInstrCost(
987  Instruction::ExtractElement, SubVT, i);
988 
989  for (unsigned i = 0; i < NumElts; i++)
990  Cost += static_cast<T *>(this)->getVectorInstrCost(
991  Instruction::InsertElement, MaskVT, i);
992 
993  // The Gaps mask is invariant and created outside the loop, therefore the
994  // cost of creating it is not accounted for here. However if we have both
995  // a MaskForGaps and some other mask that guards the execution of the
996  // memory access, we need to account for the cost of And-ing the two masks
997  // inside the loop.
998  if (UseMaskForGaps)
999  Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1000  BinaryOperator::And, MaskVT);
1001 
1002  return Cost;
1003  }
1004 
1005  /// Get intrinsic cost based on arguments.
1008  unsigned VF = 1) {
1009  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1010  assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1011  auto *ConcreteTTI = static_cast<T *>(this);
1012 
1013  switch (IID) {
1014  default: {
1015  // Assume that we need to scalarize this intrinsic.
1016  SmallVector<Type *, 4> Types;
1017  for (Value *Op : Args) {
1018  Type *OpTy = Op->getType();
1019  assert(VF == 1 || !OpTy->isVectorTy());
1020  Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1021  }
1022 
1023  if (VF > 1 && !RetTy->isVoidTy())
1024  RetTy = VectorType::get(RetTy, VF);
1025 
1026  // Compute the scalarization overhead based on Args for a vector
1027  // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1028  // CostModel will pass a vector RetTy and VF is 1.
1029  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1030  if (RetVF > 1 || VF > 1) {
1031  ScalarizationCost = 0;
1032  if (!RetTy->isVoidTy())
1033  ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1034  ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1035  }
1036 
1037  return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1038  ScalarizationCost);
1039  }
1041  assert(VF == 1 && "Can't vectorize types here.");
1042  Value *Mask = Args[3];
1043  bool VarMask = !isa<Constant>(Mask);
1044  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1045  return ConcreteTTI->getGatherScatterOpCost(
1046  Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1047  }
1048  case Intrinsic::masked_gather: {
1049  assert(VF == 1 && "Can't vectorize types here.");
1050  Value *Mask = Args[2];
1051  bool VarMask = !isa<Constant>(Mask);
1052  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1053  return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1054  Args[0], VarMask, Alignment);
1055  }
1069  return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1070  case Intrinsic::fshl:
1071  case Intrinsic::fshr: {
1072  Value *X = Args[0];
1073  Value *Y = Args[1];
1074  Value *Z = Args[2];
1075  TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1076  TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1077  TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1078  TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1080  OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1081  : TTI::OP_None;
1082  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1083  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1084  unsigned Cost = 0;
1085  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1086  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1087  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1088  OpKindX, OpKindZ, OpPropsX);
1089  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1090  OpKindY, OpKindZ, OpPropsY);
1091  // Non-constant shift amounts requires a modulo.
1092  if (OpKindZ != TTI::OK_UniformConstantValue &&
1094  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1095  OpKindZ, OpKindBW, OpPropsZ,
1096  OpPropsBW);
1097  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1098  if (X != Y) {
1099  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1100  if (RetVF > 1)
1101  CondTy = VectorType::get(CondTy, RetVF);
1102  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1103  CondTy, nullptr);
1104  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1105  CondTy, nullptr);
1106  }
1107  return Cost;
1108  }
1109  }
1110  }
1111 
1112  /// Get intrinsic cost based on argument types.
1113  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1114  /// cost of scalarizing the arguments and the return value will be computed
1115  /// based on types.
1117  Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1118  unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1120  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1121  switch (IID) {
1122  default: {
1123  // Assume that we need to scalarize this intrinsic.
1124  unsigned ScalarizationCost = ScalarizationCostPassed;
1125  unsigned ScalarCalls = 1;
1126  Type *ScalarRetTy = RetTy;
1127  if (RetTy->isVectorTy()) {
1128  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1129  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1130  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1131  ScalarRetTy = RetTy->getScalarType();
1132  }
1133  SmallVector<Type *, 4> ScalarTys;
1134  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1135  Type *Ty = Tys[i];
1136  if (Ty->isVectorTy()) {
1137  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1138  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1139  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1140  Ty = Ty->getScalarType();
1141  }
1142  ScalarTys.push_back(Ty);
1143  }
1144  if (ScalarCalls == 1)
1145  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1146 
1147  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
1148  IID, ScalarRetTy, ScalarTys, FMF);
1149 
1150  return ScalarCalls * ScalarCost + ScalarizationCost;
1151  }
1152  // Look for intrinsics that can be lowered directly or turned into a scalar
1153  // intrinsic call.
1154  case Intrinsic::sqrt:
1155  ISDs.push_back(ISD::FSQRT);
1156  break;
1157  case Intrinsic::sin:
1158  ISDs.push_back(ISD::FSIN);
1159  break;
1160  case Intrinsic::cos:
1161  ISDs.push_back(ISD::FCOS);
1162  break;
1163  case Intrinsic::exp:
1164  ISDs.push_back(ISD::FEXP);
1165  break;
1166  case Intrinsic::exp2:
1167  ISDs.push_back(ISD::FEXP2);
1168  break;
1169  case Intrinsic::log:
1170  ISDs.push_back(ISD::FLOG);
1171  break;
1172  case Intrinsic::log10:
1173  ISDs.push_back(ISD::FLOG10);
1174  break;
1175  case Intrinsic::log2:
1176  ISDs.push_back(ISD::FLOG2);
1177  break;
1178  case Intrinsic::fabs:
1179  ISDs.push_back(ISD::FABS);
1180  break;
1183  break;
1184  case Intrinsic::minnum:
1185  ISDs.push_back(ISD::FMINNUM);
1186  if (FMF.noNaNs())
1187  ISDs.push_back(ISD::FMINIMUM);
1188  break;
1189  case Intrinsic::maxnum:
1190  ISDs.push_back(ISD::FMAXNUM);
1191  if (FMF.noNaNs())
1192  ISDs.push_back(ISD::FMAXIMUM);
1193  break;
1194  case Intrinsic::copysign:
1195  ISDs.push_back(ISD::FCOPYSIGN);
1196  break;
1197  case Intrinsic::floor:
1198  ISDs.push_back(ISD::FFLOOR);
1199  break;
1200  case Intrinsic::ceil:
1201  ISDs.push_back(ISD::FCEIL);
1202  break;
1203  case Intrinsic::trunc:
1204  ISDs.push_back(ISD::FTRUNC);
1205  break;
1206  case Intrinsic::nearbyint:
1207  ISDs.push_back(ISD::FNEARBYINT);
1208  break;
1209  case Intrinsic::rint:
1210  ISDs.push_back(ISD::FRINT);
1211  break;
1212  case Intrinsic::round:
1213  ISDs.push_back(ISD::FROUND);
1214  break;
1215  case Intrinsic::pow:
1216  ISDs.push_back(ISD::FPOW);
1217  break;
1218  case Intrinsic::fma:
1219  ISDs.push_back(ISD::FMA);
1220  break;
1221  case Intrinsic::fmuladd:
1222  ISDs.push_back(ISD::FMA);
1223  break;
1224  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1227  case Intrinsic::sideeffect:
1228  return 0;
1230  return static_cast<T *>(this)
1231  ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
1233  return static_cast<T *>(this)
1234  ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1236  return static_cast<T *>(this)->getArithmeticReductionCost(
1237  Instruction::Add, Tys[0], /*IsPairwiseForm=*/false);
1239  return static_cast<T *>(this)->getArithmeticReductionCost(
1240  Instruction::Mul, Tys[0], /*IsPairwiseForm=*/false);
1242  return static_cast<T *>(this)->getArithmeticReductionCost(
1243  Instruction::And, Tys[0], /*IsPairwiseForm=*/false);
1245  return static_cast<T *>(this)->getArithmeticReductionCost(
1246  Instruction::Or, Tys[0], /*IsPairwiseForm=*/false);
1248  return static_cast<T *>(this)->getArithmeticReductionCost(
1249  Instruction::Xor, Tys[0], /*IsPairwiseForm=*/false);
1251  return static_cast<T *>(this)->getArithmeticReductionCost(
1252  Instruction::FAdd, Tys[0], /*IsPairwiseForm=*/false);
1254  return static_cast<T *>(this)->getArithmeticReductionCost(
1255  Instruction::FMul, Tys[0], /*IsPairwiseForm=*/false);
1260  return static_cast<T *>(this)->getMinMaxReductionCost(
1261  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1262  /*IsSigned=*/true);
1265  return static_cast<T *>(this)->getMinMaxReductionCost(
1266  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1267  /*IsSigned=*/false);
1268  case Intrinsic::ctpop:
1269  ISDs.push_back(ISD::CTPOP);
1270  // In case of legalization use TCC_Expensive. This is cheaper than a
1271  // library call but still not a cheap instruction.
1272  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1273  break;
1274  // FIXME: ctlz, cttz, ...
1275  }
1276 
1277  const TargetLoweringBase *TLI = getTLI();
1278  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1279 
1280  SmallVector<unsigned, 2> LegalCost;
1281  SmallVector<unsigned, 2> CustomCost;
1282  for (unsigned ISD : ISDs) {
1283  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1284  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1285  TLI->isFAbsFree(LT.second)) {
1286  return 0;
1287  }
1288 
1289  // The operation is legal. Assume it costs 1.
1290  // If the type is split to multiple registers, assume that there is some
1291  // overhead to this.
1292  // TODO: Once we have extract/insert subvector cost we need to use them.
1293  if (LT.first > 1)
1294  LegalCost.push_back(LT.first * 2);
1295  else
1296  LegalCost.push_back(LT.first * 1);
1297  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1298  // If the operation is custom lowered then assume
1299  // that the code is twice as expensive.
1300  CustomCost.push_back(LT.first * 2);
1301  }
1302  }
1303 
1304  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1305  if (MinLegalCostI != LegalCost.end())
1306  return *MinLegalCostI;
1307 
1308  auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
1309  if (MinCustomCostI != CustomCost.end())
1310  return *MinCustomCostI;
1311 
1312  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1313  // point mul followed by an add.
1314  if (IID == Intrinsic::fmuladd)
1315  return static_cast<T *>(this)
1316  ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1317  static_cast<T *>(this)
1318  ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1319 
1320  // Else, assume that we need to scalarize this intrinsic. For math builtins
1321  // this will emit a costly libcall, adding call overhead and spills. Make it
1322  // very expensive.
1323  if (RetTy->isVectorTy()) {
1324  unsigned ScalarizationCost =
1325  ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1326  ? ScalarizationCostPassed
1327  : getScalarizationOverhead(RetTy, true, false));
1328  unsigned ScalarCalls = RetTy->getVectorNumElements();
1329  SmallVector<Type *, 4> ScalarTys;
1330  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1331  Type *Ty = Tys[i];
1332  if (Ty->isVectorTy())
1333  Ty = Ty->getScalarType();
1334  ScalarTys.push_back(Ty);
1335  }
1336  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
1337  IID, RetTy->getScalarType(), ScalarTys, FMF);
1338  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1339  if (Tys[i]->isVectorTy()) {
1340  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1341  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1342  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1343  }
1344  }
1345 
1346  return ScalarCalls * ScalarCost + ScalarizationCost;
1347  }
1348 
1349  // This is going to be turned into a library call, make it expensive.
1350  return SingleCallCost;
1351  }
1352 
1353  /// Compute a cost of the given call instruction.
1354  ///
1355  /// Compute the cost of calling function F with return type RetTy and
1356  /// argument types Tys. F might be nullptr, in this case the cost of an
1357  /// arbitrary call with the specified signature will be returned.
1358  /// This is used, for instance, when we estimate call of a vector
1359  /// counterpart of the given function.
1360  /// \param F Called function, might be nullptr.
1361  /// \param RetTy Return value types.
1362  /// \param Tys Argument types.
1363  /// \returns The cost of Call instruction.
1365  return 10;
1366  }
1367 
1368  unsigned getNumberOfParts(Type *Tp) {
1369  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1370  return LT.first;
1371  }
1372 
1374  const SCEV *) {
1375  return 0;
1376  }
1377 
1378  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1379  /// We're assuming that reduction operation are performing the following way:
1380  /// 1. Non-pairwise reduction
1381  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1382  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1383  /// \----------------v-------------/ \----------v------------/
1384  /// n/2 elements n/2 elements
1385  /// %red1 = op <n x t> %val, <n x t> val1
1386  /// After this operation we have a vector %red1 where only the first n/2
1387  /// elements are meaningful, the second n/2 elements are undefined and can be
1388  /// dropped. All other operations are actually working with the vector of
1389  /// length n/2, not n, though the real vector length is still n.
1390  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1391  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1392  /// \----------------v-------------/ \----------v------------/
1393  /// n/4 elements 3*n/4 elements
1394  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1395  /// length n/2, the resulting vector has length n/4 etc.
1396  /// 2. Pairwise reduction:
1397  /// Everything is the same except for an additional shuffle operation which
1398  /// is used to produce operands for pairwise kind of reductions.
1399  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1400  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1401  /// \-------------v----------/ \----------v------------/
1402  /// n/2 elements n/2 elements
1403  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1404  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1405  /// \-------------v----------/ \----------v------------/
1406  /// n/2 elements n/2 elements
1407  /// %red1 = op <n x t> %val1, <n x t> val2
1408  /// Again, the operation is performed on <n x t> vector, but the resulting
1409  /// vector %red1 is <n/2 x t> vector.
1410  ///
1411  /// The cost model should take into account that the actual length of the
1412  /// vector is reduced on each iteration.
1413  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1414  bool IsPairwise) {
1415  assert(Ty->isVectorTy() && "Expect a vector type");
1416  Type *ScalarTy = Ty->getVectorElementType();
1417  unsigned NumVecElts = Ty->getVectorNumElements();
1418  unsigned NumReduxLevels = Log2_32(NumVecElts);
1419  unsigned ArithCost = 0;
1420  unsigned ShuffleCost = 0;
1421  auto *ConcreteTTI = static_cast<T *>(this);
1422  std::pair<unsigned, MVT> LT =
1423  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1424  unsigned LongVectorCount = 0;
1425  unsigned MVTLen =
1426  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1427  while (NumVecElts > MVTLen) {
1428  NumVecElts /= 2;
1429  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1430  // Assume the pairwise shuffles add a cost.
1431  ShuffleCost += (IsPairwise + 1) *
1432  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1433  NumVecElts, SubTy);
1434  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1435  Ty = SubTy;
1436  ++LongVectorCount;
1437  }
1438 
1439  NumReduxLevels -= LongVectorCount;
1440 
1441  // The minimal length of the vector is limited by the real length of vector
1442  // operations performed on the current platform. That's why several final
1443  // reduction operations are performed on the vectors with the same
1444  // architecture-dependent length.
1445 
1446  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1447  // reductions need two shuffles on every level, but the last one. On that
1448  // level one of the shuffles is <0, u, u, ...> which is identity.
1449  unsigned NumShuffles = NumReduxLevels;
1450  if (IsPairwise && NumReduxLevels >= 1)
1451  NumShuffles += NumReduxLevels - 1;
1452  ShuffleCost += NumShuffles *
1453  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1454  0, Ty);
1455  ArithCost += NumReduxLevels *
1456  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1457  return ShuffleCost + ArithCost +
1458  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1459  }
1460 
1461  /// Try to calculate op costs for min/max reduction operations.
1462  /// \param CondTy Conditional type for the Select instruction.
1463  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1464  bool) {
1465  assert(Ty->isVectorTy() && "Expect a vector type");
1466  Type *ScalarTy = Ty->getVectorElementType();
1467  Type *ScalarCondTy = CondTy->getVectorElementType();
1468  unsigned NumVecElts = Ty->getVectorNumElements();
1469  unsigned NumReduxLevels = Log2_32(NumVecElts);
1470  unsigned CmpOpcode;
1471  if (Ty->isFPOrFPVectorTy()) {
1472  CmpOpcode = Instruction::FCmp;
1473  } else {
1474  assert(Ty->isIntOrIntVectorTy() &&
1475  "expecting floating point or integer type for min/max reduction");
1476  CmpOpcode = Instruction::ICmp;
1477  }
1478  unsigned MinMaxCost = 0;
1479  unsigned ShuffleCost = 0;
1480  auto *ConcreteTTI = static_cast<T *>(this);
1481  std::pair<unsigned, MVT> LT =
1482  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1483  unsigned LongVectorCount = 0;
1484  unsigned MVTLen =
1485  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1486  while (NumVecElts > MVTLen) {
1487  NumVecElts /= 2;
1488  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1489  CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1490 
1491  // Assume the pairwise shuffles add a cost.
1492  ShuffleCost += (IsPairwise + 1) *
1493  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1494  NumVecElts, SubTy);
1495  MinMaxCost +=
1496  ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1497  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1498  nullptr);
1499  Ty = SubTy;
1500  ++LongVectorCount;
1501  }
1502 
1503  NumReduxLevels -= LongVectorCount;
1504 
1505  // The minimal length of the vector is limited by the real length of vector
1506  // operations performed on the current platform. That's why several final
1507  // reduction opertions are perfomed on the vectors with the same
1508  // architecture-dependent length.
1509 
1510  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1511  // reductions need two shuffles on every level, but the last one. On that
1512  // level one of the shuffles is <0, u, u, ...> which is identity.
1513  unsigned NumShuffles = NumReduxLevels;
1514  if (IsPairwise && NumReduxLevels >= 1)
1515  NumShuffles += NumReduxLevels - 1;
1516  ShuffleCost += NumShuffles *
1517  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1518  0, Ty);
1519  MinMaxCost +=
1520  NumReduxLevels *
1521  (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1522  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1523  nullptr));
1524  // The last min/max should be in vector registers and we counted it above.
1525  // So just need a single extractelement.
1526  return ShuffleCost + MinMaxCost +
1527  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1528  }
1529 
1530  unsigned getVectorSplitCost() { return 1; }
1531 
1532  /// @}
1533 };
1534 
1535 /// Concrete BasicTTIImpl that can be used if no further customization
1536 /// is needed.
1537 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1539 
1541 
1542  const TargetSubtargetInfo *ST;
1543  const TargetLoweringBase *TLI;
1544 
1545  const TargetSubtargetInfo *getST() const { return ST; }
1546  const TargetLoweringBase *getTLI() const { return TLI; }
1547 
1548 public:
1549  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1550 };
1551 
1552 } // end namespace llvm
1553 
1554 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Type * getVectorElementType() const
Definition: Type.h:371
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:594
BitVector & set()
Definition: BitVector.h:398
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Definition: BasicTTIImpl.h:568
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:373
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
LLVMContext & Context
bool noNaNs() const
Definition: Operator.h:200
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:889
This class represents lattice values for constants.
Definition: AllocatorList.h:24
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:605
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:507
The main scalar evolution driver.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1204
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
Definition: BasicTTIImpl.h:278
MemIndexedMode
The type of load/store indexing.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1274
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:192
int getExtCost(const Instruction *I, const Value *Src)
Definition: BasicTTIImpl.h:283
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:168
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:267
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:78
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:566
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes (if never set, the default is 0)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:526
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:759
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:253
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:200
zlib-gnu style compression
This file implements a class to represent arbitrary precision integral constant values and operations...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:503
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:772
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:634
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:429
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:391
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Selects elements from the corresponding lane of either source operand.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:203
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Reverse the order of the vector.
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:243
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:395
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:263
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
ExtractSubvector Index indicates start offset.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:424
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments)
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:850
Simple binary floating point operators.
Definition: ISDOpcodes.h:283
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=std::numeric_limits< unsigned >::max())
Get intrinsic cost based on argument types.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Expected to fold away in lowering.
AMDGPU Lower Kernel Arguments
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys)
Definition: BasicTTIImpl.h:300
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Merge elements from two source vectors into one with any shuffle mask.
unsigned getNumberOfParts(Type *Tp)
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isProfitableToHoist(Instruction *I) const
Extended Value Type.
Definition: ValueTypes.h:34
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:218
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:767
size_type size() const
Definition: SmallPtrSet.h:93
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:615
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:501
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:319
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:173
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:489
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
iterator end()
Definition: BasicBlock.h:271
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes (if never set, the default is 200)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
unsigned getVectorSplitCost()
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
AddressSpace
Definition: NVPTXBaseInfo.h:22
cl::opt< unsigned > PartialUnrollingThreshold
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:286
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:70
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:819
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:281
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:312
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:211
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:213
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2)
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:638
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments)
Definition: BasicTTIImpl.h:295
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:405
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
Parameters that control the generic loop unrolling transformation.
unsigned getJumpBufAlignment()
Definition: BasicTTIImpl.h:374
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:711
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:422
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:812
block_iterator block_end() const
Definition: LoopInfo.h:155
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:309
InsertSubvector. Index indicates start offset.
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:249
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize)
Definition: BasicTTIImpl.h:317
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:185
const unsigned Kind
Multiway switch.
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef< const Value *> Args)
Definition: BasicTTIImpl.h:549
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:209
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:419
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:606
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
Broadcast element 0 to all other elements.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:273
Type * getElementType() const
Definition: DerivedTypes.h:360
bool UpperBound
Allow using trip count upper bound to unroll loops.
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
const DataLayout & getDataLayout() const
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:237
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:384
This pass exposes codegen information to IR-level passes.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:226
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:222
block_iterator block_begin() const
Definition: LoopInfo.h:154
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
The cost of a &#39;div&#39; instruction on x86.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:914
BRIND - Indirect branch.
Definition: ISDOpcodes.h:634