LLVM  8.0.1
HexagonTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 /// \file
9 /// This file implements a TargetTransformInfo analysis pass specific to the
10 /// Hexagon target machine. It uses the target's detailed information to provide
11 /// more precise answers to certain TTI queries, while letting the target
12 /// independent and default TTI implementations handle the rest.
13 ///
14 //===----------------------------------------------------------------------===//
15 
17 #include "HexagonSubtarget.h"
20 #include "llvm/IR/InstrTypes.h"
21 #include "llvm/IR/Instructions.h"
22 #include "llvm/IR/User.h"
23 #include "llvm/Support/Casting.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "hexagontti"
30 
31 static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
32  cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
33 
34 static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
35  cl::init(true), cl::Hidden,
36  cl::desc("Control lookup table emission on Hexagon target"));
37 
38 // Constant "cost factor" to make floating point operations more expensive
39 // in terms of vectorization cost. This isn't the best way, but it should
40 // do. Ultimately, the cost should use cycles.
41 static const unsigned FloatFactor = 4;
42 
43 bool HexagonTTIImpl::useHVX() const {
44  return ST.useHVXOps() && HexagonAutoHVX;
45 }
46 
47 bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
48  assert(VecTy->isVectorTy());
49  // Avoid types like <2 x i32*>.
50  if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
51  return false;
52  EVT VecVT = EVT::getEVT(VecTy);
53  if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64)
54  return false;
55  if (ST.isHVXVectorType(VecVT.getSimpleVT()))
56  return true;
57  auto Action = TLI.getPreferredVectorAction(VecVT.getSimpleVT());
58  return Action == TargetLoweringBase::TypeWidenVector;
59 }
60 
61 unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
62  if (Ty->isVectorTy())
63  return Ty->getVectorNumElements();
64  assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
65  "Expecting scalar type");
66  return 1;
67 }
68 
70 HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
71  // Return fast hardware support as every input < 64 bits will be promoted
72  // to 64 bits.
74 }
75 
76 // The Hexagon target can unroll loops with run-time trip counts.
79  UP.Runtime = UP.Partial = true;
80  // Only try to peel innermost loops with small runtime trip counts.
81  if (L && L->empty() && canPeel(L) &&
82  SE.getSmallConstantTripCount(L) == 0 &&
83  SE.getSmallConstantMaxTripCount(L) > 0 &&
84  SE.getSmallConstantMaxTripCount(L) <= 5) {
85  UP.PeelCount = 2;
86  }
87 }
88 
90  return true;
91 }
92 
93 /// --- Vector TTI begin ---
94 
95 unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
96  if (Vector)
97  return useHVX() ? 32 : 0;
98  return 32;
99 }
100 
102  return useHVX() ? 2 : 0;
103 }
104 
105 unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
106  return Vector ? getMinVectorRegisterBitWidth() : 32;
107 }
108 
110  return useHVX() ? ST.getVectorLength()*8 : 0;
111 }
112 
113 unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
114  return (8 * ST.getVectorLength()) / ElemWidth;
115 }
116 
118  bool Extract) {
119  return BaseT::getScalarizationOverhead(Ty, Insert, Extract);
120 }
121 
123  ArrayRef<const Value*> Args, unsigned VF) {
125 }
126 
128  ArrayRef<Type*> Tys) {
129  return BaseT::getCallInstrCost(F, RetTy, Tys);
130 }
131 
133  ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
134  return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
135 }
136 
139  unsigned ScalarizationCostPassed) {
140  if (ID == Intrinsic::bswap) {
141  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
142  return LT.first + 2;
143  }
144  return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
145  ScalarizationCostPassed);
146 }
147 
149  ScalarEvolution *SE, const SCEV *S) {
150  return 0;
151 }
152 
153 unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
154  unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
155  assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
156  if (Opcode == Instruction::Store)
157  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
158 
159  if (Src->isVectorTy()) {
160  VectorType *VecTy = cast<VectorType>(Src);
161  unsigned VecWidth = VecTy->getBitWidth();
162  if (useHVX() && isTypeForHVX(VecTy)) {
163  unsigned RegWidth = getRegisterBitWidth(true);
164  Alignment = std::min(Alignment, RegWidth/8);
165  // Cost of HVX loads.
166  if (VecWidth % RegWidth == 0)
167  return VecWidth / RegWidth;
168  // Cost of constructing HVX vector from scalar loads.
169  unsigned AlignWidth = 8 * std::max(1u, Alignment);
170  unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
171  return 3*NumLoads;
172  }
173 
174  // Non-HVX vectors.
175  // Add extra cost for floating point types.
176  unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
177  : 1;
178  Alignment = std::min(Alignment, 8u);
179  unsigned AlignWidth = 8 * std::max(1u, Alignment);
180  unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
181  if (Alignment == 4 || Alignment == 8)
182  return Cost * NumLoads;
183  // Loads of less than 32 bits will need extra inserts to compose a vector.
184  unsigned LogA = Log2_32(Alignment);
185  return (3 - LogA) * Cost * NumLoads;
186  }
187 
188  return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
189 }
190 
191 unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
192  Type *Src, unsigned Alignment, unsigned AddressSpace) {
193  return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
194 }
195 
197  int Index, Type *SubTp) {
198  return 1;
199 }
200 
201 unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
202  Value *Ptr, bool VariableMask, unsigned Alignment) {
203  return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
204  Alignment);
205 }
206 
208  Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
209  unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
210  bool UseMaskForGaps) {
211  if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
212  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
213  Alignment, AddressSpace,
214  UseMaskForCond, UseMaskForGaps);
215  return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr);
216 }
217 
218 unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
219  Type *CondTy, const Instruction *I) {
220  if (ValTy->isVectorTy()) {
221  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
222  if (Opcode == Instruction::FCmp)
223  return LT.first + FloatFactor * getTypeNumElements(ValTy);
224  }
225  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
226 }
227 
228 unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
229  TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
230  TTI::OperandValueProperties Opd1PropInfo,
232  if (Ty->isVectorTy()) {
233  std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
234  if (LT.second.isFloatingPoint())
235  return LT.first + FloatFactor * getTypeNumElements(Ty);
236  }
237  return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
238  Opd1PropInfo, Opd2PropInfo, Args);
239 }
240 
241 unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
242  Type *SrcTy, const Instruction *I) {
243  if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
244  unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
245  unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
246 
247  std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
248  std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
249  return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
250  }
251  return 1;
252 }
253 
254 unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
255  unsigned Index) {
256  Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
257  : Val;
258  if (Opcode == Instruction::InsertElement) {
259  // Need two rotations for non-zero index.
260  unsigned Cost = (Index != 0) ? 2 : 0;
261  if (ElemTy->isIntegerTy(32))
262  return Cost;
263  // If it's not a 32-bit value, there will need to be an extract.
264  return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index);
265  }
266 
267  if (Opcode == Instruction::ExtractElement)
268  return 2;
269 
270  return 1;
271 }
272 
273 /// --- Vector TTI end ---
274 
276  return ST.getL1PrefetchDistance();
277 }
278 
280  return ST.getL1CacheLineSize();
281 }
282 
284  ArrayRef<const Value *> Operands) {
285  auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
286  if (!CI->isIntegerCast())
287  return false;
288  // Only extensions from an integer type shorter than 32-bit to i32
289  // can be folded into the load.
290  const DataLayout &DL = getDataLayout();
291  unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
292  unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
293  if (DBW != 32 || SBW >= DBW)
294  return false;
295 
296  const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
297  // Technically, this code could allow multiple uses of the load, and
298  // check if all the uses are the same extension operation, but this
299  // should be sufficient for most cases.
300  return LI && LI->hasOneUse();
301  };
302 
303  if (const CastInst *CI = dyn_cast<const CastInst>(U))
304  if (isCastFoldedIntoLoad(CI))
306  return BaseT::getUserCost(U, Operands);
307 }
308 
310  return EmitLookupTables;
311 }
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:568
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
int getUserCost(const User *U, ArrayRef< const Value *> Operands)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getMinimumVF(unsigned ElemWidth) const
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
This class represents lattice values for constants.
Definition: AllocatorList.h:24
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:507
The main scalar evolution driver.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
An instruction for reading from memory.
Definition: Instructions.h:168
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
unsigned getL1CacheLineSize() const
unsigned getSmallConstantMaxTripCount(const Loop *L)
Returns the upper bound of the loop trip count as a normal unsigned value.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value * > Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
unsigned getBitWidth() const
Return the number of bits in the Vector type.
Definition: DerivedTypes.h:452
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:526
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:772
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
unsigned getL1PrefetchDistance() const
PopcntSupportKind
Flags indicating the kind of support for population count.
unsigned getMinVectorRegisterBitWidth() const
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S)
This file implements a TargetTransformInfo analysis pass specific to the Hexagon target machine...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:850
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
Expected to fold away in lowering.
unsigned getUserCost(const User *U, ArrayRef< const Value * > Operands)
unsigned getRegisterBitWidth(bool Vector) const
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Extended Value Type.
Definition: ValueTypes.h:34
OperandValueProperties
Additional properties of an operand&#39;s values.
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool shouldFavorPostInc() const
Bias LSR towards creating post-increment opportunities.
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
AddressSpace
Definition: NVPTXBaseInfo.h:22
static cl::opt< bool > HexagonAutoHVX("hexagon-autohvx", cl::init(false), cl::Hidden, cl::desc("Enable loop vectorizer for HVX"))
bool canPeel(Loop *L)
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:819
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF)
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:568
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
unsigned getPrefetchDistance() const
— Vector TTI end —
unsigned getVectorLength() const
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
This class represents an analyzed expression in the program.
unsigned getNumberOfRegisters(bool vector) const
— Vector TTI begin —
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
Parameters that control the generic loop unrolling transformation.
static cl::opt< bool > EmitLookupTables("hexagon-emit-lookup-tables", cl::init(true), cl::Hidden, cl::desc("Control lookup table emission on Hexagon target"))
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getMaxInterleaveFactor(unsigned VF)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:309
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
bool empty() const
Definition: LoopInfo.h:146
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:185
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Type * getElementType() const
Definition: DerivedTypes.h:360
const DataLayout & getDataLayout() const
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
This pass exposes codegen information to IR-level passes.
static const unsigned FloatFactor
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
ShuffleKind
The various kinds of shuffle patterns for vector queries.