LLVM8Doxygen/AMDGPUTargetTransformInfo_8h_source.html

 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// This file a TargetTransformInfo::Concept conforming object specific to the
 /// AMDGPU target machine. It uses the target's detailed information to
 /// provide more precise answers to certain TTI queries, while letting the
 /// target independent and default TTI implementations handle the rest.
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>

 namespace llvm {

 class AMDGPUTargetLowering;
 class Loop;
 class ScalarEvolution;
 class Type;
 class Value;

 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
   using TTI = TargetTransformInfo;

   friend BaseT;

   Triple TargetTriple;

 public:
   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     : BaseT(TM, F.getParent()->getDataLayout()),
       TargetTriple(TM->getTargetTriple()) {}

   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);
 };

 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
   using TTI = TargetTransformInfo;

   friend BaseT;

   const GCNSubtarget *ST;
   const AMDGPUTargetLowering *TLI;
   AMDGPUTTIImpl CommonTTI;
   bool IsGraphicsShader;

   const FeatureBitset InlineFeatureIgnoreList = {
     // Codegen control options which don't matter.
     AMDGPU::FeatureEnableLoadStoreOpt,
     AMDGPU::FeatureEnableSIScheduler,
     AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
     AMDGPU::FeatureFlatForGlobal,
     AMDGPU::FeaturePromoteAlloca,
     AMDGPU::FeatureUnalignedBufferAccess,
     AMDGPU::FeatureUnalignedScratchAccess,

     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
     AMDGPU::FeatureDebuggerEmitPrologue,
     AMDGPU::FeatureDebuggerInsertNops,

     // Property of the kernel/environment which can't actually differ.
     AMDGPU::FeatureSGPRInitBug,
     AMDGPU::FeatureXNACK,
     AMDGPU::FeatureTrapHandler,

     // Perf-tuning features
     AMDGPU::FeatureFastFMAF32,
     AMDGPU::HalfRate64Ops
   };

   const GCNSubtarget *getST() const { return ST; }
   const AMDGPUTargetLowering *getTLI() const { return TLI; }

   static inline int getFullRateInstrCost() {
     return TargetTransformInfo::TCC_Basic;
   }

   static inline int getHalfRateInstrCost() {
     return 2 * TargetTransformInfo::TCC_Basic;
   }

   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
   // should be 2 or 4.
   static inline int getQuarterRateInstrCost() {
     return 3 * TargetTransformInfo::TCC_Basic;
   }

    // On some parts, normal fp64 operations are half rate, and others
    // quarter. This also applies to some integer operations.
   inline int get64BitInstrCost() const {
     return ST->hasHalfRate64Ops() ?
       getHalfRateInstrCost() : getQuarterRateInstrCost();
   }

 public:
   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     : BaseT(TM, F.getParent()->getDataLayout()),
       ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
       TLI(ST->getTargetLowering()),
       CommonTTI(TM, F),
       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}

   bool hasBranchDivergence() { return true; }

   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);

   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
     return TTI::PSK_FastHardware;
   }

   unsigned getHardwareNumberOfRegisters(bool Vector) const;
   unsigned getNumberOfRegisters(bool Vector) const;
   unsigned getRegisterBitWidth(bool Vector) const;
   unsigned getMinVectorRegisterBitWidth() const;
   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
                                unsigned ChainSizeInBytes,
                                VectorType *VecTy) const;
   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
                                 unsigned ChainSizeInBytes,
                                 VectorType *VecTy) const;
   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;

   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
                                   unsigned Alignment,
                                   unsigned AddrSpace) const;
   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
                                    unsigned Alignment,
                                    unsigned AddrSpace) const;
   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
                                     unsigned Alignment,
                                     unsigned AddrSpace) const;

   unsigned getMaxInterleaveFactor(unsigned VF);

   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;

   int getArithmeticInstrCost(
     unsigned Opcode, Type *Ty,
     TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
     TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
     TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
     TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
     ArrayRef<const Value *> Args = ArrayRef<const Value *>());

   unsigned getCFInstrCost(unsigned Opcode);

   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
   bool isSourceOfDivergence(const Value *V) const;
   bool isAlwaysUniform(const Value *V) const;

   unsigned getFlatAddressSpace() const {
     // Don't bother running InferAddressSpaces pass on graphics shaders which
     // don't use flat addressing.
     if (IsGraphicsShader)
       return -1;
     return ST->hasFlatAddressSpace() ?
       AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
   }

   unsigned getVectorSplitCost() { return 0; }

   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                           Type *SubTp);

   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;

   unsigned getInliningThresholdMultiplier() { return 9; }

   int getArithmeticReductionCost(unsigned Opcode,
                                  Type *Ty,
                                  bool IsPairwise);
   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
                              bool IsPairwiseForm,
                              bool IsUnsigned);
 };

 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
   using BaseT = BasicTTIImplBase<R600TTIImpl>;
   using TTI = TargetTransformInfo;

   friend BaseT;

   const R600Subtarget *ST;
   const AMDGPUTargetLowering *TLI;
   AMDGPUTTIImpl CommonTTI;

 public:
   explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     : BaseT(TM, F.getParent()->getDataLayout()),
       ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
       TLI(ST->getTargetLowering()),
       CommonTTI(TM, F)  {}

   const R600Subtarget *getST() const { return ST; }
   const AMDGPUTargetLowering *getTLI() const { return TLI; }

   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);
   unsigned getHardwareNumberOfRegisters(bool Vec) const;
   unsigned getNumberOfRegisters(bool Vec) const;
   unsigned getRegisterBitWidth(bool Vector) const;
   unsigned getMinVectorRegisterBitWidth() const;
   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
                                   unsigned AddrSpace) const;
   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
                        unsigned Alignment,
                                    unsigned AddrSpace) const;
   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
                                     unsigned Alignment,
                                     unsigned AddrSpace) const;
   unsigned getMaxInterleaveFactor(unsigned VF);
   unsigned getCFInstrCost(unsigned Opcode);
   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
 };

 } // end namespace llvm

 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
llvm::BasicTTIImplBase< AMDGPUTTIImpl >::getArithmeticInstrCost
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:568

llvm::msgpack::Type
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:49

AMDGPUSubtarget.h
AMDGPU specific subclass of TargetSubtarget.

llvm::AMDGPUTargetMachine
Definition: AMDGPUTargetMachine.h:34

llvm::TargetTransformInfo::OK_AnyValue
Definition: TargetTransformInfo.h:674

llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:24

llvm::ARM_MB::ST
Definition: ARMBaseInfo.h:74

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:454

AMDGPU.h

AMDGPUBaseInfo.h

llvm::AMDGPUTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: AMDGPUTargetTransformInfo.cpp:92

F
F(f)

llvm::Function
Definition: Function.h:60

llvm::BasicTTIImplBase
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:78

llvm::TargetTransformInfoImplBase::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth()
Definition: TargetTransformInfoImpl.h:358

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:566

llvm::TargetTransformInfoImplBase::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfoImpl.h:559

llvm::R600TTIImpl::R600TTIImpl
R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Definition: AMDGPUTargetTransformInfo.h:214

llvm::TargetTransformInfoImplBase::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
Definition: TargetTransformInfoImpl.h:489

llvm::TargetTransformInfoImplBase::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfoImpl.h:565

llvm::GCNTTIImpl::getFlatAddressSpace
unsigned getFlatAddressSpace() const
Definition: AMDGPUTargetTransformInfo.h:176

llvm::TargetTransformInfo::OP_None
Definition: TargetTransformInfo.h:681

llvm::TargetTransformInfo::PSK_FastHardware
Definition: TargetTransformInfo.h:453

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::getArithmeticReductionCost
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
Definition: BasicTTIImpl.h:1413

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::getRegisterBitWidth
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:503

llvm::GCNSubtarget::hasHalfRate64Ops
bool hasHalfRate64Ops() const
Definition: AMDGPUSubtarget.h:468

llvm::R600Subtarget
Definition: AMDGPUSubtarget.h:989

llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:453

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33

llvm::SystemZISD::TM
Definition: SystemZISelLowering.h:68

AMDGPUAS::UNKNOWN_ADDRESS_SPACE
Definition: AMDGPU.h:294

MathExtras.h

Callee
amdgpu Simplify well known AMD library false Value * Callee
Definition: AMDGPULibCalls.cpp:220

llvm::dwarf::Index
Index
Definition: Dwarf.h:337

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:21

llvm::R600TTIImpl::getST
const R600Subtarget * getST() const
Definition: AMDGPUTargetTransformInfo.h:220

llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:37

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46

AMDGPUAS::FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:255

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::getMinMaxReductionCost
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
Definition: BasicTTIImpl.h:1463

llvm::TargetTransformInfoImplBase::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfoImpl.h:553

llvm::AMDGPUTTIImpl
Definition: AMDGPUTargetTransformInfo.h:42

llvm::R600TTIImpl::getTLI
const AMDGPUTargetLowering * getTLI() const
Definition: AMDGPUTargetTransformInfo.h:221

llvm::R600TTIImpl
Definition: AMDGPUTargetTransformInfo.h:203

llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:29

BasicTTIImpl.h
This file provides a helper that implements much of the TTI interface in terms of the target-independ...

ArrayRef.h

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codgen targets.

llvm::GCNSubtarget
Definition: AMDGPUSubtarget.h:246

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44

llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand&#39;s values.
Definition: TargetTransformInfo.h:681

llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition: AMDGPUSubtarget.h:616

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::getCFInstrCost
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:767

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::getShuffleCost
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:615

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::getNumberOfRegisters
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:501

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:78

llvm::TargetTransformInfoImplBase::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfoImpl.h:571

llvm::VectorType
Class to represent vector types.
Definition: DerivedTypes.h:393

llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:614

llvm::TargetTransformInfoImplBase::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: TargetTransformInfoImpl.h:547

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::isAlwaysUniform
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:211

Function.h

llvm::GCNTTIImpl::GCNTTIImpl
GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Definition: AMDGPUTargetTransformInfo.h:119

llvm::GCNTTIImpl::getVectorSplitCost
unsigned getVectorSplitCost()
Definition: AMDGPUTargetTransformInfo.h:185

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

llvm::GCNTTIImpl::hasBranchDivergence
bool hasBranchDivergence()
Definition: AMDGPUTargetTransformInfo.h:126

llvm::GCNTTIImpl
Definition: AMDGPUTargetTransformInfo.h:59

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:348

llvm::TargetTransformInfoImplBase::areInlineCompatible
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
Definition: TargetTransformInfoImpl.h:521

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::getVectorInstrCost
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:812

Kind
const unsigned Kind
Definition: ARMAsmParser.cpp:10586

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

llvm::TargetTransformInfo::TCC_Basic
The cost of a typical &#39;add&#39; instruction.
Definition: TargetTransformInfo.h:168

llvm::GCNTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: AMDGPUTargetTransformInfo.h:131

llvm::BasicTTIImplBase< AMDGPUTTIImpl >::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:209

const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232

llvm::Value
LLVM Value Representation.
Definition: Value.h:73

getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:779

llvm::GCNTTIImpl::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier()
Definition: AMDGPUTargetTransformInfo.h:193

llvm::AMDGPUTTIImpl::AMDGPUTTIImpl
AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
Definition: AMDGPUTargetTransformInfo.h:51

llvm::TargetTransformInfoImplBase::getDataLayout
const DataLayout & getDataLayout() const
Definition: TargetTransformInfoImpl.h:46

llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand&#39;s possible values.
Definition: TargetTransformInfo.h:673

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

SubtargetFeature.h

llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:53

AMDGPU
Definition: AMDGPUPTNote.h:20

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:374

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44

llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:657