LLVM8Doxygen/AMDGPUPerfHintAnalysis_8cpp_source.html

 //===- AMDGPUPerfHintAnalysis.cpp - analysis of functions memory traffic --===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// \brief Analyzes if a function potentially memory bound and if a kernel
 /// kernel may benefit from limiting number of waves to reduce cache thrashing.
 ///
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
 #include "AMDGPUPerfHintAnalysis.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Support/CommandLine.h"

 using namespace llvm;

 #define DEBUG_TYPE "amdgpu-perf-hint"

 static cl::opt<unsigned>
     MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden,
                    cl::desc("Function mem bound threshold in %"));

 static cl::opt<unsigned>
     LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden,
                     cl::desc("Kernel limit wave threshold in %"));

 static cl::opt<unsigned>
     IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden,
              cl::desc("Indirect access memory instruction weight"));

 static cl::opt<unsigned>
     LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden,
              cl::desc("Large stride memory access weight"));

 static cl::opt<unsigned>
     LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden,
                       cl::desc("Large stride memory access threshold"));

 STATISTIC(NumMemBound, "Number of functions marked as memory bound");
 STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");

 char llvm::AMDGPUPerfHintAnalysis::ID = 0;
 char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID;

 INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE,
                 "Analysis if a function is memory bound", true, true)

 namespace {

 struct AMDGPUPerfHint {
   friend AMDGPUPerfHintAnalysis;

 public:
   AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
                  const TargetLowering *TLI_)
       : FIM(FIM_), DL(nullptr), TLI(TLI_) {}

   void runOnFunction(Function &F);

 private:
   struct MemAccessInfo {
     const Value *V;
     const Value *Base;
     int64_t Offset;
     MemAccessInfo() : V(nullptr), Base(nullptr), Offset(0) {}
     bool isLargeStride(MemAccessInfo &Reference) const;
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     Printable print() const {
       return Printable([this](raw_ostream &OS) {
         OS << "Value: " << *V << '\n'
            << "Base: " << *Base << " Offset: " << Offset << '\n';
       });
     }
 #endif
   };

   MemAccessInfo makeMemAccessInfo(Instruction *) const;

   MemAccessInfo LastAccess; // Last memory access info

   AMDGPUPerfHintAnalysis::FuncInfoMap &FIM;

   const DataLayout *DL;

   const TargetLowering *TLI;

   void visit(const Function &F);
   static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
   static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);

   bool isIndirectAccess(const Instruction *Inst) const;

   /// Check if the instruction is large stride.
   /// The purpose is to identify memory access pattern like:
   /// x = a[i];
   /// y = a[i+1000];
   /// z = a[i+2000];
   /// In the above example, the second and third memory access will be marked
   /// large stride memory access.
   bool isLargeStride(const Instruction *Inst);

   bool isGlobalAddr(const Value *V) const;
   bool isLocalAddr(const Value *V) const;
   bool isConstantAddr(const Value *V) const;
 };

 static const Value *getMemoryInstrPtr(const Instruction *Inst) {
   if (auto LI = dyn_cast<LoadInst>(Inst)) {
     return LI->getPointerOperand();
   }
   if (auto SI = dyn_cast<StoreInst>(Inst)) {
     return SI->getPointerOperand();
   }
   if (auto AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
     return AI->getPointerOperand();
   }
   if (auto AI = dyn_cast<AtomicRMWInst>(Inst)) {
     return AI->getPointerOperand();
   }
   if (auto MI = dyn_cast<AnyMemIntrinsic>(Inst)) {
     return MI->getRawDest();
   }

   return nullptr;
 }

 bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
   LLVM_DEBUG(dbgs() << "[isIndirectAccess] " << *Inst << '\n');
   SmallSet<const Value *, 32> WorkSet;
   SmallSet<const Value *, 32> Visited;
   if (const Value *MO = getMemoryInstrPtr(Inst)) {
     if (isGlobalAddr(MO))
       WorkSet.insert(MO);
   }

   while (!WorkSet.empty()) {
     const Value *V = *WorkSet.begin();
     WorkSet.erase(*WorkSet.begin());
     if (!Visited.insert(V).second)
       continue;
     LLVM_DEBUG(dbgs() << "  check: " << *V << '\n');

     if (auto LD = dyn_cast<LoadInst>(V)) {
       auto M = LD->getPointerOperand();
       if (isGlobalAddr(M) || isLocalAddr(M) || isConstantAddr(M)) {
         LLVM_DEBUG(dbgs() << "    is IA\n");
         return true;
       }
       continue;
     }

     if (auto GEP = dyn_cast<GetElementPtrInst>(V)) {
       auto P = GEP->getPointerOperand();
       WorkSet.insert(P);
       for (unsigned I = 1, E = GEP->getNumIndices() + 1; I != E; ++I)
         WorkSet.insert(GEP->getOperand(I));
       continue;
     }

     if (auto U = dyn_cast<UnaryInstruction>(V)) {
       WorkSet.insert(U->getOperand(0));
       continue;
     }

     if (auto BO = dyn_cast<BinaryOperator>(V)) {
       WorkSet.insert(BO->getOperand(0));
       WorkSet.insert(BO->getOperand(1));
       continue;
     }

     if (auto S = dyn_cast<SelectInst>(V)) {
       WorkSet.insert(S->getFalseValue());
       WorkSet.insert(S->getTrueValue());
       continue;
     }

     if (auto E = dyn_cast<ExtractElementInst>(V)) {
       WorkSet.insert(E->getVectorOperand());
       continue;
     }

     LLVM_DEBUG(dbgs() << "    dropped\n");
   }

   LLVM_DEBUG(dbgs() << "  is not IA\n");
   return false;
 }

 void AMDGPUPerfHint::visit(const Function &F) {
   auto FIP = FIM.insert(std::make_pair(&F, AMDGPUPerfHintAnalysis::FuncInfo()));
   if (!FIP.second)
     return;

   AMDGPUPerfHintAnalysis::FuncInfo &FI = FIP.first->second;

   LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');

   for (auto &B : F) {
     LastAccess = MemAccessInfo();
     for (auto &I : B) {
       if (getMemoryInstrPtr(&I)) {
         if (isIndirectAccess(&I))
           ++FI.IAMInstCount;
         if (isLargeStride(&I))
           ++FI.LSMInstCount;
         ++FI.MemInstCount;
         ++FI.InstCount;
         continue;
       }
       CallSite CS(const_cast<Instruction *>(&I));
       if (CS) {
         Function *Callee = CS.getCalledFunction();
         if (!Callee || Callee->isDeclaration()) {
           ++FI.InstCount;
           continue;
         }
         if (&F == Callee) // Handle immediate recursion
           continue;

         visit(*Callee);
         auto Loc = FIM.find(Callee);

         assert(Loc != FIM.end() && "No func info");
         FI.MemInstCount += Loc->second.MemInstCount;
         FI.InstCount += Loc->second.InstCount;
         FI.IAMInstCount += Loc->second.IAMInstCount;
         FI.LSMInstCount += Loc->second.LSMInstCount;
       } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
         TargetLoweringBase::AddrMode AM;
         auto *Ptr = GetPointerBaseWithConstantOffset(GEP, AM.BaseOffs, *DL);
         AM.BaseGV = dyn_cast_or_null<GlobalValue>(const_cast<Value *>(Ptr));
         AM.HasBaseReg = !AM.BaseGV;
         if (TLI->isLegalAddressingMode(*DL, AM, GEP->getResultElementType(),
                                        GEP->getPointerAddressSpace()))
           // Offset will likely be folded into load or store
           continue;
         ++FI.InstCount;
       } else {
         ++FI.InstCount;
       }
     }
   }
 }

 void AMDGPUPerfHint::runOnFunction(Function &F) {
   if (FIM.find(&F) != FIM.end())
     return;

   const Module &M = *F.getParent();
   DL = &M.getDataLayout();

   visit(F);
   auto Loc = FIM.find(&F);

   assert(Loc != FIM.end() && "No func info");
   LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Loc->second.MemInstCount
                     << '\n'
                     << " IAMInst: " << Loc->second.IAMInstCount << '\n'
                     << " LSMInst: " << Loc->second.LSMInstCount << '\n'
                     << " TotalInst: " << Loc->second.InstCount << '\n');

   auto &FI = Loc->second;

   if (isMemBound(FI)) {
     LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
     NumMemBound++;
   }

   if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(FI)) {
     LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
     NumLimitWave++;
   }
 }

 bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
   return FI.MemInstCount * 100 / FI.InstCount > MemBoundThresh;
 }

 bool AMDGPUPerfHint::needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
   return ((FI.MemInstCount + FI.IAMInstCount * IAWeight +
            FI.LSMInstCount * LSWeight) *
           100 / FI.InstCount) > LimitWaveThresh;
 }

 bool AMDGPUPerfHint::isGlobalAddr(const Value *V) const {
   if (auto PT = dyn_cast<PointerType>(V->getType())) {
     unsigned As = PT->getAddressSpace();
     // Flat likely points to global too.
     return As == AMDGPUAS::GLOBAL_ADDRESS || As == AMDGPUAS::FLAT_ADDRESS;
   }
   return false;
 }

 bool AMDGPUPerfHint::isLocalAddr(const Value *V) const {
   if (auto PT = dyn_cast<PointerType>(V->getType()))
     return PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
   return false;
 }

 bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) {
   LLVM_DEBUG(dbgs() << "[isLargeStride] " << *Inst << '\n');

   MemAccessInfo MAI = makeMemAccessInfo(const_cast<Instruction *>(Inst));
   bool IsLargeStride = MAI.isLargeStride(LastAccess);
   if (MAI.Base)
     LastAccess = std::move(MAI);

   return IsLargeStride;
 }

 AMDGPUPerfHint::MemAccessInfo
 AMDGPUPerfHint::makeMemAccessInfo(Instruction *Inst) const {
   MemAccessInfo MAI;
   const Value *MO = getMemoryInstrPtr(Inst);

   LLVM_DEBUG(dbgs() << "[isLargeStride] MO: " << *MO << '\n');
   // Do not treat local-addr memory access as large stride.
   if (isLocalAddr(MO))
     return MAI;

   MAI.V = MO;
   MAI.Base = GetPointerBaseWithConstantOffset(MO, MAI.Offset, *DL);
   return MAI;
 }

 bool AMDGPUPerfHint::isConstantAddr(const Value *V) const {
   if (auto PT = dyn_cast<PointerType>(V->getType())) {
     unsigned As = PT->getAddressSpace();
     return As == AMDGPUAS::CONSTANT_ADDRESS ||
            As == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
   }
   return false;
 }

 bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
     MemAccessInfo &Reference) const {

   if (!Base || !Reference.Base || Base != Reference.Base)
     return false;

   uint64_t Diff = Offset > Reference.Offset ? Offset - Reference.Offset
                                             : Reference.Offset - Offset;
   bool Result = Diff > LargeStrideThresh;
   LLVM_DEBUG(dbgs() << "[isLargeStride compare]\n"
                << print() << "<=>\n"
                << Reference.print() << "Result:" << Result << '\n');
   return Result;
 }
 } // namespace

 bool AMDGPUPerfHintAnalysis::runOnFunction(Function &F) {
   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
   if (!TPC)
     return false;

   const TargetMachine &TM = TPC->getTM<TargetMachine>();
   const TargetSubtargetInfo *ST = TM.getSubtargetImpl(F);

   AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
   Analyzer.runOnFunction(F);
   return false;
 }

 bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
   auto FI = FIM.find(F);
   if (FI == FIM.end())
     return false;

   return AMDGPUPerfHint::isMemBound(FI->second);
 }

 bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
   auto FI = FIM.find(F);
   if (FI == FIM.end())
     return false;

   return AMDGPUPerfHint::needLimitWave(FI->second);
 }
llvm::AMDGPUPerfHintAnalysis
Definition: AMDGPUPerfHintAnalysis.h:23

TargetSubtargetInfo.h

SmallSet.h

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111

Base

llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2068

TargetPassConfig.h

Instructions.h

llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:24

llvm::Module
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65

llvm::ARM_MB::ST
Definition: ARMBaseInfo.h:74

IntrinsicInst.h

llvm::AMDGPUPerfHintAnalysis::needsWaveLimiter
bool needsWaveLimiter(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:389

AMDGPU.h

llvm::AMDGPUPerfHintAnalysis::FuncInfo::IAMInstCount
unsigned IAMInstCount
Definition: AMDGPUPerfHintAnalysis.h:42

LimitWaveThresh
static cl::opt< unsigned > LimitWaveThresh("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden, cl::desc("Kernel limit wave threshold in %"))

AMDGPUAS::CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:263

llvm::AMDGPUPerfHintAnalysis::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
Definition: AMDGPUPerfHintAnalysis.cpp:368

AMDGPUBaseInfo.h

llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")

F
F(f)

llvm::Function
Definition: Function.h:60

GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:170

Statistic.h

llvm::SmallSet::erase
bool erase(const T &V)
Definition: SmallSet.h:208

llvm::cl::Hidden
Definition: CommandLine.h:145

llvm::SmallSet::empty
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:156

llvm::SmallSet::begin
const_iterator begin() const
Definition: SmallSet.h:224

AMDGPUAS::CONSTANT_ADDRESS
Address space for constant memory (VTX2)
Definition: AMDGPU.h:259

llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371

ValueMap.h

llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2071

llvm::Instruction
Definition: Instruction.h:44

llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:2760

SI
Definition: SIInstrInfo.cpp:5509

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245

llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
Definition: ValueTracking.cpp:3368

LSWeight
static cl::opt< unsigned > LSWeight("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden, cl::desc("Large stride memory access weight"))

llvm::SystemZISD::TM
Definition: SystemZISelLowering.h:68

LargeStrideThresh
static cl::opt< unsigned > LargeStrideThresh("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden, cl::desc("Large stride memory access threshold"))

Callee
amdgpu Simplify well known AMD library false Value * Callee
Definition: AMDGPULibCalls.cpp:220

llvm::AMDGPUPerfHintAnalysis::FuncInfo
Definition: AMDGPUPerfHintAnalysis.h:39

llvm::AMDGPUPerfHintAnalysis::FuncInfo::InstCount
unsigned InstCount
Definition: AMDGPUPerfHintAnalysis.h:41

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:66

P
#define P(N)

CommandLine.h

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423

llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2069

llvm::AMDGPUPerfHintAnalysis::ID
static char ID
Definition: AMDGPUPerfHintAnalysis.h:24

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

AMDGPUAS::FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:255

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

llvm::cl::desc
Definition: CommandLine.h:394

llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:633

llvm::AMDGPUPerfHintAnalysisID
char & AMDGPUPerfHintAnalysisID
Definition: AMDGPUPerfHintAnalysis.cpp:60

AMDGPUAS::LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:260

print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:145

IAWeight
static cl::opt< unsigned > IAWeight("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden, cl::desc("Indirect access memory instruction weight"))

INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE, "Analysis if a function is memory bound", true, true) namespace
Definition: AMDGPUPerfHintAnalysis.cpp:62

llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181

AMDGPUAS::GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:256

llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:296

llvm::ValueMap< const Function *, FuncInfo >

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213

MemBoundThresh
static cl::opt< unsigned > MemBoundThresh("amdgpu-membound-threshold", cl::init(50), cl::Hidden, cl::desc("Function mem bound threshold in %"))

Module.h
Module.h This file contains the declarations for the Module class.

llvm::AMDGPUPerfHintAnalysis::FuncInfo::LSMInstCount
unsigned LSMInstCount
Definition: AMDGPUPerfHintAnalysis.h:43

llvm::TargetMachine::getSubtargetImpl
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
Definition: TargetMachine.h:111

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPerfHintAnalysis.cpp:34

llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2070

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:62

llvm::cl::opt
Definition: CommandLine.h:1300

llvm::AMDGPUPerfHintAnalysis::isMemoryBound
bool isMemoryBound(const Function *F) const
Definition: AMDGPUPerfHintAnalysis.cpp:381

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214

I
#define I(x, y, z)
Definition: MD5.cpp:58

llvm::ARM_MB::LD
Definition: ARMBaseInfo.h:73

ValueTracking.h

llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206

llvm::CallSiteBase::getCalledFunction
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

llvm::AMDGPUPerfHintAnalysis::FuncInfo::MemInstCount
unsigned MemInstCount
Definition: AMDGPUPerfHintAnalysis.h:40

llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566

llvm::Value
LLVM Value Representation.
Definition: Value.h:73

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:89

llvm::Printable
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:38

llvm::CallSite
Definition: CallSite.h:663

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:123

AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...

TargetLowering.h
This file describes how to lower LLVM code to machine code.