Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting number of waves to reduce cache thrashing.
More...
|
static cl::opt< unsigned > | MemBoundThresh ("amdgpu-membound-threshold", cl::init(50), cl::Hidden, cl::desc("Function mem bound threshold in %")) |
|
static cl::opt< unsigned > | LimitWaveThresh ("amdgpu-limit-wave-threshold", cl::init(50), cl::Hidden, cl::desc("Kernel limit wave threshold in %")) |
|
static cl::opt< unsigned > | IAWeight ("amdgpu-indirect-access-weight", cl::init(1000), cl::Hidden, cl::desc("Indirect access memory instruction weight")) |
|
static cl::opt< unsigned > | LSWeight ("amdgpu-large-stride-weight", cl::init(1000), cl::Hidden, cl::desc("Large stride memory access weight")) |
|
static cl::opt< unsigned > | LargeStrideThresh ("amdgpu-large-stride-threshold", cl::init(64), cl::Hidden, cl::desc("Large stride memory access threshold")) |
|
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting number of waves to reduce cache thrashing.
Definition in file AMDGPUPerfHintAnalysis.cpp.
Check if the instruction is large stride. The purpose is to identify memory access pattern like: x = a[i]; y = a[i+1000]; z = a[i+2000]; In the above example, the second and third memory access will be marked large stride memory access.
Definition at line 62 of file AMDGPUPerfHintAnalysis.cpp.
References assert(), B, llvm::TargetLoweringBase::AddrMode::BaseGV, llvm::TargetLoweringBase::AddrMode::BaseOffs, llvm::SmallSet< T, N, C >::begin(), Callee, AMDGPUAS::CONSTANT_ADDRESS, AMDGPUAS::CONSTANT_ADDRESS_32BIT, llvm::dbgs(), E, llvm::SmallSet< T, N, C >::empty(), llvm::SmallSet< T, N, C >::erase(), F(), AMDGPUAS::FLAT_ADDRESS, GEP, llvm::CallSiteBase< FunTy, BBTy, ValTy, UserTy, UseTy, InstrTy, CallTy, InvokeTy, IterTy >::getCalledFunction(), llvm::Function::getCallingConv(), llvm::Module::getDataLayout(), llvm::Value::getName(), llvm::GlobalValue::getParent(), llvm::GetPointerBaseWithConstantOffset(), llvm::Value::getType(), AMDGPUAS::GLOBAL_ADDRESS, llvm::TargetLoweringBase::AddrMode::HasBaseReg, I, llvm::AMDGPUPerfHintAnalysis::FuncInfo::IAMInstCount, IAWeight, llvm::SmallSet< T, N, C >::insert(), llvm::AMDGPUPerfHintAnalysis::FuncInfo::InstCount, llvm::GlobalValue::isDeclaration(), llvm::AMDGPU::isEntryFunctionCC(), LargeStrideThresh, llvm::ARM_MB::LD, LimitWaveThresh, LLVM_DEBUG, AMDGPUAS::LOCAL_ADDRESS, llvm::AMDGPUPerfHintAnalysis::FuncInfo::LSMInstCount, LSWeight, MemBoundThresh, llvm::AMDGPUPerfHintAnalysis::FuncInfo::MemInstCount, MI, P, print(), runOnFunction(), and SI.