16 #define DEBUG_TYPE "loop-data-prefetch" 42 cl::desc(
"Prefetch write addresses"));
46 cl::desc(
"Number of instructions to prefetch ahead"),
54 "max-prefetch-iters-ahead",
57 STATISTIC(NumPrefetches,
"Number of prefetches inserted");
62 class LoopDataPrefetch {
67 : AC(AC), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
72 bool runOnLoop(
Loop *L);
78 unsigned getMinPrefetchStride() {
81 return TTI->getMinPrefetchStride();
84 unsigned getPrefetchDistance() {
87 return TTI->getPrefetchDistance();
90 unsigned getMaxPrefetchIterationsAhead() {
93 return TTI->getMaxPrefetchIterationsAhead();
104 class LoopDataPrefetchLegacyPass :
public FunctionPass {
128 "Loop Data Prefetch",
false,
false)
138 return new LoopDataPrefetchLegacyPass();
141 bool LoopDataPrefetch::isStrideLargeEnough(
const SCEVAddRecExpr *AR) {
142 unsigned TargetMinStride = getMinPrefetchStride();
144 if (TargetMinStride <= 1)
153 unsigned AbsStride =
std::abs(ConstStride->getAPInt().getSExtValue());
154 return TargetMinStride <= AbsStride;
166 LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
167 bool Changed = LDP.run();
183 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
184 ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
186 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
188 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
190 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
192 LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
196 bool LoopDataPrefetch::run() {
200 if (getPrefetchDistance() == 0)
202 assert(TTI->getCacheLineSize() &&
"Cache line size is not set for target");
204 bool MadeChange =
false;
208 MadeChange |= runOnLoop(*L);
213 bool LoopDataPrefetch::runOnLoop(
Loop *L) {
214 bool MadeChange =
false;
225 for (
const auto BB : L->
blocks()) {
229 if (
CallInst *CI = dyn_cast<CallInst>(&
I))
230 if (
Function *F = CI->getCalledFunction())
236 unsigned LoopSize = Metrics.
NumInsts;
240 unsigned ItersAhead = getPrefetchDistance() / LoopSize;
244 if (ItersAhead > getMaxPrefetchIterationsAhead())
248 <<
" iterations ahead (loop size: " << LoopSize <<
") in " 252 for (
const auto BB : L->
blocks()) {
253 for (
auto &
I : *BB) {
257 if (
LoadInst *LMemI = dyn_cast<LoadInst>(&
I)) {
259 PtrValue = LMemI->getPointerOperand();
260 }
else if (
StoreInst *SMemI = dyn_cast<StoreInst>(&
I)) {
263 PtrValue = SMemI->getPointerOperand();
273 const SCEV *LSCEV = SE->getSCEV(PtrValue);
280 if (!isStrideLargeEnough(LSCEVAddRec))
286 bool DupPref =
false;
287 for (
const auto &PrefLoad : PrefLoads) {
288 const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, PrefLoad.second);
290 dyn_cast<SCEVConstant>(PtrDiff)) {
291 int64_t
PD =
std::abs(ConstPtrDiff->getValue()->getSExtValue());
292 if (PD < (int64_t) TTI->getCacheLineSize()) {
301 const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(
302 SE->getConstant(LSCEVAddRec->
getType(), ItersAhead),
307 PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
310 SCEVExpander SCEVE(*SE,
I.getModule()->getDataLayout(),
"prefaddr");
311 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
314 Module *M = BB->getParent()->getParent();
323 LLVM_DEBUG(
dbgs() <<
" Access: " << *PtrValue <<
", SCEV: " << *LSCEV
327 <<
"prefetched memory access";
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop)...
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents lattice values for constants.
A Module instance is used to store all the information related to an LLVM module. ...
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
The main scalar evolution driver.
This class represents a function call, abstracting a target machine's calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
An instruction for reading from memory.
FunctionPass * createLoopDataPrefetchPass()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Analysis pass that exposes the LoopInfo for a function.
BlockT * getHeader() const
Type * getType() const
All values are typed, get the type of this value.
This node represents a polynomial recurrence on the trip count of the specified loop.
An instruction for storing to memory.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
A set of analyses that are preserved following a run of a transformation pass.
The instances of the Type class are immutable: once they are created, they are never changed...
df_iterator< T > df_end(const T &G)
Represent the analysis usage information of a pass.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues)
Add information about a block to the current state.
FunctionPass class - This class is used to implement most global optimizations.
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
This file provides the interface for LLVM's Loop Data Prefetching Pass.
A function analysis which provides an AssumptionCache.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Module.h This file contains the declarations for the Module class.
Utility to calculate the size and a few similar metrics for a set of basic blocks.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
df_iterator< T > df_begin(const T &G)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This class uses information about analyze scalars to rewrite expressions in canonical form...
loop data Loop Data Prefetch
Analysis pass that exposes the ScalarEvolution for a function.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This class represents an analyzed expression in the program.
static IntegerType * getInt32Ty(LLVMContext &C)
Represents a single loop in the control flow graph.
StringRef getName() const
Return a constant reference to the value's name.
const Function * getParent() const
Return the enclosing method, or null if none.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void preserve()
Mark an analysis as preserved.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
The legacy pass manager's analysis pass to compute loop information.
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
unsigned NumInsts
Number of instructions in the analyzed blocks.
iterator_range< block_iterator > blocks() const
This class represents a constant integer value.