LLVM  8.0.1
LoopUnrollAndJamPass.cpp
Go to the documentation of this file.
1 //===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements an unroll and jam pass. Most of the work is done by
11 // Utils/UnrollLoopAndJam.cpp.
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/None.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/LoopPass.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/Constant.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Metadata.h"
38 #include "llvm/IR/PassManager.h"
39 #include "llvm/Pass.h"
40 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/Debug.h"
45 #include "llvm/Transforms/Scalar.h"
47 #include "llvm/Transforms/Utils.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <string>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "loop-unroll-and-jam"
58 
59 /// @{
60 /// Metadata attribute names
61 static const char *const LLVMLoopUnrollAndJamFollowupAll =
62  "llvm.loop.unroll_and_jam.followup_all";
63 static const char *const LLVMLoopUnrollAndJamFollowupInner =
64  "llvm.loop.unroll_and_jam.followup_inner";
65 static const char *const LLVMLoopUnrollAndJamFollowupOuter =
66  "llvm.loop.unroll_and_jam.followup_outer";
68  "llvm.loop.unroll_and_jam.followup_remainder_inner";
70  "llvm.loop.unroll_and_jam.followup_remainder_outer";
71 /// @}
72 
73 static cl::opt<bool>
74  AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
75  cl::desc("Allows loops to be unroll-and-jammed."));
76 
78  "unroll-and-jam-count", cl::Hidden,
79  cl::desc("Use this unroll count for all loops including those with "
80  "unroll_and_jam_count pragma values, for testing purposes"));
81 
83  "unroll-and-jam-threshold", cl::init(60), cl::Hidden,
84  cl::desc("Threshold to use for inner loop when doing unroll and jam."));
85 
87  "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,
88  cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "
89  "unroll_count pragma."));
90 
91 // Returns the loop hint metadata node with the given name (for example,
92 // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
93 // returned.
95  if (MDNode *LoopID = L->getLoopID())
96  return GetUnrollMetadata(LoopID, Name);
97  return nullptr;
98 }
99 
100 // Returns true if the loop has any metadata starting with Prefix. For example a
101 // Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.
102 static bool HasAnyUnrollPragma(const Loop *L, StringRef Prefix) {
103  if (MDNode *LoopID = L->getLoopID()) {
104  // First operand should refer to the loop id itself.
105  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
106  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
107 
108  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
109  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
110  if (!MD)
111  continue;
112 
113  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
114  if (!S)
115  continue;
116 
117  if (S->getString().startswith(Prefix))
118  return true;
119  }
120  }
121  return false;
122 }
123 
124 // Returns true if the loop has an unroll_and_jam(enable) pragma.
125 static bool HasUnrollAndJamEnablePragma(const Loop *L) {
126  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
127 }
128 
129 // If loop has an unroll_and_jam_count pragma return the (necessarily
130 // positive) value from the pragma. Otherwise return 0.
131 static unsigned UnrollAndJamCountPragmaValue(const Loop *L) {
132  MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");
133  if (MD) {
134  assert(MD->getNumOperands() == 2 &&
135  "Unroll count hint metadata should have two operands.");
136  unsigned Count =
137  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
138  assert(Count >= 1 && "Unroll count must be positive.");
139  return Count;
140  }
141  return 0;
142 }
143 
144 // Returns loop size estimation for unrolled loop.
145 static uint64_t
146 getUnrollAndJammedLoopSize(unsigned LoopSize,
148  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
149  return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
150 }
151 
152 // Calculates unroll and jam count and writes it to UP.Count. Returns true if
153 // unroll count was set explicitly.
155  Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,
156  LoopInfo *LI, ScalarEvolution &SE,
157  const SmallPtrSetImpl<const Value *> &EphValues,
158  OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,
159  unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount,
160  unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) {
161  // First up use computeUnrollCount from the loop unroller to get a count
162  // for unrolling the outer loop, plus any loops requiring explicit
163  // unrolling we leave to the unroller. This uses UP.Threshold /
164  // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.
165  // We have already checked that the loop has no unroll.* pragmas.
166  unsigned MaxTripCount = 0;
167  bool UseUpperBound = false;
168  bool ExplicitUnroll = computeUnrollCount(
169  L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
170  OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
171  if (ExplicitUnroll || UseUpperBound) {
172  // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
173  // for the unroller instead.
174  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "
175  "computeUnrollCount\n");
176  UP.Count = 0;
177  return false;
178  }
179 
180  // Override with any explicit Count from the "unroll-and-jam-count" option.
181  bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;
182  if (UserUnrollCount) {
184  UP.Force = true;
185  if (UP.AllowRemainder &&
186  getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
187  getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
189  return true;
190  }
191 
192  // Check for unroll_and_jam pragmas
193  unsigned PragmaCount = UnrollAndJamCountPragmaValue(L);
194  if (PragmaCount > 0) {
195  UP.Count = PragmaCount;
196  UP.Runtime = true;
197  UP.Force = true;
198  if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&
199  getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&
200  getUnrollAndJammedLoopSize(InnerLoopSize, UP) <
202  return true;
203  }
204 
205  bool PragmaEnableUnroll = HasUnrollAndJamEnablePragma(L);
206  bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;
207  bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;
208 
209  // If the loop has an unrolling pragma, we want to be more aggressive with
210  // unrolling limits.
211  if (ExplicitUnrollAndJam)
213 
214  if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
216  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "
217  "inner loop too large\n");
218  UP.Count = 0;
219  return false;
220  }
221 
222  // We have a sensible limit for the outer loop, now adjust it for the inner
223  // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set
224  // explicitly, we want to stick to it.
225  if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {
226  while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=
228  UP.Count--;
229  }
230 
231  // If we are explicitly unroll and jamming, we are done. Otherwise there are a
232  // number of extra performance heuristics to check.
233  if (ExplicitUnrollAndJam)
234  return true;
235 
236  // If the inner loop count is known and small, leave the entire loop nest to
237  // be the unroller
238  if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {
239  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "
240  "being left for the unroller\n");
241  UP.Count = 0;
242  return false;
243  }
244 
245  // Check for situations where UnJ is likely to be unprofitable. Including
246  // subloops with more than 1 block.
247  if (SubLoop->getBlocks().size() != 1) {
248  LLVM_DEBUG(
249  dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");
250  UP.Count = 0;
251  return false;
252  }
253 
254  // Limit to loops where there is something to gain from unrolling and
255  // jamming the loop. In this case, look for loads that are invariant in the
256  // outer loop and can become shared.
257  unsigned NumInvariant = 0;
258  for (BasicBlock *BB : SubLoop->getBlocks()) {
259  for (Instruction &I : *BB) {
260  if (auto *Ld = dyn_cast<LoadInst>(&I)) {
261  Value *V = Ld->getPointerOperand();
262  const SCEV *LSCEV = SE.getSCEVAtScope(V, L);
263  if (SE.isLoopInvariant(LSCEV, L))
264  NumInvariant++;
265  }
266  }
267  }
268  if (NumInvariant == 0) {
269  LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");
270  UP.Count = 0;
271  return false;
272  }
273 
274  return false;
275 }
276 
277 static LoopUnrollResult
279  ScalarEvolution &SE, const TargetTransformInfo &TTI,
281  OptimizationRemarkEmitter &ORE, int OptLevel) {
282  // Quick checks of the correct loop form
283  if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1)
285  Loop *SubLoop = L->getSubLoops()[0];
286  if (!SubLoop->isLoopSimplifyForm())
288 
289  BasicBlock *Latch = L->getLoopLatch();
290  BasicBlock *Exit = L->getExitingBlock();
291  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
292  BasicBlock *SubLoopExit = SubLoop->getExitingBlock();
293 
294  if (Latch != Exit || SubLoopLatch != SubLoopExit)
296 
298  L, SE, TTI, OptLevel, None, None, None, None, None, None);
299  if (AllowUnrollAndJam.getNumOccurrences() > 0)
301  if (UnrollAndJamThreshold.getNumOccurrences() > 0)
303  // Exit early if unrolling is disabled.
306 
307  LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["
308  << L->getHeader()->getParent()->getName() << "] Loop %"
309  << L->getHeader()->getName() << "\n");
310 
312  if (EnableMode & TM_Disable)
314 
315  // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
316  // the unroller, so long as it does not explicitly have unroll_and_jam
317  // metadata. This means #pragma nounroll will disable unroll and jam as well
318  // as unrolling
319  if (HasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
320  !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
321  LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n");
323  }
324 
325  if (!isSafeToUnrollAndJam(L, SE, DT, DI)) {
326  LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n");
328  }
329 
330  // Approximate the loop size and collect useful info
331  unsigned NumInlineCandidates;
332  bool NotDuplicatable;
333  bool Convergent;
335  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
336  unsigned InnerLoopSize =
337  ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
338  Convergent, TTI, EphValues, UP.BEInsns);
339  unsigned OuterLoopSize =
340  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
341  TTI, EphValues, UP.BEInsns);
342  LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSize << "\n");
343  LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
344  if (NotDuplicatable) {
345  LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
346  "instructions.\n");
348  }
349  if (NumInlineCandidates != 0) {
350  LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
352  }
353  if (Convergent) {
354  LLVM_DEBUG(
355  dbgs() << " Not unrolling loop with convergent instructions.\n");
357  }
358 
359  // Save original loop IDs for after the transformation.
360  MDNode *OrigOuterLoopID = L->getLoopID();
361  MDNode *OrigSubLoopID = SubLoop->getLoopID();
362 
363  // To assign the loop id of the epilogue, assign it before unrolling it so it
364  // is applied to every inner loop of the epilogue. We later apply the loop ID
365  // for the jammed inner loop.
366  Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
367  OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
368  LLVMLoopUnrollAndJamFollowupRemainderInner});
369  if (NewInnerEpilogueLoopID.hasValue())
370  SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
371 
372  // Find trip count and trip multiple
373  unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
374  unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
375  unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);
376 
377  // Decide if, and by how much, to unroll
378  bool IsCountSetExplicitly = computeUnrollAndJamCount(
379  L, SubLoop, TTI, DT, LI, SE, EphValues, &ORE, OuterTripCount,
380  OuterTripMultiple, OuterLoopSize, InnerTripCount, InnerLoopSize, UP);
381  if (UP.Count <= 1)
383  // Unroll factor (Count) must be less or equal to TripCount.
384  if (OuterTripCount && UP.Count > OuterTripCount)
385  UP.Count = OuterTripCount;
386 
387  Loop *EpilogueOuterLoop = nullptr;
388  LoopUnrollResult UnrollResult = UnrollAndJamLoop(
389  L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
390  &SE, &DT, &AC, &ORE, &EpilogueOuterLoop);
391 
392  // Assign new loop attributes.
393  if (EpilogueOuterLoop) {
394  Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
395  OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
396  LLVMLoopUnrollAndJamFollowupRemainderOuter});
397  if (NewOuterEpilogueLoopID.hasValue())
398  EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
399  }
400 
401  Optional<MDNode *> NewInnerLoopID =
403  LLVMLoopUnrollAndJamFollowupInner});
404  if (NewInnerLoopID.hasValue())
405  SubLoop->setLoopID(NewInnerLoopID.getValue());
406  else
407  SubLoop->setLoopID(OrigSubLoopID);
408 
409  if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
410  Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
411  OrigOuterLoopID,
412  {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
413  if (NewOuterLoopID.hasValue()) {
414  L->setLoopID(NewOuterLoopID.getValue());
415 
416  // Do not setLoopAlreadyUnrolled if a followup was given.
417  return UnrollResult;
418  }
419  }
420 
421  // If loop has an unroll count pragma or unrolled by explicitly set count
422  // mark loop as unrolled to prevent unrolling beyond that requested.
423  if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
425 
426  return UnrollResult;
427 }
428 
429 namespace {
430 
431 class LoopUnrollAndJam : public LoopPass {
432 public:
433  static char ID; // Pass ID, replacement for typeid
434  unsigned OptLevel;
435 
436  LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
438  }
439 
440  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
441  if (skipLoop(L))
442  return false;
443 
444  Function &F = *L->getHeader()->getParent();
445 
446  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
447  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
448  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
449  const TargetTransformInfo &TTI =
450  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
451  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
452  auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
453  // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
454  // pass. Function analyses need to be preserved across loop transformations
455  // but ORE cannot be preserved (see comment before the pass definition).
457 
458  LoopUnrollResult Result =
459  tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
460 
461  if (Result == LoopUnrollResult::FullyUnrolled)
462  LPM.markLoopAsDeleted(*L);
463 
464  return Result != LoopUnrollResult::Unmodified;
465  }
466 
467  /// This transformation requires natural loop information & requires that
468  /// loop preheaders be inserted into the CFG...
469  void getAnalysisUsage(AnalysisUsage &AU) const override {
474  }
475 };
476 
477 } // end anonymous namespace
478 
479 char LoopUnrollAndJam::ID = 0;
480 
481 INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
482  "Unroll and Jam loops", false, false)
487 INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
488  "Unroll and Jam loops", false, false)
489 
491  return new LoopUnrollAndJam(OptLevel);
492 }
493 
496  LPMUpdater &) {
497  const auto &FAM =
498  AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
499  Function *F = L.getHeader()->getParent();
500 
501  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
502  // FIXME: This should probably be optional rather than required.
503  if (!ORE)
505  "LoopUnrollAndJamPass: OptimizationRemarkEmitterAnalysis not cached at "
506  "a higher level");
507 
508  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
509 
511  &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, DI, *ORE, OptLevel);
512 
513  if (Result == LoopUnrollResult::Unmodified)
514  return PreservedAnalyses::all();
515 
517 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value *> &EphValues)
Collect a loop&#39;s ephemeral values (those used only by an assume or similar intrinsics in the loop)...
Definition: CodeMetrics.cpp:72
const NoneType None
Definition: None.h:24
unsigned getSmallConstantTripCount(const Loop *L)
Returns the maximum trip count of the loop if it is a single-exit loop and we can compute a small max...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
unsigned getSmallConstantTripMultiple(const Loop *L)
Returns the largest constant divisor of the trip count of the loop if it is a single-exit loop and we...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:24
This header provides classes for managing a pipeline of passes over loops in LLVM IR...
static bool HasUnrollAndJamEnablePragma(const Loop *L)
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam", "Unroll and Jam loops", false, false) INITIALIZE_PASS_END(LoopUnrollAndJam
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))
Legacy pass manager pass to access dependence information.
The main scalar evolution driver.
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
A cache of @llvm.assume calls within a function.
static const char *const LLVMLoopUnrollAndJamFollowupOuter
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Metadata node.
Definition: Metadata.h:864
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1069
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound)
DependenceInfo - This class is the main dependence-analysis driver.
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
amdgpu Simplify well known AMD library false Value Value const Twine & Name
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
TransformationMode hasUnrollAndJamTransformation(Loop *L)
Definition: LoopUtils.cpp:352
static const char *const LLVMLoopUnrollAndJamFollowupInner
BlockT * getHeader() const
Definition: LoopInfo.h:100
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:239
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:161
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI)
This header provides classes for managing per-loop analyses.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Pass * createLoopUnrollAndJamPass(int OptLevel=2)
StringRef getString() const
Definition: Metadata.cpp:464
loop unroll and Unroll and Jam loops
The loop was fully unrolled into straight-line code.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< bool > UserAllowPeeling)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop&#39;s loop id metadata.
Definition: LoopInfo.cpp:257
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value *> &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP)
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Represent the analysis usage information of a pass.
static unsigned UnrollAndJamCountPragmaValue(const Loop *L)
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
static MDNode * GetUnrollMetadataForLoop(const Loop *L, StringRef Name)
The loop was partially unrolled – we still have a loop, but with a smaller trip count.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
The transformation should not be applied.
Definition: LoopUtils.h:221
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value *> &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:1154
void markLoopAsDeleted(Loop &L)
Definition: LoopPass.cpp:143
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:246
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:896
loop unroll and jam
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:131
static const char *const LLVMLoopUnrollAndJamFollowupAll
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
bool hasValue() const
Definition: Optional.h:165
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to...
Definition: LoopInfo.cpp:193
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:215
unsigned Threshold
The cost threshold for the unrolled loop.
This class represents an analyzed expression in the program.
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))
Parameters that control the generic loop unrolling transformation.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:132
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:212
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
The loop was not modified.
void initializeLoopUnrollAndJamPass(PassRegistry &)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A single uniqued string.
Definition: Metadata.h:604
A container for analyses that lazily runs them and caches their results.
static bool HasAnyUnrollPragma(const Loop *L, StringRef Prefix)
This pass exposes codegen information to IR-level passes.
This header defines various interfaces for pass management in LLVM.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1075
#define LLVM_DEBUG(X)
Definition: Debug.h:123
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
The optimization diagnostic interface.
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:52
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)