LLVM  8.0.1
LoopVectorizationLegality.cpp
Go to the documentation of this file.
1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file provides loop vectorization legality analysis. Original code
11 // resided in LoopVectorize.cpp for a long time.
12 //
13 // At this point, it is implemented as a utility class, not as an analysis
14 // pass. It should be easy to create an analysis pass around it if there
15 // is a need (but D45420 needs to happen first).
16 //
19 #include "llvm/IR/IntrinsicInst.h"
20 
21 using namespace llvm;
22 
23 #define LV_NAME "loop-vectorize"
24 #define DEBUG_TYPE LV_NAME
25 
26 static cl::opt<bool>
27  EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
28  cl::desc("Enable if-conversion during vectorization."));
29 
31  "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
32  cl::desc("The maximum allowed number of runtime memory checks with a "
33  "vectorize(enable) pragma."));
34 
36  "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
37  cl::desc("The maximum number of SCEV checks allowed."));
38 
40  "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
41  cl::desc("The maximum number of SCEV checks allowed with a "
42  "vectorize(enable) pragma"));
43 
44 /// Maximum vectorization interleave count.
45 static const unsigned MaxInterleaveFactor = 16;
46 
47 namespace llvm {
48 
50  StringRef RemarkName,
51  Loop *TheLoop,
52  Instruction *I) {
53  Value *CodeRegion = TheLoop->getHeader();
54  DebugLoc DL = TheLoop->getStartLoc();
55 
56  if (I) {
57  CodeRegion = I->getParent();
58  // If there is no debug location attached to the instruction, revert back to
59  // using the loop's.
60  if (I->getDebugLoc())
61  DL = I->getDebugLoc();
62  }
63 
64  OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
65  R << "loop not vectorized: ";
66  return R;
67 }
68 
69 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
70  switch (Kind) {
71  case HK_WIDTH:
72  return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
73  case HK_UNROLL:
74  return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
75  case HK_FORCE:
76  return (Val <= 1);
77  case HK_ISVECTORIZED:
78  return (Val == 0 || Val == 1);
79  }
80  return false;
81 }
82 
84  bool InterleaveOnlyWhenForced,
86  : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
87  Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
88  Force("vectorize.enable", FK_Undefined, HK_FORCE),
89  IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
90  // Populate values with existing loop metadata.
91  getHintsFromMetadata();
92 
93  // force-vector-interleave overrides DisableInterleaving.
96 
97  if (IsVectorized.Value != 1)
98  // If the vectorization width and interleaving count are both 1 then
99  // consider the loop to have been already vectorized because there's
100  // nothing more that we can do.
101  IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
102  LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
103  << "LV: Interleaving disabled by the pass manager\n");
104 }
105 
107  Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
109  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
111  return false;
112  }
113 
114  if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
115  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
117  return false;
118  }
119 
120  if (getIsVectorized() == 1) {
121  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
122  // FIXME: Add interleave.disable metadata. This will allow
123  // vectorize.disable to be used without disabling the pass and errors
124  // to differentiate between disabled vectorization and a width of 1.
125  ORE.emit([&]() {
127  "AllDisabled", L->getStartLoc(),
128  L->getHeader())
129  << "loop not vectorized: vectorization and interleaving are "
130  "explicitly disabled, or the loop has already been "
131  "vectorized";
132  });
133  return false;
134  }
135 
136  return true;
137 }
138 
140  using namespace ore;
141 
142  ORE.emit([&]() {
143  if (Force.Value == LoopVectorizeHints::FK_Disabled)
144  return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
145  TheLoop->getStartLoc(),
146  TheLoop->getHeader())
147  << "loop not vectorized: vectorization is explicitly disabled";
148  else {
149  OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
150  TheLoop->getStartLoc(), TheLoop->getHeader());
151  R << "loop not vectorized";
152  if (Force.Value == LoopVectorizeHints::FK_Enabled) {
153  R << " (Force=" << NV("Force", true);
154  if (Width.Value != 0)
155  R << ", Vector Width=" << NV("VectorWidth", Width.Value);
156  if (Interleave.Value != 0)
157  R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
158  R << ")";
159  }
160  return R;
161  }
162  });
163 }
164 
166  if (getWidth() == 1)
167  return LV_NAME;
169  return LV_NAME;
171  return LV_NAME;
173 }
174 
175 void LoopVectorizeHints::getHintsFromMetadata() {
176  MDNode *LoopID = TheLoop->getLoopID();
177  if (!LoopID)
178  return;
179 
180  // First operand should refer to the loop id itself.
181  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
182  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
183 
184  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
185  const MDString *S = nullptr;
187 
188  // The expected hint is either a MDString or a MDNode with the first
189  // operand a MDString.
190  if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
191  if (!MD || MD->getNumOperands() == 0)
192  continue;
193  S = dyn_cast<MDString>(MD->getOperand(0));
194  for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
195  Args.push_back(MD->getOperand(i));
196  } else {
197  S = dyn_cast<MDString>(LoopID->getOperand(i));
198  assert(Args.size() == 0 && "too many arguments for MDString");
199  }
200 
201  if (!S)
202  continue;
203 
204  // Check if the hint starts with the loop metadata prefix.
205  StringRef Name = S->getString();
206  if (Args.size() == 1)
207  setHint(Name, Args[0]);
208  }
209 }
210 
211 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
212  if (!Name.startswith(Prefix()))
213  return;
214  Name = Name.substr(Prefix().size(), StringRef::npos);
215 
216  const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
217  if (!C)
218  return;
219  unsigned Val = C->getZExtValue();
220 
221  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
222  for (auto H : Hints) {
223  if (Name == H->Name) {
224  if (H->validate(Val))
225  H->Value = Val;
226  else
227  LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
228  break;
229  }
230  }
231 }
232 
233 MDNode *LoopVectorizeHints::createHintMetadata(StringRef Name,
234  unsigned V) const {
235  LLVMContext &Context = TheLoop->getHeader()->getContext();
236  Metadata *MDs[] = {
237  MDString::get(Context, Name),
239  return MDNode::get(Context, MDs);
240 }
241 
242 bool LoopVectorizeHints::matchesHintMetadataName(MDNode *Node,
243  ArrayRef<Hint> HintTypes) {
244  MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
245  if (!Name)
246  return false;
247 
248  for (auto H : HintTypes)
249  if (Name->getString().endswith(H.Name))
250  return true;
251  return false;
252 }
253 
254 void LoopVectorizeHints::writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
255  if (HintTypes.empty())
256  return;
257 
258  // Reserve the first element to LoopID (see below).
260  // If the loop already has metadata, then ignore the existing operands.
261  MDNode *LoopID = TheLoop->getLoopID();
262  if (LoopID) {
263  for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
264  MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
265  // If node in update list, ignore old value.
266  if (!matchesHintMetadataName(Node, HintTypes))
267  MDs.push_back(Node);
268  }
269  }
270 
271  // Now, add the missing hints.
272  for (auto H : HintTypes)
273  MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
274 
275  // Replace current metadata node with new one.
276  LLVMContext &Context = TheLoop->getHeader()->getContext();
277  MDNode *NewLoopID = MDNode::get(Context, MDs);
278  // Set operand 0 to refer to the loop id itself.
279  NewLoopID->replaceOperandWith(0, NewLoopID);
280 
281  TheLoop->setLoopID(NewLoopID);
282 }
283 
285  Function *F, Loop *L, const LoopVectorizeHints &Hints) {
286  const char *PassName = Hints.vectorizeAnalysisPassName();
287  bool Failed = false;
288  if (UnsafeAlgebraInst && !Hints.allowReordering()) {
289  ORE.emit([&]() {
291  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
292  UnsafeAlgebraInst->getParent())
293  << "loop not vectorized: cannot prove it is safe to reorder "
294  "floating-point operations";
295  });
296  Failed = true;
297  }
298 
299  // Test if runtime memcheck thresholds are exceeded.
300  bool PragmaThresholdReached =
301  NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
302  bool ThresholdReached =
303  NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
304  if ((ThresholdReached && !Hints.allowReordering()) ||
305  PragmaThresholdReached) {
306  ORE.emit([&]() {
307  return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
308  L->getStartLoc(),
309  L->getHeader())
310  << "loop not vectorized: cannot prove it is safe to reorder "
311  "memory operations";
312  });
313  LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
314  Failed = true;
315  }
316 
317  return Failed;
318 }
319 
320 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
321 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
322 // executing the inner loop will execute the same iterations). This check is
323 // very constrained for now but it will be relaxed in the future. \p Lp is
324 // considered uniform if it meets all the following conditions:
325 // 1) it has a canonical IV (starting from 0 and with stride 1),
326 // 2) its latch terminator is a conditional branch and,
327 // 3) its latch condition is a compare instruction whose operands are the
328 // canonical IV and an OuterLp invariant.
329 // This check doesn't take into account the uniformity of other conditions not
330 // related to the loop latch because they don't affect the loop uniformity.
331 //
332 // NOTE: We decided to keep all these checks and its associated documentation
333 // together so that we can easily have a picture of the current supported loop
334 // nests. However, some of the current checks don't depend on \p OuterLp and
335 // would be redundantly executed for each \p Lp if we invoked this function for
336 // different candidate outer loops. This is not the case for now because we
337 // don't currently have the infrastructure to evaluate multiple candidate outer
338 // loops and \p OuterLp will be a fixed parameter while we only support explicit
339 // outer loop vectorization. It's also very likely that these checks go away
340 // before introducing the aforementioned infrastructure. However, if this is not
341 // the case, we should move the \p OuterLp independent checks to a separate
342 // function that is only executed once for each \p Lp.
343 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
344  assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
345 
346  // If Lp is the outer loop, it's uniform by definition.
347  if (Lp == OuterLp)
348  return true;
349  assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
350 
351  // 1.
353  if (!IV) {
354  LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
355  return false;
356  }
357 
358  // 2.
359  BasicBlock *Latch = Lp->getLoopLatch();
360  auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
361  if (!LatchBr || LatchBr->isUnconditional()) {
362  LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
363  return false;
364  }
365 
366  // 3.
367  auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
368  if (!LatchCmp) {
369  LLVM_DEBUG(
370  dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
371  return false;
372  }
373 
374  Value *CondOp0 = LatchCmp->getOperand(0);
375  Value *CondOp1 = LatchCmp->getOperand(1);
376  Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
377  if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
378  !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
379  LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
380  return false;
381  }
382 
383  return true;
384 }
385 
386 // Return true if \p Lp and all its nested loops are uniform with regard to \p
387 // OuterLp.
388 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
389  if (!isUniformLoop(Lp, OuterLp))
390  return false;
391 
392  // Check if nested loops are uniform.
393  for (Loop *SubLp : *Lp)
394  if (!isUniformLoopNest(SubLp, OuterLp))
395  return false;
396 
397  return true;
398 }
399 
400 /// Check whether it is safe to if-convert this phi node.
401 ///
402 /// Phi nodes with constant expressions that can trap are not safe to if
403 /// convert.
405  for (PHINode &Phi : BB->phis()) {
406  for (Value *V : Phi.incoming_values())
407  if (auto *C = dyn_cast<Constant>(V))
408  if (C->canTrap())
409  return false;
410  }
411  return true;
412 }
413 
415  if (Ty->isPointerTy())
416  return DL.getIntPtrType(Ty);
417 
418  // It is possible that char's or short's overflow when we ask for the loop's
419  // trip count, work around this by changing the type size.
420  if (Ty->getScalarSizeInBits() < 32)
421  return Type::getInt32Ty(Ty->getContext());
422 
423  return Ty;
424 }
425 
426 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
427  Ty0 = convertPointerToIntegerType(DL, Ty0);
428  Ty1 = convertPointerToIntegerType(DL, Ty1);
429  if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
430  return Ty0;
431  return Ty1;
432 }
433 
434 /// Check that the instruction has outside loop users and is not an
435 /// identified reduction variable.
436 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
437  SmallPtrSetImpl<Value *> &AllowedExit) {
438  // Reductions, Inductions and non-header phis are allowed to have exit users. All
439  // other instructions must not have external users.
440  if (!AllowedExit.count(Inst))
441  // Check that all of the users of the loop are inside the BB.
442  for (User *U : Inst->users()) {
443  Instruction *UI = cast<Instruction>(U);
444  // This user may be a reduction exit value.
445  if (!TheLoop->contains(UI)) {
446  LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
447  return true;
448  }
449  }
450  return false;
451 }
452 
454  const ValueToValueMap &Strides =
455  getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
456 
457  int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
458  if (Stride == 1 || Stride == -1)
459  return Stride;
460  return 0;
461 }
462 
464  return LAI->isUniform(V);
465 }
466 
467 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
468  assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
469  // Store the result and return it at the end instead of exiting early, in case
470  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
471  bool Result = true;
472  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
473 
474  for (BasicBlock *BB : TheLoop->blocks()) {
475  // Check whether the BB terminator is a BranchInst. Any other terminator is
476  // not supported yet.
477  auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
478  if (!Br) {
479  LLVM_DEBUG(dbgs() << "LV: Unsupported basic block terminator.\n");
480  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
481  << "loop control flow is not understood by vectorizer");
482  if (DoExtraAnalysis)
483  Result = false;
484  else
485  return false;
486  }
487 
488  // Check whether the BranchInst is a supported one. Only unconditional
489  // branches, conditional branches with an outer loop invariant condition or
490  // backedges are supported.
491  if (Br && Br->isConditional() &&
492  !TheLoop->isLoopInvariant(Br->getCondition()) &&
493  !LI->isLoopHeader(Br->getSuccessor(0)) &&
494  !LI->isLoopHeader(Br->getSuccessor(1))) {
495  LLVM_DEBUG(dbgs() << "LV: Unsupported conditional branch.\n");
496  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
497  << "loop control flow is not understood by vectorizer");
498  if (DoExtraAnalysis)
499  Result = false;
500  else
501  return false;
502  }
503  }
504 
505  // Check whether inner loops are uniform. At this point, we only support
506  // simple outer loops scenarios with uniform nested loops.
507  if (!isUniformLoopNest(TheLoop /*loop nest*/,
508  TheLoop /*context outer loop*/)) {
509  LLVM_DEBUG(
510  dbgs()
511  << "LV: Not vectorizing: Outer loop contains divergent loops.\n");
512  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
513  << "loop control flow is not understood by vectorizer");
514  if (DoExtraAnalysis)
515  Result = false;
516  else
517  return false;
518  }
519 
520  // Check whether we are able to set up outer loop induction.
521  if (!setupOuterLoopInductions()) {
522  LLVM_DEBUG(
523  dbgs() << "LV: Not vectorizing: Unsupported outer loop Phi(s).\n");
524  ORE->emit(createMissedAnalysis("UnsupportedPhi")
525  << "Unsupported outer loop Phi(s)");
526  if (DoExtraAnalysis)
527  Result = false;
528  else
529  return false;
530  }
531 
532  return Result;
533 }
534 
535 void LoopVectorizationLegality::addInductionPhi(
536  PHINode *Phi, const InductionDescriptor &ID,
537  SmallPtrSetImpl<Value *> &AllowedExit) {
538  Inductions[Phi] = ID;
539 
540  // In case this induction also comes with casts that we know we can ignore
541  // in the vectorized loop body, record them here. All casts could be recorded
542  // here for ignoring, but suffices to record only the first (as it is the
543  // only one that may bw used outside the cast sequence).
544  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
545  if (!Casts.empty())
546  InductionCastsToIgnore.insert(*Casts.begin());
547 
548  Type *PhiTy = Phi->getType();
549  const DataLayout &DL = Phi->getModule()->getDataLayout();
550 
551  // Get the widest type.
552  if (!PhiTy->isFloatingPointTy()) {
553  if (!WidestIndTy)
554  WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
555  else
556  WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
557  }
558 
559  // Int inductions are special because we only allow one IV.
562  isa<Constant>(ID.getStartValue()) &&
563  cast<Constant>(ID.getStartValue())->isNullValue()) {
564 
565  // Use the phi node with the widest type as induction. Use the last
566  // one if there are multiple (no good reason for doing this other
567  // than it is expedient). We've checked that it begins at zero and
568  // steps by one, so this is a canonical induction variable.
569  if (!PrimaryInduction || PhiTy == WidestIndTy)
570  PrimaryInduction = Phi;
571  }
572 
573  // Both the PHI node itself, and the "post-increment" value feeding
574  // back into the PHI node may have external users.
575  // We can allow those uses, except if the SCEVs we have for them rely
576  // on predicates that only hold within the loop, since allowing the exit
577  // currently means re-using this SCEV outside the loop (see PR33706 for more
578  // details).
579  if (PSE.getUnionPredicate().isAlwaysTrue()) {
580  AllowedExit.insert(Phi);
581  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
582  }
583 
584  LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
585 }
586 
587 bool LoopVectorizationLegality::setupOuterLoopInductions() {
588  BasicBlock *Header = TheLoop->getHeader();
589 
590  // Returns true if a given Phi is a supported induction.
591  auto isSupportedPhi = [&](PHINode &Phi) -> bool {
593  if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
595  addInductionPhi(&Phi, ID, AllowedExit);
596  return true;
597  } else {
598  // Bail out for any Phi in the outer loop header that is not a supported
599  // induction.
600  LLVM_DEBUG(
601  dbgs()
602  << "LV: Found unsupported PHI for outer loop vectorization.\n");
603  return false;
604  }
605  };
606 
607  if (llvm::all_of(Header->phis(), isSupportedPhi))
608  return true;
609  else
610  return false;
611 }
612 
613 bool LoopVectorizationLegality::canVectorizeInstrs() {
614  BasicBlock *Header = TheLoop->getHeader();
615 
616  // Look for the attribute signaling the absence of NaNs.
617  Function &F = *Header->getParent();
618  HasFunNoNaNAttr =
619  F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
620 
621  // For each block in the loop.
622  for (BasicBlock *BB : TheLoop->blocks()) {
623  // Scan the instructions in the block and look for hazards.
624  for (Instruction &I : *BB) {
625  if (auto *Phi = dyn_cast<PHINode>(&I)) {
626  Type *PhiTy = Phi->getType();
627  // Check that this PHI type is allowed.
628  if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
629  !PhiTy->isPointerTy()) {
630  ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
631  << "loop control flow is not understood by vectorizer");
632  LLVM_DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
633  return false;
634  }
635 
636  // If this PHINode is not in the header block, then we know that we
637  // can convert it to select during if-conversion. No need to check if
638  // the PHIs in this block are induction or reduction variables.
639  if (BB != Header) {
640  // Non-header phi nodes that have outside uses can be vectorized. Add
641  // them to the list of allowed exits.
642  // Unsafe cyclic dependencies with header phis are identified during
643  // legalization for reduction, induction and first order
644  // recurrences.
645  continue;
646  }
647 
648  // We only allow if-converted PHIs with exactly two incoming values.
649  if (Phi->getNumIncomingValues() != 2) {
650  ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
651  << "control flow not understood by vectorizer");
652  LLVM_DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
653  return false;
654  }
655 
656  RecurrenceDescriptor RedDes;
657  if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
658  DT)) {
659  if (RedDes.hasUnsafeAlgebra())
660  Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
661  AllowedExit.insert(RedDes.getLoopExitInstr());
662  Reductions[Phi] = RedDes;
663  continue;
664  }
665 
666  // TODO: Instead of recording the AllowedExit, it would be good to record the
667  // complementary set: NotAllowedExit. These include (but may not be
668  // limited to):
669  // 1. Reduction phis as they represent the one-before-last value, which
670  // is not available when vectorized
671  // 2. Induction phis and increment when SCEV predicates cannot be used
672  // outside the loop - see addInductionPhi
673  // 3. Non-Phis with outside uses when SCEV predicates cannot be used
674  // outside the loop - see call to hasOutsideLoopUser in the non-phi
675  // handling below
676  // 4. FirstOrderRecurrence phis that can possibly be handled by
677  // extraction.
678  // By recording these, we can then reason about ways to vectorize each
679  // of these NotAllowedExit.
681  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
682  addInductionPhi(Phi, ID, AllowedExit);
683  if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
684  Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
685  continue;
686  }
687 
689  SinkAfter, DT)) {
690  FirstOrderRecurrences.insert(Phi);
691  continue;
692  }
693 
694  // As a last resort, coerce the PHI to a AddRec expression
695  // and re-try classifying it a an induction PHI.
696  if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
697  addInductionPhi(Phi, ID, AllowedExit);
698  continue;
699  }
700 
701  ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi)
702  << "value that could not be identified as "
703  "reduction is used outside the loop");
704  LLVM_DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
705  return false;
706  } // end of PHI handling
707 
708  // We handle calls that:
709  // * Are debug info intrinsics.
710  // * Have a mapping to an IR intrinsic.
711  // * Have a vector version available.
712  auto *CI = dyn_cast<CallInst>(&I);
713  if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
714  !isa<DbgInfoIntrinsic>(CI) &&
715  !(CI->getCalledFunction() && TLI &&
716  TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
717  // If the call is a recognized math libary call, it is likely that
718  // we can vectorize it given loosened floating-point constraints.
719  LibFunc Func;
720  bool IsMathLibCall =
721  TLI && CI->getCalledFunction() &&
722  CI->getType()->isFloatingPointTy() &&
723  TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
724  TLI->hasOptimizedCodeGen(Func);
725 
726  if (IsMathLibCall) {
727  // TODO: Ideally, we should not use clang-specific language here,
728  // but it's hard to provide meaningful yet generic advice.
729  // Also, should this be guarded by allowExtraAnalysis() and/or be part
730  // of the returned info from isFunctionVectorizable()?
731  ORE->emit(createMissedAnalysis("CantVectorizeLibcall", CI)
732  << "library call cannot be vectorized. "
733  "Try compiling with -fno-math-errno, -ffast-math, "
734  "or similar flags");
735  } else {
736  ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
737  << "call instruction cannot be vectorized");
738  }
739  LLVM_DEBUG(
740  dbgs() << "LV: Found a non-intrinsic callsite.\n");
741  return false;
742  }
743 
744  // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
745  // second argument is the same (i.e. loop invariant)
747  getVectorIntrinsicIDForCall(CI, TLI), 1)) {
748  auto *SE = PSE.getSE();
749  if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
750  ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
751  << "intrinsic instruction cannot be vectorized");
752  LLVM_DEBUG(dbgs()
753  << "LV: Found unvectorizable intrinsic " << *CI << "\n");
754  return false;
755  }
756  }
757 
758  // Check that the instruction return type is vectorizable.
759  // Also, we can't vectorize extractelement instructions.
760  if ((!VectorType::isValidElementType(I.getType()) &&
761  !I.getType()->isVoidTy()) ||
762  isa<ExtractElementInst>(I)) {
763  ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I)
764  << "instruction return type cannot be vectorized");
765  LLVM_DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
766  return false;
767  }
768 
769  // Check that the stored type is vectorizable.
770  if (auto *ST = dyn_cast<StoreInst>(&I)) {
771  Type *T = ST->getValueOperand()->getType();
773  ORE->emit(createMissedAnalysis("CantVectorizeStore", ST)
774  << "store instruction cannot be vectorized");
775  return false;
776  }
777 
778  // FP instructions can allow unsafe algebra, thus vectorizable by
779  // non-IEEE-754 compliant SIMD units.
780  // This applies to floating-point math operations and calls, not memory
781  // operations, shuffles, or casts, as they don't change precision or
782  // semantics.
783  } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
784  !I.isFast()) {
785  LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
786  Hints->setPotentiallyUnsafe();
787  }
788 
789  // Reduction instructions are allowed to have exit users.
790  // All other instructions must not have external users.
791  if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
792  // We can safely vectorize loops where instructions within the loop are
793  // used outside the loop only if the SCEV predicates within the loop is
794  // same as outside the loop. Allowing the exit means reusing the SCEV
795  // outside the loop.
796  if (PSE.getUnionPredicate().isAlwaysTrue()) {
797  AllowedExit.insert(&I);
798  continue;
799  }
800  ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
801  << "value cannot be used outside the loop");
802  return false;
803  }
804  } // next instr.
805  }
806 
807  if (!PrimaryInduction) {
808  LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
809  if (Inductions.empty()) {
810  ORE->emit(createMissedAnalysis("NoInductionVariable")
811  << "loop induction variable could not be identified");
812  return false;
813  } else if (!WidestIndTy) {
814  ORE->emit(createMissedAnalysis("NoIntegerInductionVariable")
815  << "integer loop induction variable could not be identified");
816  return false;
817  }
818  }
819 
820  // Now we know the widest induction type, check if our found induction
821  // is the same size. If it's not, unset it here and InnerLoopVectorizer
822  // will create another.
823  if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
824  PrimaryInduction = nullptr;
825 
826  return true;
827 }
828 
829 bool LoopVectorizationLegality::canVectorizeMemory() {
830  LAI = &(*GetLAA)(*TheLoop);
831  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
832  if (LAR) {
833  ORE->emit([&]() {
834  return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
835  "loop not vectorized: ", *LAR);
836  });
837  }
838  if (!LAI->canVectorizeMemory())
839  return false;
840 
841  if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
842  ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
843  << "write to a loop invariant address could not "
844  "be vectorized");
845  LLVM_DEBUG(
846  dbgs() << "LV: Non vectorizable stores to a uniform address\n");
847  return false;
848  }
849  Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
850  PSE.addPredicate(LAI->getPSE().getUnionPredicate());
851 
852  return true;
853 }
854 
856  Value *In0 = const_cast<Value *>(V);
857  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
858  if (!PN)
859  return false;
860 
861  return Inductions.count(PN);
862 }
863 
865  auto *Inst = dyn_cast<Instruction>(V);
866  return (Inst && InductionCastsToIgnore.count(Inst));
867 }
868 
870  return isInductionPhi(V) || isCastedInductionVariable(V);
871 }
872 
874  return FirstOrderRecurrences.count(Phi);
875 }
876 
878  return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
879 }
880 
881 bool LoopVectorizationLegality::blockCanBePredicated(
882  BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
883  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
884 
885  for (Instruction &I : *BB) {
886  // Check that we don't have a constant expression that can trap as operand.
887  for (Value *Operand : I.operands()) {
888  if (auto *C = dyn_cast<Constant>(Operand))
889  if (C->canTrap())
890  return false;
891  }
892  // We might be able to hoist the load.
893  if (I.mayReadFromMemory()) {
894  auto *LI = dyn_cast<LoadInst>(&I);
895  if (!LI)
896  return false;
897  if (!SafePtrs.count(LI->getPointerOperand())) {
898  // !llvm.mem.parallel_loop_access implies if-conversion safety.
899  // Otherwise, record that the load needs (real or emulated) masking
900  // and let the cost model decide.
901  if (!IsAnnotatedParallel)
902  MaskedOp.insert(LI);
903  continue;
904  }
905  }
906 
907  if (I.mayWriteToMemory()) {
908  auto *SI = dyn_cast<StoreInst>(&I);
909  if (!SI)
910  return false;
911  // Predicated store requires some form of masking:
912  // 1) masked store HW instruction,
913  // 2) emulation via load-blend-store (only if safe and legal to do so,
914  // be aware on the race conditions), or
915  // 3) element-by-element predicate check and scalar store.
916  MaskedOp.insert(SI);
917  continue;
918  }
919  if (I.mayThrow())
920  return false;
921  }
922 
923  return true;
924 }
925 
926 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
927  if (!EnableIfConversion) {
928  ORE->emit(createMissedAnalysis("IfConversionDisabled")
929  << "if-conversion is disabled");
930  return false;
931  }
932 
933  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
934 
935  // A list of pointers that we can safely read and write to.
936  SmallPtrSet<Value *, 8> SafePointes;
937 
938  // Collect safe addresses.
939  for (BasicBlock *BB : TheLoop->blocks()) {
940  if (blockNeedsPredication(BB))
941  continue;
942 
943  for (Instruction &I : *BB)
944  if (auto *Ptr = getLoadStorePointerOperand(&I))
945  SafePointes.insert(Ptr);
946  }
947 
948  // Collect the blocks that need predication.
949  BasicBlock *Header = TheLoop->getHeader();
950  for (BasicBlock *BB : TheLoop->blocks()) {
951  // We don't support switch statements inside loops.
952  if (!isa<BranchInst>(BB->getTerminator())) {
953  ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator())
954  << "loop contains a switch statement");
955  return false;
956  }
957 
958  // We must be able to predicate all blocks that need to be predicated.
959  if (blockNeedsPredication(BB)) {
960  if (!blockCanBePredicated(BB, SafePointes)) {
961  ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
962  << "control flow cannot be substituted for a select");
963  return false;
964  }
965  } else if (BB != Header && !canIfConvertPHINodes(BB)) {
966  ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
967  << "control flow cannot be substituted for a select");
968  return false;
969  }
970  }
971 
972  // We can if-convert this loop.
973  return true;
974 }
975 
976 // Helper function to canVectorizeLoopNestCFG.
977 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
978  bool UseVPlanNativePath) {
979  assert((UseVPlanNativePath || Lp->empty()) &&
980  "VPlan-native path is not enabled.");
981 
982  // TODO: ORE should be improved to show more accurate information when an
983  // outer loop can't be vectorized because a nested loop is not understood or
984  // legal. Something like: "outer_loop_location: loop not vectorized:
985  // (inner_loop_location) loop control flow is not understood by vectorizer".
986 
987  // Store the result and return it at the end instead of exiting early, in case
988  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
989  bool Result = true;
990  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
991 
992  // We must have a loop in canonical form. Loops with indirectbr in them cannot
993  // be canonicalized.
994  if (!Lp->getLoopPreheader()) {
995  LLVM_DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
996  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
997  << "loop control flow is not understood by vectorizer");
998  if (DoExtraAnalysis)
999  Result = false;
1000  else
1001  return false;
1002  }
1003 
1004  // We must have a single backedge.
1005  if (Lp->getNumBackEdges() != 1) {
1006  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
1007  << "loop control flow is not understood by vectorizer");
1008  if (DoExtraAnalysis)
1009  Result = false;
1010  else
1011  return false;
1012  }
1013 
1014  // We must have a single exiting block.
1015  if (!Lp->getExitingBlock()) {
1016  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
1017  << "loop control flow is not understood by vectorizer");
1018  if (DoExtraAnalysis)
1019  Result = false;
1020  else
1021  return false;
1022  }
1023 
1024  // We only handle bottom-tested loops, i.e. loop in which the condition is
1025  // checked at the end of each iteration. With that we can assume that all
1026  // instructions in the loop are executed the same number of times.
1027  if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1028  ORE->emit(createMissedAnalysis("CFGNotUnderstood")
1029  << "loop control flow is not understood by vectorizer");
1030  if (DoExtraAnalysis)
1031  Result = false;
1032  else
1033  return false;
1034  }
1035 
1036  return Result;
1037 }
1038 
1039 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1040  Loop *Lp, bool UseVPlanNativePath) {
1041  // Store the result and return it at the end instead of exiting early, in case
1042  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1043  bool Result = true;
1044  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1045  if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1046  if (DoExtraAnalysis)
1047  Result = false;
1048  else
1049  return false;
1050  }
1051 
1052  // Recursively check whether the loop control flow of nested loops is
1053  // understood.
1054  for (Loop *SubLp : *Lp)
1055  if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1056  if (DoExtraAnalysis)
1057  Result = false;
1058  else
1059  return false;
1060  }
1061 
1062  return Result;
1063 }
1064 
1065 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1066  // Store the result and return it at the end instead of exiting early, in case
1067  // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1068  bool Result = true;
1069 
1070  bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1071  // Check whether the loop-related control flow in the loop nest is expected by
1072  // vectorizer.
1073  if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1074  if (DoExtraAnalysis)
1075  Result = false;
1076  else
1077  return false;
1078  }
1079 
1080  // We need to have a loop header.
1081  LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1082  << '\n');
1083 
1084  // Specific checks for outer loops. We skip the remaining legal checks at this
1085  // point because they don't support outer loops.
1086  if (!TheLoop->empty()) {
1087  assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1088 
1089  if (!canVectorizeOuterLoop()) {
1090  LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Unsupported outer loop.\n");
1091  // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1092  // outer loops.
1093  return false;
1094  }
1095 
1096  LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1097  return Result;
1098  }
1099 
1100  assert(TheLoop->empty() && "Inner loop expected.");
1101  // Check if we can if-convert non-single-bb loops.
1102  unsigned NumBlocks = TheLoop->getNumBlocks();
1103  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1104  LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1105  if (DoExtraAnalysis)
1106  Result = false;
1107  else
1108  return false;
1109  }
1110 
1111  // Check if we can vectorize the instructions and CFG in this loop.
1112  if (!canVectorizeInstrs()) {
1113  LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1114  if (DoExtraAnalysis)
1115  Result = false;
1116  else
1117  return false;
1118  }
1119 
1120  // Go over each instruction and look at memory deps.
1121  if (!canVectorizeMemory()) {
1122  LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1123  if (DoExtraAnalysis)
1124  Result = false;
1125  else
1126  return false;
1127  }
1128 
1129  LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1130  << (LAI->getRuntimePointerChecking()->Need
1131  ? " (with a runtime bound check)"
1132  : "")
1133  << "!\n");
1134 
1135  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1136  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1137  SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1138 
1139  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1140  ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks")
1141  << "Too many SCEV assumptions need to be made and checked "
1142  << "at runtime");
1143  LLVM_DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
1144  if (DoExtraAnalysis)
1145  Result = false;
1146  else
1147  return false;
1148  }
1149 
1150  // Okay! We've done all the tests. If any have failed, return false. Otherwise
1151  // we can vectorize, and at this point we don't have any other mem analysis
1152  // which may limit our maximum vectorization factor, so just return true with
1153  // no restrictions.
1154  return Result;
1155 }
1156 
1158 
1159  LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1160 
1161  if (!PrimaryInduction) {
1162  ORE->emit(createMissedAnalysis("NoPrimaryInduction")
1163  << "Missing a primary induction variable in the loop, which is "
1164  << "needed in order to fold tail by masking as required.");
1165  LLVM_DEBUG(dbgs() << "LV: No primary induction, cannot fold tail by "
1166  << "masking.\n");
1167  return false;
1168  }
1169 
1170  // TODO: handle reductions when tail is folded by masking.
1171  if (!Reductions.empty()) {
1172  ORE->emit(createMissedAnalysis("ReductionFoldingTailByMasking")
1173  << "Cannot fold tail by masking in the presence of reductions.");
1174  LLVM_DEBUG(dbgs() << "LV: Loop has reductions, cannot fold tail by "
1175  << "masking.\n");
1176  return false;
1177  }
1178 
1179  // TODO: handle outside users when tail is folded by masking.
1180  for (auto *AE : AllowedExit) {
1181  // Check that all users of allowed exit values are inside the loop.
1182  for (User *U : AE->users()) {
1183  Instruction *UI = cast<Instruction>(U);
1184  if (TheLoop->contains(UI))
1185  continue;
1186  ORE->emit(createMissedAnalysis("LiveOutFoldingTailByMasking")
1187  << "Cannot fold tail by masking in the presence of live outs.");
1188  LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop has an "
1189  << "outside user for : " << *UI << '\n');
1190  return false;
1191  }
1192  }
1193 
1194  // The list of pointers that we can safely read and write to remains empty.
1195  SmallPtrSet<Value *, 8> SafePointers;
1196 
1197  // Check and mark all blocks for predication, including those that ordinarily
1198  // do not need predication such as the header block.
1199  for (BasicBlock *BB : TheLoop->blocks()) {
1200  if (!blockCanBePredicated(BB, SafePointers)) {
1201  ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
1202  << "control flow cannot be substituted for a select");
1203  LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as required.\n");
1204  return false;
1205  }
1206  }
1207 
1208  LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1209  return true;
1210 }
1211 
1212 } // namespace llvm
static bool isUniformLoop(Loop *Lp, Loop *OuterLp)
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
uint64_t CallInst * C
#define LV_NAME
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:636
Diagnostic information for missed-optimization remarks.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
LLVMContext & Context
static Type * getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1)
DiagnosticInfoOptimizationBase::Argument NV
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Instruction * getUnsafeAlgebraInst()
Returns first unsafe algebra instruction in the PHI node&#39;s use-chain.
bool isCastedInductionVariable(const Value *V)
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
static bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction *> *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:859
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:454
ConstantInt * getConstIntStepValue() const
bool isUniform(Value *V)
Returns true if the value V is uniform within the loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class...
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
InductionKind getKind() const
A debug info location.
Definition: DebugLoc.h:34
Metadata node.
Definition: Metadata.h:864
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1069
An instruction for reading from memory.
Definition: Instructions.h:168
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
Value * getStartValue() const
#define DEBUG_TYPE
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool endswith(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:279
bool hasUnsafeAlgebra()
Returns true if the recurrence has unsafe algebra which requires a relaxed floating-point model...
amdgpu Simplify well known AMD library false Value Value const Twine & Name
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:227
This file defines the LoopVectorizationLegality class.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
static const unsigned MaxVectorWidth
Maximum SIMD width.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
static cl::opt< bool > EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization."))
Diagnostic information for optimization analysis remarks.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
BlockT * getHeader() const
Definition: LoopInfo.h:100
int isConsecutivePtr(Value *Ptr)
Check if this pointer is consecutive when vectorizing.
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:201
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInductionVariable(const Value *V)
Returns True if V can be considered as an induction variable in this loop.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:621
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:321
static cl::opt< unsigned > PragmaVectorizeMemoryCheckThreshold("pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma."))
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
bool blockNeedsPredication(BasicBlock *BB)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Diagnostic information for optimization analysis remarks related to pointer aliasing.
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:410
StringRef getString() const
Definition: Metadata.cpp:464
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:750
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
Integer induction variable. Step = C.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl< Value *> &AllowedExit)
Check that the instruction has outside loop users and is not an identified reduction variable...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
Instruction * getUnsafeAlgebraInst()
Returns induction operator that does not have "fast-math" property and requires FP unsafe mode...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I=nullptr)
Create an analysis remark that explains why vectorization failed.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Returns true if Phi is a reduction in TheLoop.
static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp)
bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints)
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:365
size_t size() const
Definition: SmallVector.h:53
static bool isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, DenseMap< Instruction *, Instruction *> &SinkAfter, DominatorTree *DT)
Returns true if Phi is a first-order recurrence.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:58
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:63
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
Diagnostic information for optimization analysis remarks related to floating-point non-commutativity...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1167
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file. ...
A struct for saving information about induction variables.
bool canFoldTailByMasking()
Return true if we can vectorize this loop while folding its tail by masking.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:148
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
DenseMap< const Value *, Value * > ValueToValueMap
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool isFirstOrderRecurrence(const PHINode *Phi)
Returns True if Phi is a first-order recurrence in this loop.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:89
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
PHINode * getCanonicalInductionVariable() const
Check to see if the loop has a canonical induction variable: an integer recurrence that starts at 0 a...
Definition: LoopInfo.cpp:113
static bool canIfConvertPHINodes(BasicBlock *BB)
Check whether it is safe to if-convert this phi node.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
bool isInductionPhi(const Value *V)
Returns True if V is a Phi node of an induction variable in this loop.
iterator_range< user_iterator > users()
Definition: Value.h:400
bool hasUnsafeAlgebra()
Returns true if the induction type is FP and the binary operator does not have the "fast-math" proper...
amdgpu Simplify well known AMD library false Value Value * Arg
const SmallVectorImpl< Instruction * > & getCastInsts() const
Returns a reference to the type cast instructions in the induction update chain, that are redundant w...
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:311
static const size_t npos
Definition: StringRef.h:51
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
Instruction * getLoopExitInstr()
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:325
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:18
bool empty() const
Definition: LoopInfo.h:146
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
static Type * convertPointerToIntegerType(const DataLayout &DL, Type *Ty)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
static cl::opt< unsigned > VectorizeSCEVCheckThreshold("vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed."))
A single uniqued string.
Definition: Metadata.h:604
static cl::opt< unsigned > PragmaVectorizeSCEVCheckThreshold("pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma"))
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1075
#define LLVM_DEBUG(X)
Definition: Debug.h:123
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
Root of the metadata hierarchy.
Definition: Metadata.h:58
The optimization diagnostic interface.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
void emitRemarkWithHints() const
Dumps all the hint information.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
const BasicBlock * getParent() const
Definition: Instruction.h:67