LLVM  8.0.1
LoopIdiomRecognize.cpp
Go to the documentation of this file.
1 //===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass implements an idiom recognizer that transforms simple loops into a
11 // non-loop form. In cases that this kicks in, it can be a significant
12 // performance win.
13 //
14 // If compiling for code size we avoid idiom recognition if the resulting
15 // code could be larger than the code for the original loop. One way this could
16 // happen is if the loop is not removable after idiom recognition due to the
17 // presence of non-idiom instructions. The initial implementation of the
18 // heuristics applies to idioms in multi-block loops.
19 //
20 //===----------------------------------------------------------------------===//
21 //
22 // TODO List:
23 //
24 // Future loop memory idioms to recognize:
25 // memcmp, memmove, strlen, etc.
26 // Future floating point idioms to recognize in -ffast-math mode:
27 // fpowi
28 // Future integer operation idioms to recognize:
29 // ctpop
30 //
31 // Beware that isel's default lowering for ctpop is highly inefficient for
32 // i64 and larger types when i64 is legal and the value has few bits set. It
33 // would be good to enhance isel to emit a loop for ctpop in this case.
34 //
35 // This could recognize common matrix multiplies and dot product idioms and
36 // replace them with calls to BLAS (if linked in??).
37 //
38 //===----------------------------------------------------------------------===//
39 
40 #include "llvm/ADT/APInt.h"
41 #include "llvm/ADT/ArrayRef.h"
42 #include "llvm/ADT/DenseMap.h"
43 #include "llvm/ADT/MapVector.h"
44 #include "llvm/ADT/SetVector.h"
45 #include "llvm/ADT/SmallPtrSet.h"
46 #include "llvm/ADT/SmallVector.h"
47 #include "llvm/ADT/Statistic.h"
48 #include "llvm/ADT/StringRef.h"
51 #include "llvm/Analysis/LoopInfo.h"
52 #include "llvm/Analysis/LoopPass.h"
61 #include "llvm/IR/Attributes.h"
62 #include "llvm/IR/BasicBlock.h"
63 #include "llvm/IR/Constant.h"
64 #include "llvm/IR/Constants.h"
65 #include "llvm/IR/DataLayout.h"
66 #include "llvm/IR/DebugLoc.h"
67 #include "llvm/IR/DerivedTypes.h"
68 #include "llvm/IR/Dominators.h"
69 #include "llvm/IR/GlobalValue.h"
70 #include "llvm/IR/GlobalVariable.h"
71 #include "llvm/IR/IRBuilder.h"
72 #include "llvm/IR/InstrTypes.h"
73 #include "llvm/IR/Instruction.h"
74 #include "llvm/IR/Instructions.h"
75 #include "llvm/IR/IntrinsicInst.h"
76 #include "llvm/IR/Intrinsics.h"
77 #include "llvm/IR/LLVMContext.h"
78 #include "llvm/IR/Module.h"
79 #include "llvm/IR/PassManager.h"
80 #include "llvm/IR/Type.h"
81 #include "llvm/IR/User.h"
82 #include "llvm/IR/Value.h"
83 #include "llvm/IR/ValueHandle.h"
84 #include "llvm/Pass.h"
85 #include "llvm/Support/Casting.h"
87 #include "llvm/Support/Debug.h"
89 #include "llvm/Transforms/Scalar.h"
93 #include <algorithm>
94 #include <cassert>
95 #include <cstdint>
96 #include <utility>
97 #include <vector>
98 
99 using namespace llvm;
100 
101 #define DEBUG_TYPE "loop-idiom"
102 
103 STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
104 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
105 
107  "use-lir-code-size-heurs",
108  cl::desc("Use loop idiom recognition code size heuristics when compiling"
109  "with -Os/-Oz"),
110  cl::init(true), cl::Hidden);
111 
112 namespace {
113 
114 class LoopIdiomRecognize {
115  Loop *CurLoop = nullptr;
116  AliasAnalysis *AA;
117  DominatorTree *DT;
118  LoopInfo *LI;
119  ScalarEvolution *SE;
120  TargetLibraryInfo *TLI;
121  const TargetTransformInfo *TTI;
122  const DataLayout *DL;
123  bool ApplyCodeSizeHeuristics;
124 
125 public:
126  explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
127  LoopInfo *LI, ScalarEvolution *SE,
128  TargetLibraryInfo *TLI,
129  const TargetTransformInfo *TTI,
130  const DataLayout *DL)
131  : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {}
132 
133  bool runOnLoop(Loop *L);
134 
135 private:
136  using StoreList = SmallVector<StoreInst *, 8>;
137  using StoreListMap = MapVector<Value *, StoreList>;
138 
139  StoreListMap StoreRefsForMemset;
140  StoreListMap StoreRefsForMemsetPattern;
141  StoreList StoreRefsForMemcpy;
142  bool HasMemset;
143  bool HasMemsetPattern;
144  bool HasMemcpy;
145 
146  /// Return code for isLegalStore()
147  enum LegalStoreKind {
148  None = 0,
149  Memset,
150  MemsetPattern,
151  Memcpy,
152  UnorderedAtomicMemcpy,
153  DontUse // Dummy retval never to be used. Allows catching errors in retval
154  // handling.
155  };
156 
157  /// \name Countable Loop Idiom Handling
158  /// @{
159 
160  bool runOnCountableLoop();
161  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
162  SmallVectorImpl<BasicBlock *> &ExitBlocks);
163 
164  void collectStores(BasicBlock *BB);
165  LegalStoreKind isLegalStore(StoreInst *SI);
166  enum class ForMemset { No, Yes };
167  bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
168  ForMemset For);
169  bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
170 
171  bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
172  unsigned StoreAlignment, Value *StoredVal,
173  Instruction *TheStore,
175  const SCEVAddRecExpr *Ev, const SCEV *BECount,
176  bool NegStride, bool IsLoopMemset = false);
177  bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
178  bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
179  bool IsLoopMemset = false);
180 
181  /// @}
182  /// \name Noncountable Loop Idiom Handling
183  /// @{
184 
185  bool runOnNoncountableLoop();
186 
187  bool recognizePopcount();
188  void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
189  PHINode *CntPhi, Value *Var);
190  bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
191  void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
192  Instruction *CntInst, PHINode *CntPhi,
193  Value *Var, Instruction *DefX,
194  const DebugLoc &DL, bool ZeroCheck,
195  bool IsCntPhiUsedOutsideLoop);
196 
197  /// @}
198 };
199 
200 class LoopIdiomRecognizeLegacyPass : public LoopPass {
201 public:
202  static char ID;
203 
204  explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
207  }
208 
209  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
210  if (skipLoop(L))
211  return false;
212 
213  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
214  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
215  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
216  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
217  TargetLibraryInfo *TLI =
218  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
219  const TargetTransformInfo *TTI =
220  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
221  *L->getHeader()->getParent());
222  const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
223 
224  LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
225  return LIR.runOnLoop(L);
226  }
227 
228  /// This transformation requires natural loop information & requires that
229  /// loop preheaders be inserted into the CFG.
230  void getAnalysisUsage(AnalysisUsage &AU) const override {
234  }
235 };
236 
237 } // end anonymous namespace
238 
240 
243  LPMUpdater &) {
244  const auto *DL = &L.getHeader()->getModule()->getDataLayout();
245 
246  LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL);
247  if (!LIR.runOnLoop(&L))
248  return PreservedAnalyses::all();
249 
251 }
252 
253 INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
254  "Recognize loop idioms", false, false)
258 INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
259  "Recognize loop idioms", false, false)
260 
261 Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
262 
265  I->eraseFromParent();
266 }
267 
268 //===----------------------------------------------------------------------===//
269 //
270 // Implementation of LoopIdiomRecognize
271 //
272 //===----------------------------------------------------------------------===//
273 
274 bool LoopIdiomRecognize::runOnLoop(Loop *L) {
275  CurLoop = L;
276  // If the loop could not be converted to canonical form, it must have an
277  // indirectbr in it, just give up.
278  if (!L->getLoopPreheader())
279  return false;
280 
281  // Disable loop idiom recognition if the function's name is a common idiom.
283  if (Name == "memset" || Name == "memcpy")
284  return false;
285 
286  // Determine if code size heuristics need to be applied.
287  ApplyCodeSizeHeuristics =
289 
290  HasMemset = TLI->has(LibFunc_memset);
291  HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
292  HasMemcpy = TLI->has(LibFunc_memcpy);
293 
294  if (HasMemset || HasMemsetPattern || HasMemcpy)
296  return runOnCountableLoop();
297 
298  return runOnNoncountableLoop();
299 }
300 
301 bool LoopIdiomRecognize::runOnCountableLoop() {
302  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
303  assert(!isa<SCEVCouldNotCompute>(BECount) &&
304  "runOnCountableLoop() called on a loop without a predictable"
305  "backedge-taken count");
306 
307  // If this loop executes exactly one time, then it should be peeled, not
308  // optimized by this pass.
309  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
310  if (BECst->getAPInt() == 0)
311  return false;
312 
313  SmallVector<BasicBlock *, 8> ExitBlocks;
314  CurLoop->getUniqueExitBlocks(ExitBlocks);
315 
316  LLVM_DEBUG(dbgs() << "loop-idiom Scanning: F["
317  << CurLoop->getHeader()->getParent()->getName()
318  << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
319 
320  bool MadeChange = false;
321 
322  // The following transforms hoist stores/memsets into the loop pre-header.
323  // Give up if the loop has instructions may throw.
324  SimpleLoopSafetyInfo SafetyInfo;
325  SafetyInfo.computeLoopSafetyInfo(CurLoop);
326  if (SafetyInfo.anyBlockMayThrow())
327  return MadeChange;
328 
329  // Scan all the blocks in the loop that are not in subloops.
330  for (auto *BB : CurLoop->getBlocks()) {
331  // Ignore blocks in subloops.
332  if (LI->getLoopFor(BB) != CurLoop)
333  continue;
334 
335  MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
336  }
337  return MadeChange;
338 }
339 
340 static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
341  const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
342  return ConstStride->getAPInt();
343 }
344 
345 /// getMemSetPatternValue - If a strided store of the specified value is safe to
346 /// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
347 /// be passed in. Otherwise, return null.
348 ///
349 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
350 /// just replicate their input array and then pass on to memset_pattern16.
352  // FIXME: This could check for UndefValue because it can be merged into any
353  // other valid pattern.
354 
355  // If the value isn't a constant, we can't promote it to being in a constant
356  // array. We could theoretically do a store to an alloca or something, but
357  // that doesn't seem worthwhile.
358  Constant *C = dyn_cast<Constant>(V);
359  if (!C)
360  return nullptr;
361 
362  // Only handle simple values that are a power of two bytes in size.
363  uint64_t Size = DL->getTypeSizeInBits(V->getType());
364  if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
365  return nullptr;
366 
367  // Don't care enough about darwin/ppc to implement this.
368  if (DL->isBigEndian())
369  return nullptr;
370 
371  // Convert to size in bytes.
372  Size /= 8;
373 
374  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
375  // if the top and bottom are the same (e.g. for vectors and large integers).
376  if (Size > 16)
377  return nullptr;
378 
379  // If the constant is exactly 16 bytes, just use it.
380  if (Size == 16)
381  return C;
382 
383  // Otherwise, we'll use an array of the constants.
384  unsigned ArraySize = 16 / Size;
385  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
386  return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
387 }
388 
389 LoopIdiomRecognize::LegalStoreKind
390 LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
391  // Don't touch volatile stores.
392  if (SI->isVolatile())
393  return LegalStoreKind::None;
394  // We only want simple or unordered-atomic stores.
395  if (!SI->isUnordered())
396  return LegalStoreKind::None;
397 
398  // Don't convert stores of non-integral pointer types to memsets (which stores
399  // integers).
400  if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
401  return LegalStoreKind::None;
402 
403  // Avoid merging nontemporal stores.
405  return LegalStoreKind::None;
406 
407  Value *StoredVal = SI->getValueOperand();
408  Value *StorePtr = SI->getPointerOperand();
409 
410  // Reject stores that are so large that they overflow an unsigned.
411  uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
412  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
413  return LegalStoreKind::None;
414 
415  // See if the pointer expression is an AddRec like {base,+,1} on the current
416  // loop, which indicates a strided store. If we have something else, it's a
417  // random store we can't handle.
418  const SCEVAddRecExpr *StoreEv =
419  dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
420  if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
421  return LegalStoreKind::None;
422 
423  // Check to see if we have a constant stride.
424  if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
425  return LegalStoreKind::None;
426 
427  // See if the store can be turned into a memset.
428 
429  // If the stored value is a byte-wise value (like i32 -1), then it may be
430  // turned into a memset of i8 -1, assuming that all the consecutive bytes
431  // are stored. A store of i32 0x01020304 can never be turned into a memset,
432  // but it can be turned into memset_pattern if the target supports it.
433  Value *SplatValue = isBytewiseValue(StoredVal);
434  Constant *PatternValue = nullptr;
435 
436  // Note: memset and memset_pattern on unordered-atomic is yet not supported
437  bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
438 
439  // If we're allowed to form a memset, and the stored value would be
440  // acceptable for memset, use it.
441  if (!UnorderedAtomic && HasMemset && SplatValue &&
442  // Verify that the stored value is loop invariant. If not, we can't
443  // promote the memset.
444  CurLoop->isLoopInvariant(SplatValue)) {
445  // It looks like we can use SplatValue.
446  return LegalStoreKind::Memset;
447  } else if (!UnorderedAtomic && HasMemsetPattern &&
448  // Don't create memset_pattern16s with address spaces.
449  StorePtr->getType()->getPointerAddressSpace() == 0 &&
450  (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
451  // It looks like we can use PatternValue!
452  return LegalStoreKind::MemsetPattern;
453  }
454 
455  // Otherwise, see if the store can be turned into a memcpy.
456  if (HasMemcpy) {
457  // Check to see if the stride matches the size of the store. If so, then we
458  // know that every byte is touched in the loop.
459  APInt Stride = getStoreStride(StoreEv);
460  unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
461  if (StoreSize != Stride && StoreSize != -Stride)
462  return LegalStoreKind::None;
463 
464  // The store must be feeding a non-volatile load.
466 
467  // Only allow non-volatile loads
468  if (!LI || LI->isVolatile())
469  return LegalStoreKind::None;
470  // Only allow simple or unordered-atomic loads
471  if (!LI->isUnordered())
472  return LegalStoreKind::None;
473 
474  // See if the pointer expression is an AddRec like {base,+,1} on the current
475  // loop, which indicates a strided load. If we have something else, it's a
476  // random load we can't handle.
477  const SCEVAddRecExpr *LoadEv =
479  if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
480  return LegalStoreKind::None;
481 
482  // The store and load must share the same stride.
483  if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
484  return LegalStoreKind::None;
485 
486  // Success. This store can be converted into a memcpy.
487  UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
488  return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
489  : LegalStoreKind::Memcpy;
490  }
491  // This store can't be transformed into a memset/memcpy.
492  return LegalStoreKind::None;
493 }
494 
495 void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
496  StoreRefsForMemset.clear();
497  StoreRefsForMemsetPattern.clear();
498  StoreRefsForMemcpy.clear();
499  for (Instruction &I : *BB) {
500  StoreInst *SI = dyn_cast<StoreInst>(&I);
501  if (!SI)
502  continue;
503 
504  // Make sure this is a strided store with a constant stride.
505  switch (isLegalStore(SI)) {
507  // Nothing to do
508  break;
509  case LegalStoreKind::Memset: {
510  // Find the base pointer.
511  Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
512  StoreRefsForMemset[Ptr].push_back(SI);
513  } break;
514  case LegalStoreKind::MemsetPattern: {
515  // Find the base pointer.
516  Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
517  StoreRefsForMemsetPattern[Ptr].push_back(SI);
518  } break;
519  case LegalStoreKind::Memcpy:
520  case LegalStoreKind::UnorderedAtomicMemcpy:
521  StoreRefsForMemcpy.push_back(SI);
522  break;
523  default:
524  assert(false && "unhandled return value");
525  break;
526  }
527  }
528 }
529 
530 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
531 /// with the specified backedge count. This block is known to be in the current
532 /// loop and not in any subloops.
533 bool LoopIdiomRecognize::runOnLoopBlock(
534  BasicBlock *BB, const SCEV *BECount,
535  SmallVectorImpl<BasicBlock *> &ExitBlocks) {
536  // We can only promote stores in this block if they are unconditionally
537  // executed in the loop. For a block to be unconditionally executed, it has
538  // to dominate all the exit blocks of the loop. Verify this now.
539  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
540  if (!DT->dominates(BB, ExitBlocks[i]))
541  return false;
542 
543  bool MadeChange = false;
544  // Look for store instructions, which may be optimized to memset/memcpy.
545  collectStores(BB);
546 
547  // Look for a single store or sets of stores with a common base, which can be
548  // optimized into a memset (memset_pattern). The latter most commonly happens
549  // with structs and handunrolled loops.
550  for (auto &SL : StoreRefsForMemset)
551  MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);
552 
553  for (auto &SL : StoreRefsForMemsetPattern)
554  MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);
555 
556  // Optimize the store into a memcpy, if it feeds an similarly strided load.
557  for (auto &SI : StoreRefsForMemcpy)
558  MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
559 
560  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
561  Instruction *Inst = &*I++;
562  // Look for memset instructions, which may be optimized to a larger memset.
563  if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
564  WeakTrackingVH InstPtr(&*I);
565  if (!processLoopMemSet(MSI, BECount))
566  continue;
567  MadeChange = true;
568 
569  // If processing the memset invalidated our iterator, start over from the
570  // top of the block.
571  if (!InstPtr)
572  I = BB->begin();
573  continue;
574  }
575  }
576 
577  return MadeChange;
578 }
579 
580 /// See if this store(s) can be promoted to a memset.
581 bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
582  const SCEV *BECount, ForMemset For) {
583  // Try to find consecutive stores that can be transformed into memsets.
584  SetVector<StoreInst *> Heads, Tails;
586 
587  // Do a quadratic search on all of the given stores and find
588  // all of the pairs of stores that follow each other.
589  SmallVector<unsigned, 16> IndexQueue;
590  for (unsigned i = 0, e = SL.size(); i < e; ++i) {
591  assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
592 
593  Value *FirstStoredVal = SL[i]->getValueOperand();
594  Value *FirstStorePtr = SL[i]->getPointerOperand();
595  const SCEVAddRecExpr *FirstStoreEv =
596  cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
597  APInt FirstStride = getStoreStride(FirstStoreEv);
598  unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());
599 
600  // See if we can optimize just this store in isolation.
601  if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {
602  Heads.insert(SL[i]);
603  continue;
604  }
605 
606  Value *FirstSplatValue = nullptr;
607  Constant *FirstPatternValue = nullptr;
608 
609  if (For == ForMemset::Yes)
610  FirstSplatValue = isBytewiseValue(FirstStoredVal);
611  else
612  FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
613 
614  assert((FirstSplatValue || FirstPatternValue) &&
615  "Expected either splat value or pattern value.");
616 
617  IndexQueue.clear();
618  // If a store has multiple consecutive store candidates, search Stores
619  // array according to the sequence: from i+1 to e, then from i-1 to 0.
620  // This is because usually pairing with immediate succeeding or preceding
621  // candidate create the best chance to find memset opportunity.
622  unsigned j = 0;
623  for (j = i + 1; j < e; ++j)
624  IndexQueue.push_back(j);
625  for (j = i; j > 0; --j)
626  IndexQueue.push_back(j - 1);
627 
628  for (auto &k : IndexQueue) {
629  assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
630  Value *SecondStorePtr = SL[k]->getPointerOperand();
631  const SCEVAddRecExpr *SecondStoreEv =
632  cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
633  APInt SecondStride = getStoreStride(SecondStoreEv);
634 
635  if (FirstStride != SecondStride)
636  continue;
637 
638  Value *SecondStoredVal = SL[k]->getValueOperand();
639  Value *SecondSplatValue = nullptr;
640  Constant *SecondPatternValue = nullptr;
641 
642  if (For == ForMemset::Yes)
643  SecondSplatValue = isBytewiseValue(SecondStoredVal);
644  else
645  SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
646 
647  assert((SecondSplatValue || SecondPatternValue) &&
648  "Expected either splat value or pattern value.");
649 
650  if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
651  if (For == ForMemset::Yes) {
652  if (isa<UndefValue>(FirstSplatValue))
653  FirstSplatValue = SecondSplatValue;
654  if (FirstSplatValue != SecondSplatValue)
655  continue;
656  } else {
657  if (isa<UndefValue>(FirstPatternValue))
658  FirstPatternValue = SecondPatternValue;
659  if (FirstPatternValue != SecondPatternValue)
660  continue;
661  }
662  Tails.insert(SL[k]);
663  Heads.insert(SL[i]);
664  ConsecutiveChain[SL[i]] = SL[k];
665  break;
666  }
667  }
668  }
669 
670  // We may run into multiple chains that merge into a single chain. We mark the
671  // stores that we transformed so that we don't visit the same store twice.
672  SmallPtrSet<Value *, 16> TransformedStores;
673  bool Changed = false;
674 
675  // For stores that start but don't end a link in the chain:
676  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
677  it != e; ++it) {
678  if (Tails.count(*it))
679  continue;
680 
681  // We found a store instr that starts a chain. Now follow the chain and try
682  // to transform it.
683  SmallPtrSet<Instruction *, 8> AdjacentStores;
684  StoreInst *I = *it;
685 
686  StoreInst *HeadStore = I;
687  unsigned StoreSize = 0;
688 
689  // Collect the chain into a list.
690  while (Tails.count(I) || Heads.count(I)) {
691  if (TransformedStores.count(I))
692  break;
693  AdjacentStores.insert(I);
694 
695  StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());
696  // Move to the next value in the chain.
697  I = ConsecutiveChain[I];
698  }
699 
700  Value *StoredVal = HeadStore->getValueOperand();
701  Value *StorePtr = HeadStore->getPointerOperand();
702  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
703  APInt Stride = getStoreStride(StoreEv);
704 
705  // Check to see if the stride matches the size of the stores. If so, then
706  // we know that every byte is touched in the loop.
707  if (StoreSize != Stride && StoreSize != -Stride)
708  continue;
709 
710  bool NegStride = StoreSize == -Stride;
711 
712  if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
713  StoredVal, HeadStore, AdjacentStores, StoreEv,
714  BECount, NegStride)) {
715  TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
716  Changed = true;
717  }
718  }
719 
720  return Changed;
721 }
722 
723 /// processLoopMemSet - See if this memset can be promoted to a large memset.
724 bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
725  const SCEV *BECount) {
726  // We can only handle non-volatile memsets with a constant size.
727  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
728  return false;
729 
730  // If we're not allowed to hack on memset, we fail.
731  if (!HasMemset)
732  return false;
733 
734  Value *Pointer = MSI->getDest();
735 
736  // See if the pointer expression is an AddRec like {base,+,1} on the current
737  // loop, which indicates a strided store. If we have something else, it's a
738  // random store we can't handle.
739  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
740  if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
741  return false;
742 
743  // Reject memsets that are so large that they overflow an unsigned.
744  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
745  if ((SizeInBytes >> 32) != 0)
746  return false;
747 
748  // Check to see if the stride matches the size of the memset. If so, then we
749  // know that every byte is touched in the loop.
750  const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
751  if (!ConstStride)
752  return false;
753 
754  APInt Stride = ConstStride->getAPInt();
755  if (SizeInBytes != Stride && SizeInBytes != -Stride)
756  return false;
757 
758  // Verify that the memset value is loop invariant. If not, we can't promote
759  // the memset.
760  Value *SplatValue = MSI->getValue();
761  if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
762  return false;
763 
765  MSIs.insert(MSI);
766  bool NegStride = SizeInBytes == -Stride;
767  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
768  MSI->getDestAlignment(), SplatValue, MSI, MSIs,
769  Ev, BECount, NegStride, /*IsLoopMemset=*/true);
770 }
771 
772 /// mayLoopAccessLocation - Return true if the specified loop might access the
773 /// specified pointer location, which is a loop-strided access. The 'Access'
774 /// argument specifies what the verboten forms of access are (read or write).
775 static bool
777  const SCEV *BECount, unsigned StoreSize,
778  AliasAnalysis &AA,
779  SmallPtrSetImpl<Instruction *> &IgnoredStores) {
780  // Get the location that may be stored across the loop. Since the access is
781  // strided positively through memory, we say that the modified location starts
782  // at the pointer and has infinite size.
783  LocationSize AccessSize = LocationSize::unknown();
784 
785  // If the loop iterates a fixed number of times, we can refine the access size
786  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
787  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
788  AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
789  StoreSize);
790 
791  // TODO: For this to be really effective, we have to dive into the pointer
792  // operand in the store. Store to &A[i] of 100 will always return may alias
793  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
794  // which will then no-alias a store to &A[100].
795  MemoryLocation StoreLoc(Ptr, AccessSize);
796 
797  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
798  ++BI)
799  for (Instruction &I : **BI)
800  if (IgnoredStores.count(&I) == 0 &&
802  intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
803  return true;
804 
805  return false;
806 }
807 
808 // If we have a negative stride, Start refers to the end of the memory location
809 // we're trying to memset. Therefore, we need to recompute the base pointer,
810 // which is just Start - BECount*Size.
811 static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
812  Type *IntPtr, unsigned StoreSize,
813  ScalarEvolution *SE) {
814  const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
815  if (StoreSize != 1)
816  Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
817  SCEV::FlagNUW);
818  return SE->getMinusSCEV(Start, Index);
819 }
820 
821 /// Compute the number of bytes as a SCEV from the backedge taken count.
822 ///
823 /// This also maps the SCEV into the provided type and tries to handle the
824 /// computation in a way that will fold cleanly.
825 static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
826  unsigned StoreSize, Loop *CurLoop,
827  const DataLayout *DL, ScalarEvolution *SE) {
828  const SCEV *NumBytesS;
829  // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
830  // pointer size if it isn't already.
831  //
832  // If we're going to need to zero extend the BE count, check if we can add
833  // one to it prior to zero extending without overflow. Provided this is safe,
834  // it allows better simplification of the +1.
835  if (DL->getTypeSizeInBits(BECount->getType()) <
836  DL->getTypeSizeInBits(IntPtr) &&
838  CurLoop, ICmpInst::ICMP_NE, BECount,
839  SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
840  NumBytesS = SE->getZeroExtendExpr(
841  SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
842  IntPtr);
843  } else {
844  NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
845  SE->getOne(IntPtr), SCEV::FlagNUW);
846  }
847 
848  // And scale it based on the store size.
849  if (StoreSize != 1) {
850  NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
851  SCEV::FlagNUW);
852  }
853  return NumBytesS;
854 }
855 
856 /// processLoopStridedStore - We see a strided store of some value. If we can
857 /// transform this into a memset or memset_pattern in the loop preheader, do so.
858 bool LoopIdiomRecognize::processLoopStridedStore(
859  Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
860  Value *StoredVal, Instruction *TheStore,
862  const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
863  Value *SplatValue = isBytewiseValue(StoredVal);
864  Constant *PatternValue = nullptr;
865 
866  if (!SplatValue)
867  PatternValue = getMemSetPatternValue(StoredVal, DL);
868 
869  assert((SplatValue || PatternValue) &&
870  "Expected either splat value or pattern value.");
871 
872  // The trip count of the loop and the base pointer of the addrec SCEV is
873  // guaranteed to be loop invariant, which means that it should dominate the
874  // header. This allows us to insert code for it in the preheader.
875  unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
876  BasicBlock *Preheader = CurLoop->getLoopPreheader();
877  IRBuilder<> Builder(Preheader->getTerminator());
878  SCEVExpander Expander(*SE, *DL, "loop-idiom");
879 
880  Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
881  Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
882 
883  const SCEV *Start = Ev->getStart();
884  // Handle negative strided loops.
885  if (NegStride)
886  Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
887 
888  // TODO: ideally we should still be able to generate memset if SCEV expander
889  // is taught to generate the dependencies at the latest point.
890  if (!isSafeToExpand(Start, *SE))
891  return false;
892 
893  // Okay, we have a strided store "p[i]" of a splattable value. We can turn
894  // this into a memset in the loop preheader now if we want. However, this
895  // would be unsafe to do if there is anything else in the loop that may read
896  // or write to the aliased location. Check for any overlap by generating the
897  // base pointer and checking the region.
898  Value *BasePtr =
899  Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
900  if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
901  StoreSize, *AA, Stores)) {
902  Expander.clear();
903  // If we generated new code for the base pointer, clean up.
905  return false;
906  }
907 
908  if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
909  return false;
910 
911  // Okay, everything looks good, insert the memset.
912 
913  const SCEV *NumBytesS =
914  getNumBytes(BECount, IntPtr, StoreSize, CurLoop, DL, SE);
915 
916  // TODO: ideally we should still be able to generate memset if SCEV expander
917  // is taught to generate the dependencies at the latest point.
918  if (!isSafeToExpand(NumBytesS, *SE))
919  return false;
920 
921  Value *NumBytes =
922  Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
923 
924  CallInst *NewCall;
925  if (SplatValue) {
926  NewCall =
927  Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
928  } else {
929  // Everything is emitted in default address space
930  Type *Int8PtrTy = DestInt8PtrTy;
931 
932  Module *M = TheStore->getModule();
933  StringRef FuncName = "memset_pattern16";
934  Value *MSP =
935  M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
936  Int8PtrTy, Int8PtrTy, IntPtr);
937  inferLibFuncAttributes(M, FuncName, *TLI);
938 
939  // Otherwise we should form a memset_pattern16. PatternValue is known to be
940  // an constant array of 16-bytes. Plop the value into a mergable global.
941  GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
943  PatternValue, ".memset_pattern");
944  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
945  GV->setAlignment(16);
946  Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
947  NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
948  }
949 
950  LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
951  << " from store to: " << *Ev << " at: " << *TheStore
952  << "\n");
953  NewCall->setDebugLoc(TheStore->getDebugLoc());
954 
955  // Okay, the memset has been formed. Zap the original store and anything that
956  // feeds into it.
957  for (auto *I : Stores)
959  ++NumMemSet;
960  return true;
961 }
962 
963 /// If the stored value is a strided load in the same loop with the same stride
964 /// this may be transformable into a memcpy. This kicks in for stuff like
965 /// for (i) A[i] = B[i];
966 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
967  const SCEV *BECount) {
968  assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
969 
970  Value *StorePtr = SI->getPointerOperand();
971  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
972  APInt Stride = getStoreStride(StoreEv);
973  unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
974  bool NegStride = StoreSize == -Stride;
975 
976  // The store must be feeding a non-volatile load.
977  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
978  assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
979 
980  // See if the pointer expression is an AddRec like {base,+,1} on the current
981  // loop, which indicates a strided load. If we have something else, it's a
982  // random load we can't handle.
983  const SCEVAddRecExpr *LoadEv =
984  cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
985 
986  // The trip count of the loop and the base pointer of the addrec SCEV is
987  // guaranteed to be loop invariant, which means that it should dominate the
988  // header. This allows us to insert code for it in the preheader.
989  BasicBlock *Preheader = CurLoop->getLoopPreheader();
990  IRBuilder<> Builder(Preheader->getTerminator());
991  SCEVExpander Expander(*SE, *DL, "loop-idiom");
992 
993  const SCEV *StrStart = StoreEv->getStart();
994  unsigned StrAS = SI->getPointerAddressSpace();
995  Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
996 
997  // Handle negative strided loops.
998  if (NegStride)
999  StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
1000 
1001  // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1002  // this into a memcpy in the loop preheader now if we want. However, this
1003  // would be unsafe to do if there is anything else in the loop that may read
1004  // or write the memory region we're storing to. This includes the load that
1005  // feeds the stores. Check for an alias by generating the base address and
1006  // checking everything.
1007  Value *StoreBasePtr = Expander.expandCodeFor(
1008  StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
1009 
1011  Stores.insert(SI);
1012  if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
1013  StoreSize, *AA, Stores)) {
1014  Expander.clear();
1015  // If we generated new code for the base pointer, clean up.
1016  RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1017  return false;
1018  }
1019 
1020  const SCEV *LdStart = LoadEv->getStart();
1021  unsigned LdAS = LI->getPointerAddressSpace();
1022 
1023  // Handle negative strided loops.
1024  if (NegStride)
1025  LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
1026 
1027  // For a memcpy, we have to make sure that the input array is not being
1028  // mutated by the loop.
1029  Value *LoadBasePtr = Expander.expandCodeFor(
1030  LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
1031 
1032  if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
1033  StoreSize, *AA, Stores)) {
1034  Expander.clear();
1035  // If we generated new code for the base pointer, clean up.
1037  RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1038  return false;
1039  }
1040 
1041  if (avoidLIRForMultiBlockLoop())
1042  return false;
1043 
1044  // Okay, everything is safe, we can transform this!
1045 
1046  const SCEV *NumBytesS =
1047  getNumBytes(BECount, IntPtrTy, StoreSize, CurLoop, DL, SE);
1048 
1049  Value *NumBytes =
1050  Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
1051 
1052  CallInst *NewCall = nullptr;
1053  // Check whether to generate an unordered atomic memcpy:
1054  // If the load or store are atomic, then they must necessarily be unordered
1055  // by previous checks.
1056  if (!SI->isAtomic() && !LI->isAtomic())
1057  NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
1058  LoadBasePtr, LI->getAlignment(), NumBytes);
1059  else {
1060  // We cannot allow unaligned ops for unordered load/store, so reject
1061  // anything where the alignment isn't at least the element size.
1062  unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
1063  if (Align < StoreSize)
1064  return false;
1065 
1066  // If the element.atomic memcpy is not lowered into explicit
1067  // loads/stores later, then it will be lowered into an element-size
1068  // specific lib call. If the lib call doesn't exist for our store size, then
1069  // we shouldn't generate the memcpy.
1070  if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
1071  return false;
1072 
1073  // Create the call.
1074  // Note that unordered atomic loads/stores are *required* by the spec to
1075  // have an alignment but non-atomic loads/stores may not.
1076  NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
1077  StoreBasePtr, SI->getAlignment(), LoadBasePtr, LI->getAlignment(),
1078  NumBytes, StoreSize);
1079  }
1080  NewCall->setDebugLoc(SI->getDebugLoc());
1081 
1082  LLVM_DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
1083  << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
1084  << " from store ptr=" << *StoreEv << " at: " << *SI
1085  << "\n");
1086 
1087  // Okay, the memcpy has been formed. Zap the original store and anything that
1088  // feeds into it.
1090  ++NumMemCpy;
1091  return true;
1092 }
1093 
1094 // When compiling for codesize we avoid idiom recognition for a multi-block loop
1095 // unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
1096 //
1097 bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
1098  bool IsLoopMemset) {
1099  if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
1100  if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) {
1101  LLVM_DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName()
1102  << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
1103  << " avoided: multi-block top-level loop\n");
1104  return true;
1105  }
1106  }
1107 
1108  return false;
1109 }
1110 
1111 bool LoopIdiomRecognize::runOnNoncountableLoop() {
1112  return recognizePopcount() || recognizeAndInsertFFS();
1113 }
1114 
1115 /// Check if the given conditional branch is based on the comparison between
1116 /// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
1117 /// true), the control yields to the loop entry. If the branch matches the
1118 /// behavior, the variable involved in the comparison is returned. This function
1119 /// will be called to see if the precondition and postcondition of the loop are
1120 /// in desirable form.
1121 static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1122  bool JmpOnZero = false) {
1123  if (!BI || !BI->isConditional())
1124  return nullptr;
1125 
1126  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1127  if (!Cond)
1128  return nullptr;
1129 
1130  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1131  if (!CmpZero || !CmpZero->isZero())
1132  return nullptr;
1133 
1134  BasicBlock *TrueSucc = BI->getSuccessor(0);
1135  BasicBlock *FalseSucc = BI->getSuccessor(1);
1136  if (JmpOnZero)
1137  std::swap(TrueSucc, FalseSucc);
1138 
1139  ICmpInst::Predicate Pred = Cond->getPredicate();
1140  if ((Pred == ICmpInst::ICMP_NE && TrueSucc == LoopEntry) ||
1141  (Pred == ICmpInst::ICMP_EQ && FalseSucc == LoopEntry))
1142  return Cond->getOperand(0);
1143 
1144  return nullptr;
1145 }
1146 
1147 // Check if the recurrence variable `VarX` is in the right form to create
1148 // the idiom. Returns the value coerced to a PHINode if so.
1150  BasicBlock *LoopEntry) {
1151  auto *PhiX = dyn_cast<PHINode>(VarX);
1152  if (PhiX && PhiX->getParent() == LoopEntry &&
1153  (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
1154  return PhiX;
1155  return nullptr;
1156 }
1157 
1158 /// Return true iff the idiom is detected in the loop.
1159 ///
1160 /// Additionally:
1161 /// 1) \p CntInst is set to the instruction counting the population bit.
1162 /// 2) \p CntPhi is set to the corresponding phi node.
1163 /// 3) \p Var is set to the value whose population bits are being counted.
1164 ///
1165 /// The core idiom we are trying to detect is:
1166 /// \code
1167 /// if (x0 != 0)
1168 /// goto loop-exit // the precondition of the loop
1169 /// cnt0 = init-val;
1170 /// do {
1171 /// x1 = phi (x0, x2);
1172 /// cnt1 = phi(cnt0, cnt2);
1173 ///
1174 /// cnt2 = cnt1 + 1;
1175 /// ...
1176 /// x2 = x1 & (x1 - 1);
1177 /// ...
1178 /// } while(x != 0);
1179 ///
1180 /// loop-exit:
1181 /// \endcode
1182 static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
1183  Instruction *&CntInst, PHINode *&CntPhi,
1184  Value *&Var) {
1185  // step 1: Check to see if the look-back branch match this pattern:
1186  // "if (a!=0) goto loop-entry".
1187  BasicBlock *LoopEntry;
1188  Instruction *DefX2, *CountInst;
1189  Value *VarX1, *VarX0;
1190  PHINode *PhiX, *CountPhi;
1191 
1192  DefX2 = CountInst = nullptr;
1193  VarX1 = VarX0 = nullptr;
1194  PhiX = CountPhi = nullptr;
1195  LoopEntry = *(CurLoop->block_begin());
1196 
1197  // step 1: Check if the loop-back branch is in desirable form.
1198  {
1199  if (Value *T = matchCondition(
1200  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1201  DefX2 = dyn_cast<Instruction>(T);
1202  else
1203  return false;
1204  }
1205 
1206  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
1207  {
1208  if (!DefX2 || DefX2->getOpcode() != Instruction::And)
1209  return false;
1210 
1211  BinaryOperator *SubOneOp;
1212 
1213  if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
1214  VarX1 = DefX2->getOperand(1);
1215  else {
1216  VarX1 = DefX2->getOperand(0);
1217  SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
1218  }
1219  if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)
1220  return false;
1221 
1222  ConstantInt *Dec = dyn_cast<ConstantInt>(SubOneOp->getOperand(1));
1223  if (!Dec ||
1224  !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||
1225  (SubOneOp->getOpcode() == Instruction::Add &&
1226  Dec->isMinusOne()))) {
1227  return false;
1228  }
1229  }
1230 
1231  // step 3: Check the recurrence of variable X
1232  PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
1233  if (!PhiX)
1234  return false;
1235 
1236  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
1237  {
1238  CountInst = nullptr;
1239  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1240  IterE = LoopEntry->end();
1241  Iter != IterE; Iter++) {
1242  Instruction *Inst = &*Iter;
1243  if (Inst->getOpcode() != Instruction::Add)
1244  continue;
1245 
1246  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1247  if (!Inc || !Inc->isOne())
1248  continue;
1249 
1250  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1251  if (!Phi)
1252  continue;
1253 
1254  // Check if the result of the instruction is live of the loop.
1255  bool LiveOutLoop = false;
1256  for (User *U : Inst->users()) {
1257  if ((cast<Instruction>(U))->getParent() != LoopEntry) {
1258  LiveOutLoop = true;
1259  break;
1260  }
1261  }
1262 
1263  if (LiveOutLoop) {
1264  CountInst = Inst;
1265  CountPhi = Phi;
1266  break;
1267  }
1268  }
1269 
1270  if (!CountInst)
1271  return false;
1272  }
1273 
1274  // step 5: check if the precondition is in this form:
1275  // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
1276  {
1277  auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1278  Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
1279  if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
1280  return false;
1281 
1282  CntInst = CountInst;
1283  CntPhi = CountPhi;
1284  Var = T;
1285  }
1286 
1287  return true;
1288 }
1289 
1290 /// Return true if the idiom is detected in the loop.
1291 ///
1292 /// Additionally:
1293 /// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1294 /// or nullptr if there is no such.
1295 /// 2) \p CntPhi is set to the corresponding phi node
1296 /// or nullptr if there is no such.
1297 /// 3) \p Var is set to the value whose CTLZ could be used.
1298 /// 4) \p DefX is set to the instruction calculating Loop exit condition.
1299 ///
1300 /// The core idiom we are trying to detect is:
1301 /// \code
1302 /// if (x0 == 0)
1303 /// goto loop-exit // the precondition of the loop
1304 /// cnt0 = init-val;
1305 /// do {
1306 /// x = phi (x0, x.next); //PhiX
1307 /// cnt = phi(cnt0, cnt.next);
1308 ///
1309 /// cnt.next = cnt + 1;
1310 /// ...
1311 /// x.next = x >> 1; // DefX
1312 /// ...
1313 /// } while(x.next != 0);
1314 ///
1315 /// loop-exit:
1316 /// \endcode
1317 static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1318  Intrinsic::ID &IntrinID, Value *&InitX,
1319  Instruction *&CntInst, PHINode *&CntPhi,
1320  Instruction *&DefX) {
1321  BasicBlock *LoopEntry;
1322  Value *VarX = nullptr;
1323 
1324  DefX = nullptr;
1325  CntInst = nullptr;
1326  CntPhi = nullptr;
1327  LoopEntry = *(CurLoop->block_begin());
1328 
1329  // step 1: Check if the loop-back branch is in desirable form.
1330  if (Value *T = matchCondition(
1331  dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
1332  DefX = dyn_cast<Instruction>(T);
1333  else
1334  return false;
1335 
1336  // step 2: detect instructions corresponding to "x.next = x >> 1 or x << 1"
1337  if (!DefX || !DefX->isShift())
1338  return false;
1339  IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :
1341  ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1342  if (!Shft || !Shft->isOne())
1343  return false;
1344  VarX = DefX->getOperand(0);
1345 
1346  // step 3: Check the recurrence of variable X
1347  PHINode *PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
1348  if (!PhiX)
1349  return false;
1350 
1351  InitX = PhiX->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1352 
1353  // Make sure the initial value can't be negative otherwise the ashr in the
1354  // loop might never reach zero which would make the loop infinite.
1355  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, DL))
1356  return false;
1357 
1358  // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1359  // TODO: We can skip the step. If loop trip count is known (CTLZ),
1360  // then all uses of "cnt.next" could be optimized to the trip count
1361  // plus "cnt0". Currently it is not optimized.
1362  // This step could be used to detect POPCNT instruction:
1363  // cnt.next = cnt + (x.next & 1)
1364  for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1365  IterE = LoopEntry->end();
1366  Iter != IterE; Iter++) {
1367  Instruction *Inst = &*Iter;
1368  if (Inst->getOpcode() != Instruction::Add)
1369  continue;
1370 
1371  ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
1372  if (!Inc || !Inc->isOne())
1373  continue;
1374 
1375  PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
1376  if (!Phi)
1377  continue;
1378 
1379  CntInst = Inst;
1380  CntPhi = Phi;
1381  break;
1382  }
1383  if (!CntInst)
1384  return false;
1385 
1386  return true;
1387 }
1388 
1389 /// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1390 /// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1391 /// trip count returns true; otherwise, returns false.
1392 bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1393  // Give up if the loop has multiple blocks or multiple backedges.
1394  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1395  return false;
1396 
1397  Intrinsic::ID IntrinID;
1398  Value *InitX;
1399  Instruction *DefX = nullptr;
1400  PHINode *CntPhi = nullptr;
1401  Instruction *CntInst = nullptr;
1402  // Help decide if transformation is profitable. For ShiftUntilZero idiom,
1403  // this is always 6.
1404  size_t IdiomCanonicalSize = 6;
1405 
1406  if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
1407  CntInst, CntPhi, DefX))
1408  return false;
1409 
1410  bool IsCntPhiUsedOutsideLoop = false;
1411  for (User *U : CntPhi->users())
1412  if (!CurLoop->contains(cast<Instruction>(U))) {
1413  IsCntPhiUsedOutsideLoop = true;
1414  break;
1415  }
1416  bool IsCntInstUsedOutsideLoop = false;
1417  for (User *U : CntInst->users())
1418  if (!CurLoop->contains(cast<Instruction>(U))) {
1419  IsCntInstUsedOutsideLoop = true;
1420  break;
1421  }
1422  // If both CntInst and CntPhi are used outside the loop the profitability
1423  // is questionable.
1424  if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1425  return false;
1426 
1427  // For some CPUs result of CTLZ(X) intrinsic is undefined
1428  // when X is 0. If we can not guarantee X != 0, we need to check this
1429  // when expand.
1430  bool ZeroCheck = false;
1431  // It is safe to assume Preheader exist as it was checked in
1432  // parent function RunOnLoop.
1433  BasicBlock *PH = CurLoop->getLoopPreheader();
1434 
1435  // If we are using the count instruction outside the loop, make sure we
1436  // have a zero check as a precondition. Without the check the loop would run
1437  // one iteration for before any check of the input value. This means 0 and 1
1438  // would have identical behavior in the original loop and thus
1439  if (!IsCntPhiUsedOutsideLoop) {
1440  auto *PreCondBB = PH->getSinglePredecessor();
1441  if (!PreCondBB)
1442  return false;
1443  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1444  if (!PreCondBI)
1445  return false;
1446  if (matchCondition(PreCondBI, PH) != InitX)
1447  return false;
1448  ZeroCheck = true;
1449  }
1450 
1451  // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1452  // profitable if we delete the loop.
1453 
1454  // the loop has only 6 instructions:
1455  // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1456  // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1457  // %shr = ashr %n.addr.0, 1
1458  // %tobool = icmp eq %shr, 0
1459  // %inc = add nsw %i.0, 1
1460  // br i1 %tobool
1461 
1462  const Value *Args[] =
1463  {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
1464  : ConstantInt::getFalse(InitX->getContext())};
1465  if (CurLoop->getHeader()->size() != IdiomCanonicalSize &&
1466  TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
1468  return false;
1469 
1470  transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1471  DefX->getDebugLoc(), ZeroCheck,
1472  IsCntPhiUsedOutsideLoop);
1473  return true;
1474 }
1475 
1476 /// Recognizes a population count idiom in a non-countable loop.
1477 ///
1478 /// If detected, transforms the relevant code to issue the popcount intrinsic
1479 /// function call, and returns true; otherwise, returns false.
1480 bool LoopIdiomRecognize::recognizePopcount() {
1482  return false;
1483 
1484  // Counting population are usually conducted by few arithmetic instructions.
1485  // Such instructions can be easily "absorbed" by vacant slots in a
1486  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
1487  // in a compact loop.
1488 
1489  // Give up if the loop has multiple blocks or multiple backedges.
1490  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1491  return false;
1492 
1493  BasicBlock *LoopBody = *(CurLoop->block_begin());
1494  if (LoopBody->size() >= 20) {
1495  // The loop is too big, bail out.
1496  return false;
1497  }
1498 
1499  // It should have a preheader containing nothing but an unconditional branch.
1500  BasicBlock *PH = CurLoop->getLoopPreheader();
1501  if (!PH || &PH->front() != PH->getTerminator())
1502  return false;
1503  auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
1504  if (!EntryBI || EntryBI->isConditional())
1505  return false;
1506 
1507  // It should have a precondition block where the generated popcount intrinsic
1508  // function can be inserted.
1509  auto *PreCondBB = PH->getSinglePredecessor();
1510  if (!PreCondBB)
1511  return false;
1512  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
1513  if (!PreCondBI || PreCondBI->isUnconditional())
1514  return false;
1515 
1516  Instruction *CntInst;
1517  PHINode *CntPhi;
1518  Value *Val;
1519  if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
1520  return false;
1521 
1522  transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
1523  return true;
1524 }
1525 
1527  const DebugLoc &DL) {
1528  Value *Ops[] = {Val};
1529  Type *Tys[] = {Val->getType()};
1530 
1531  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1533  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1534  CI->setDebugLoc(DL);
1535 
1536  return CI;
1537 }
1538 
1540  const DebugLoc &DL, bool ZeroCheck,
1541  Intrinsic::ID IID) {
1542  Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
1543  Type *Tys[] = {Val->getType()};
1544 
1545  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1546  Value *Func = Intrinsic::getDeclaration(M, IID, Tys);
1547  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1548  CI->setDebugLoc(DL);
1549 
1550  return CI;
1551 }
1552 
1553 /// Transform the following loop (Using CTLZ, CTTZ is similar):
1554 /// loop:
1555 /// CntPhi = PHI [Cnt0, CntInst]
1556 /// PhiX = PHI [InitX, DefX]
1557 /// CntInst = CntPhi + 1
1558 /// DefX = PhiX >> 1
1559 /// LOOP_BODY
1560 /// Br: loop if (DefX != 0)
1561 /// Use(CntPhi) or Use(CntInst)
1562 ///
1563 /// Into:
1564 /// If CntPhi used outside the loop:
1565 /// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
1566 /// Count = CountPrev + 1
1567 /// else
1568 /// Count = BitWidth(InitX) - CTLZ(InitX)
1569 /// loop:
1570 /// CntPhi = PHI [Cnt0, CntInst]
1571 /// PhiX = PHI [InitX, DefX]
1572 /// PhiCount = PHI [Count, Dec]
1573 /// CntInst = CntPhi + 1
1574 /// DefX = PhiX >> 1
1575 /// Dec = PhiCount - 1
1576 /// LOOP_BODY
1577 /// Br: loop if (Dec != 0)
1578 /// Use(CountPrev + Cnt0) // Use(CntPhi)
1579 /// or
1580 /// Use(Count + Cnt0) // Use(CntInst)
1581 ///
1582 /// If LOOP_BODY is empty the loop will be deleted.
1583 /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
1584 void LoopIdiomRecognize::transformLoopToCountable(
1585  Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
1586  PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
1587  bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
1588  BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
1589 
1590  // Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
1591  IRBuilder<> Builder(PreheaderBr);
1592  Builder.SetCurrentDebugLocation(DL);
1593  Value *FFS, *Count, *CountPrev, *NewCount, *InitXNext;
1594 
1595  // Count = BitWidth - CTLZ(InitX);
1596  // If there are uses of CntPhi create:
1597  // CountPrev = BitWidth - CTLZ(InitX >> 1);
1598  if (IsCntPhiUsedOutsideLoop) {
1599  if (DefX->getOpcode() == Instruction::AShr)
1600  InitXNext =
1601  Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1));
1602  else if (DefX->getOpcode() == Instruction::LShr)
1603  InitXNext =
1604  Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
1605  else if (DefX->getOpcode() == Instruction::Shl) // cttz
1606  InitXNext =
1607  Builder.CreateShl(InitX, ConstantInt::get(InitX->getType(), 1));
1608  else
1609  llvm_unreachable("Unexpected opcode!");
1610  } else
1611  InitXNext = InitX;
1612  FFS = createFFSIntrinsic(Builder, InitXNext, DL, ZeroCheck, IntrinID);
1613  Count = Builder.CreateSub(
1614  ConstantInt::get(FFS->getType(),
1615  FFS->getType()->getIntegerBitWidth()),
1616  FFS);
1617  if (IsCntPhiUsedOutsideLoop) {
1618  CountPrev = Count;
1619  Count = Builder.CreateAdd(
1620  CountPrev,
1621  ConstantInt::get(CountPrev->getType(), 1));
1622  }
1623 
1624  NewCount = Builder.CreateZExtOrTrunc(
1625  IsCntPhiUsedOutsideLoop ? CountPrev : Count,
1626  cast<IntegerType>(CntInst->getType()));
1627 
1628  // If the counter's initial value is not zero, insert Add Inst.
1629  Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
1630  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1631  if (!InitConst || !InitConst->isZero())
1632  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1633 
1634  // Step 2: Insert new IV and loop condition:
1635  // loop:
1636  // ...
1637  // PhiCount = PHI [Count, Dec]
1638  // ...
1639  // Dec = PhiCount - 1
1640  // ...
1641  // Br: loop if (Dec != 0)
1642  BasicBlock *Body = *(CurLoop->block_begin());
1643  auto *LbBr = cast<BranchInst>(Body->getTerminator());
1644  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1645  Type *Ty = Count->getType();
1646 
1647  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1648 
1649  Builder.SetInsertPoint(LbCond);
1650  Instruction *TcDec = cast<Instruction>(
1651  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1652  "tcdec", false, true));
1653 
1654  TcPhi->addIncoming(Count, Preheader);
1655  TcPhi->addIncoming(TcDec, Body);
1656 
1657  CmpInst::Predicate Pred =
1658  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
1659  LbCond->setPredicate(Pred);
1660  LbCond->setOperand(0, TcDec);
1661  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1662 
1663  // Step 3: All the references to the original counter outside
1664  // the loop are replaced with the NewCount
1665  if (IsCntPhiUsedOutsideLoop)
1666  CntPhi->replaceUsesOutsideBlock(NewCount, Body);
1667  else
1668  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1669 
1670  // step 4: Forget the "non-computable" trip-count SCEV associated with the
1671  // loop. The loop would otherwise not be deleted even if it becomes empty.
1672  SE->forgetLoop(CurLoop);
1673 }
1674 
1675 void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
1676  Instruction *CntInst,
1677  PHINode *CntPhi, Value *Var) {
1678  BasicBlock *PreHead = CurLoop->getLoopPreheader();
1679  auto *PreCondBr = cast<BranchInst>(PreCondBB->getTerminator());
1680  const DebugLoc &DL = CntInst->getDebugLoc();
1681 
1682  // Assuming before transformation, the loop is following:
1683  // if (x) // the precondition
1684  // do { cnt++; x &= x - 1; } while(x);
1685 
1686  // Step 1: Insert the ctpop instruction at the end of the precondition block
1687  IRBuilder<> Builder(PreCondBr);
1688  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
1689  {
1690  PopCnt = createPopcntIntrinsic(Builder, Var, DL);
1691  NewCount = PopCntZext =
1692  Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
1693 
1694  if (NewCount != PopCnt)
1695  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1696 
1697  // TripCnt is exactly the number of iterations the loop has
1698  TripCnt = NewCount;
1699 
1700  // If the population counter's initial value is not zero, insert Add Inst.
1701  Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
1702  ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
1703  if (!InitConst || !InitConst->isZero()) {
1704  NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1705  (cast<Instruction>(NewCount))->setDebugLoc(DL);
1706  }
1707  }
1708 
1709  // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
1710  // "if (NewCount == 0) loop-exit". Without this change, the intrinsic
1711  // function would be partial dead code, and downstream passes will drag
1712  // it back from the precondition block to the preheader.
1713  {
1714  ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
1715 
1716  Value *Opnd0 = PopCntZext;
1717  Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
1718  if (PreCond->getOperand(0) != Var)
1719  std::swap(Opnd0, Opnd1);
1720 
1721  ICmpInst *NewPreCond = cast<ICmpInst>(
1722  Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
1723  PreCondBr->setCondition(NewPreCond);
1724 
1726  }
1727 
1728  // Step 3: Note that the population count is exactly the trip count of the
1729  // loop in question, which enable us to convert the loop from noncountable
1730  // loop into a countable one. The benefit is twofold:
1731  //
1732  // - If the loop only counts population, the entire loop becomes dead after
1733  // the transformation. It is a lot easier to prove a countable loop dead
1734  // than to prove a noncountable one. (In some C dialects, an infinite loop
1735  // isn't dead even if it computes nothing useful. In general, DCE needs
1736  // to prove a noncountable loop finite before safely delete it.)
1737  //
1738  // - If the loop also performs something else, it remains alive.
1739  // Since it is transformed to countable form, it can be aggressively
1740  // optimized by some optimizations which are in general not applicable
1741  // to a noncountable loop.
1742  //
1743  // After this step, this loop (conceptually) would look like following:
1744  // newcnt = __builtin_ctpop(x);
1745  // t = newcnt;
1746  // if (x)
1747  // do { cnt++; x &= x-1; t--) } while (t > 0);
1748  BasicBlock *Body = *(CurLoop->block_begin());
1749  {
1750  auto *LbBr = cast<BranchInst>(Body->getTerminator());
1751  ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
1752  Type *Ty = TripCnt->getType();
1753 
1754  PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1755 
1756  Builder.SetInsertPoint(LbCond);
1757  Instruction *TcDec = cast<Instruction>(
1758  Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1759  "tcdec", false, true));
1760 
1761  TcPhi->addIncoming(TripCnt, PreHead);
1762  TcPhi->addIncoming(TcDec, Body);
1763 
1764  CmpInst::Predicate Pred =
1765  (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
1766  LbCond->setPredicate(Pred);
1767  LbCond->setOperand(0, TcDec);
1768  LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1769  }
1770 
1771  // Step 4: All the references to the original population counter outside
1772  // the loop are replaced with the NewCount -- the value returned from
1773  // __builtin_ctpop().
1774  CntInst->replaceUsesOutsideBlock(NewCount, Body);
1775 
1776  // step 5: Forget the "non-computable" trip-count SCEV associated with the
1777  // loop. The loop would otherwise not be deleted even if it becomes empty.
1778  SE->forgetLoop(CurLoop);
1779 }
The access may reference and may modify the value stored in memory.
virtual void computeLoopSafetyInfo(const Loop *CurLoop)
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
Definition: MustExecute.cpp:45
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:410
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:585
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1949
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1669
const SCEV * getConstant(ConstantInt *V)
This class represents lattice values for constants.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:316
Value * isBytewiseValue(Value *V)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Constant * getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:144
ArrayRef< BasicBlock *>::const_iterator block_iterator
Definition: LoopInfo.h:153
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static constexpr LocationSize unknown()
bool isUnordered() const
Definition: Instructions.h:404
void push_back(const T &Elt)
Definition: SmallVector.h:218
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
Value * getValue() const
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:57
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memset intrinsic.
BasicBlock * getSuccessor(unsigned i) const
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:168
static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)
Check if the given conditional branch is based on the comparison between a variable and zero...
Value * getCondition() const
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
void setAlignment(unsigned Align)
Definition: Globals.cpp:116
Value * getLength() const
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
static Constant * get(ArrayType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:983
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
Definition: MustExecute.h:98
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:232
amdgpu Simplify well known AMD library false Value Value const Twine & Name
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:690
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1014
unsigned getDestAlignment() const
This file implements a class to represent arbitrary precision integral constant values and operations...
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:100
static bool isSimple(Instruction *I)
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition: Constants.h:201
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:419
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:182
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
This node represents a polynomial recurrence on the trip count of the specified loop.
static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)
#define T
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
Class to represent array types.
Definition: DerivedTypes.h:369
static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling" "with -Os/-Oz"), cl::init(true), cl::Hidden)
bool has(LibFunc F) const
Tests whether a library function is available.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:221
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1031
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry &)
An instruction for storing to memory.
Definition: Instructions.h:321
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:209
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
static void deleteDeadInstruction(Instruction *I)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1020
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * getOperand(unsigned i) const
Definition: User.h:170
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS)
Test whether entry to the loop is protected by a conditional between LHS and RHS. ...
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1773
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI)
Analyze the name and prototype of the given function and set any applicable attributes.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
const SCEV * getOperand(unsigned i) const
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:190
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:308
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:234
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:287
Conditional or Unconditional Branch instruction.
static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
Value * getIncomingValueForBlock(const BasicBlock *BB) const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const Instruction & front() const
Definition: BasicBlock.h:281
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
size_t size() const
Definition: BasicBlock.h:279
bool isUnordered() const
Definition: Instructions.h:279
Represent the analysis usage information of a pass.
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
This instruction compares its operands according to the predicate given to the constructor.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)
Return true if the idiom is detected in the loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
Definition: Instructions.h:285
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:430
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
size_t size() const
Definition: SmallVector.h:53
static wasm::ValType getType(const TargetRegisterClass *RC)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
bool isVolatile() const
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Representation for a specific memory location.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
typename vector_type::const_iterator iterator
Definition: SetVector.h:49
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:271
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)
getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...
static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, unsigned StoreSize, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute the number of bytes as a SCEV from the backedge taken count.
static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)
Return true iff the idiom is detected in the loop.
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:578
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The access may modify the value stored in memory.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
signed less or equal
Definition: InstrTypes.h:676
loop Recognize loop idioms
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:70
loop idiom
iterator_range< user_iterator > users()
Definition: Value.h:400
This class uses information about analyze scalars to rewrite expressions in canonical form...
Pass * createLoopIdiomPass()
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:292
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:568
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:721
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:354
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:311
INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:216
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:241
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
LLVM_NODISCARD ModRefInfo intersectModRef(const ModRefInfo MRI1, const ModRefInfo MRI2)
#define I(x, y, z)
Definition: MD5.cpp:58
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:132
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:581
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
block_iterator block_end() const
Definition: LoopInfo.h:155
uint32_t Size
Definition: Profile.cpp:47
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1974
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:366
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:291
static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, unsigned StoreSize, ScalarEvolution *SE)
const SCEV * getBackedgeTakenCount(const Loop *L)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys) const
Estimate the cost of an intrinsic when lowered.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
A vector that has set insertion semantics.
Definition: SetVector.h:41
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction *> &IgnoredStores)
mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
unsigned greater than
Definition: InstrTypes.h:669
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)
replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...
Definition: Value.cpp:439
const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
This pass exposes codegen information to IR-level passes.
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
bool isSimple() const
Definition: Instructions.h:402
This header defines various interfaces for pass management in LLVM.
bool isBigEndian() const
Definition: DataLayout.h:222
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
virtual bool anyBlockMayThrow() const
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
Definition: MustExecute.cpp:41
#define LLVM_DEBUG(X)
Definition: Debug.h:123
bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE)
Return true if the given expression is safe to expand in the sense that all materialized values are s...
block_iterator block_begin() const
Definition: LoopInfo.h:154
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count...
Value * getPointerOperand()
Definition: Instructions.h:413
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
This class represents a constant integer value.