LLVM  8.0.1
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visit functions for load, store and alloca.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/Loads.h"
20 #include "llvm/IR/ConstantRange.h"
21 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/LLVMContext.h"
25 #include "llvm/IR/MDBuilder.h"
26 #include "llvm/IR/PatternMatch.h"
28 using namespace llvm;
29 using namespace PatternMatch;
30 
31 #define DEBUG_TYPE "instcombine"
32 
33 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
34 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
35 
36 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
37 /// some part of a constant global variable. This intentionally only accepts
38 /// constant expressions because we can't rewrite arbitrary instructions.
39 static bool pointsToConstantGlobal(Value *V) {
40  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
41  return GV->isConstant();
42 
43  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
44  if (CE->getOpcode() == Instruction::BitCast ||
45  CE->getOpcode() == Instruction::AddrSpaceCast ||
46  CE->getOpcode() == Instruction::GetElementPtr)
47  return pointsToConstantGlobal(CE->getOperand(0));
48  }
49  return false;
50 }
51 
52 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
53 /// pointer to an alloca. Ignore any reads of the pointer, return false if we
54 /// see any stores or other unknown uses. If we see pointer arithmetic, keep
55 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
56 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
57 /// the alloca, and if the source pointer is a pointer to a constant global, we
58 /// can optimize this.
59 static bool
62  // We track lifetime intrinsics as we encounter them. If we decide to go
63  // ahead and replace the value with the global, this lets the caller quickly
64  // eliminate the markers.
65 
66  SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
67  ValuesToInspect.emplace_back(V, false);
68  while (!ValuesToInspect.empty()) {
69  auto ValuePair = ValuesToInspect.pop_back_val();
70  const bool IsOffset = ValuePair.second;
71  for (auto &U : ValuePair.first->uses()) {
72  auto *I = cast<Instruction>(U.getUser());
73 
74  if (auto *LI = dyn_cast<LoadInst>(I)) {
75  // Ignore non-volatile loads, they are always ok.
76  if (!LI->isSimple()) return false;
77  continue;
78  }
79 
80  if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
81  // If uses of the bitcast are ok, we are ok.
82  ValuesToInspect.emplace_back(I, IsOffset);
83  continue;
84  }
85  if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
86  // If the GEP has all zero indices, it doesn't offset the pointer. If it
87  // doesn't, it does.
88  ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
89  continue;
90  }
91 
92  if (auto CS = CallSite(I)) {
93  // If this is the function being called then we treat it like a load and
94  // ignore it.
95  if (CS.isCallee(&U))
96  continue;
97 
98  unsigned DataOpNo = CS.getDataOperandNo(&U);
99  bool IsArgOperand = CS.isArgOperand(&U);
100 
101  // Inalloca arguments are clobbered by the call.
102  if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
103  return false;
104 
105  // If this is a readonly/readnone call site, then we know it is just a
106  // load (but one that potentially returns the value itself), so we can
107  // ignore it if we know that the value isn't captured.
108  if (CS.onlyReadsMemory() &&
109  (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
110  continue;
111 
112  // If this is being passed as a byval argument, the caller is making a
113  // copy, so it is only a read of the alloca.
114  if (IsArgOperand && CS.isByValArgument(DataOpNo))
115  continue;
116  }
117 
118  // Lifetime intrinsics can be handled by the caller.
119  if (I->isLifetimeStartOrEnd()) {
120  assert(I->use_empty() && "Lifetime markers have no result to use!");
121  ToDelete.push_back(I);
122  continue;
123  }
124 
125  // If this is isn't our memcpy/memmove, reject it as something we can't
126  // handle.
128  if (!MI)
129  return false;
130 
131  // If the transfer is using the alloca as a source of the transfer, then
132  // ignore it since it is a load (unless the transfer is volatile).
133  if (U.getOperandNo() == 1) {
134  if (MI->isVolatile()) return false;
135  continue;
136  }
137 
138  // If we already have seen a copy, reject the second one.
139  if (TheCopy) return false;
140 
141  // If the pointer has been offset from the start of the alloca, we can't
142  // safely handle this.
143  if (IsOffset) return false;
144 
145  // If the memintrinsic isn't using the alloca as the dest, reject it.
146  if (U.getOperandNo() != 0) return false;
147 
148  // If the source of the memcpy/move is not a constant global, reject it.
149  if (!pointsToConstantGlobal(MI->getSource()))
150  return false;
151 
152  // Otherwise, the transform is safe. Remember the copy instruction.
153  TheCopy = MI;
154  }
155  }
156  return true;
157 }
158 
159 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
160 /// modified by a copy from a constant global. If we can prove this, we can
161 /// replace any uses of the alloca with uses of the global directly.
162 static MemTransferInst *
164  SmallVectorImpl<Instruction *> &ToDelete) {
165  MemTransferInst *TheCopy = nullptr;
166  if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
167  return TheCopy;
168  return nullptr;
169 }
170 
171 /// Returns true if V is dereferenceable for size of alloca.
172 static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
173  const DataLayout &DL) {
174  if (AI->isArrayAllocation())
175  return false;
176  uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
177  if (!AllocaSize)
178  return false;
180  APInt(64, AllocaSize), DL);
181 }
182 
184  // Check for array size of 1 (scalar allocation).
185  if (!AI.isArrayAllocation()) {
186  // i32 1 is the canonical array size for scalar allocations.
187  if (AI.getArraySize()->getType()->isIntegerTy(32))
188  return nullptr;
189 
190  // Canonicalize it.
191  Value *V = IC.Builder.getInt32(1);
192  AI.setOperand(0, V);
193  return &AI;
194  }
195 
196  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
197  if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
198  if (C->getValue().getActiveBits() <= 64) {
199  Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
200  AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
201  New->setAlignment(AI.getAlignment());
202 
203  // Scan to the end of the allocation instructions, to skip over a block of
204  // allocas if possible...also skip interleaved debug info
205  //
206  BasicBlock::iterator It(New);
207  while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
208  ++It;
209 
210  // Now that I is pointing to the first non-allocation-inst in the block,
211  // insert our getelementptr instruction...
212  //
213  Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
214  Value *NullIdx = Constant::getNullValue(IdxTy);
215  Value *Idx[2] = {NullIdx, NullIdx};
216  Instruction *GEP =
217  GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
218  IC.InsertNewInstBefore(GEP, *It);
219 
220  // Now make everything use the getelementptr instead of the original
221  // allocation.
222  return IC.replaceInstUsesWith(AI, GEP);
223  }
224  }
225 
226  if (isa<UndefValue>(AI.getArraySize()))
228 
229  // Ensure that the alloca array size argument has type intptr_t, so that
230  // any casting is exposed early.
231  Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
232  if (AI.getArraySize()->getType() != IntPtrTy) {
233  Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
234  AI.setOperand(0, V);
235  return &AI;
236  }
237 
238  return nullptr;
239 }
240 
241 namespace {
242 // If I and V are pointers in different address space, it is not allowed to
243 // use replaceAllUsesWith since I and V have different types. A
244 // non-target-specific transformation should not use addrspacecast on V since
245 // the two address space may be disjoint depending on target.
246 //
247 // This class chases down uses of the old pointer until reaching the load
248 // instructions, then replaces the old pointer in the load instructions with
249 // the new pointer. If during the chasing it sees bitcast or GEP, it will
250 // create new bitcast or GEP with the new pointer and use them in the load
251 // instruction.
252 class PointerReplacer {
253 public:
254  PointerReplacer(InstCombiner &IC) : IC(IC) {}
255  void replacePointer(Instruction &I, Value *V);
256 
257 private:
258  void findLoadAndReplace(Instruction &I);
259  void replace(Instruction *I);
260  Value *getReplacement(Value *I);
261 
264  InstCombiner &IC;
265 };
266 } // end anonymous namespace
267 
268 void PointerReplacer::findLoadAndReplace(Instruction &I) {
269  for (auto U : I.users()) {
270  auto *Inst = dyn_cast<Instruction>(&*U);
271  if (!Inst)
272  return;
273  LLVM_DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
274  if (isa<LoadInst>(Inst)) {
275  for (auto P : Path)
276  replace(P);
277  replace(Inst);
278  } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
279  Path.push_back(Inst);
280  findLoadAndReplace(*Inst);
281  Path.pop_back();
282  } else {
283  return;
284  }
285  }
286 }
287 
288 Value *PointerReplacer::getReplacement(Value *V) {
289  auto Loc = WorkMap.find(V);
290  if (Loc != WorkMap.end())
291  return Loc->second;
292  return nullptr;
293 }
294 
296  if (getReplacement(I))
297  return;
298 
299  if (auto *LT = dyn_cast<LoadInst>(I)) {
300  auto *V = getReplacement(LT->getPointerOperand());
301  assert(V && "Operand not replaced");
302  auto *NewI = new LoadInst(V);
303  NewI->takeName(LT);
304  IC.InsertNewInstWith(NewI, *LT);
305  IC.replaceInstUsesWith(*LT, NewI);
306  WorkMap[LT] = NewI;
307  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
308  auto *V = getReplacement(GEP->getPointerOperand());
309  assert(V && "Operand not replaced");
310  SmallVector<Value *, 8> Indices;
311  Indices.append(GEP->idx_begin(), GEP->idx_end());
312  auto *NewI = GetElementPtrInst::Create(
313  V->getType()->getPointerElementType(), V, Indices);
314  IC.InsertNewInstWith(NewI, *GEP);
315  NewI->takeName(GEP);
316  WorkMap[GEP] = NewI;
317  } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
318  auto *V = getReplacement(BC->getOperand(0));
319  assert(V && "Operand not replaced");
320  auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
322  auto *NewI = new BitCastInst(V, NewT);
323  IC.InsertNewInstWith(NewI, *BC);
324  NewI->takeName(BC);
325  WorkMap[BC] = NewI;
326  } else {
327  llvm_unreachable("should never reach here");
328  }
329 }
330 
331 void PointerReplacer::replacePointer(Instruction &I, Value *V) {
332 #ifndef NDEBUG
333  auto *PT = cast<PointerType>(I.getType());
334  auto *NT = cast<PointerType>(V->getType());
335  assert(PT != NT && PT->getElementType() == NT->getElementType() &&
336  "Invalid usage");
337 #endif
338  WorkMap[&I] = V;
339  findLoadAndReplace(I);
340 }
341 
343  if (auto *I = simplifyAllocaArraySize(*this, AI))
344  return I;
345 
346  if (AI.getAllocatedType()->isSized()) {
347  // If the alignment is 0 (unspecified), assign it the preferred alignment.
348  if (AI.getAlignment() == 0)
349  AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
350 
351  // Move all alloca's of zero byte objects to the entry block and merge them
352  // together. Note that we only do this for alloca's, because malloc should
353  // allocate and return a unique pointer, even for a zero byte allocation.
354  if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
355  // For a zero sized alloca there is no point in doing an array allocation.
356  // This is helpful if the array size is a complicated expression not used
357  // elsewhere.
358  if (AI.isArrayAllocation()) {
359  AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
360  return &AI;
361  }
362 
363  // Get the first instruction in the entry block.
364  BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
365  Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
366  if (FirstInst != &AI) {
367  // If the entry block doesn't start with a zero-size alloca then move
368  // this one to the start of the entry block. There is no problem with
369  // dominance as the array size was forced to a constant earlier already.
370  AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
371  if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
372  DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
373  AI.moveBefore(FirstInst);
374  return &AI;
375  }
376 
377  // If the alignment of the entry block alloca is 0 (unspecified),
378  // assign it the preferred alignment.
379  if (EntryAI->getAlignment() == 0)
380  EntryAI->setAlignment(
381  DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
382  // Replace this zero-sized alloca with the one at the start of the entry
383  // block after ensuring that the address will be aligned enough for both
384  // types.
385  unsigned MaxAlign = std::max(EntryAI->getAlignment(),
386  AI.getAlignment());
387  EntryAI->setAlignment(MaxAlign);
388  if (AI.getType() != EntryAI->getType())
389  return new BitCastInst(EntryAI, AI.getType());
390  return replaceInstUsesWith(AI, EntryAI);
391  }
392  }
393  }
394 
395  if (AI.getAlignment()) {
396  // Check to see if this allocation is only modified by a memcpy/memmove from
397  // a constant global whose alignment is equal to or exceeds that of the
398  // allocation. If this is the case, we can change all users to use
399  // the constant global instead. This is commonly produced by the CFE by
400  // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
401  // is only subsequently read.
403  if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
404  unsigned SourceAlign = getOrEnforceKnownAlignment(
405  Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
406  if (AI.getAlignment() <= SourceAlign &&
407  isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
408  LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
409  LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
410  for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
411  eraseInstFromFunction(*ToDelete[i]);
412  Constant *TheSrc = cast<Constant>(Copy->getSource());
413  auto *SrcTy = TheSrc->getType();
414  auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
415  SrcTy->getPointerAddressSpace());
416  Constant *Cast =
418  if (AI.getType()->getPointerAddressSpace() ==
419  SrcTy->getPointerAddressSpace()) {
420  Instruction *NewI = replaceInstUsesWith(AI, Cast);
421  eraseInstFromFunction(*Copy);
422  ++NumGlobalCopies;
423  return NewI;
424  } else {
425  PointerReplacer PtrReplacer(*this);
426  PtrReplacer.replacePointer(AI, Cast);
427  ++NumGlobalCopies;
428  }
429  }
430  }
431  }
432 
433  // At last, use the generic allocation site handler to aggressively remove
434  // unused allocas.
435  return visitAllocSite(AI);
436 }
437 
438 // Are we allowed to form a atomic load or store of this type?
439 static bool isSupportedAtomicType(Type *Ty) {
440  return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
441 }
442 
443 /// Helper to combine a load to a new type.
444 ///
445 /// This just does the work of combining a load to a new type. It handles
446 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
447 /// loaded *value* type. This will convert it to a pointer, cast the operand to
448 /// that pointer type, load it, etc.
449 ///
450 /// Note that this will create all of the instructions with whatever insert
451 /// point the \c InstCombiner currently is using.
453  const Twine &Suffix = "") {
454  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
455  "can't fold an atomic load to requested type");
456 
457  Value *Ptr = LI.getPointerOperand();
458  unsigned AS = LI.getPointerAddressSpace();
460  LI.getAllMetadata(MD);
461 
462  Value *NewPtr = nullptr;
463  if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
464  NewPtr->getType()->getPointerElementType() == NewTy &&
465  NewPtr->getType()->getPointerAddressSpace() == AS))
466  NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
467 
468  LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
469  NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
470  NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
471  MDBuilder MDB(NewLoad->getContext());
472  for (const auto &MDPair : MD) {
473  unsigned ID = MDPair.first;
474  MDNode *N = MDPair.second;
475  // Note, essentially every kind of metadata should be preserved here! This
476  // routine is supposed to clone a load instruction changing *only its type*.
477  // The only metadata it makes sense to drop is metadata which is invalidated
478  // when the pointer type changes. This should essentially never be the case
479  // in LLVM, but we explicitly switch over only known metadata to be
480  // conservatively correct. If you are adding metadata to LLVM which pertains
481  // to loads, you almost certainly want to add it here.
482  switch (ID) {
483  case LLVMContext::MD_dbg:
494  // All of these directly apply.
495  NewLoad->setMetadata(ID, N);
496  break;
497 
499  copyNonnullMetadata(LI, N, *NewLoad);
500  break;
504  // These only directly apply if the new type is also a pointer.
505  if (NewTy->isPointerTy())
506  NewLoad->setMetadata(ID, N);
507  break;
509  copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
510  break;
511  }
512  }
513  return NewLoad;
514 }
515 
516 /// Combine a store to a new type.
517 ///
518 /// Returns the newly created store instruction.
520  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
521  "can't fold an atomic store of requested type");
522 
523  Value *Ptr = SI.getPointerOperand();
524  unsigned AS = SI.getPointerAddressSpace();
526  SI.getAllMetadata(MD);
527 
528  StoreInst *NewStore = IC.Builder.CreateAlignedStore(
529  V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
530  SI.getAlignment(), SI.isVolatile());
531  NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
532  for (const auto &MDPair : MD) {
533  unsigned ID = MDPair.first;
534  MDNode *N = MDPair.second;
535  // Note, essentially every kind of metadata should be preserved here! This
536  // routine is supposed to clone a store instruction changing *only its
537  // type*. The only metadata it makes sense to drop is metadata which is
538  // invalidated when the pointer type changes. This should essentially
539  // never be the case in LLVM, but we explicitly switch over only known
540  // metadata to be conservatively correct. If you are adding metadata to
541  // LLVM which pertains to stores, you almost certainly want to add it
542  // here.
543  switch (ID) {
544  case LLVMContext::MD_dbg:
554  // All of these directly apply.
555  NewStore->setMetadata(ID, N);
556  break;
563  // These don't apply for stores.
564  break;
565  }
566  }
567 
568  return NewStore;
569 }
570 
571 /// Returns true if instruction represent minmax pattern like:
572 /// select ((cmp load V1, load V2), V1, V2).
573 static bool isMinMaxWithLoads(Value *V) {
574  assert(V->getType()->isPointerTy() && "Expected pointer type.");
575  // Ignore possible ty* to ixx* bitcast.
576  V = peekThroughBitcast(V);
577  // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
578  // pattern.
579  CmpInst::Predicate Pred;
580  Instruction *L1;
581  Instruction *L2;
582  Value *LHS;
583  Value *RHS;
584  if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
585  m_Value(LHS), m_Value(RHS))))
586  return false;
587  return (match(L1, m_Load(m_Specific(LHS))) &&
588  match(L2, m_Load(m_Specific(RHS)))) ||
589  (match(L1, m_Load(m_Specific(RHS))) &&
590  match(L2, m_Load(m_Specific(LHS))));
591 }
592 
593 /// Combine loads to match the type of their uses' value after looking
594 /// through intervening bitcasts.
595 ///
596 /// The core idea here is that if the result of a load is used in an operation,
597 /// we should load the type most conducive to that operation. For example, when
598 /// loading an integer and converting that immediately to a pointer, we should
599 /// instead directly load a pointer.
600 ///
601 /// However, this routine must never change the width of a load or the number of
602 /// loads as that would introduce a semantic change. This combine is expected to
603 /// be a semantic no-op which just allows loads to more closely model the types
604 /// of their consuming operations.
605 ///
606 /// Currently, we also refuse to change the precise type used for an atomic load
607 /// or a volatile load. This is debatable, and might be reasonable to change
608 /// later. However, it is risky in case some backend or other part of LLVM is
609 /// relying on the exact type loaded to select appropriate atomic operations.
611  // FIXME: We could probably with some care handle both volatile and ordered
612  // atomic loads here but it isn't clear that this is important.
613  if (!LI.isUnordered())
614  return nullptr;
615 
616  if (LI.use_empty())
617  return nullptr;
618 
619  // swifterror values can't be bitcasted.
620  if (LI.getPointerOperand()->isSwiftError())
621  return nullptr;
622 
623  Type *Ty = LI.getType();
624  const DataLayout &DL = IC.getDataLayout();
625 
626  // Try to canonicalize loads which are only ever stored to operate over
627  // integers instead of any other type. We only do this when the loaded type
628  // is sized and has a size exactly the same as its store size and the store
629  // size is a legal integer type.
630  // Do not perform canonicalization if minmax pattern is found (to avoid
631  // infinite loop).
632  if (!Ty->isIntegerTy() && Ty->isSized() &&
634  DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) &&
635  !DL.isNonIntegralPointerType(Ty) &&
637  peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
638  if (all_of(LI.users(), [&LI](User *U) {
639  auto *SI = dyn_cast<StoreInst>(U);
640  return SI && SI->getPointerOperand() != &LI &&
641  !SI->getPointerOperand()->isSwiftError();
642  })) {
643  LoadInst *NewLoad = combineLoadToNewType(
644  IC, LI,
646  // Replace all the stores with stores of the newly loaded value.
647  for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
648  auto *SI = cast<StoreInst>(*UI++);
650  combineStoreToNewValue(IC, *SI, NewLoad);
652  }
653  assert(LI.use_empty() && "Failed to remove all users of the load!");
654  // Return the old load so the combiner can delete it safely.
655  return &LI;
656  }
657  }
658 
659  // Fold away bit casts of the loaded value by loading the desired type.
660  // We can do this for BitCastInsts as well as casts from and to pointer types,
661  // as long as those are noops (i.e., the source or dest type have the same
662  // bitwidth as the target's pointers).
663  if (LI.hasOneUse())
664  if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
665  if (CI->isNoopCast(DL))
666  if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
667  LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
668  CI->replaceAllUsesWith(NewLoad);
669  IC.eraseInstFromFunction(*CI);
670  return &LI;
671  }
672 
673  // FIXME: We should also canonicalize loads of vectors when their elements are
674  // cast to other types.
675  return nullptr;
676 }
677 
679  // FIXME: We could probably with some care handle both volatile and atomic
680  // stores here but it isn't clear that this is important.
681  if (!LI.isSimple())
682  return nullptr;
683 
684  Type *T = LI.getType();
685  if (!T->isAggregateType())
686  return nullptr;
687 
688  StringRef Name = LI.getName();
689  assert(LI.getAlignment() && "Alignment must be set at this point");
690 
691  if (auto *ST = dyn_cast<StructType>(T)) {
692  // If the struct only have one element, we unpack.
693  auto NumElements = ST->getNumElements();
694  if (NumElements == 1) {
695  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
696  ".unpack");
697  AAMDNodes AAMD;
698  LI.getAAMetadata(AAMD);
699  NewLoad->setAAMetadata(AAMD);
701  UndefValue::get(T), NewLoad, 0, Name));
702  }
703 
704  // We don't want to break loads with padding here as we'd loose
705  // the knowledge that padding exists for the rest of the pipeline.
706  const DataLayout &DL = IC.getDataLayout();
707  auto *SL = DL.getStructLayout(ST);
708  if (SL->hasPadding())
709  return nullptr;
710 
711  auto Align = LI.getAlignment();
712  if (!Align)
714 
715  auto *Addr = LI.getPointerOperand();
716  auto *IdxType = Type::getInt32Ty(T->getContext());
717  auto *Zero = ConstantInt::get(IdxType, 0);
718 
719  Value *V = UndefValue::get(T);
720  for (unsigned i = 0; i < NumElements; i++) {
721  Value *Indices[2] = {
722  Zero,
723  ConstantInt::get(IdxType, i),
724  };
725  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
726  Name + ".elt");
727  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
728  auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
729  // Propagate AA metadata. It'll still be valid on the narrowed load.
730  AAMDNodes AAMD;
731  LI.getAAMetadata(AAMD);
732  L->setAAMetadata(AAMD);
733  V = IC.Builder.CreateInsertValue(V, L, i);
734  }
735 
736  V->setName(Name);
737  return IC.replaceInstUsesWith(LI, V);
738  }
739 
740  if (auto *AT = dyn_cast<ArrayType>(T)) {
741  auto *ET = AT->getElementType();
742  auto NumElements = AT->getNumElements();
743  if (NumElements == 1) {
744  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
745  AAMDNodes AAMD;
746  LI.getAAMetadata(AAMD);
747  NewLoad->setAAMetadata(AAMD);
749  UndefValue::get(T), NewLoad, 0, Name));
750  }
751 
752  // Bail out if the array is too large. Ideally we would like to optimize
753  // arrays of arbitrary size but this has a terrible impact on compile time.
754  // The threshold here is chosen arbitrarily, maybe needs a little bit of
755  // tuning.
756  if (NumElements > IC.MaxArraySizeForCombine)
757  return nullptr;
758 
759  const DataLayout &DL = IC.getDataLayout();
760  auto EltSize = DL.getTypeAllocSize(ET);
761  auto Align = LI.getAlignment();
762  if (!Align)
763  Align = DL.getABITypeAlignment(T);
764 
765  auto *Addr = LI.getPointerOperand();
766  auto *IdxType = Type::getInt64Ty(T->getContext());
767  auto *Zero = ConstantInt::get(IdxType, 0);
768 
769  Value *V = UndefValue::get(T);
770  uint64_t Offset = 0;
771  for (uint64_t i = 0; i < NumElements; i++) {
772  Value *Indices[2] = {
773  Zero,
774  ConstantInt::get(IdxType, i),
775  };
776  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
777  Name + ".elt");
778  auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
779  Name + ".unpack");
780  AAMDNodes AAMD;
781  LI.getAAMetadata(AAMD);
782  L->setAAMetadata(AAMD);
783  V = IC.Builder.CreateInsertValue(V, L, i);
784  Offset += EltSize;
785  }
786 
787  V->setName(Name);
788  return IC.replaceInstUsesWith(LI, V);
789  }
790 
791  return nullptr;
792 }
793 
794 // If we can determine that all possible objects pointed to by the provided
795 // pointer value are, not only dereferenceable, but also definitively less than
796 // or equal to the provided maximum size, then return true. Otherwise, return
797 // false (constant global values and allocas fall into this category).
798 //
799 // FIXME: This should probably live in ValueTracking (or similar).
800 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
801  const DataLayout &DL) {
802  SmallPtrSet<Value *, 4> Visited;
803  SmallVector<Value *, 4> Worklist(1, V);
804 
805  do {
806  Value *P = Worklist.pop_back_val();
807  P = P->stripPointerCasts();
808 
809  if (!Visited.insert(P).second)
810  continue;
811 
812  if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
813  Worklist.push_back(SI->getTrueValue());
814  Worklist.push_back(SI->getFalseValue());
815  continue;
816  }
817 
818  if (PHINode *PN = dyn_cast<PHINode>(P)) {
819  for (Value *IncValue : PN->incoming_values())
820  Worklist.push_back(IncValue);
821  continue;
822  }
823 
824  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
825  if (GA->isInterposable())
826  return false;
827  Worklist.push_back(GA->getAliasee());
828  continue;
829  }
830 
831  // If we know how big this object is, and it is less than MaxSize, continue
832  // searching. Otherwise, return false.
833  if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
834  if (!AI->getAllocatedType()->isSized())
835  return false;
836 
837  ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
838  if (!CS)
839  return false;
840 
841  uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
842  // Make sure that, even if the multiplication below would wrap as an
843  // uint64_t, we still do the right thing.
844  if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
845  return false;
846  continue;
847  }
848 
849  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
850  if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
851  return false;
852 
853  uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
854  if (InitSize > MaxSize)
855  return false;
856  continue;
857  }
858 
859  return false;
860  } while (!Worklist.empty());
861 
862  return true;
863 }
864 
865 // If we're indexing into an object of a known size, and the outer index is
866 // not a constant, but having any value but zero would lead to undefined
867 // behavior, replace it with zero.
868 //
869 // For example, if we have:
870 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
871 // ...
872 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
873 // ... = load i32* %arrayidx, align 4
874 // Then we know that we can replace %x in the GEP with i64 0.
875 //
876 // FIXME: We could fold any GEP index to zero that would cause UB if it were
877 // not zero. Currently, we only handle the first such index. Also, we could
878 // also search through non-zero constant indices if we kept track of the
879 // offsets those indices implied.
881  Instruction *MemI, unsigned &Idx) {
882  if (GEPI->getNumOperands() < 2)
883  return false;
884 
885  // Find the first non-zero index of a GEP. If all indices are zero, return
886  // one past the last index.
887  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
888  unsigned I = 1;
889  for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
890  Value *V = GEPI->getOperand(I);
891  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
892  if (CI->isZero())
893  continue;
894 
895  break;
896  }
897 
898  return I;
899  };
900 
901  // Skip through initial 'zero' indices, and find the corresponding pointer
902  // type. See if the next index is not a constant.
903  Idx = FirstNZIdx(GEPI);
904  if (Idx == GEPI->getNumOperands())
905  return false;
906  if (isa<Constant>(GEPI->getOperand(Idx)))
907  return false;
908 
909  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
910  Type *AllocTy =
912  if (!AllocTy || !AllocTy->isSized())
913  return false;
914  const DataLayout &DL = IC.getDataLayout();
915  uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
916 
917  // If there are more indices after the one we might replace with a zero, make
918  // sure they're all non-negative. If any of them are negative, the overall
919  // address being computed might be before the base address determined by the
920  // first non-zero index.
921  auto IsAllNonNegative = [&]() {
922  for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
923  KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
924  if (Known.isNonNegative())
925  continue;
926  return false;
927  }
928 
929  return true;
930  };
931 
932  // FIXME: If the GEP is not inbounds, and there are extra indices after the
933  // one we'll replace, those could cause the address computation to wrap
934  // (rendering the IsAllNonNegative() check below insufficient). We can do
935  // better, ignoring zero indices (and other indices we can prove small
936  // enough not to wrap).
937  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
938  return false;
939 
940  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
941  // also known to be dereferenceable.
942  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
943  IsAllNonNegative();
944 }
945 
946 // If we're indexing into an object with a variable index for the memory
947 // access, but the object has only one element, we can assume that the index
948 // will always be zero. If we replace the GEP, return it.
949 template <typename T>
951  T &MemI) {
952  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
953  unsigned Idx;
954  if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
955  Instruction *NewGEPI = GEPI->clone();
956  NewGEPI->setOperand(Idx,
957  ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
958  NewGEPI->insertBefore(GEPI);
959  MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
960  return NewGEPI;
961  }
962  }
963 
964  return nullptr;
965 }
966 
969  return false;
970 
971  auto *Ptr = SI.getPointerOperand();
972  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
973  Ptr = GEPI->getOperand(0);
974  return (isa<ConstantPointerNull>(Ptr) &&
976 }
977 
979  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
980  const Value *GEPI0 = GEPI->getOperand(0);
981  if (isa<ConstantPointerNull>(GEPI0) &&
982  !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
983  return true;
984  }
985  if (isa<UndefValue>(Op) ||
986  (isa<ConstantPointerNull>(Op) &&
988  return true;
989  return false;
990 }
991 
993  Value *Op = LI.getOperand(0);
994 
995  // Try to canonicalize the loaded type.
996  if (Instruction *Res = combineLoadToOperationType(*this, LI))
997  return Res;
998 
999  // Attempt to improve the alignment.
1000  unsigned KnownAlign = getOrEnforceKnownAlignment(
1001  Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
1002  unsigned LoadAlign = LI.getAlignment();
1003  unsigned EffectiveLoadAlign =
1004  LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
1005 
1006  if (KnownAlign > EffectiveLoadAlign)
1007  LI.setAlignment(KnownAlign);
1008  else if (LoadAlign == 0)
1009  LI.setAlignment(EffectiveLoadAlign);
1010 
1011  // Replace GEP indices if possible.
1012  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
1013  Worklist.Add(NewGEPI);
1014  return &LI;
1015  }
1016 
1017  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1018  return Res;
1019 
1020  // Do really simple store-to-load forwarding and load CSE, to catch cases
1021  // where there are several consecutive memory accesses to the same location,
1022  // separated by a few arithmetic operations.
1023  BasicBlock::iterator BBI(LI);
1024  bool IsLoadCSE = false;
1025  if (Value *AvailableVal = FindAvailableLoadedValue(
1026  &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1027  if (IsLoadCSE)
1028  combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1029 
1030  return replaceInstUsesWith(
1031  LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1032  LI.getName() + ".cast"));
1033  }
1034 
1035  // None of the following transforms are legal for volatile/ordered atomic
1036  // loads. Most of them do apply for unordered atomics.
1037  if (!LI.isUnordered()) return nullptr;
1038 
1039  // load(gep null, ...) -> unreachable
1040  // load null/undef -> unreachable
1041  // TODO: Consider a target hook for valid address spaces for this xforms.
1042  if (canSimplifyNullLoadOrGEP(LI, Op)) {
1043  // Insert a new store to null instruction before the load to indicate
1044  // that this code is not reachable. We do this instead of inserting
1045  // an unreachable instruction directly because we cannot modify the
1046  // CFG.
1048  Constant::getNullValue(Op->getType()), &LI);
1049  SI->setDebugLoc(LI.getDebugLoc());
1050  return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
1051  }
1052 
1053  if (Op->hasOneUse()) {
1054  // Change select and PHI nodes to select values instead of addresses: this
1055  // helps alias analysis out a lot, allows many others simplifications, and
1056  // exposes redundancy in the code.
1057  //
1058  // Note that we cannot do the transformation unless we know that the
1059  // introduced loads cannot trap! Something like this is valid as long as
1060  // the condition is always false: load (select bool %C, int* null, int* %G),
1061  // but it would not be valid if we transformed it to load from null
1062  // unconditionally.
1063  //
1064  if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1065  // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1066  unsigned Align = LI.getAlignment();
1067  if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
1068  isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
1069  LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1),
1070  SI->getOperand(1)->getName()+".val");
1071  LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2),
1072  SI->getOperand(2)->getName()+".val");
1073  assert(LI.isUnordered() && "implied by above");
1074  V1->setAlignment(Align);
1075  V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1076  V2->setAlignment(Align);
1077  V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1078  return SelectInst::Create(SI->getCondition(), V1, V2);
1079  }
1080 
1081  // load (select (cond, null, P)) -> load P
1082  if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1083  !NullPointerIsDefined(SI->getFunction(),
1084  LI.getPointerAddressSpace())) {
1085  LI.setOperand(0, SI->getOperand(2));
1086  return &LI;
1087  }
1088 
1089  // load (select (cond, P, null)) -> load P
1090  if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1091  !NullPointerIsDefined(SI->getFunction(),
1092  LI.getPointerAddressSpace())) {
1093  LI.setOperand(0, SI->getOperand(1));
1094  return &LI;
1095  }
1096  }
1097  }
1098  return nullptr;
1099 }
1100 
1101 /// Look for extractelement/insertvalue sequence that acts like a bitcast.
1102 ///
1103 /// \returns underlying value that was "cast", or nullptr otherwise.
1104 ///
1105 /// For example, if we have:
1106 ///
1107 /// %E0 = extractelement <2 x double> %U, i32 0
1108 /// %V0 = insertvalue [2 x double] undef, double %E0, 0
1109 /// %E1 = extractelement <2 x double> %U, i32 1
1110 /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1111 ///
1112 /// and the layout of a <2 x double> is isomorphic to a [2 x double],
1113 /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1114 /// Note that %U may contain non-undef values where %V1 has undef.
1116  Value *U = nullptr;
1117  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1118  auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1119  if (!E)
1120  return nullptr;
1121  auto *W = E->getVectorOperand();
1122  if (!U)
1123  U = W;
1124  else if (U != W)
1125  return nullptr;
1126  auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1127  if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1128  return nullptr;
1129  V = IV->getAggregateOperand();
1130  }
1131  if (!isa<UndefValue>(V) ||!U)
1132  return nullptr;
1133 
1134  auto *UT = cast<VectorType>(U->getType());
1135  auto *VT = V->getType();
1136  // Check that types UT and VT are bitwise isomorphic.
1137  const auto &DL = IC.getDataLayout();
1138  if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1139  return nullptr;
1140  }
1141  if (auto *AT = dyn_cast<ArrayType>(VT)) {
1142  if (AT->getNumElements() != UT->getNumElements())
1143  return nullptr;
1144  } else {
1145  auto *ST = cast<StructType>(VT);
1146  if (ST->getNumElements() != UT->getNumElements())
1147  return nullptr;
1148  for (const auto *EltT : ST->elements()) {
1149  if (EltT != UT->getElementType())
1150  return nullptr;
1151  }
1152  }
1153  return U;
1154 }
1155 
1156 /// Combine stores to match the type of value being stored.
1157 ///
1158 /// The core idea here is that the memory does not have any intrinsic type and
1159 /// where we can we should match the type of a store to the type of value being
1160 /// stored.
1161 ///
1162 /// However, this routine must never change the width of a store or the number of
1163 /// stores as that would introduce a semantic change. This combine is expected to
1164 /// be a semantic no-op which just allows stores to more closely model the types
1165 /// of their incoming values.
1166 ///
1167 /// Currently, we also refuse to change the precise type used for an atomic or
1168 /// volatile store. This is debatable, and might be reasonable to change later.
1169 /// However, it is risky in case some backend or other part of LLVM is relying
1170 /// on the exact type stored to select appropriate atomic operations.
1171 ///
1172 /// \returns true if the store was successfully combined away. This indicates
1173 /// the caller must erase the store instruction. We have to let the caller erase
1174 /// the store instruction as otherwise there is no way to signal whether it was
1175 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
1177  // FIXME: We could probably with some care handle both volatile and ordered
1178  // atomic stores here but it isn't clear that this is important.
1179  if (!SI.isUnordered())
1180  return false;
1181 
1182  // swifterror values can't be bitcasted.
1183  if (SI.getPointerOperand()->isSwiftError())
1184  return false;
1185 
1186  Value *V = SI.getValueOperand();
1187 
1188  // Fold away bit casts of the stored value by storing the original type.
1189  if (auto *BC = dyn_cast<BitCastInst>(V)) {
1190  V = BC->getOperand(0);
1191  if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1192  combineStoreToNewValue(IC, SI, V);
1193  return true;
1194  }
1195  }
1196 
1197  if (Value *U = likeBitCastFromVector(IC, V))
1198  if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1199  combineStoreToNewValue(IC, SI, U);
1200  return true;
1201  }
1202 
1203  // FIXME: We should also canonicalize stores of vectors when their elements
1204  // are cast to other types.
1205  return false;
1206 }
1207 
1209  // FIXME: We could probably with some care handle both volatile and atomic
1210  // stores here but it isn't clear that this is important.
1211  if (!SI.isSimple())
1212  return false;
1213 
1214  Value *V = SI.getValueOperand();
1215  Type *T = V->getType();
1216 
1217  if (!T->isAggregateType())
1218  return false;
1219 
1220  if (auto *ST = dyn_cast<StructType>(T)) {
1221  // If the struct only have one element, we unpack.
1222  unsigned Count = ST->getNumElements();
1223  if (Count == 1) {
1224  V = IC.Builder.CreateExtractValue(V, 0);
1225  combineStoreToNewValue(IC, SI, V);
1226  return true;
1227  }
1228 
1229  // We don't want to break loads with padding here as we'd loose
1230  // the knowledge that padding exists for the rest of the pipeline.
1231  const DataLayout &DL = IC.getDataLayout();
1232  auto *SL = DL.getStructLayout(ST);
1233  if (SL->hasPadding())
1234  return false;
1235 
1236  auto Align = SI.getAlignment();
1237  if (!Align)
1239 
1240  SmallString<16> EltName = V->getName();
1241  EltName += ".elt";
1242  auto *Addr = SI.getPointerOperand();
1243  SmallString<16> AddrName = Addr->getName();
1244  AddrName += ".repack";
1245 
1246  auto *IdxType = Type::getInt32Ty(ST->getContext());
1247  auto *Zero = ConstantInt::get(IdxType, 0);
1248  for (unsigned i = 0; i < Count; i++) {
1249  Value *Indices[2] = {
1250  Zero,
1251  ConstantInt::get(IdxType, i),
1252  };
1253  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
1254  AddrName);
1255  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1256  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
1257  llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1258  AAMDNodes AAMD;
1259  SI.getAAMetadata(AAMD);
1260  NS->setAAMetadata(AAMD);
1261  }
1262 
1263  return true;
1264  }
1265 
1266  if (auto *AT = dyn_cast<ArrayType>(T)) {
1267  // If the array only have one element, we unpack.
1268  auto NumElements = AT->getNumElements();
1269  if (NumElements == 1) {
1270  V = IC.Builder.CreateExtractValue(V, 0);
1271  combineStoreToNewValue(IC, SI, V);
1272  return true;
1273  }
1274 
1275  // Bail out if the array is too large. Ideally we would like to optimize
1276  // arrays of arbitrary size but this has a terrible impact on compile time.
1277  // The threshold here is chosen arbitrarily, maybe needs a little bit of
1278  // tuning.
1279  if (NumElements > IC.MaxArraySizeForCombine)
1280  return false;
1281 
1282  const DataLayout &DL = IC.getDataLayout();
1283  auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1284  auto Align = SI.getAlignment();
1285  if (!Align)
1286  Align = DL.getABITypeAlignment(T);
1287 
1288  SmallString<16> EltName = V->getName();
1289  EltName += ".elt";
1290  auto *Addr = SI.getPointerOperand();
1291  SmallString<16> AddrName = Addr->getName();
1292  AddrName += ".repack";
1293 
1294  auto *IdxType = Type::getInt64Ty(T->getContext());
1295  auto *Zero = ConstantInt::get(IdxType, 0);
1296 
1297  uint64_t Offset = 0;
1298  for (uint64_t i = 0; i < NumElements; i++) {
1299  Value *Indices[2] = {
1300  Zero,
1301  ConstantInt::get(IdxType, i),
1302  };
1303  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
1304  AddrName);
1305  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1306  auto EltAlign = MinAlign(Align, Offset);
1307  Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1308  AAMDNodes AAMD;
1309  SI.getAAMetadata(AAMD);
1310  NS->setAAMetadata(AAMD);
1311  Offset += EltSize;
1312  }
1313 
1314  return true;
1315  }
1316 
1317  return false;
1318 }
1319 
1320 /// equivalentAddressValues - Test if A and B will obviously have the same
1321 /// value. This includes recognizing that %t0 and %t1 will have the same
1322 /// value in code like this:
1323 /// %t0 = getelementptr \@a, 0, 3
1324 /// store i32 0, i32* %t0
1325 /// %t1 = getelementptr \@a, 0, 3
1326 /// %t2 = load i32* %t1
1327 ///
1329  // Test if the values are trivially equivalent.
1330  if (A == B) return true;
1331 
1332  // Test if the values come form identical arithmetic instructions.
1333  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1334  // its only used to compare two uses within the same basic block, which
1335  // means that they'll always either have the same value or one of them
1336  // will have an undefined value.
1337  if (isa<BinaryOperator>(A) ||
1338  isa<CastInst>(A) ||
1339  isa<PHINode>(A) ||
1340  isa<GetElementPtrInst>(A))
1341  if (Instruction *BI = dyn_cast<Instruction>(B))
1342  if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1343  return true;
1344 
1345  // Otherwise they may not be equivalent.
1346  return false;
1347 }
1348 
1349 /// Converts store (bitcast (load (bitcast (select ...)))) to
1350 /// store (load (select ...)), where select is minmax:
1351 /// select ((cmp load V1, load V2), V1, V2).
1353  StoreInst &SI) {
1354  // bitcast?
1355  if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1356  return false;
1357  // load? integer?
1358  Value *LoadAddr;
1359  if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1360  return false;
1361  auto *LI = cast<LoadInst>(SI.getValueOperand());
1362  if (!LI->getType()->isIntegerTy())
1363  return false;
1364  if (!isMinMaxWithLoads(LoadAddr))
1365  return false;
1366 
1367  if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
1368  auto *SI = dyn_cast<StoreInst>(U);
1369  return SI && SI->getPointerOperand() != LI &&
1370  peekThroughBitcast(SI->getPointerOperand()) != LoadAddr &&
1371  !SI->getPointerOperand()->isSwiftError();
1372  }))
1373  return false;
1374 
1375  IC.Builder.SetInsertPoint(LI);
1376  LoadInst *NewLI = combineLoadToNewType(
1377  IC, *LI, LoadAddr->getType()->getPointerElementType());
1378  // Replace all the stores with stores of the newly loaded value.
1379  for (auto *UI : LI->users()) {
1380  auto *USI = cast<StoreInst>(UI);
1381  IC.Builder.SetInsertPoint(USI);
1382  combineStoreToNewValue(IC, *USI, NewLI);
1383  }
1384  IC.replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
1385  IC.eraseInstFromFunction(*LI);
1386  return true;
1387 }
1388 
1390  Value *Val = SI.getOperand(0);
1391  Value *Ptr = SI.getOperand(1);
1392 
1393  // Try to canonicalize the stored type.
1394  if (combineStoreToValueType(*this, SI))
1395  return eraseInstFromFunction(SI);
1396 
1397  // Attempt to improve the alignment.
1398  unsigned KnownAlign = getOrEnforceKnownAlignment(
1399  Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
1400  unsigned StoreAlign = SI.getAlignment();
1401  unsigned EffectiveStoreAlign =
1402  StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
1403 
1404  if (KnownAlign > EffectiveStoreAlign)
1405  SI.setAlignment(KnownAlign);
1406  else if (StoreAlign == 0)
1407  SI.setAlignment(EffectiveStoreAlign);
1408 
1409  // Try to canonicalize the stored type.
1410  if (unpackStoreToAggregate(*this, SI))
1411  return eraseInstFromFunction(SI);
1412 
1413  if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
1414  return eraseInstFromFunction(SI);
1415 
1416  // Replace GEP indices if possible.
1417  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
1418  Worklist.Add(NewGEPI);
1419  return &SI;
1420  }
1421 
1422  // Don't hack volatile/ordered stores.
1423  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1424  if (!SI.isUnordered()) return nullptr;
1425 
1426  // If the RHS is an alloca with a single use, zapify the store, making the
1427  // alloca dead.
1428  if (Ptr->hasOneUse()) {
1429  if (isa<AllocaInst>(Ptr))
1430  return eraseInstFromFunction(SI);
1431  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1432  if (isa<AllocaInst>(GEP->getOperand(0))) {
1433  if (GEP->getOperand(0)->hasOneUse())
1434  return eraseInstFromFunction(SI);
1435  }
1436  }
1437  }
1438 
1439  // Do really simple DSE, to catch cases where there are several consecutive
1440  // stores to the same location, separated by a few arithmetic operations. This
1441  // situation often occurs with bitfield accesses.
1442  BasicBlock::iterator BBI(SI);
1443  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1444  --ScanInsts) {
1445  --BBI;
1446  // Don't count debug info directives, lest they affect codegen,
1447  // and we skip pointer-to-pointer bitcasts, which are NOPs.
1448  if (isa<DbgInfoIntrinsic>(BBI) ||
1449  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1450  ScanInsts++;
1451  continue;
1452  }
1453 
1454  if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1455  // Prev store isn't volatile, and stores to the same location?
1456  if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
1457  SI.getOperand(1))) {
1458  ++NumDeadStore;
1459  ++BBI;
1460  eraseInstFromFunction(*PrevSI);
1461  continue;
1462  }
1463  break;
1464  }
1465 
1466  // If this is a load, we have to stop. However, if the loaded value is from
1467  // the pointer we're loading and is producing the pointer we're storing,
1468  // then *this* store is dead (X = load P; store X -> P).
1469  if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1470  if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1471  assert(SI.isUnordered() && "can't eliminate ordering operation");
1472  return eraseInstFromFunction(SI);
1473  }
1474 
1475  // Otherwise, this is a load from some other location. Stores before it
1476  // may not be dead.
1477  break;
1478  }
1479 
1480  // Don't skip over loads, throws or things that can modify memory.
1481  if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1482  break;
1483  }
1484 
1485  // store X, null -> turns into 'unreachable' in SimplifyCFG
1486  // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1487  if (canSimplifyNullStoreOrGEP(SI)) {
1488  if (!isa<UndefValue>(Val)) {
1489  SI.setOperand(0, UndefValue::get(Val->getType()));
1490  if (Instruction *U = dyn_cast<Instruction>(Val))
1491  Worklist.Add(U); // Dropped a use.
1492  }
1493  return nullptr; // Do not modify these!
1494  }
1495 
1496  // store undef, Ptr -> noop
1497  if (isa<UndefValue>(Val))
1498  return eraseInstFromFunction(SI);
1499 
1500  // If this store is the second-to-last instruction in the basic block
1501  // (excluding debug info and bitcasts of pointers) and if the block ends with
1502  // an unconditional branch, try to move the store to the successor block.
1503  BBI = SI.getIterator();
1504  do {
1505  ++BBI;
1506  } while (isa<DbgInfoIntrinsic>(BBI) ||
1507  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
1508 
1509  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
1510  if (BI->isUnconditional())
1511  mergeStoreIntoSuccessor(SI);
1512 
1513  return nullptr;
1514 }
1515 
1516 /// Try to transform:
1517 /// if () { *P = v1; } else { *P = v2 }
1518 /// or:
1519 /// *P = v1; if () { *P = v2; }
1520 /// into a phi node with a store in the successor.
1521 bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
1522  assert(SI.isUnordered() &&
1523  "This code has not been audited for volatile or ordered store case.");
1524 
1525  // Check if the successor block has exactly 2 incoming edges.
1526  BasicBlock *StoreBB = SI.getParent();
1527  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1528  if (!DestBB->hasNPredecessors(2))
1529  return false;
1530 
1531  // Capture the other block (the block that doesn't contain our store).
1532  pred_iterator PredIter = pred_begin(DestBB);
1533  if (*PredIter == StoreBB)
1534  ++PredIter;
1535  BasicBlock *OtherBB = *PredIter;
1536 
1537  // Bail out if all of the relevant blocks aren't distinct. This can happen,
1538  // for example, if SI is in an infinite loop.
1539  if (StoreBB == DestBB || OtherBB == DestBB)
1540  return false;
1541 
1542  // Verify that the other block ends in a branch and is not otherwise empty.
1543  BasicBlock::iterator BBI(OtherBB->getTerminator());
1544  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1545  if (!OtherBr || BBI == OtherBB->begin())
1546  return false;
1547 
1548  // If the other block ends in an unconditional branch, check for the 'if then
1549  // else' case. There is an instruction before the branch.
1550  StoreInst *OtherStore = nullptr;
1551  if (OtherBr->isUnconditional()) {
1552  --BBI;
1553  // Skip over debugging info.
1554  while (isa<DbgInfoIntrinsic>(BBI) ||
1555  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1556  if (BBI==OtherBB->begin())
1557  return false;
1558  --BBI;
1559  }
1560  // If this isn't a store, isn't a store to the same location, or is not the
1561  // right kind of store, bail out.
1562  OtherStore = dyn_cast<StoreInst>(BBI);
1563  if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
1564  !SI.isSameOperationAs(OtherStore))
1565  return false;
1566  } else {
1567  // Otherwise, the other block ended with a conditional branch. If one of the
1568  // destinations is StoreBB, then we have the if/then case.
1569  if (OtherBr->getSuccessor(0) != StoreBB &&
1570  OtherBr->getSuccessor(1) != StoreBB)
1571  return false;
1572 
1573  // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1574  // if/then triangle. See if there is a store to the same ptr as SI that
1575  // lives in OtherBB.
1576  for (;; --BBI) {
1577  // Check to see if we find the matching store.
1578  if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1579  if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1580  !SI.isSameOperationAs(OtherStore))
1581  return false;
1582  break;
1583  }
1584  // If we find something that may be using or overwriting the stored
1585  // value, or if we run out of instructions, we can't do the transform.
1586  if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1587  BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1588  return false;
1589  }
1590 
1591  // In order to eliminate the store in OtherBr, we have to make sure nothing
1592  // reads or overwrites the stored value in StoreBB.
1593  for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1594  // FIXME: This should really be AA driven.
1595  if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1596  return false;
1597  }
1598  }
1599 
1600  // Insert a PHI node now if we need it.
1601  Value *MergedVal = OtherStore->getOperand(0);
1602  // The debug locations of the original instructions might differ. Merge them.
1604  OtherStore->getDebugLoc());
1605  if (MergedVal != SI.getOperand(0)) {
1606  PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1607  PN->addIncoming(SI.getOperand(0), SI.getParent());
1608  PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1609  MergedVal = InsertNewInstBefore(PN, DestBB->front());
1610  PN->setDebugLoc(MergedLoc);
1611  }
1612 
1613  // Advance to a place where it is safe to insert the new store and insert it.
1614  BBI = DestBB->getFirstInsertionPt();
1615  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
1616  SI.isVolatile(), SI.getAlignment(),
1617  SI.getOrdering(), SI.getSyncScopeID());
1618  InsertNewInstBefore(NewSI, *BBI);
1619  NewSI->setDebugLoc(MergedLoc);
1620 
1621  // If the two stores had AA tags, merge them.
1622  AAMDNodes AATags;
1623  SI.getAAMetadata(AATags);
1624  if (AATags) {
1625  OtherStore->getAAMetadata(AATags, /* Merge = */ true);
1626  NewSI->setAAMetadata(AATags);
1627  }
1628 
1629  // Nuke the old stores.
1630  eraseInstFromFunction(SI);
1631  eraseInstFromFunction(*OtherStore);
1632  return true;
1633 }
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1477
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:410
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:427
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:71
bool isSimple() const
Definition: Instructions.h:277
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:79
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:261
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1184
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1344
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:1602
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
This class represents lattice values for constants.
Definition: AllocatorList.h:24
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1393
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
void setAlignment(unsigned Align)
bool isUnordered() const
Definition: Instructions.h:404
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:588
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:880
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:630
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:385
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:248
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
Metadata node.
Definition: Metadata.h:864
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
static LoadInst * combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
An instruction for reading from memory.
Definition: Instructions.h:168
static bool pointsToConstantGlobal(Value *V)
pointsToConstantGlobal - Return true if V (possibly indirectly) points to some part of a constant glo...
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Hexagon Common GEP
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:396
bool isSafeToLoadUnconditionally(Value *V, unsigned Align, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:201
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:265
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1430
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:271
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:48
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:232
amdgpu Simplify well known AMD library false Value Value const Twine & Name
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:376
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:113
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:321
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:892
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:652
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
static Instruction * replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, T &MemI)
The core instruction combiner logic.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
Type * getSourceElementType() const
Definition: Instructions.h:951
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:419
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:889
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1732
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a nonnull metadata node to a new load instruction.
Definition: Local.cpp:2479
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static Instruction * simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI)
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:726
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:212
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:979
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
An instruction for storing to memory.
Definition: Instructions.h:321
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * getOperand(unsigned i) const
Definition: User.h:170
const DataLayout & getDataLayout() const
const BasicBlock & getEntryBlock() const
Definition: Function.h:640
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:854
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:750
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
#define P(N)
static Instruction * combineLoadToOperationType(InstCombiner &IC, LoadInst &LI)
Combine loads to match the type of their uses&#39; value after looking through intervening bitcasts...
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:217
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:308
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:74
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1265
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
const Instruction & front() const
Definition: BasicBlock.h:281
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
static StoreInst * combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
bool mayThrow() const
Return true if this instruction may throw an exception.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:502
bool isUnordered() const
Definition: Instructions.h:279
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a range metadata node to a new load instruction.
Definition: Local.cpp:2504
Instruction * visitAllocaInst(AllocaInst &AI)
Value * getPointerOperand()
Definition: Instructions.h:285
self_iterator getIterator()
Definition: ilist_node.h:82
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
void setAlignment(unsigned Align)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2083
size_t size() const
Definition: SmallVector.h:53
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:106
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction *> &ToDelete)
isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) pointer to an alloca...
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:1801
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:64
static bool isSupportedAtomicType(Type *Ty)
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:243
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:258
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:644
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:307
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1437
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
void getAllMetadata(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
Get all metadata attached to this Instruction.
Definition: Instruction.h:237
void setOperand(unsigned i, Value *Val)
Definition: User.h:175
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Class for arbitrary precision integers.
Definition: APInt.h:70
iterator_range< user_iterator > users()
Definition: Value.h:400
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:349
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:568
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
This class wraps the llvm.memcpy/memmove intrinsics.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:354
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:311
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:241
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
static Type * getIndexedType(Type *Ty, ArrayRef< Value *> IdxList)
Returns the type of the element that would be loaded with a load instruction with the specified param...
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:373
static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC, StoreInst &SI)
Converts store (bitcast (load (bitcast (select ...)))) to store (load (select ...)), where select is minmax: select ((cmp load V1, load V2), V1, V2).
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:260
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static Value * likeBitCastFromVector(InstCombiner &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:581
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
Instruction * InsertNewInstBefore(Instruction *New, Instruction &Old)
Inserts an instruction New before instruction Old.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:366
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:291
static GetElementPtrInst * CreateInBounds(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:914
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:376
static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * visitLoadInst(LoadInst &LI)
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:419
Instruction * visitStoreInst(StoreInst &SI)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:87
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2352
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:99
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:197
bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:129
bool isSimple() const
Definition: Instructions.h:402
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2091
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Value * getPointerOperand()
Definition: Instructions.h:413
static Instruction * unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI)
bool use_empty() const
Definition: Value.h:323
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:479
static bool isMinMaxWithLoads(Value *V)
Returns true if instruction represent minmax pattern like: select ((cmp load V1, load V2)...
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
user_iterator user_end()
Definition: Value.h:384