LLVM  8.0.1
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements an analysis that determines, for a given memory
11 // operation, what preceding memory operations it depends on. It builds on
12 // alias analysis information, and tries to provide a lazy, caching interface to
13 // a common kind of alias information query.
14 //
15 //===----------------------------------------------------------------------===//
16 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/IR/Metadata.h"
45 #include "llvm/IR/Module.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Use.h"
49 #include "llvm/IR/User.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <utility>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "memdep"
67 
68 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
69 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
70 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
71 
72 STATISTIC(NumCacheNonLocalPtr,
73  "Number of fully cached non-local ptr responses");
74 STATISTIC(NumCacheDirtyNonLocalPtr,
75  "Number of cached, but dirty, non-local ptr responses");
76 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
77 STATISTIC(NumCacheCompleteNonLocalPtr,
78  "Number of block queries that were completely cached");
79 
80 // Limit for the number of instructions to scan in a block.
81 
83  "memdep-block-scan-limit", cl::Hidden, cl::init(100),
84  cl::desc("The number of instructions to scan in a block in memory "
85  "dependency analysis (default = 100)"));
86 
87 static cl::opt<unsigned>
88  BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
89  cl::desc("The number of blocks to scan during memory "
90  "dependency analysis (default = 1000)"));
91 
92 // Limit on the number of memdep results to process.
93 static const unsigned int NumResultsLimit = 100;
94 
95 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
96 ///
97 /// If the set becomes empty, remove Inst's entry.
98 template <typename KeyTy>
99 static void
101  Instruction *Inst, KeyTy Val) {
102  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
103  ReverseMap.find(Inst);
104  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
105  bool Found = InstIt->second.erase(Val);
106  assert(Found && "Invalid reverse map!");
107  (void)Found;
108  if (InstIt->second.empty())
109  ReverseMap.erase(InstIt);
110 }
111 
112 /// If the given instruction references a specific memory location, fill in Loc
113 /// with the details, otherwise set Loc.Ptr to null.
114 ///
115 /// Returns a ModRefInfo value describing the general behavior of the
116 /// instruction.
118  const TargetLibraryInfo &TLI) {
119  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
120  if (LI->isUnordered()) {
121  Loc = MemoryLocation::get(LI);
122  return ModRefInfo::Ref;
123  }
124  if (LI->getOrdering() == AtomicOrdering::Monotonic) {
125  Loc = MemoryLocation::get(LI);
126  return ModRefInfo::ModRef;
127  }
128  Loc = MemoryLocation();
129  return ModRefInfo::ModRef;
130  }
131 
132  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
133  if (SI->isUnordered()) {
134  Loc = MemoryLocation::get(SI);
135  return ModRefInfo::Mod;
136  }
137  if (SI->getOrdering() == AtomicOrdering::Monotonic) {
138  Loc = MemoryLocation::get(SI);
139  return ModRefInfo::ModRef;
140  }
141  Loc = MemoryLocation();
142  return ModRefInfo::ModRef;
143  }
144 
145  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
146  Loc = MemoryLocation::get(V);
147  return ModRefInfo::ModRef;
148  }
149 
150  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
151  // calls to free() deallocate the entire structure
152  Loc = MemoryLocation(CI->getArgOperand(0));
153  return ModRefInfo::Mod;
154  }
155 
156  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
157  switch (II->getIntrinsicID()) {
161  Loc = MemoryLocation::getForArgument(II, 1, TLI);
162  // These intrinsics don't really modify the memory, but returning Mod
163  // will allow them to be handled conservatively.
164  return ModRefInfo::Mod;
166  Loc = MemoryLocation::getForArgument(II, 2, TLI);
167  // These intrinsics don't really modify the memory, but returning Mod
168  // will allow them to be handled conservatively.
169  return ModRefInfo::Mod;
170  default:
171  break;
172  }
173  }
174 
175  // Otherwise, just do the coarse-grained thing that always works.
176  if (Inst->mayWriteToMemory())
177  return ModRefInfo::ModRef;
178  if (Inst->mayReadFromMemory())
179  return ModRefInfo::Ref;
180  return ModRefInfo::NoModRef;
181 }
182 
183 /// Private helper for finding the local dependencies of a call site.
184 MemDepResult MemoryDependenceResults::getCallDependencyFrom(
185  CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
186  BasicBlock *BB) {
187  unsigned Limit = BlockScanLimit;
188 
189  // Walk backwards through the block, looking for dependencies.
190  while (ScanIt != BB->begin()) {
191  Instruction *Inst = &*--ScanIt;
192  // Debug intrinsics don't cause dependences and should not affect Limit
193  if (isa<DbgInfoIntrinsic>(Inst))
194  continue;
195 
196  // Limit the amount of scanning we do so we don't end up with quadratic
197  // running time on extreme testcases.
198  --Limit;
199  if (!Limit)
200  return MemDepResult::getUnknown();
201 
202  // If this inst is a memory op, get the pointer it accessed
203  MemoryLocation Loc;
204  ModRefInfo MR = GetLocation(Inst, Loc, TLI);
205  if (Loc.Ptr) {
206  // A simple instruction.
207  if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
208  return MemDepResult::getClobber(Inst);
209  continue;
210  }
211 
212  if (auto *CallB = dyn_cast<CallBase>(Inst)) {
213  // If these two calls do not interfere, look past it.
214  if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
215  // If the two calls are the same, return Inst as a Def, so that
216  // Call can be found redundant and eliminated.
217  if (isReadOnlyCall && !isModSet(MR) &&
218  Call->isIdenticalToWhenDefined(CallB))
219  return MemDepResult::getDef(Inst);
220 
221  // Otherwise if the two calls don't interact (e.g. CallB is readnone)
222  // keep scanning.
223  continue;
224  } else
225  return MemDepResult::getClobber(Inst);
226  }
227 
228  // If we could not obtain a pointer for the instruction and the instruction
229  // touches memory then assume that this is a dependency.
230  if (isModOrRefSet(MR))
231  return MemDepResult::getClobber(Inst);
232  }
233 
234  // No dependence found. If this is the entry block of the function, it is
235  // unknown, otherwise it is non-local.
236  if (BB != &BB->getParent()->getEntryBlock())
237  return MemDepResult::getNonLocal();
239 }
240 
242  const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
243  const LoadInst *LI) {
244  // We can only extend simple integer loads.
245  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
246  return 0;
247 
248  // Load widening is hostile to ThreadSanitizer: it may cause false positives
249  // or make the reports more cryptic (access sizes are wrong).
251  return 0;
252 
253  const DataLayout &DL = LI->getModule()->getDataLayout();
254 
255  // Get the base of this load.
256  int64_t LIOffs = 0;
257  const Value *LIBase =
259 
260  // If the two pointers are not based on the same pointer, we can't tell that
261  // they are related.
262  if (LIBase != MemLocBase)
263  return 0;
264 
265  // Okay, the two values are based on the same pointer, but returned as
266  // no-alias. This happens when we have things like two byte loads at "P+1"
267  // and "P+3". Check to see if increasing the size of the "LI" load up to its
268  // alignment (or the largest native integer type) will allow us to load all
269  // the bits required by MemLoc.
270 
271  // If MemLoc is before LI, then no widening of LI will help us out.
272  if (MemLocOffs < LIOffs)
273  return 0;
274 
275  // Get the alignment of the load in bytes. We assume that it is safe to load
276  // any legal integer up to this size without a problem. For example, if we're
277  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
278  // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
279  // to i16.
280  unsigned LoadAlign = LI->getAlignment();
281 
282  int64_t MemLocEnd = MemLocOffs + MemLocSize;
283 
284  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
285  if (LIOffs + LoadAlign < MemLocEnd)
286  return 0;
287 
288  // This is the size of the load to try. Start with the next larger power of
289  // two.
290  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
291  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
292 
293  while (true) {
294  // If this load size is bigger than our known alignment or would not fit
295  // into a native integer register, then we fail.
296  if (NewLoadByteSize > LoadAlign ||
297  !DL.fitsInLegalInteger(NewLoadByteSize * 8))
298  return 0;
299 
300  if (LIOffs + NewLoadByteSize > MemLocEnd &&
305  // We will be reading past the location accessed by the original program.
306  // While this is safe in a regular build, Address Safety analysis tools
307  // may start reporting false warnings. So, don't do widening.
308  return 0;
309 
310  // If a load of this width would include all of MemLoc, then we succeed.
311  if (LIOffs + NewLoadByteSize >= MemLocEnd)
312  return NewLoadByteSize;
313 
314  NewLoadByteSize <<= 1;
315  }
316 }
317 
318 static bool isVolatile(Instruction *Inst) {
319  if (auto *LI = dyn_cast<LoadInst>(Inst))
320  return LI->isVolatile();
321  if (auto *SI = dyn_cast<StoreInst>(Inst))
322  return SI->isVolatile();
323  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
324  return AI->isVolatile();
325  return false;
326 }
327 
329  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
330  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
331  MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
332  if (QueryInst != nullptr) {
333  if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
334  InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
335 
336  if (InvariantGroupDependency.isDef())
337  return InvariantGroupDependency;
338  }
339  }
341  MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
342  if (SimpleDep.isDef())
343  return SimpleDep;
344  // Non-local invariant group dependency indicates there is non local Def
345  // (it only returns nonLocal if it finds nonLocal def), which is better than
346  // local clobber and everything else.
347  if (InvariantGroupDependency.isNonLocal())
348  return InvariantGroupDependency;
349 
350  assert(InvariantGroupDependency.isUnknown() &&
351  "InvariantGroupDependency should be only unknown at this point");
352  return SimpleDep;
353 }
354 
357  BasicBlock *BB) {
358 
360  return MemDepResult::getUnknown();
361 
362  // Take the ptr operand after all casts and geps 0. This way we can search
363  // cast graph down only.
364  Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
365 
366  // It's is not safe to walk the use list of global value, because function
367  // passes aren't allowed to look outside their functions.
368  // FIXME: this could be fixed by filtering instructions from outside
369  // of current function.
370  if (isa<GlobalValue>(LoadOperand))
371  return MemDepResult::getUnknown();
372 
373  // Queue to process all pointers that are equivalent to load operand.
374  SmallVector<const Value *, 8> LoadOperandsQueue;
375  LoadOperandsQueue.push_back(LoadOperand);
376 
377  Instruction *ClosestDependency = nullptr;
378  // Order of instructions in uses list is unpredictible. In order to always
379  // get the same result, we will look for the closest dominance.
380  auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
381  assert(Other && "Must call it with not null instruction");
382  if (Best == nullptr || DT.dominates(Best, Other))
383  return Other;
384  return Best;
385  };
386 
387  // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
388  // we will see all the instructions. This should be fixed in MSSA.
389  while (!LoadOperandsQueue.empty()) {
390  const Value *Ptr = LoadOperandsQueue.pop_back_val();
391  assert(Ptr && !isa<GlobalValue>(Ptr) &&
392  "Null or GlobalValue should not be inserted");
393 
394  for (const Use &Us : Ptr->uses()) {
395  auto *U = dyn_cast<Instruction>(Us.getUser());
396  if (!U || U == LI || !DT.dominates(U, LI))
397  continue;
398 
399  // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
400  // users. U = bitcast Ptr
401  if (isa<BitCastInst>(U)) {
402  LoadOperandsQueue.push_back(U);
403  continue;
404  }
405  // Gep with zeros is equivalent to bitcast.
406  // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
407  // or gep 0 to bitcast because of SROA, so there are 2 forms. When
408  // typeless pointers will be ready then both cases will be gone
409  // (and this BFS also won't be needed).
410  if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
411  if (GEP->hasAllZeroIndices()) {
412  LoadOperandsQueue.push_back(U);
413  continue;
414  }
415 
416  // If we hit load/store with the same invariant.group metadata (and the
417  // same pointer operand) we can assume that value pointed by pointer
418  // operand didn't change.
419  if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
420  U->getMetadata(LLVMContext::MD_invariant_group) != nullptr)
421  ClosestDependency = GetClosestDependency(ClosestDependency, U);
422  }
423  }
424 
425  if (!ClosestDependency)
426  return MemDepResult::getUnknown();
427  if (ClosestDependency->getParent() == BB)
428  return MemDepResult::getDef(ClosestDependency);
429  // Def(U) can't be returned here because it is non-local. If local
430  // dependency won't be found then return nonLocal counting that the
431  // user will call getNonLocalPointerDependency, which will return cached
432  // result.
433  NonLocalDefsCache.try_emplace(
434  LI, NonLocalDepResult(ClosestDependency->getParent(),
435  MemDepResult::getDef(ClosestDependency), nullptr));
436  ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
437  return MemDepResult::getNonLocal();
438 }
439 
441  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
442  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
443  bool isInvariantLoad = false;
444 
445  if (!Limit) {
446  unsigned DefaultLimit = BlockScanLimit;
447  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
448  &DefaultLimit);
449  }
450 
451  // We must be careful with atomic accesses, as they may allow another thread
452  // to touch this location, clobbering it. We are conservative: if the
453  // QueryInst is not a simple (non-atomic) memory access, we automatically
454  // return getClobber.
455  // If it is simple, we know based on the results of
456  // "Compiler testing via a theory of sound optimisations in the C11/C++11
457  // memory model" in PLDI 2013, that a non-atomic location can only be
458  // clobbered between a pair of a release and an acquire action, with no
459  // access to the location in between.
460  // Here is an example for giving the general intuition behind this rule.
461  // In the following code:
462  // store x 0;
463  // release action; [1]
464  // acquire action; [4]
465  // %val = load x;
466  // It is unsafe to replace %val by 0 because another thread may be running:
467  // acquire action; [2]
468  // store x 42;
469  // release action; [3]
470  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
471  // being 42. A key property of this program however is that if either
472  // 1 or 4 were missing, there would be a race between the store of 42
473  // either the store of 0 or the load (making the whole program racy).
474  // The paper mentioned above shows that the same property is respected
475  // by every program that can detect any optimization of that kind: either
476  // it is racy (undefined) or there is a release followed by an acquire
477  // between the pair of accesses under consideration.
478 
479  // If the load is invariant, we "know" that it doesn't alias *any* write. We
480  // do want to respect mustalias results since defs are useful for value
481  // forwarding, but any mayalias write can be assumed to be noalias.
482  // Arguably, this logic should be pushed inside AliasAnalysis itself.
483  if (isLoad && QueryInst) {
484  LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
485  if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
486  isInvariantLoad = true;
487  }
488 
489  const DataLayout &DL = BB->getModule()->getDataLayout();
490 
491  // Create a numbered basic block to lazily compute and cache instruction
492  // positions inside a BB. This is used to provide fast queries for relative
493  // position between two instructions in a BB and can be used by
494  // AliasAnalysis::callCapturesBefore.
495  OrderedBasicBlock OBB(BB);
496 
497  // Return "true" if and only if the instruction I is either a non-simple
498  // load or a non-simple store.
499  auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
500  if (auto *LI = dyn_cast<LoadInst>(I))
501  return !LI->isSimple();
502  if (auto *SI = dyn_cast<StoreInst>(I))
503  return !SI->isSimple();
504  return false;
505  };
506 
507  // Return "true" if I is not a load and not a store, but it does access
508  // memory.
509  auto isOtherMemAccess = [](Instruction *I) -> bool {
510  return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
511  };
512 
513  // Walk backwards through the basic block, looking for dependencies.
514  while (ScanIt != BB->begin()) {
515  Instruction *Inst = &*--ScanIt;
516 
517  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
518  // Debug intrinsics don't (and can't) cause dependencies.
519  if (isa<DbgInfoIntrinsic>(II))
520  continue;
521 
522  // Limit the amount of scanning we do so we don't end up with quadratic
523  // running time on extreme testcases.
524  --*Limit;
525  if (!*Limit)
526  return MemDepResult::getUnknown();
527 
528  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
529  // If we reach a lifetime begin or end marker, then the query ends here
530  // because the value is undefined.
531  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
532  // FIXME: This only considers queries directly on the invariant-tagged
533  // pointer, not on query pointers that are indexed off of them. It'd
534  // be nice to handle that at some point (the right approach is to use
535  // GetPointerBaseWithConstantOffset).
536  if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
537  return MemDepResult::getDef(II);
538  continue;
539  }
540  }
541 
542  // Values depend on loads if the pointers are must aliased. This means
543  // that a load depends on another must aliased load from the same value.
544  // One exception is atomic loads: a value can depend on an atomic load that
545  // it does not alias with when this atomic load indicates that another
546  // thread may be accessing the location.
547  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
548  // While volatile access cannot be eliminated, they do not have to clobber
549  // non-aliasing locations, as normal accesses, for example, can be safely
550  // reordered with volatile accesses.
551  if (LI->isVolatile()) {
552  if (!QueryInst)
553  // Original QueryInst *may* be volatile
554  return MemDepResult::getClobber(LI);
555  if (isVolatile(QueryInst))
556  // Ordering required if QueryInst is itself volatile
557  return MemDepResult::getClobber(LI);
558  // Otherwise, volatile doesn't imply any special ordering
559  }
560 
561  // Atomic loads have complications involved.
562  // A Monotonic (or higher) load is OK if the query inst is itself not
563  // atomic.
564  // FIXME: This is overly conservative.
565  if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
566  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
567  isOtherMemAccess(QueryInst))
568  return MemDepResult::getClobber(LI);
569  if (LI->getOrdering() != AtomicOrdering::Monotonic)
570  return MemDepResult::getClobber(LI);
571  }
572 
573  MemoryLocation LoadLoc = MemoryLocation::get(LI);
574 
575  // If we found a pointer, check if it could be the same as our pointer.
576  AliasResult R = AA.alias(LoadLoc, MemLoc);
577 
578  if (isLoad) {
579  if (R == NoAlias)
580  continue;
581 
582  // Must aliased loads are defs of each other.
583  if (R == MustAlias)
584  return MemDepResult::getDef(Inst);
585 
586 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
587  // in terms of clobbering loads, but since it does this by looking
588  // at the clobbering load directly, it doesn't know about any
589  // phi translation that may have happened along the way.
590 
591  // If we have a partial alias, then return this as a clobber for the
592  // client to handle.
593  if (R == PartialAlias)
594  return MemDepResult::getClobber(Inst);
595 #endif
596 
597  // Random may-alias loads don't depend on each other without a
598  // dependence.
599  continue;
600  }
601 
602  // Stores don't depend on other no-aliased accesses.
603  if (R == NoAlias)
604  continue;
605 
606  // Stores don't alias loads from read-only memory.
607  if (AA.pointsToConstantMemory(LoadLoc))
608  continue;
609 
610  // Stores depend on may/must aliased loads.
611  return MemDepResult::getDef(Inst);
612  }
613 
614  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
615  // Atomic stores have complications involved.
616  // A Monotonic store is OK if the query inst is itself not atomic.
617  // FIXME: This is overly conservative.
618  if (!SI->isUnordered() && SI->isAtomic()) {
619  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
620  isOtherMemAccess(QueryInst))
622  if (SI->getOrdering() != AtomicOrdering::Monotonic)
624  }
625 
626  // FIXME: this is overly conservative.
627  // While volatile access cannot be eliminated, they do not have to clobber
628  // non-aliasing locations, as normal accesses can for example be reordered
629  // with volatile accesses.
630  if (SI->isVolatile())
631  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
632  isOtherMemAccess(QueryInst))
634 
635  // If alias analysis can tell that this store is guaranteed to not modify
636  // the query pointer, ignore it. Use getModRefInfo to handle cases where
637  // the query pointer points to constant memory etc.
638  if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc)))
639  continue;
640 
641  // Ok, this store might clobber the query pointer. Check to see if it is
642  // a must alias: in this case, we want to return this as a def.
643  // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
645 
646  // If we found a pointer, check if it could be the same as our pointer.
647  AliasResult R = AA.alias(StoreLoc, MemLoc);
648 
649  if (R == NoAlias)
650  continue;
651  if (R == MustAlias)
652  return MemDepResult::getDef(Inst);
653  if (isInvariantLoad)
654  continue;
655  return MemDepResult::getClobber(Inst);
656  }
657 
658  // If this is an allocation, and if we know that the accessed pointer is to
659  // the allocation, return Def. This means that there is no dependence and
660  // the access can be optimized based on that. For example, a load could
661  // turn into undef. Note that we can bypass the allocation itself when
662  // looking for a clobber in many cases; that's an alias property and is
663  // handled by BasicAA.
664  if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
665  const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
666  if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
667  return MemDepResult::getDef(Inst);
668  }
669 
670  if (isInvariantLoad)
671  continue;
672 
673  // A release fence requires that all stores complete before it, but does
674  // not prevent the reordering of following loads or stores 'before' the
675  // fence. As a result, we look past it when finding a dependency for
676  // loads. DSE uses this to find preceeding stores to delete and thus we
677  // can't bypass the fence if the query instruction is a store.
678  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
679  if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
680  continue;
681 
682  // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
683  ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
684  // If necessary, perform additional analysis.
685  if (isModAndRefSet(MR))
686  MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
687  switch (clearMust(MR)) {
689  // If the call has no effect on the queried pointer, just ignore it.
690  continue;
691  case ModRefInfo::Mod:
692  return MemDepResult::getClobber(Inst);
693  case ModRefInfo::Ref:
694  // If the call is known to never store to the pointer, and if this is a
695  // load query, we can safely ignore it (scan past it).
696  if (isLoad)
697  continue;
699  default:
700  // Otherwise, there is a potential dependence. Return a clobber.
701  return MemDepResult::getClobber(Inst);
702  }
703  }
704 
705  // No dependence found. If this is the entry block of the function, it is
706  // unknown, otherwise it is non-local.
707  if (BB != &BB->getParent()->getEntryBlock())
708  return MemDepResult::getNonLocal();
710 }
711 
713  Instruction *ScanPos = QueryInst;
714 
715  // Check for a cached result
716  MemDepResult &LocalCache = LocalDeps[QueryInst];
717 
718  // If the cached entry is non-dirty, just return it. Note that this depends
719  // on MemDepResult's default constructing to 'dirty'.
720  if (!LocalCache.isDirty())
721  return LocalCache;
722 
723  // Otherwise, if we have a dirty entry, we know we can start the scan at that
724  // instruction, which may save us some work.
725  if (Instruction *Inst = LocalCache.getInst()) {
726  ScanPos = Inst;
727 
728  RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
729  }
730 
731  BasicBlock *QueryParent = QueryInst->getParent();
732 
733  // Do the scan.
734  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
735  // No dependence found. If this is the entry block of the function, it is
736  // unknown, otherwise it is non-local.
737  if (QueryParent != &QueryParent->getParent()->getEntryBlock())
738  LocalCache = MemDepResult::getNonLocal();
739  else
740  LocalCache = MemDepResult::getNonFuncLocal();
741  } else {
742  MemoryLocation MemLoc;
743  ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
744  if (MemLoc.Ptr) {
745  // If we can do a pointer scan, make it happen.
746  bool isLoad = !isModSet(MR);
747  if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
748  isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
749 
750  LocalCache = getPointerDependencyFrom(
751  MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
752  } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
753  bool isReadOnly = AA.onlyReadsMemory(QueryCall);
754  LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
755  ScanPos->getIterator(), QueryParent);
756  } else
757  // Non-memory instruction.
758  LocalCache = MemDepResult::getUnknown();
759  }
760 
761  // Remember the result!
762  if (Instruction *I = LocalCache.getInst())
763  ReverseLocalDeps[I].insert(QueryInst);
764 
765  return LocalCache;
766 }
767 
768 #ifndef NDEBUG
769 /// This method is used when -debug is specified to verify that cache arrays
770 /// are properly kept sorted.
772  int Count = -1) {
773  if (Count == -1)
774  Count = Cache.size();
775  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
776  "Cache isn't sorted!");
777 }
778 #endif
779 
782  assert(getDependency(QueryCall).isNonLocal() &&
783  "getNonLocalCallDependency should only be used on calls with "
784  "non-local deps!");
785  PerInstNLInfo &CacheP = NonLocalDeps[QueryCall];
786  NonLocalDepInfo &Cache = CacheP.first;
787 
788  // This is the set of blocks that need to be recomputed. In the cached case,
789  // this can happen due to instructions being deleted etc. In the uncached
790  // case, this starts out as the set of predecessors we care about.
791  SmallVector<BasicBlock *, 32> DirtyBlocks;
792 
793  if (!Cache.empty()) {
794  // Okay, we have a cache entry. If we know it is not dirty, just return it
795  // with no computation.
796  if (!CacheP.second) {
797  ++NumCacheNonLocal;
798  return Cache;
799  }
800 
801  // If we already have a partially computed set of results, scan them to
802  // determine what is dirty, seeding our initial DirtyBlocks worklist.
803  for (auto &Entry : Cache)
804  if (Entry.getResult().isDirty())
805  DirtyBlocks.push_back(Entry.getBB());
806 
807  // Sort the cache so that we can do fast binary search lookups below.
808  llvm::sort(Cache);
809 
810  ++NumCacheDirtyNonLocal;
811  // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
812  // << Cache.size() << " cached: " << *QueryInst;
813  } else {
814  // Seed DirtyBlocks with each of the preds of QueryInst's block.
815  BasicBlock *QueryBB = QueryCall->getParent();
816  for (BasicBlock *Pred : PredCache.get(QueryBB))
817  DirtyBlocks.push_back(Pred);
818  ++NumUncacheNonLocal;
819  }
820 
821  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
822  bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
823 
825 
826  unsigned NumSortedEntries = Cache.size();
827  LLVM_DEBUG(AssertSorted(Cache));
828 
829  // Iterate while we still have blocks to update.
830  while (!DirtyBlocks.empty()) {
831  BasicBlock *DirtyBB = DirtyBlocks.back();
832  DirtyBlocks.pop_back();
833 
834  // Already processed this block?
835  if (!Visited.insert(DirtyBB).second)
836  continue;
837 
838  // Do a binary search to see if we already have an entry for this block in
839  // the cache set. If so, find it.
840  LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
841  NonLocalDepInfo::iterator Entry =
842  std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
843  NonLocalDepEntry(DirtyBB));
844  if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
845  --Entry;
846 
847  NonLocalDepEntry *ExistingResult = nullptr;
848  if (Entry != Cache.begin() + NumSortedEntries &&
849  Entry->getBB() == DirtyBB) {
850  // If we already have an entry, and if it isn't already dirty, the block
851  // is done.
852  if (!Entry->getResult().isDirty())
853  continue;
854 
855  // Otherwise, remember this slot so we can update the value.
856  ExistingResult = &*Entry;
857  }
858 
859  // If the dirty entry has a pointer, start scanning from it so we don't have
860  // to rescan the entire block.
861  BasicBlock::iterator ScanPos = DirtyBB->end();
862  if (ExistingResult) {
863  if (Instruction *Inst = ExistingResult->getResult().getInst()) {
864  ScanPos = Inst->getIterator();
865  // We're removing QueryInst's use of Inst.
866  RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
867  QueryCall);
868  }
869  }
870 
871  // Find out if this block has a local dependency for QueryInst.
872  MemDepResult Dep;
873 
874  if (ScanPos != DirtyBB->begin()) {
875  Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
876  } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
877  // No dependence found. If this is the entry block of the function, it is
878  // a clobber, otherwise it is unknown.
880  } else {
882  }
883 
884  // If we had a dirty entry for the block, update it. Otherwise, just add
885  // a new entry.
886  if (ExistingResult)
887  ExistingResult->setResult(Dep);
888  else
889  Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
890 
891  // If the block has a dependency (i.e. it isn't completely transparent to
892  // the value), remember the association!
893  if (!Dep.isNonLocal()) {
894  // Keep the ReverseNonLocalDeps map up to date so we can efficiently
895  // update this when we remove instructions.
896  if (Instruction *Inst = Dep.getInst())
897  ReverseNonLocalDeps[Inst].insert(QueryCall);
898  } else {
899 
900  // If the block *is* completely transparent to the load, we need to check
901  // the predecessors of this block. Add them to our worklist.
902  for (BasicBlock *Pred : PredCache.get(DirtyBB))
903  DirtyBlocks.push_back(Pred);
904  }
905  }
906 
907  return Cache;
908 }
909 
911  Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
912  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
913  bool isLoad = isa<LoadInst>(QueryInst);
914  BasicBlock *FromBB = QueryInst->getParent();
915  assert(FromBB);
916 
917  assert(Loc.Ptr->getType()->isPointerTy() &&
918  "Can't get pointer deps of a non-pointer!");
919  Result.clear();
920  {
921  // Check if there is cached Def with invariant.group.
922  auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
923  if (NonLocalDefIt != NonLocalDefsCache.end()) {
924  Result.push_back(NonLocalDefIt->second);
925  ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
926  .erase(QueryInst);
927  NonLocalDefsCache.erase(NonLocalDefIt);
928  return;
929  }
930  }
931  // This routine does not expect to deal with volatile instructions.
932  // Doing so would require piping through the QueryInst all the way through.
933  // TODO: volatiles can't be elided, but they can be reordered with other
934  // non-volatile accesses.
935 
936  // We currently give up on any instruction which is ordered, but we do handle
937  // atomic instructions which are unordered.
938  // TODO: Handle ordered instructions
939  auto isOrdered = [](Instruction *Inst) {
940  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
941  return !LI->isUnordered();
942  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
943  return !SI->isUnordered();
944  }
945  return false;
946  };
947  if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
949  const_cast<Value *>(Loc.Ptr)));
950  return;
951  }
952  const DataLayout &DL = FromBB->getModule()->getDataLayout();
953  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
954 
955  // This is the set of blocks we've inspected, and the pointer we consider in
956  // each block. Because of critical edges, we currently bail out if querying
957  // a block with multiple different pointers. This can happen during PHI
958  // translation.
960  if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
961  Result, Visited, true))
962  return;
963  Result.clear();
965  const_cast<Value *>(Loc.Ptr)));
966 }
967 
968 /// Compute the memdep value for BB with Pointer/PointeeSize using either
969 /// cached information in Cache or by doing a lookup (which may use dirty cache
970 /// info if available).
971 ///
972 /// If we do a lookup, add the result to the cache.
973 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
974  Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
975  BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
976 
977  // Do a binary search to see if we already have an entry for this block in
978  // the cache set. If so, find it.
979  NonLocalDepInfo::iterator Entry = std::upper_bound(
980  Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
981  if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
982  --Entry;
983 
984  NonLocalDepEntry *ExistingResult = nullptr;
985  if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
986  ExistingResult = &*Entry;
987 
988  // If we have a cached entry, and it is non-dirty, use it as the value for
989  // this dependency.
990  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
991  ++NumCacheNonLocalPtr;
992  return ExistingResult->getResult();
993  }
994 
995  // Otherwise, we have to scan for the value. If we have a dirty cache
996  // entry, start scanning from its position, otherwise we scan from the end
997  // of the block.
998  BasicBlock::iterator ScanPos = BB->end();
999  if (ExistingResult && ExistingResult->getResult().getInst()) {
1000  assert(ExistingResult->getResult().getInst()->getParent() == BB &&
1001  "Instruction invalidated?");
1002  ++NumCacheDirtyNonLocalPtr;
1003  ScanPos = ExistingResult->getResult().getInst()->getIterator();
1004 
1005  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1006  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1007  RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
1008  } else {
1009  ++NumUncacheNonLocalPtr;
1010  }
1011 
1012  // Scan the block for the dependency.
1013  MemDepResult Dep =
1014  getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
1015 
1016  // If we had a dirty entry for the block, update it. Otherwise, just add
1017  // a new entry.
1018  if (ExistingResult)
1019  ExistingResult->setResult(Dep);
1020  else
1021  Cache->push_back(NonLocalDepEntry(BB, Dep));
1022 
1023  // If the block has a dependency (i.e. it isn't completely transparent to
1024  // the value), remember the reverse association because we just added it
1025  // to Cache!
1026  if (!Dep.isDef() && !Dep.isClobber())
1027  return Dep;
1028 
1029  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
1030  // update MemDep when we remove instructions.
1031  Instruction *Inst = Dep.getInst();
1032  assert(Inst && "Didn't depend on anything?");
1033  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1034  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
1035  return Dep;
1036 }
1037 
1038 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1039 /// array that are already properly ordered.
1040 ///
1041 /// This is optimized for the case when only a few entries are added.
1042 static void
1044  unsigned NumSortedEntries) {
1045  switch (Cache.size() - NumSortedEntries) {
1046  case 0:
1047  // done, no new entries.
1048  break;
1049  case 2: {
1050  // Two new entries, insert the last one into place.
1051  NonLocalDepEntry Val = Cache.back();
1052  Cache.pop_back();
1053  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1054  std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1055  Cache.insert(Entry, Val);
1057  }
1058  case 1:
1059  // One new entry, Just insert the new value at the appropriate position.
1060  if (Cache.size() != 1) {
1061  NonLocalDepEntry Val = Cache.back();
1062  Cache.pop_back();
1063  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1064  std::upper_bound(Cache.begin(), Cache.end(), Val);
1065  Cache.insert(Entry, Val);
1066  }
1067  break;
1068  default:
1069  // Added many values, do a full scale sort.
1070  llvm::sort(Cache);
1071  break;
1072  }
1073 }
1074 
1075 /// Perform a dependency query based on pointer/pointeesize starting at the end
1076 /// of StartBB.
1077 ///
1078 /// Add any clobber/def results to the results vector and keep track of which
1079 /// blocks are visited in 'Visited'.
1080 ///
1081 /// This has special behavior for the first block queries (when SkipFirstBlock
1082 /// is true). In this special case, it ignores the contents of the specified
1083 /// block and starts returning dependence info for its predecessors.
1084 ///
1085 /// This function returns true on success, or false to indicate that it could
1086 /// not compute dependence information for some reason. This should be treated
1087 /// as a clobber dependence on the first instruction in the predecessor block.
1088 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1089  Instruction *QueryInst, const PHITransAddr &Pointer,
1090  const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1092  DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
1093  // Look up the cached info for Pointer.
1094  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1095 
1096  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1097  // CacheKey, this value will be inserted as the associated value. Otherwise,
1098  // it'll be ignored, and we'll have to check to see if the cached size and
1099  // aa tags are consistent with the current query.
1100  NonLocalPointerInfo InitialNLPI;
1101  InitialNLPI.Size = Loc.Size;
1102  InitialNLPI.AATags = Loc.AATags;
1103 
1104  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1105  // already have one.
1106  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1107  NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1108  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1109 
1110  // If we already have a cache entry for this CacheKey, we may need to do some
1111  // work to reconcile the cache entry and the current query.
1112  if (!Pair.second) {
1113  if (CacheInfo->Size != Loc.Size) {
1114  bool ThrowOutEverything;
1115  if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
1116  // FIXME: We may be able to do better in the face of results with mixed
1117  // precision. We don't appear to get them in practice, though, so just
1118  // be conservative.
1119  ThrowOutEverything =
1120  CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
1121  CacheInfo->Size.getValue() < Loc.Size.getValue();
1122  } else {
1123  // For our purposes, unknown size > all others.
1124  ThrowOutEverything = !Loc.Size.hasValue();
1125  }
1126 
1127  if (ThrowOutEverything) {
1128  // The query's Size is greater than the cached one. Throw out the
1129  // cached data and proceed with the query at the greater size.
1130  CacheInfo->Pair = BBSkipFirstBlockPair();
1131  CacheInfo->Size = Loc.Size;
1132  for (auto &Entry : CacheInfo->NonLocalDeps)
1133  if (Instruction *Inst = Entry.getResult().getInst())
1134  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1135  CacheInfo->NonLocalDeps.clear();
1136  } else {
1137  // This query's Size is less than the cached one. Conservatively restart
1138  // the query using the greater size.
1139  return getNonLocalPointerDepFromBB(
1140  QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1141  StartBB, Result, Visited, SkipFirstBlock);
1142  }
1143  }
1144 
1145  // If the query's AATags are inconsistent with the cached one,
1146  // conservatively throw out the cached data and restart the query with
1147  // no tag if needed.
1148  if (CacheInfo->AATags != Loc.AATags) {
1149  if (CacheInfo->AATags) {
1150  CacheInfo->Pair = BBSkipFirstBlockPair();
1151  CacheInfo->AATags = AAMDNodes();
1152  for (auto &Entry : CacheInfo->NonLocalDeps)
1153  if (Instruction *Inst = Entry.getResult().getInst())
1154  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1155  CacheInfo->NonLocalDeps.clear();
1156  }
1157  if (Loc.AATags)
1158  return getNonLocalPointerDepFromBB(
1159  QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1160  Visited, SkipFirstBlock);
1161  }
1162  }
1163 
1164  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1165 
1166  // If we have valid cached information for exactly the block we are
1167  // investigating, just return it with no recomputation.
1168  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1169  // We have a fully cached result for this query then we can just return the
1170  // cached results and populate the visited set. However, we have to verify
1171  // that we don't already have conflicting results for these blocks. Check
1172  // to ensure that if a block in the results set is in the visited set that
1173  // it was for the same pointer query.
1174  if (!Visited.empty()) {
1175  for (auto &Entry : *Cache) {
1177  Visited.find(Entry.getBB());
1178  if (VI == Visited.end() || VI->second == Pointer.getAddr())
1179  continue;
1180 
1181  // We have a pointer mismatch in a block. Just return false, saying
1182  // that something was clobbered in this result. We could also do a
1183  // non-fully cached query, but there is little point in doing this.
1184  return false;
1185  }
1186  }
1187 
1188  Value *Addr = Pointer.getAddr();
1189  for (auto &Entry : *Cache) {
1190  Visited.insert(std::make_pair(Entry.getBB(), Addr));
1191  if (Entry.getResult().isNonLocal()) {
1192  continue;
1193  }
1194 
1195  if (DT.isReachableFromEntry(Entry.getBB())) {
1196  Result.push_back(
1197  NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1198  }
1199  }
1200  ++NumCacheCompleteNonLocalPtr;
1201  return true;
1202  }
1203 
1204  // Otherwise, either this is a new block, a block with an invalid cache
1205  // pointer or one that we're about to invalidate by putting more info into it
1206  // than its valid cache info. If empty, the result will be valid cache info,
1207  // otherwise it isn't.
1208  if (Cache->empty())
1209  CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1210  else
1211  CacheInfo->Pair = BBSkipFirstBlockPair();
1212 
1214  Worklist.push_back(StartBB);
1215 
1216  // PredList used inside loop.
1218 
1219  // Keep track of the entries that we know are sorted. Previously cached
1220  // entries will all be sorted. The entries we add we only sort on demand (we
1221  // don't insert every element into its sorted position). We know that we
1222  // won't get any reuse from currently inserted values, because we don't
1223  // revisit blocks after we insert info for them.
1224  unsigned NumSortedEntries = Cache->size();
1225  unsigned WorklistEntries = BlockNumberLimit;
1226  bool GotWorklistLimit = false;
1227  LLVM_DEBUG(AssertSorted(*Cache));
1228 
1229  while (!Worklist.empty()) {
1230  BasicBlock *BB = Worklist.pop_back_val();
1231 
1232  // If we do process a large number of blocks it becomes very expensive and
1233  // likely it isn't worth worrying about
1234  if (Result.size() > NumResultsLimit) {
1235  Worklist.clear();
1236  // Sort it now (if needed) so that recursive invocations of
1237  // getNonLocalPointerDepFromBB and other routines that could reuse the
1238  // cache value will only see properly sorted cache arrays.
1239  if (Cache && NumSortedEntries != Cache->size()) {
1240  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1241  }
1242  // Since we bail out, the "Cache" set won't contain all of the
1243  // results for the query. This is ok (we can still use it to accelerate
1244  // specific block queries) but we can't do the fastpath "return all
1245  // results from the set". Clear out the indicator for this.
1246  CacheInfo->Pair = BBSkipFirstBlockPair();
1247  return false;
1248  }
1249 
1250  // Skip the first block if we have it.
1251  if (!SkipFirstBlock) {
1252  // Analyze the dependency of *Pointer in FromBB. See if we already have
1253  // been here.
1254  assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1255 
1256  // Get the dependency info for Pointer in BB. If we have cached
1257  // information, we will use it, otherwise we compute it.
1258  LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1259  MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
1260  Cache, NumSortedEntries);
1261 
1262  // If we got a Def or Clobber, add this to the list of results.
1263  if (!Dep.isNonLocal()) {
1264  if (DT.isReachableFromEntry(BB)) {
1265  Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1266  continue;
1267  }
1268  }
1269  }
1270 
1271  // If 'Pointer' is an instruction defined in this block, then we need to do
1272  // phi translation to change it into a value live in the predecessor block.
1273  // If not, we just add the predecessors to the worklist and scan them with
1274  // the same Pointer.
1275  if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
1276  SkipFirstBlock = false;
1278  for (BasicBlock *Pred : PredCache.get(BB)) {
1279  // Verify that we haven't looked at this block yet.
1280  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1281  Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1282  if (InsertRes.second) {
1283  // First time we've looked at *PI.
1284  NewBlocks.push_back(Pred);
1285  continue;
1286  }
1287 
1288  // If we have seen this block before, but it was with a different
1289  // pointer then we have a phi translation failure and we have to treat
1290  // this as a clobber.
1291  if (InsertRes.first->second != Pointer.getAddr()) {
1292  // Make sure to clean up the Visited map before continuing on to
1293  // PredTranslationFailure.
1294  for (unsigned i = 0; i < NewBlocks.size(); i++)
1295  Visited.erase(NewBlocks[i]);
1296  goto PredTranslationFailure;
1297  }
1298  }
1299  if (NewBlocks.size() > WorklistEntries) {
1300  // Make sure to clean up the Visited map before continuing on to
1301  // PredTranslationFailure.
1302  for (unsigned i = 0; i < NewBlocks.size(); i++)
1303  Visited.erase(NewBlocks[i]);
1304  GotWorklistLimit = true;
1305  goto PredTranslationFailure;
1306  }
1307  WorklistEntries -= NewBlocks.size();
1308  Worklist.append(NewBlocks.begin(), NewBlocks.end());
1309  continue;
1310  }
1311 
1312  // We do need to do phi translation, if we know ahead of time we can't phi
1313  // translate this value, don't even try.
1314  if (!Pointer.IsPotentiallyPHITranslatable())
1315  goto PredTranslationFailure;
1316 
1317  // We may have added values to the cache list before this PHI translation.
1318  // If so, we haven't done anything to ensure that the cache remains sorted.
1319  // Sort it now (if needed) so that recursive invocations of
1320  // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1321  // value will only see properly sorted cache arrays.
1322  if (Cache && NumSortedEntries != Cache->size()) {
1323  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1324  NumSortedEntries = Cache->size();
1325  }
1326  Cache = nullptr;
1327 
1328  PredList.clear();
1329  for (BasicBlock *Pred : PredCache.get(BB)) {
1330  PredList.push_back(std::make_pair(Pred, Pointer));
1331 
1332  // Get the PHI translated pointer in this predecessor. This can fail if
1333  // not translatable, in which case the getAddr() returns null.
1334  PHITransAddr &PredPointer = PredList.back().second;
1335  PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
1336  Value *PredPtrVal = PredPointer.getAddr();
1337 
1338  // Check to see if we have already visited this pred block with another
1339  // pointer. If so, we can't do this lookup. This failure can occur
1340  // with PHI translation when a critical edge exists and the PHI node in
1341  // the successor translates to a pointer value different than the
1342  // pointer the block was first analyzed with.
1343  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1344  Visited.insert(std::make_pair(Pred, PredPtrVal));
1345 
1346  if (!InsertRes.second) {
1347  // We found the pred; take it off the list of preds to visit.
1348  PredList.pop_back();
1349 
1350  // If the predecessor was visited with PredPtr, then we already did
1351  // the analysis and can ignore it.
1352  if (InsertRes.first->second == PredPtrVal)
1353  continue;
1354 
1355  // Otherwise, the block was previously analyzed with a different
1356  // pointer. We can't represent the result of this case, so we just
1357  // treat this as a phi translation failure.
1358 
1359  // Make sure to clean up the Visited map before continuing on to
1360  // PredTranslationFailure.
1361  for (unsigned i = 0, n = PredList.size(); i < n; ++i)
1362  Visited.erase(PredList[i].first);
1363 
1364  goto PredTranslationFailure;
1365  }
1366  }
1367 
1368  // Actually process results here; this need to be a separate loop to avoid
1369  // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1370  // any results for. (getNonLocalPointerDepFromBB will modify our
1371  // datastructures in ways the code after the PredTranslationFailure label
1372  // doesn't expect.)
1373  for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
1374  BasicBlock *Pred = PredList[i].first;
1375  PHITransAddr &PredPointer = PredList[i].second;
1376  Value *PredPtrVal = PredPointer.getAddr();
1377 
1378  bool CanTranslate = true;
1379  // If PHI translation was unable to find an available pointer in this
1380  // predecessor, then we have to assume that the pointer is clobbered in
1381  // that predecessor. We can still do PRE of the load, which would insert
1382  // a computation of the pointer in this predecessor.
1383  if (!PredPtrVal)
1384  CanTranslate = false;
1385 
1386  // FIXME: it is entirely possible that PHI translating will end up with
1387  // the same value. Consider PHI translating something like:
1388  // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1389  // to recurse here, pedantically speaking.
1390 
1391  // If getNonLocalPointerDepFromBB fails here, that means the cached
1392  // result conflicted with the Visited list; we have to conservatively
1393  // assume it is unknown, but this also does not block PRE of the load.
1394  if (!CanTranslate ||
1395  !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1396  Loc.getWithNewPtr(PredPtrVal), isLoad,
1397  Pred, Result, Visited)) {
1398  // Add the entry to the Result list.
1399  NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1400  Result.push_back(Entry);
1401 
1402  // Since we had a phi translation failure, the cache for CacheKey won't
1403  // include all of the entries that we need to immediately satisfy future
1404  // queries. Mark this in NonLocalPointerDeps by setting the
1405  // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1406  // cached value to do more work but not miss the phi trans failure.
1407  NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1408  NLPI.Pair = BBSkipFirstBlockPair();
1409  continue;
1410  }
1411  }
1412 
1413  // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1414  CacheInfo = &NonLocalPointerDeps[CacheKey];
1415  Cache = &CacheInfo->NonLocalDeps;
1416  NumSortedEntries = Cache->size();
1417 
1418  // Since we did phi translation, the "Cache" set won't contain all of the
1419  // results for the query. This is ok (we can still use it to accelerate
1420  // specific block queries) but we can't do the fastpath "return all
1421  // results from the set" Clear out the indicator for this.
1422  CacheInfo->Pair = BBSkipFirstBlockPair();
1423  SkipFirstBlock = false;
1424  continue;
1425 
1426  PredTranslationFailure:
1427  // The following code is "failure"; we can't produce a sane translation
1428  // for the given block. It assumes that we haven't modified any of
1429  // our datastructures while processing the current block.
1430 
1431  if (!Cache) {
1432  // Refresh the CacheInfo/Cache pointer if it got invalidated.
1433  CacheInfo = &NonLocalPointerDeps[CacheKey];
1434  Cache = &CacheInfo->NonLocalDeps;
1435  NumSortedEntries = Cache->size();
1436  }
1437 
1438  // Since we failed phi translation, the "Cache" set won't contain all of the
1439  // results for the query. This is ok (we can still use it to accelerate
1440  // specific block queries) but we can't do the fastpath "return all
1441  // results from the set". Clear out the indicator for this.
1442  CacheInfo->Pair = BBSkipFirstBlockPair();
1443 
1444  // If *nothing* works, mark the pointer as unknown.
1445  //
1446  // If this is the magic first block, return this as a clobber of the whole
1447  // incoming value. Since we can't phi translate to one of the predecessors,
1448  // we have to bail out.
1449  if (SkipFirstBlock)
1450  return false;
1451 
1452  bool foundBlock = false;
1453  for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1454  if (I.getBB() != BB)
1455  continue;
1456 
1457  assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1458  !DT.isReachableFromEntry(BB)) &&
1459  "Should only be here with transparent block");
1460  foundBlock = true;
1461  I.setResult(MemDepResult::getUnknown());
1462  Result.push_back(
1463  NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
1464  break;
1465  }
1466  (void)foundBlock; (void)GotWorklistLimit;
1467  assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
1468  }
1469 
1470  // Okay, we're done now. If we added new values to the cache, re-sort it.
1471  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1472  LLVM_DEBUG(AssertSorted(*Cache));
1473  return true;
1474 }
1475 
1476 /// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1477 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
1478  ValueIsLoadPair P) {
1479 
1480  // Most of the time this cache is empty.
1481  if (!NonLocalDefsCache.empty()) {
1482  auto it = NonLocalDefsCache.find(P.getPointer());
1483  if (it != NonLocalDefsCache.end()) {
1484  RemoveFromReverseMap(ReverseNonLocalDefsCache,
1485  it->second.getResult().getInst(), P.getPointer());
1486  NonLocalDefsCache.erase(it);
1487  }
1488 
1489  if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1490  auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1491  if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1492  for (const auto &entry : toRemoveIt->second)
1493  NonLocalDefsCache.erase(entry);
1494  ReverseNonLocalDefsCache.erase(toRemoveIt);
1495  }
1496  }
1497  }
1498 
1499  CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1500  if (It == NonLocalPointerDeps.end())
1501  return;
1502 
1503  // Remove all of the entries in the BB->val map. This involves removing
1504  // instructions from the reverse map.
1505  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1506 
1507  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
1508  Instruction *Target = PInfo[i].getResult().getInst();
1509  if (!Target)
1510  continue; // Ignore non-local dep results.
1511  assert(Target->getParent() == PInfo[i].getBB());
1512 
1513  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1514  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1515  }
1516 
1517  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1518  NonLocalPointerDeps.erase(It);
1519 }
1520 
1522  // If Ptr isn't really a pointer, just ignore it.
1523  if (!Ptr->getType()->isPointerTy())
1524  return;
1525  // Flush store info for the pointer.
1526  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1527  // Flush load info for the pointer.
1528  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1529  // Invalidate phis that use the pointer.
1530  PV.invalidateValue(Ptr);
1531 }
1532 
1534  PredCache.clear();
1535 }
1536 
1538  // Walk through the Non-local dependencies, removing this one as the value
1539  // for any cached queries.
1540  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1541  if (NLDI != NonLocalDeps.end()) {
1542  NonLocalDepInfo &BlockMap = NLDI->second.first;
1543  for (auto &Entry : BlockMap)
1544  if (Instruction *Inst = Entry.getResult().getInst())
1545  RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1546  NonLocalDeps.erase(NLDI);
1547  }
1548 
1549  // If we have a cached local dependence query for this instruction, remove it.
1550  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1551  if (LocalDepEntry != LocalDeps.end()) {
1552  // Remove us from DepInst's reverse set now that the local dep info is gone.
1553  if (Instruction *Inst = LocalDepEntry->second.getInst())
1554  RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1555 
1556  // Remove this local dependency info.
1557  LocalDeps.erase(LocalDepEntry);
1558  }
1559 
1560  // If we have any cached pointer dependencies on this instruction, remove
1561  // them. If the instruction has non-pointer type, then it can't be a pointer
1562  // base.
1563 
1564  // Remove it from both the load info and the store info. The instruction
1565  // can't be in either of these maps if it is non-pointer.
1566  if (RemInst->getType()->isPointerTy()) {
1567  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1568  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1569  }
1570 
1571  // Loop over all of the things that depend on the instruction we're removing.
1573 
1574  // If we find RemInst as a clobber or Def in any of the maps for other values,
1575  // we need to replace its entry with a dirty version of the instruction after
1576  // it. If RemInst is a terminator, we use a null dirty value.
1577  //
1578  // Using a dirty version of the instruction after RemInst saves having to scan
1579  // the entire block to get to this point.
1580  MemDepResult NewDirtyVal;
1581  if (!RemInst->isTerminator())
1582  NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1583 
1584  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1585  if (ReverseDepIt != ReverseLocalDeps.end()) {
1586  // RemInst can't be the terminator if it has local stuff depending on it.
1587  assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1588  "Nothing can locally depend on a terminator");
1589 
1590  for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1591  assert(InstDependingOnRemInst != RemInst &&
1592  "Already removed our local dep info");
1593 
1594  LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1595 
1596  // Make sure to remember that new things depend on NewDepInst.
1597  assert(NewDirtyVal.getInst() &&
1598  "There is no way something else can have "
1599  "a local dep on this if it is a terminator!");
1600  ReverseDepsToAdd.push_back(
1601  std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1602  }
1603 
1604  ReverseLocalDeps.erase(ReverseDepIt);
1605 
1606  // Add new reverse deps after scanning the set, to avoid invalidating the
1607  // 'ReverseDeps' reference.
1608  while (!ReverseDepsToAdd.empty()) {
1609  ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1610  ReverseDepsToAdd.back().second);
1611  ReverseDepsToAdd.pop_back();
1612  }
1613  }
1614 
1615  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1616  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1617  for (Instruction *I : ReverseDepIt->second) {
1618  assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1619 
1620  PerInstNLInfo &INLD = NonLocalDeps[I];
1621  // The information is now dirty!
1622  INLD.second = true;
1623 
1624  for (auto &Entry : INLD.first) {
1625  if (Entry.getResult().getInst() != RemInst)
1626  continue;
1627 
1628  // Convert to a dirty entry for the subsequent instruction.
1629  Entry.setResult(NewDirtyVal);
1630 
1631  if (Instruction *NextI = NewDirtyVal.getInst())
1632  ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1633  }
1634  }
1635 
1636  ReverseNonLocalDeps.erase(ReverseDepIt);
1637 
1638  // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1639  while (!ReverseDepsToAdd.empty()) {
1640  ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1641  ReverseDepsToAdd.back().second);
1642  ReverseDepsToAdd.pop_back();
1643  }
1644  }
1645 
1646  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1647  // value in the NonLocalPointerDeps info.
1648  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1649  ReverseNonLocalPtrDeps.find(RemInst);
1650  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1652  ReversePtrDepsToAdd;
1653 
1654  for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1655  assert(P.getPointer() != RemInst &&
1656  "Already removed NonLocalPointerDeps info for RemInst");
1657 
1658  NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1659 
1660  // The cache is not valid for any specific block anymore.
1661  NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1662 
1663  // Update any entries for RemInst to use the instruction after it.
1664  for (auto &Entry : NLPDI) {
1665  if (Entry.getResult().getInst() != RemInst)
1666  continue;
1667 
1668  // Convert to a dirty entry for the subsequent instruction.
1669  Entry.setResult(NewDirtyVal);
1670 
1671  if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1672  ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1673  }
1674 
1675  // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1676  // subsequent value may invalidate the sortedness.
1677  llvm::sort(NLPDI);
1678  }
1679 
1680  ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1681 
1682  while (!ReversePtrDepsToAdd.empty()) {
1683  ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1684  ReversePtrDepsToAdd.back().second);
1685  ReversePtrDepsToAdd.pop_back();
1686  }
1687  }
1688 
1689  // Invalidate phis that use the removed instruction.
1690  PV.invalidateValue(RemInst);
1691 
1692  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1693  LLVM_DEBUG(verifyRemoved(RemInst));
1694 }
1695 
1696 /// Verify that the specified instruction does not occur in our internal data
1697 /// structures.
1698 ///
1699 /// This function verifies by asserting in debug builds.
1700 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1701 #ifndef NDEBUG
1702  for (const auto &DepKV : LocalDeps) {
1703  assert(DepKV.first != D && "Inst occurs in data structures");
1704  assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1705  }
1706 
1707  for (const auto &DepKV : NonLocalPointerDeps) {
1708  assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1709  for (const auto &Entry : DepKV.second.NonLocalDeps)
1710  assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1711  }
1712 
1713  for (const auto &DepKV : NonLocalDeps) {
1714  assert(DepKV.first != D && "Inst occurs in data structures");
1715  const PerInstNLInfo &INLD = DepKV.second;
1716  for (const auto &Entry : INLD.first)
1717  assert(Entry.getResult().getInst() != D &&
1718  "Inst occurs in data structures");
1719  }
1720 
1721  for (const auto &DepKV : ReverseLocalDeps) {
1722  assert(DepKV.first != D && "Inst occurs in data structures");
1723  for (Instruction *Inst : DepKV.second)
1724  assert(Inst != D && "Inst occurs in data structures");
1725  }
1726 
1727  for (const auto &DepKV : ReverseNonLocalDeps) {
1728  assert(DepKV.first != D && "Inst occurs in data structures");
1729  for (Instruction *Inst : DepKV.second)
1730  assert(Inst != D && "Inst occurs in data structures");
1731  }
1732 
1733  for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1734  assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1735 
1736  for (ValueIsLoadPair P : DepKV.second)
1737  assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1738  "Inst occurs in ReverseNonLocalPtrDeps map");
1739  }
1740 #endif
1741 }
1742 
1743 AnalysisKey MemoryDependenceAnalysis::Key;
1744 
1747  auto &AA = AM.getResult<AAManager>(F);
1748  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1749  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1750  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1751  auto &PV = AM.getResult<PhiValuesAnalysis>(F);
1752  return MemoryDependenceResults(AA, AC, TLI, DT, PV);
1753 }
1754 
1756 
1758  "Memory Dependence Analysis", false, true)
1766 
1767 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
1769 }
1770 
1772 
1774  MemDep.reset();
1775 }
1776 
1778  AU.setPreservesAll();
1784 }
1785 
1788  // Check whether our analysis is preserved.
1789  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1790  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1791  // If not, give up now.
1792  return true;
1793 
1794  // Check whether the analyses we depend on became invalid for any reason.
1795  if (Inv.invalidate<AAManager>(F, PA) ||
1796  Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1797  Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
1798  Inv.invalidate<PhiValuesAnalysis>(F, PA))
1799  return true;
1800 
1801  // Otherwise this analysis result remains valid.
1802  return false;
1803 }
1804 
1806  return BlockScanLimit;
1807 }
1808 
1810  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1811  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1812  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1813  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1814  auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
1815  MemDep.emplace(AA, AC, TLI, DT, PV);
1816  return false;
1817 }
The access may reference and may modify the value stored in memory.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:660
bool isSimple() const
Definition: Instructions.h:277
iterator_range< use_iterator > uses()
Definition: Value.h:355
The access neither references nor modifies the value stored in memory.
bool isStrongerThanUnordered(AtomicOrdering ao)
Provides a lazy, caching interface for making common memory aliasing information queries, backed by LLVM&#39;s alias analysis passes.
LLVM_NODISCARD ModRefInfo clearMust(const ModRefInfo MRI)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Atomic ordering constants.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
This class represents lattice values for constants.
Definition: AllocatorList.h:24
PointerTy getPointer() const
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
An instruction for ordering other memory operations.
Definition: Instructions.h:455
MemoryLocation getWithNewPtr(const Value *NewPtr) const
This class provides various memory handling functions that manipulate MemoryBlock instances...
Definition: Memory.h:46
bool isPrecise() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block...
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 1000)"))
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
bool isTerminator() const
Definition: Instruction.h:129
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
STATISTIC(NumFunctions, "Total number of functions")
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1014
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:231
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
Hexagon Common GEP
This defines the Use class.
LLVM_NODISCARD bool isModAndRefSet(const ModRefInfo MRI)
The analysis pass which yields a PhiValues.
Definition: PhiValues.h:119
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:300
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: PassManager.h:305
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI)
Looks at a memory location for a load (specified by MemLocBase, Offs, and Size) and compares it again...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
static bool isLoad(int Opcode)
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst&#39;s specified memory location...
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
The access may reference the value stored in memory.
This file contains the simple types necessary to represent the attributes associated with functions a...
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, DominatorTree &DT, PhiValues &PV)
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1633
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:784
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a function that returns a NoAlias pointer (including malloc/c...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 >> &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from &#39;Inst&#39;s set in ReverseMap.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:221
An instruction for storing to memory.
Definition: Instructions.h:321
Wrapper pass for the legacy pass manager.
Definition: PhiValues.h:142
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
uint64_t getValue() const
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
static MemDepResult getUnknown()
const BasicBlock & getEntryBlock() const
Definition: Function.h:640
#define P(N)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction&#39;s result is undefined.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
PointerIntPair - This class implements a pair of a pointer and small integer.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:36
MemoryLocation getWithoutAATags() const
MemoryLocation getWithNewSize(LocationSize NewSize) const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
void invalidateValue(const Value *V)
Notify PhiValues that the cached information using V is no longer valid.
Definition: PhiValues.cpp:124
A manager for alias analyses.
This is a result from a NonLocal dependence query.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit=nullptr)
Represent the analysis usage information of a pass.
static MemDepResult getNonFuncLocal()
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known...
Value * getPointerOperand()
Definition: Instructions.h:285
self_iterator getIterator()
Definition: ilist_node.h:82
void setResult(const MemDepResult &R)
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:640
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
size_t size() const
Definition: SmallVector.h:53
INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) INITIALIZE_PASS_END(MemoryDependenceWrapperPass
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
A memory dependence query can return one of three different answers.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
void clear()
clear - Remove all information.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:93
A function analysis which provides an AssumptionCache.
The two locations precisely alias each other.
Definition: AliasAnalysis.h:90
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
iterator end()
Definition: BasicBlock.h:271
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state to reflect any needed changes.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static MemDepResult getClobber(Instruction *Inst)
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
const MemDepResult & getResult() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:644
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
The access may modify the value stored in memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Target - Wrapper for Target specific information.
void releaseMemory() override
Clean up memory in between runs.
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:64
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, OrderedBasicBlock *OBB=nullptr)
Return information about whether a particular call site modifies or reads the specified memory locati...
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM_NODISCARD bool isNoModRef(const ModRefInfo MRI)
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
std::vector< NonLocalDepEntry > NonLocalDepInfo
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn&#39;t do any analysis eagerly.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details...
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:241
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
This file provides utility analysis objects describing memory locations.
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
AnalysisUsage & addRequiredTransitive()
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Memory Dependence Analysis
Value * getAddr() const
Definition: PHITransAddr.h:60
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:642
auto upper_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1295
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This templated class represents "all analyses that operate over <a particular IR unit>" (e...
Definition: PassManager.h:92
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
bool hasValue() const
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
print Instructions which execute on loop entry
This is an entry in the NonLocalDepInfo cache.
A container for analyses that lazily runs them and caches their results.
bool fitsInLegalInteger(unsigned Width) const
Returns true if the specified type fits in a native integer type supported by the CPU...
Definition: DataLayout.h:317
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
static bool isVolatile(Instruction *Inst)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:88
Dependence - This class represents a dependence between two memory memory references in a function...
#define LLVM_DEBUG(X)
Definition: Debug.h:123
static MemDepResult getNonLocal()
static const unsigned int NumResultsLimit
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:71
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.