LLVM  8.0.1
CodeGenPrepare.cpp
Go to the documentation of this file.
1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass munges the code in the input function to better prepare it for
11 // SelectionDAG-based code generation. This works around limitations in it's
12 // basic-block-at-a-time approach. It should eventually be removed.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
28 #include "llvm/Analysis/LoopInfo.h"
35 #include "llvm/CodeGen/Analysis.h"
42 #include "llvm/Config/llvm-config.h"
43 #include "llvm/IR/Argument.h"
44 #include "llvm/IR/Attributes.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/CallSite.h"
47 #include "llvm/IR/Constant.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DerivedTypes.h"
51 #include "llvm/IR/Dominators.h"
52 #include "llvm/IR/Function.h"
54 #include "llvm/IR/GlobalValue.h"
55 #include "llvm/IR/GlobalVariable.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InlineAsm.h"
58 #include "llvm/IR/InstrTypes.h"
59 #include "llvm/IR/Instruction.h"
60 #include "llvm/IR/Instructions.h"
61 #include "llvm/IR/IntrinsicInst.h"
62 #include "llvm/IR/Intrinsics.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/IR/MDBuilder.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/Operator.h"
67 #include "llvm/IR/PatternMatch.h"
68 #include "llvm/IR/Statepoint.h"
69 #include "llvm/IR/Type.h"
70 #include "llvm/IR/Use.h"
71 #include "llvm/IR/User.h"
72 #include "llvm/IR/Value.h"
73 #include "llvm/IR/ValueHandle.h"
74 #include "llvm/IR/ValueMap.h"
75 #include "llvm/Pass.h"
78 #include "llvm/Support/Casting.h"
80 #include "llvm/Support/Compiler.h"
81 #include "llvm/Support/Debug.h"
91 #include <algorithm>
92 #include <cassert>
93 #include <cstdint>
94 #include <iterator>
95 #include <limits>
96 #include <memory>
97 #include <utility>
98 #include <vector>
99 
100 using namespace llvm;
101 using namespace llvm::PatternMatch;
102 
103 #define DEBUG_TYPE "codegenprepare"
104 
105 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
106 STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
107 STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
108 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
109  "sunken Cmps");
110 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
111  "of sunken Casts");
112 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
113  "computations were sunk");
114 STATISTIC(NumMemoryInstsPhiCreated,
115  "Number of phis created when address "
116  "computations were sunk to memory instructions");
117 STATISTIC(NumMemoryInstsSelectCreated,
118  "Number of select created when address "
119  "computations were sunk to memory instructions");
120 STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
121 STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
122 STATISTIC(NumAndsAdded,
123  "Number of and mask instructions added to form ext loads");
124 STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
125 STATISTIC(NumRetsDup, "Number of return instructions duplicated");
126 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
127 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
128 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
129 
131  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
132  cl::desc("Disable branch optimizations in CodeGenPrepare"));
133 
134 static cl::opt<bool>
135  DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
136  cl::desc("Disable GC optimizations in CodeGenPrepare"));
137 
139  "disable-cgp-select2branch", cl::Hidden, cl::init(false),
140  cl::desc("Disable select to branch conversion."));
141 
143  "addr-sink-using-gep", cl::Hidden, cl::init(true),
144  cl::desc("Address sinking in CGP using GEPs."));
145 
147  "enable-andcmp-sinking", cl::Hidden, cl::init(true),
148  cl::desc("Enable sinkinig and/cmp into branches."));
149 
151  "disable-cgp-store-extract", cl::Hidden, cl::init(false),
152  cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
153 
155  "stress-cgp-store-extract", cl::Hidden, cl::init(false),
156  cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
157 
159  "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
160  cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
161  "CodeGenPrepare"));
162 
164  "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
165  cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
166  "optimization in CodeGenPrepare"));
167 
169  "disable-preheader-prot", cl::Hidden, cl::init(false),
170  cl::desc("Disable protection against removing loop preheaders"));
171 
173  "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
174  cl::desc("Use profile info to add section prefix for hot/cold functions"));
175 
177  "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
178  cl::desc("Skip merging empty blocks if (frequency of empty block) / "
179  "(frequency of destination block) is greater than this ratio"));
180 
182  "force-split-store", cl::Hidden, cl::init(false),
183  cl::desc("Force store splitting no matter what the target query says."));
184 
185 static cl::opt<bool>
186 EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
187  cl::desc("Enable merging of redundant sexts when one is dominating"
188  " the other."), cl::init(true));
189 
191  "disable-complex-addr-modes", cl::Hidden, cl::init(false),
192  cl::desc("Disables combining addressing modes with different parts "
193  "in optimizeMemoryInst."));
194 
195 static cl::opt<bool>
196 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
197  cl::desc("Allow creation of Phis in Address sinking."));
198 
199 static cl::opt<bool>
200 AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
201  cl::desc("Allow creation of selects in Address sinking."));
202 
204  "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
205  cl::desc("Allow combining of BaseReg field in Address sinking."));
206 
208  "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
209  cl::desc("Allow combining of BaseGV field in Address sinking."));
210 
212  "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
213  cl::desc("Allow combining of BaseOffs field in Address sinking."));
214 
216  "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
217  cl::desc("Allow combining of ScaledReg field in Address sinking."));
218 
219 static cl::opt<bool>
220  EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
221  cl::init(true),
222  cl::desc("Enable splitting large offset of GEP."));
223 
224 namespace {
225 
226 enum ExtType {
227  ZeroExtension, // Zero extension has been seen.
228  SignExtension, // Sign extension has been seen.
229  BothExtension // This extension type is used if we saw sext after
230  // ZeroExtension had been set, or if we saw zext after
231  // SignExtension had been set. It makes the type
232  // information of a promoted instruction invalid.
233 };
234 
235 using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
236 using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
237 using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
238 using SExts = SmallVector<Instruction *, 16>;
239 using ValueToSExts = DenseMap<Value *, SExts>;
240 
241 class TypePromotionTransaction;
242 
243  class CodeGenPrepare : public FunctionPass {
244  const TargetMachine *TM = nullptr;
245  const TargetSubtargetInfo *SubtargetInfo;
246  const TargetLowering *TLI = nullptr;
247  const TargetRegisterInfo *TRI;
248  const TargetTransformInfo *TTI = nullptr;
249  const TargetLibraryInfo *TLInfo;
250  const LoopInfo *LI;
251  std::unique_ptr<BlockFrequencyInfo> BFI;
252  std::unique_ptr<BranchProbabilityInfo> BPI;
253 
254  /// As we scan instructions optimizing them, this is the next instruction
255  /// to optimize. Transforms that can invalidate this should update it.
256  BasicBlock::iterator CurInstIterator;
257 
258  /// Keeps track of non-local addresses that have been sunk into a block.
259  /// This allows us to avoid inserting duplicate code for blocks with
260  /// multiple load/stores of the same address. The usage of WeakTrackingVH
261  /// enables SunkAddrs to be treated as a cache whose entries can be
262  /// invalidated if a sunken address computation has been erased.
264 
265  /// Keeps track of all instructions inserted for the current function.
266  SetOfInstrs InsertedInsts;
267 
268  /// Keeps track of the type of the related instruction before their
269  /// promotion for the current function.
270  InstrToOrigTy PromotedInsts;
271 
272  /// Keep track of instructions removed during promotion.
273  SetOfInstrs RemovedInsts;
274 
275  /// Keep track of sext chains based on their initial value.
276  DenseMap<Value *, Instruction *> SeenChainsForSExt;
277 
278  /// Keep track of GEPs accessing the same data structures such as structs or
279  /// arrays that are candidates to be split later because of their large
280  /// size.
281  MapVector<
284  LargeOffsetGEPMap;
285 
286  /// Keep track of new GEP base after splitting the GEPs having large offset.
287  SmallSet<AssertingVH<Value>, 2> NewGEPBases;
288 
289  /// Map serial numbers to Large offset GEPs.
290  DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
291 
292  /// Keep track of SExt promoted.
293  ValueToSExts ValToSExtendedUses;
294 
295  /// True if CFG is modified in any way.
296  bool ModifiedDT;
297 
298  /// True if optimizing for size.
299  bool OptSize;
300 
301  /// DataLayout for the Function being processed.
302  const DataLayout *DL = nullptr;
303 
304  public:
305  static char ID; // Pass identification, replacement for typeid
306 
307  CodeGenPrepare() : FunctionPass(ID) {
309  }
310 
311  bool runOnFunction(Function &F) override;
312 
313  StringRef getPassName() const override { return "CodeGen Prepare"; }
314 
315  void getAnalysisUsage(AnalysisUsage &AU) const override {
316  // FIXME: When we can selectively preserve passes, preserve the domtree.
321  }
322 
323  private:
324  template <typename F>
325  void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
326  // Substituting can cause recursive simplifications, which can invalidate
327  // our iterator. Use a WeakTrackingVH to hold onto it in case this
328  // happens.
329  Value *CurValue = &*CurInstIterator;
330  WeakTrackingVH IterHandle(CurValue);
331 
332  f();
333 
334  // If the iterator instruction was recursively deleted, start over at the
335  // start of the block.
336  if (IterHandle != CurValue) {
337  CurInstIterator = BB->begin();
338  SunkAddrs.clear();
339  }
340  }
341 
342  bool eliminateFallThrough(Function &F);
343  bool eliminateMostlyEmptyBlocks(Function &F);
344  BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
345  bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
346  void eliminateMostlyEmptyBlock(BasicBlock *BB);
347  bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
348  bool isPreheader);
349  bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
350  bool optimizeInst(Instruction *I, bool &ModifiedDT);
351  bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
352  Type *AccessTy, unsigned AddrSpace);
353  bool optimizeInlineAsmInst(CallInst *CS);
354  bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
355  bool optimizeExt(Instruction *&I);
356  bool optimizeExtUses(Instruction *I);
357  bool optimizeLoadExt(LoadInst *Load);
358  bool optimizeSelectInst(SelectInst *SI);
359  bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
360  bool optimizeSwitchInst(SwitchInst *SI);
361  bool optimizeExtractElementInst(Instruction *Inst);
362  bool dupRetToEnableTailCallOpts(BasicBlock *BB);
363  bool placeDbgValues(Function &F);
364  bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
365  LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
366  bool tryToPromoteExts(TypePromotionTransaction &TPT,
367  const SmallVectorImpl<Instruction *> &Exts,
368  SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
369  unsigned CreatedInstsCost = 0);
370  bool mergeSExts(Function &F);
371  bool splitLargeGEPOffsets();
372  bool performAddressTypePromotion(
373  Instruction *&Inst,
374  bool AllowPromotionWithoutCommonHeader,
375  bool HasPromoted, TypePromotionTransaction &TPT,
376  SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
377  bool splitBranchCondition(Function &F);
378  bool simplifyOffsetableRelocate(Instruction &I);
379  };
380 
381 } // end anonymous namespace
382 
383 char CodeGenPrepare::ID = 0;
384 
385 INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
386  "Optimize for code generation", false, false)
389  "Optimize for code generation", false, false)
390 
391 FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
392 
394  if (skipFunction(F))
395  return false;
396 
397  DL = &F.getParent()->getDataLayout();
398 
399  bool EverMadeChange = false;
400  // Clear per function information.
401  InsertedInsts.clear();
402  PromotedInsts.clear();
403 
404  ModifiedDT = false;
405  if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
406  TM = &TPC->getTM<TargetMachine>();
407  SubtargetInfo = TM->getSubtargetImpl(F);
408  TLI = SubtargetInfo->getTargetLowering();
409  TRI = SubtargetInfo->getRegisterInfo();
410  }
411  TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
412  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
413  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
414  BPI.reset(new BranchProbabilityInfo(F, *LI));
415  BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
416  OptSize = F.optForSize();
417 
418  ProfileSummaryInfo *PSI =
419  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
421  if (PSI->isFunctionHotInCallGraph(&F, *BFI))
422  F.setSectionPrefix(".hot");
423  else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
424  F.setSectionPrefix(".unlikely");
425  }
426 
427  /// This optimization identifies DIV instructions that can be
428  /// profitably bypassed and carried out with a shorter, faster divide.
429  if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
430  TLI->isSlowDivBypassed()) {
431  const DenseMap<unsigned int, unsigned int> &BypassWidths =
432  TLI->getBypassSlowDivWidths();
433  BasicBlock* BB = &*F.begin();
434  while (BB != nullptr) {
435  // bypassSlowDivision may create new BBs, but we don't want to reapply the
436  // optimization to those blocks.
437  BasicBlock* Next = BB->getNextNode();
438  EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
439  BB = Next;
440  }
441  }
442 
443  // Eliminate blocks that contain only PHI nodes and an
444  // unconditional branch.
445  EverMadeChange |= eliminateMostlyEmptyBlocks(F);
446 
447  if (!DisableBranchOpts)
448  EverMadeChange |= splitBranchCondition(F);
449 
450  // Split some critical edges where one of the sources is an indirect branch,
451  // to help generate sane code for PHIs involving such edges.
452  EverMadeChange |= SplitIndirectBrCriticalEdges(F);
453 
454  bool MadeChange = true;
455  while (MadeChange) {
456  MadeChange = false;
457  for (Function::iterator I = F.begin(); I != F.end(); ) {
458  BasicBlock *BB = &*I++;
459  bool ModifiedDTOnIteration = false;
460  MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
461 
462  // Restart BB iteration if the dominator tree of the Function was changed
463  if (ModifiedDTOnIteration)
464  break;
465  }
466  if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
467  MadeChange |= mergeSExts(F);
468  if (!LargeOffsetGEPMap.empty())
469  MadeChange |= splitLargeGEPOffsets();
470 
471  // Really free removed instructions during promotion.
472  for (Instruction *I : RemovedInsts)
473  I->deleteValue();
474 
475  EverMadeChange |= MadeChange;
476  SeenChainsForSExt.clear();
477  ValToSExtendedUses.clear();
478  RemovedInsts.clear();
479  LargeOffsetGEPMap.clear();
480  LargeOffsetGEPID.clear();
481  }
482 
483  SunkAddrs.clear();
484 
485  if (!DisableBranchOpts) {
486  MadeChange = false;
487  // Use a set vector to get deterministic iteration order. The order the
488  // blocks are removed may affect whether or not PHI nodes in successors
489  // are removed.
491  for (BasicBlock &BB : F) {
492  SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
493  MadeChange |= ConstantFoldTerminator(&BB, true);
494  if (!MadeChange) continue;
495 
497  II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
498  if (pred_begin(*II) == pred_end(*II))
499  WorkList.insert(*II);
500  }
501 
502  // Delete the dead blocks and any of their dead successors.
503  MadeChange |= !WorkList.empty();
504  while (!WorkList.empty()) {
505  BasicBlock *BB = WorkList.pop_back_val();
506  SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
507 
508  DeleteDeadBlock(BB);
509 
511  II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
512  if (pred_begin(*II) == pred_end(*II))
513  WorkList.insert(*II);
514  }
515 
516  // Merge pairs of basic blocks with unconditional branches, connected by
517  // a single edge.
518  if (EverMadeChange || MadeChange)
519  MadeChange |= eliminateFallThrough(F);
520 
521  EverMadeChange |= MadeChange;
522  }
523 
524  if (!DisableGCOpts) {
525  SmallVector<Instruction *, 2> Statepoints;
526  for (BasicBlock &BB : F)
527  for (Instruction &I : BB)
528  if (isStatepoint(I))
529  Statepoints.push_back(&I);
530  for (auto &I : Statepoints)
531  EverMadeChange |= simplifyOffsetableRelocate(*I);
532  }
533 
534  // Do this last to clean up use-before-def scenarios introduced by other
535  // preparatory transforms.
536  EverMadeChange |= placeDbgValues(F);
537 
538  return EverMadeChange;
539 }
540 
541 /// Merge basic blocks which are connected by a single edge, where one of the
542 /// basic blocks has a single successor pointing to the other basic block,
543 /// which has a single predecessor.
544 bool CodeGenPrepare::eliminateFallThrough(Function &F) {
545  bool Changed = false;
546  // Scan all of the blocks in the function, except for the entry block.
547  // Use a temporary array to avoid iterator being invalidated when
548  // deleting blocks.
550  for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
551  Blocks.push_back(&Block);
552 
553  for (auto &Block : Blocks) {
554  auto *BB = cast_or_null<BasicBlock>(Block);
555  if (!BB)
556  continue;
557  // If the destination block has a single pred, then this is a trivial
558  // edge, just collapse it.
559  BasicBlock *SinglePred = BB->getSinglePredecessor();
560 
561  // Don't merge if BB's address is taken.
562  if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
563 
564  BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
565  if (Term && !Term->isConditional()) {
566  Changed = true;
567  LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
568 
569  // Merge BB into SinglePred and delete it.
571  }
572  }
573  return Changed;
574 }
575 
576 /// Find a destination block from BB if BB is mergeable empty block.
577 BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
578  // If this block doesn't end with an uncond branch, ignore it.
580  if (!BI || !BI->isUnconditional())
581  return nullptr;
582 
583  // If the instruction before the branch (skipping debug info) isn't a phi
584  // node, then other stuff is happening here.
585  BasicBlock::iterator BBI = BI->getIterator();
586  if (BBI != BB->begin()) {
587  --BBI;
588  while (isa<DbgInfoIntrinsic>(BBI)) {
589  if (BBI == BB->begin())
590  break;
591  --BBI;
592  }
593  if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
594  return nullptr;
595  }
596 
597  // Do not break infinite loops.
598  BasicBlock *DestBB = BI->getSuccessor(0);
599  if (DestBB == BB)
600  return nullptr;
601 
602  if (!canMergeBlocks(BB, DestBB))
603  DestBB = nullptr;
604 
605  return DestBB;
606 }
607 
608 /// Eliminate blocks that contain only PHI nodes, debug info directives, and an
609 /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
610 /// edges in ways that are non-optimal for isel. Start by eliminating these
611 /// blocks so we can split them the way we want them.
612 bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
614  SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
615  while (!LoopList.empty()) {
616  Loop *L = LoopList.pop_back_val();
617  LoopList.insert(LoopList.end(), L->begin(), L->end());
618  if (BasicBlock *Preheader = L->getLoopPreheader())
619  Preheaders.insert(Preheader);
620  }
621 
622  bool MadeChange = false;
623  // Copy blocks into a temporary array to avoid iterator invalidation issues
624  // as we remove them.
625  // Note that this intentionally skips the entry block.
627  for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
628  Blocks.push_back(&Block);
629 
630  for (auto &Block : Blocks) {
631  BasicBlock *BB = cast_or_null<BasicBlock>(Block);
632  if (!BB)
633  continue;
634  BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
635  if (!DestBB ||
636  !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
637  continue;
638 
639  eliminateMostlyEmptyBlock(BB);
640  MadeChange = true;
641  }
642  return MadeChange;
643 }
644 
645 bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
646  BasicBlock *DestBB,
647  bool isPreheader) {
648  // Do not delete loop preheaders if doing so would create a critical edge.
649  // Loop preheaders can be good locations to spill registers. If the
650  // preheader is deleted and we create a critical edge, registers may be
651  // spilled in the loop body instead.
652  if (!DisablePreheaderProtect && isPreheader &&
653  !(BB->getSinglePredecessor() &&
655  return false;
656 
657  // Try to skip merging if the unique predecessor of BB is terminated by a
658  // switch or indirect branch instruction, and BB is used as an incoming block
659  // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
660  // add COPY instructions in the predecessor of BB instead of BB (if it is not
661  // merged). Note that the critical edge created by merging such blocks wont be
662  // split in MachineSink because the jump table is not analyzable. By keeping
663  // such empty block (BB), ISel will place COPY instructions in BB, not in the
664  // predecessor of BB.
665  BasicBlock *Pred = BB->getUniquePredecessor();
666  if (!Pred ||
667  !(isa<SwitchInst>(Pred->getTerminator()) ||
668  isa<IndirectBrInst>(Pred->getTerminator())))
669  return true;
670 
671  if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
672  return true;
673 
674  // We use a simple cost heuristic which determine skipping merging is
675  // profitable if the cost of skipping merging is less than the cost of
676  // merging : Cost(skipping merging) < Cost(merging BB), where the
677  // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
678  // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
679  // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
680  // Freq(Pred) / Freq(BB) > 2.
681  // Note that if there are multiple empty blocks sharing the same incoming
682  // value for the PHIs in the DestBB, we consider them together. In such
683  // case, Cost(merging BB) will be the sum of their frequencies.
684 
685  if (!isa<PHINode>(DestBB->begin()))
686  return true;
687 
688  SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
689 
690  // Find all other incoming blocks from which incoming values of all PHIs in
691  // DestBB are the same as the ones from BB.
692  for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
693  ++PI) {
694  BasicBlock *DestBBPred = *PI;
695  if (DestBBPred == BB)
696  continue;
697 
698  if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
699  return DestPN.getIncomingValueForBlock(BB) ==
700  DestPN.getIncomingValueForBlock(DestBBPred);
701  }))
702  SameIncomingValueBBs.insert(DestBBPred);
703  }
704 
705  // See if all BB's incoming values are same as the value from Pred. In this
706  // case, no reason to skip merging because COPYs are expected to be place in
707  // Pred already.
708  if (SameIncomingValueBBs.count(Pred))
709  return true;
710 
711  BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
712  BlockFrequency BBFreq = BFI->getBlockFreq(BB);
713 
714  for (auto SameValueBB : SameIncomingValueBBs)
715  if (SameValueBB->getUniquePredecessor() == Pred &&
716  DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
717  BBFreq += BFI->getBlockFreq(SameValueBB);
718 
719  return PredFreq.getFrequency() <=
721 }
722 
723 /// Return true if we can merge BB into DestBB if there is a single
724 /// unconditional branch between them, and BB contains no other non-phi
725 /// instructions.
726 bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
727  const BasicBlock *DestBB) const {
728  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
729  // the successor. If there are more complex condition (e.g. preheaders),
730  // don't mess around with them.
731  for (const PHINode &PN : BB->phis()) {
732  for (const User *U : PN.users()) {
733  const Instruction *UI = cast<Instruction>(U);
734  if (UI->getParent() != DestBB || !isa<PHINode>(UI))
735  return false;
736  // If User is inside DestBB block and it is a PHINode then check
737  // incoming value. If incoming value is not from BB then this is
738  // a complex condition (e.g. preheaders) we want to avoid here.
739  if (UI->getParent() == DestBB) {
740  if (const PHINode *UPN = dyn_cast<PHINode>(UI))
741  for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
742  Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
743  if (Insn && Insn->getParent() == BB &&
744  Insn->getParent() != UPN->getIncomingBlock(I))
745  return false;
746  }
747  }
748  }
749  }
750 
751  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
752  // and DestBB may have conflicting incoming values for the block. If so, we
753  // can't merge the block.
754  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
755  if (!DestBBPN) return true; // no conflict.
756 
757  // Collect the preds of BB.
759  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
760  // It is faster to get preds from a PHI than with pred_iterator.
761  for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
762  BBPreds.insert(BBPN->getIncomingBlock(i));
763  } else {
764  BBPreds.insert(pred_begin(BB), pred_end(BB));
765  }
766 
767  // Walk the preds of DestBB.
768  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
769  BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
770  if (BBPreds.count(Pred)) { // Common predecessor?
771  for (const PHINode &PN : DestBB->phis()) {
772  const Value *V1 = PN.getIncomingValueForBlock(Pred);
773  const Value *V2 = PN.getIncomingValueForBlock(BB);
774 
775  // If V2 is a phi node in BB, look up what the mapped value will be.
776  if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
777  if (V2PN->getParent() == BB)
778  V2 = V2PN->getIncomingValueForBlock(Pred);
779 
780  // If there is a conflict, bail out.
781  if (V1 != V2) return false;
782  }
783  }
784  }
785 
786  return true;
787 }
788 
789 /// Eliminate a basic block that has only phi's and an unconditional branch in
790 /// it.
791 void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
792  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
793  BasicBlock *DestBB = BI->getSuccessor(0);
794 
795  LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
796  << *BB << *DestBB);
797 
798  // If the destination block has a single pred, then this is a trivial edge,
799  // just collapse it.
800  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
801  if (SinglePred != DestBB) {
802  assert(SinglePred == BB &&
803  "Single predecessor not the same as predecessor");
804  // Merge DestBB into SinglePred/BB and delete it.
806  // Note: BB(=SinglePred) will not be deleted on this path.
807  // DestBB(=its single successor) is the one that was deleted.
808  LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
809  return;
810  }
811  }
812 
813  // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
814  // to handle the new incoming edges it is about to have.
815  for (PHINode &PN : DestBB->phis()) {
816  // Remove the incoming value for BB, and remember it.
817  Value *InVal = PN.removeIncomingValue(BB, false);
818 
819  // Two options: either the InVal is a phi node defined in BB or it is some
820  // value that dominates BB.
821  PHINode *InValPhi = dyn_cast<PHINode>(InVal);
822  if (InValPhi && InValPhi->getParent() == BB) {
823  // Add all of the input values of the input PHI as inputs of this phi.
824  for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
825  PN.addIncoming(InValPhi->getIncomingValue(i),
826  InValPhi->getIncomingBlock(i));
827  } else {
828  // Otherwise, add one instance of the dominating value for each edge that
829  // we will be adding.
830  if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
831  for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
832  PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
833  } else {
834  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
835  PN.addIncoming(InVal, *PI);
836  }
837  }
838  }
839 
840  // The PHIs are now updated, change everything that refers to BB to use
841  // DestBB and remove BB.
842  BB->replaceAllUsesWith(DestBB);
843  BB->eraseFromParent();
844  ++NumBlocksElim;
845 
846  LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
847 }
848 
849 // Computes a map of base pointer relocation instructions to corresponding
850 // derived pointer relocation instructions given a vector of all relocate calls
852  const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
854  &RelocateInstMap) {
855  // Collect information in two maps: one primarily for locating the base object
856  // while filling the second map; the second map is the final structure holding
857  // a mapping between Base and corresponding Derived relocate calls
859  for (auto *ThisRelocate : AllRelocateCalls) {
860  auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
861  ThisRelocate->getDerivedPtrIndex());
862  RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
863  }
864  for (auto &Item : RelocateIdxMap) {
865  std::pair<unsigned, unsigned> Key = Item.first;
866  if (Key.first == Key.second)
867  // Base relocation: nothing to insert
868  continue;
869 
870  GCRelocateInst *I = Item.second;
871  auto BaseKey = std::make_pair(Key.first, Key.first);
872 
873  // We're iterating over RelocateIdxMap so we cannot modify it.
874  auto MaybeBase = RelocateIdxMap.find(BaseKey);
875  if (MaybeBase == RelocateIdxMap.end())
876  // TODO: We might want to insert a new base object relocate and gep off
877  // that, if there are enough derived object relocates.
878  continue;
879 
880  RelocateInstMap[MaybeBase->second].push_back(I);
881  }
882 }
883 
884 // Accepts a GEP and extracts the operands into a vector provided they're all
885 // small integer constants
887  SmallVectorImpl<Value *> &OffsetV) {
888  for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
889  // Only accept small constant integer operands
890  auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
891  if (!Op || Op->getZExtValue() > 20)
892  return false;
893  }
894 
895  for (unsigned i = 1; i < GEP->getNumOperands(); i++)
896  OffsetV.push_back(GEP->getOperand(i));
897  return true;
898 }
899 
900 // Takes a RelocatedBase (base pointer relocation instruction) and Targets to
901 // replace, computes a replacement, and affects it.
902 static bool
904  const SmallVectorImpl<GCRelocateInst *> &Targets) {
905  bool MadeChange = false;
906  // We must ensure the relocation of derived pointer is defined after
907  // relocation of base pointer. If we find a relocation corresponding to base
908  // defined earlier than relocation of base then we move relocation of base
909  // right before found relocation. We consider only relocation in the same
910  // basic block as relocation of base. Relocations from other basic block will
911  // be skipped by optimization and we do not care about them.
912  for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
913  &*R != RelocatedBase; ++R)
914  if (auto RI = dyn_cast<GCRelocateInst>(R))
915  if (RI->getStatepoint() == RelocatedBase->getStatepoint())
916  if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
917  RelocatedBase->moveBefore(RI);
918  break;
919  }
920 
921  for (GCRelocateInst *ToReplace : Targets) {
922  assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
923  "Not relocating a derived object of the original base object");
924  if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
925  // A duplicate relocate call. TODO: coalesce duplicates.
926  continue;
927  }
928 
929  if (RelocatedBase->getParent() != ToReplace->getParent()) {
930  // Base and derived relocates are in different basic blocks.
931  // In this case transform is only valid when base dominates derived
932  // relocate. However it would be too expensive to check dominance
933  // for each such relocate, so we skip the whole transformation.
934  continue;
935  }
936 
937  Value *Base = ToReplace->getBasePtr();
938  auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
939  if (!Derived || Derived->getPointerOperand() != Base)
940  continue;
941 
942  SmallVector<Value *, 2> OffsetV;
943  if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
944  continue;
945 
946  // Create a Builder and replace the target callsite with a gep
947  assert(RelocatedBase->getNextNode() &&
948  "Should always have one since it's not a terminator");
949 
950  // Insert after RelocatedBase
951  IRBuilder<> Builder(RelocatedBase->getNextNode());
952  Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
953 
954  // If gc_relocate does not match the actual type, cast it to the right type.
955  // In theory, there must be a bitcast after gc_relocate if the type does not
956  // match, and we should reuse it to get the derived pointer. But it could be
957  // cases like this:
958  // bb1:
959  // ...
960  // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
961  // br label %merge
962  //
963  // bb2:
964  // ...
965  // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
966  // br label %merge
967  //
968  // merge:
969  // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
970  // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
971  //
972  // In this case, we can not find the bitcast any more. So we insert a new bitcast
973  // no matter there is already one or not. In this way, we can handle all cases, and
974  // the extra bitcast should be optimized away in later passes.
975  Value *ActualRelocatedBase = RelocatedBase;
976  if (RelocatedBase->getType() != Base->getType()) {
977  ActualRelocatedBase =
978  Builder.CreateBitCast(RelocatedBase, Base->getType());
979  }
980  Value *Replacement = Builder.CreateGEP(
981  Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
982  Replacement->takeName(ToReplace);
983  // If the newly generated derived pointer's type does not match the original derived
984  // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
985  Value *ActualReplacement = Replacement;
986  if (Replacement->getType() != ToReplace->getType()) {
987  ActualReplacement =
988  Builder.CreateBitCast(Replacement, ToReplace->getType());
989  }
990  ToReplace->replaceAllUsesWith(ActualReplacement);
991  ToReplace->eraseFromParent();
992 
993  MadeChange = true;
994  }
995  return MadeChange;
996 }
997 
998 // Turns this:
999 //
1000 // %base = ...
1001 // %ptr = gep %base + 15
1002 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1003 // %base' = relocate(%tok, i32 4, i32 4)
1004 // %ptr' = relocate(%tok, i32 4, i32 5)
1005 // %val = load %ptr'
1006 //
1007 // into this:
1008 //
1009 // %base = ...
1010 // %ptr = gep %base + 15
1011 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1012 // %base' = gc.relocate(%tok, i32 4, i32 4)
1013 // %ptr' = gep %base' + 15
1014 // %val = load %ptr'
1015 bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
1016  bool MadeChange = false;
1017  SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1018 
1019  for (auto *U : I.users())
1020  if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1021  // Collect all the relocate calls associated with a statepoint
1022  AllRelocateCalls.push_back(Relocate);
1023 
1024  // We need atleast one base pointer relocation + one derived pointer
1025  // relocation to mangle
1026  if (AllRelocateCalls.size() < 2)
1027  return false;
1028 
1029  // RelocateInstMap is a mapping from the base relocate instruction to the
1030  // corresponding derived relocate instructions
1032  computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1033  if (RelocateInstMap.empty())
1034  return false;
1035 
1036  for (auto &Item : RelocateInstMap)
1037  // Item.first is the RelocatedBase to offset against
1038  // Item.second is the vector of Targets to replace
1039  MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1040  return MadeChange;
1041 }
1042 
1043 /// SinkCast - Sink the specified cast instruction into its user blocks
1044 static bool SinkCast(CastInst *CI) {
1045  BasicBlock *DefBB = CI->getParent();
1046 
1047  /// InsertedCasts - Only insert a cast in each block once.
1048  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
1049 
1050  bool MadeChange = false;
1051  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1052  UI != E; ) {
1053  Use &TheUse = UI.getUse();
1054  Instruction *User = cast<Instruction>(*UI);
1055 
1056  // Figure out which BB this cast is used in. For PHI's this is the
1057  // appropriate predecessor block.
1058  BasicBlock *UserBB = User->getParent();
1059  if (PHINode *PN = dyn_cast<PHINode>(User)) {
1060  UserBB = PN->getIncomingBlock(TheUse);
1061  }
1062 
1063  // Preincrement use iterator so we don't invalidate it.
1064  ++UI;
1065 
1066  // The first insertion point of a block containing an EH pad is after the
1067  // pad. If the pad is the user, we cannot sink the cast past the pad.
1068  if (User->isEHPad())
1069  continue;
1070 
1071  // If the block selected to receive the cast is an EH pad that does not
1072  // allow non-PHI instructions before the terminator, we can't sink the
1073  // cast.
1074  if (UserBB->getTerminator()->isEHPad())
1075  continue;
1076 
1077  // If this user is in the same block as the cast, don't change the cast.
1078  if (UserBB == DefBB) continue;
1079 
1080  // If we have already inserted a cast into this block, use it.
1081  CastInst *&InsertedCast = InsertedCasts[UserBB];
1082 
1083  if (!InsertedCast) {
1084  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1085  assert(InsertPt != UserBB->end());
1086  InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1087  CI->getType(), "", &*InsertPt);
1088  InsertedCast->setDebugLoc(CI->getDebugLoc());
1089  }
1090 
1091  // Replace a use of the cast with a use of the new cast.
1092  TheUse = InsertedCast;
1093  MadeChange = true;
1094  ++NumCastUses;
1095  }
1096 
1097  // If we removed all uses, nuke the cast.
1098  if (CI->use_empty()) {
1099  salvageDebugInfo(*CI);
1100  CI->eraseFromParent();
1101  MadeChange = true;
1102  }
1103 
1104  return MadeChange;
1105 }
1106 
1107 /// If the specified cast instruction is a noop copy (e.g. it's casting from
1108 /// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1109 /// reduce the number of virtual registers that must be created and coalesced.
1110 ///
1111 /// Return true if any changes are made.
1113  const DataLayout &DL) {
1114  // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1115  // than sinking only nop casts, but is helpful on some platforms.
1116  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1117  if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
1118  ASC->getDestAddressSpace()))
1119  return false;
1120  }
1121 
1122  // If this is a noop copy,
1123  EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1124  EVT DstVT = TLI.getValueType(DL, CI->getType());
1125 
1126  // This is an fp<->int conversion?
1127  if (SrcVT.isInteger() != DstVT.isInteger())
1128  return false;
1129 
1130  // If this is an extension, it will be a zero or sign extension, which
1131  // isn't a noop.
1132  if (SrcVT.bitsLT(DstVT)) return false;
1133 
1134  // If these values will be promoted, find out what they will be promoted
1135  // to. This helps us consider truncates on PPC as noop copies when they
1136  // are.
1137  if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1139  SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1140  if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1142  DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1143 
1144  // If, after promotion, these are the same types, this is a noop copy.
1145  if (SrcVT != DstVT)
1146  return false;
1147 
1148  return SinkCast(CI);
1149 }
1150 
1151 /// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
1152 /// possible.
1153 ///
1154 /// Return true if any changes were made.
1156  Value *A, *B;
1157  Instruction *AddI;
1158  if (!match(CI,
1160  return false;
1161 
1162  Type *Ty = AddI->getType();
1163  if (!isa<IntegerType>(Ty))
1164  return false;
1165 
1166  // We don't want to move around uses of condition values this late, so we we
1167  // check if it is legal to create the call to the intrinsic in the basic
1168  // block containing the icmp:
1169 
1170  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
1171  return false;
1172 
1173 #ifndef NDEBUG
1174  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
1175  // for now:
1176  if (AddI->hasOneUse())
1177  assert(*AddI->user_begin() == CI && "expected!");
1178 #endif
1179 
1180  Module *M = CI->getModule();
1182 
1183  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
1184 
1185  DebugLoc Loc = CI->getDebugLoc();
1186  auto *UAddWithOverflow =
1187  CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
1188  UAddWithOverflow->setDebugLoc(Loc);
1189  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
1190  UAdd->setDebugLoc(Loc);
1191  auto *Overflow =
1192  ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
1193  Overflow->setDebugLoc(Loc);
1194 
1195  CI->replaceAllUsesWith(Overflow);
1196  AddI->replaceAllUsesWith(UAdd);
1197  CI->eraseFromParent();
1198  AddI->eraseFromParent();
1199  return true;
1200 }
1201 
1202 /// Sink the given CmpInst into user blocks to reduce the number of virtual
1203 /// registers that must be created and coalesced. This is a clear win except on
1204 /// targets with multiple condition code registers (PowerPC), where it might
1205 /// lose; some adjustment may be wanted there.
1206 ///
1207 /// Return true if any changes are made.
1208 static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1209  BasicBlock *DefBB = CI->getParent();
1210 
1211  // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1212  if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
1213  return false;
1214 
1215  // Only insert a cmp in each block once.
1216  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1217 
1218  bool MadeChange = false;
1219  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1220  UI != E; ) {
1221  Use &TheUse = UI.getUse();
1222  Instruction *User = cast<Instruction>(*UI);
1223 
1224  // Preincrement use iterator so we don't invalidate it.
1225  ++UI;
1226 
1227  // Don't bother for PHI nodes.
1228  if (isa<PHINode>(User))
1229  continue;
1230 
1231  // Figure out which BB this cmp is used in.
1232  BasicBlock *UserBB = User->getParent();
1233 
1234  // If this user is in the same block as the cmp, don't change the cmp.
1235  if (UserBB == DefBB) continue;
1236 
1237  // If we have already inserted a cmp into this block, use it.
1238  CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1239 
1240  if (!InsertedCmp) {
1241  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1242  assert(InsertPt != UserBB->end());
1243  InsertedCmp =
1244  CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
1245  CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
1246  // Propagate the debug info.
1247  InsertedCmp->setDebugLoc(CI->getDebugLoc());
1248  }
1249 
1250  // Replace a use of the cmp with a use of the new cmp.
1251  TheUse = InsertedCmp;
1252  MadeChange = true;
1253  ++NumCmpUses;
1254  }
1255 
1256  // If we removed all uses, nuke the cmp.
1257  if (CI->use_empty()) {
1258  CI->eraseFromParent();
1259  MadeChange = true;
1260  }
1261 
1262  return MadeChange;
1263 }
1264 
1265 static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1266  if (SinkCmpExpression(CI, TLI))
1267  return true;
1268 
1269  if (CombineUAddWithOverflow(CI))
1270  return true;
1271 
1272  return false;
1273 }
1274 
1275 /// Duplicate and sink the given 'and' instruction into user blocks where it is
1276 /// used in a compare to allow isel to generate better code for targets where
1277 /// this operation can be combined.
1278 ///
1279 /// Return true if any changes are made.
1281  const TargetLowering &TLI,
1282  SetOfInstrs &InsertedInsts) {
1283  // Double-check that we're not trying to optimize an instruction that was
1284  // already optimized by some other part of this pass.
1285  assert(!InsertedInsts.count(AndI) &&
1286  "Attempting to optimize already optimized and instruction");
1287  (void) InsertedInsts;
1288 
1289  // Nothing to do for single use in same basic block.
1290  if (AndI->hasOneUse() &&
1291  AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1292  return false;
1293 
1294  // Try to avoid cases where sinking/duplicating is likely to increase register
1295  // pressure.
1296  if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1297  !isa<ConstantInt>(AndI->getOperand(1)) &&
1298  AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1299  return false;
1300 
1301  for (auto *U : AndI->users()) {
1302  Instruction *User = cast<Instruction>(U);
1303 
1304  // Only sink for and mask feeding icmp with 0.
1305  if (!isa<ICmpInst>(User))
1306  return false;
1307 
1308  auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1309  if (!CmpC || !CmpC->isZero())
1310  return false;
1311  }
1312 
1313  if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
1314  return false;
1315 
1316  LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
1317  LLVM_DEBUG(AndI->getParent()->dump());
1318 
1319  // Push the 'and' into the same block as the icmp 0. There should only be
1320  // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1321  // others, so we don't need to keep track of which BBs we insert into.
1322  for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
1323  UI != E; ) {
1324  Use &TheUse = UI.getUse();
1325  Instruction *User = cast<Instruction>(*UI);
1326 
1327  // Preincrement use iterator so we don't invalidate it.
1328  ++UI;
1329 
1330  LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
1331 
1332  // Keep the 'and' in the same place if the use is already in the same block.
1333  Instruction *InsertPt =
1334  User->getParent() == AndI->getParent() ? AndI : User;
1335  Instruction *InsertedAnd =
1336  BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
1337  AndI->getOperand(1), "", InsertPt);
1338  // Propagate the debug info.
1339  InsertedAnd->setDebugLoc(AndI->getDebugLoc());
1340 
1341  // Replace a use of the 'and' with a use of the new 'and'.
1342  TheUse = InsertedAnd;
1343  ++NumAndUses;
1344  LLVM_DEBUG(User->getParent()->dump());
1345  }
1346 
1347  // We removed all uses, nuke the and.
1348  AndI->eraseFromParent();
1349  return true;
1350 }
1351 
1352 /// Check if the candidates could be combined with a shift instruction, which
1353 /// includes:
1354 /// 1. Truncate instruction
1355 /// 2. And instruction and the imm is a mask of the low bits:
1356 /// imm & (imm+1) == 0
1358  if (!isa<TruncInst>(User)) {
1359  if (User->getOpcode() != Instruction::And ||
1360  !isa<ConstantInt>(User->getOperand(1)))
1361  return false;
1362 
1363  const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1364 
1365  if ((Cimm & (Cimm + 1)).getBoolValue())
1366  return false;
1367  }
1368  return true;
1369 }
1370 
1371 /// Sink both shift and truncate instruction to the use of truncate's BB.
1372 static bool
1375  const TargetLowering &TLI, const DataLayout &DL) {
1376  BasicBlock *UserBB = User->getParent();
1377  DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1378  TruncInst *TruncI = dyn_cast<TruncInst>(User);
1379  bool MadeChange = false;
1380 
1381  for (Value::user_iterator TruncUI = TruncI->user_begin(),
1382  TruncE = TruncI->user_end();
1383  TruncUI != TruncE;) {
1384 
1385  Use &TruncTheUse = TruncUI.getUse();
1386  Instruction *TruncUser = cast<Instruction>(*TruncUI);
1387  // Preincrement use iterator so we don't invalidate it.
1388 
1389  ++TruncUI;
1390 
1391  int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1392  if (!ISDOpcode)
1393  continue;
1394 
1395  // If the use is actually a legal node, there will not be an
1396  // implicit truncate.
1397  // FIXME: always querying the result type is just an
1398  // approximation; some nodes' legality is determined by the
1399  // operand or other means. There's no good way to find out though.
1400  if (TLI.isOperationLegalOrCustom(
1401  ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1402  continue;
1403 
1404  // Don't bother for PHI nodes.
1405  if (isa<PHINode>(TruncUser))
1406  continue;
1407 
1408  BasicBlock *TruncUserBB = TruncUser->getParent();
1409 
1410  if (UserBB == TruncUserBB)
1411  continue;
1412 
1413  BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1414  CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1415 
1416  if (!InsertedShift && !InsertedTrunc) {
1417  BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1418  assert(InsertPt != TruncUserBB->end());
1419  // Sink the shift
1420  if (ShiftI->getOpcode() == Instruction::AShr)
1421  InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1422  "", &*InsertPt);
1423  else
1424  InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1425  "", &*InsertPt);
1426  InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1427 
1428  // Sink the trunc
1429  BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1430  TruncInsertPt++;
1431  assert(TruncInsertPt != TruncUserBB->end());
1432 
1433  InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1434  TruncI->getType(), "", &*TruncInsertPt);
1435  InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
1436 
1437  MadeChange = true;
1438 
1439  TruncTheUse = InsertedTrunc;
1440  }
1441  }
1442  return MadeChange;
1443 }
1444 
1445 /// Sink the shift *right* instruction into user blocks if the uses could
1446 /// potentially be combined with this shift instruction and generate BitExtract
1447 /// instruction. It will only be applied if the architecture supports BitExtract
1448 /// instruction. Here is an example:
1449 /// BB1:
1450 /// %x.extract.shift = lshr i64 %arg1, 32
1451 /// BB2:
1452 /// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1453 /// ==>
1454 ///
1455 /// BB2:
1456 /// %x.extract.shift.1 = lshr i64 %arg1, 32
1457 /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1458 ///
1459 /// CodeGen will recognize the pattern in BB2 and generate BitExtract
1460 /// instruction.
1461 /// Return true if any changes are made.
1463  const TargetLowering &TLI,
1464  const DataLayout &DL) {
1465  BasicBlock *DefBB = ShiftI->getParent();
1466 
1467  /// Only insert instructions in each block once.
1469 
1470  bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1471 
1472  bool MadeChange = false;
1473  for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1474  UI != E;) {
1475  Use &TheUse = UI.getUse();
1476  Instruction *User = cast<Instruction>(*UI);
1477  // Preincrement use iterator so we don't invalidate it.
1478  ++UI;
1479 
1480  // Don't bother for PHI nodes.
1481  if (isa<PHINode>(User))
1482  continue;
1483 
1484  if (!isExtractBitsCandidateUse(User))
1485  continue;
1486 
1487  BasicBlock *UserBB = User->getParent();
1488 
1489  if (UserBB == DefBB) {
1490  // If the shift and truncate instruction are in the same BB. The use of
1491  // the truncate(TruncUse) may still introduce another truncate if not
1492  // legal. In this case, we would like to sink both shift and truncate
1493  // instruction to the BB of TruncUse.
1494  // for example:
1495  // BB1:
1496  // i64 shift.result = lshr i64 opnd, imm
1497  // trunc.result = trunc shift.result to i16
1498  //
1499  // BB2:
1500  // ----> We will have an implicit truncate here if the architecture does
1501  // not have i16 compare.
1502  // cmp i16 trunc.result, opnd2
1503  //
1504  if (isa<TruncInst>(User) && shiftIsLegal
1505  // If the type of the truncate is legal, no truncate will be
1506  // introduced in other basic blocks.
1507  &&
1508  (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1509  MadeChange =
1510  SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1511 
1512  continue;
1513  }
1514  // If we have already inserted a shift into this block, use it.
1515  BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1516 
1517  if (!InsertedShift) {
1518  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1519  assert(InsertPt != UserBB->end());
1520 
1521  if (ShiftI->getOpcode() == Instruction::AShr)
1522  InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1523  "", &*InsertPt);
1524  else
1525  InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1526  "", &*InsertPt);
1527  InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1528 
1529  MadeChange = true;
1530  }
1531 
1532  // Replace a use of the shift with a use of the new shift.
1533  TheUse = InsertedShift;
1534  }
1535 
1536  // If we removed all uses, nuke the shift.
1537  if (ShiftI->use_empty()) {
1538  salvageDebugInfo(*ShiftI);
1539  ShiftI->eraseFromParent();
1540  }
1541 
1542  return MadeChange;
1543 }
1544 
1545 /// If counting leading or trailing zeros is an expensive operation and a zero
1546 /// input is defined, add a check for zero to avoid calling the intrinsic.
1547 ///
1548 /// We want to transform:
1549 /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
1550 ///
1551 /// into:
1552 /// entry:
1553 /// %cmpz = icmp eq i64 %A, 0
1554 /// br i1 %cmpz, label %cond.end, label %cond.false
1555 /// cond.false:
1556 /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
1557 /// br label %cond.end
1558 /// cond.end:
1559 /// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
1560 ///
1561 /// If the transform is performed, return true and set ModifiedDT to true.
1562 static bool despeculateCountZeros(IntrinsicInst *CountZeros,
1563  const TargetLowering *TLI,
1564  const DataLayout *DL,
1565  bool &ModifiedDT) {
1566  if (!TLI || !DL)
1567  return false;
1568 
1569  // If a zero input is undefined, it doesn't make sense to despeculate that.
1570  if (match(CountZeros->getOperand(1), m_One()))
1571  return false;
1572 
1573  // If it's cheap to speculate, there's nothing to do.
1574  auto IntrinsicID = CountZeros->getIntrinsicID();
1575  if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
1576  (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
1577  return false;
1578 
1579  // Only handle legal scalar cases. Anything else requires too much work.
1580  Type *Ty = CountZeros->getType();
1581  unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
1582  if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
1583  return false;
1584 
1585  // The intrinsic will be sunk behind a compare against zero and branch.
1586  BasicBlock *StartBlock = CountZeros->getParent();
1587  BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
1588 
1589  // Create another block after the count zero intrinsic. A PHI will be added
1590  // in this block to select the result of the intrinsic or the bit-width
1591  // constant if the input to the intrinsic is zero.
1592  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
1593  BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
1594 
1595  // Set up a builder to create a compare, conditional branch, and PHI.
1596  IRBuilder<> Builder(CountZeros->getContext());
1597  Builder.SetInsertPoint(StartBlock->getTerminator());
1598  Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
1599 
1600  // Replace the unconditional branch that was created by the first split with
1601  // a compare against zero and a conditional branch.
1602  Value *Zero = Constant::getNullValue(Ty);
1603  Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
1604  Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
1605  StartBlock->getTerminator()->eraseFromParent();
1606 
1607  // Create a PHI in the end block to select either the output of the intrinsic
1608  // or the bit width of the operand.
1609  Builder.SetInsertPoint(&EndBlock->front());
1610  PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
1611  CountZeros->replaceAllUsesWith(PN);
1612  Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
1613  PN->addIncoming(BitWidth, StartBlock);
1614  PN->addIncoming(CountZeros, CallBlock);
1615 
1616  // We are explicitly handling the zero case, so we can set the intrinsic's
1617  // undefined zero argument to 'true'. This will also prevent reprocessing the
1618  // intrinsic; we only despeculate when a zero input is defined.
1619  CountZeros->setArgOperand(1, Builder.getTrue());
1620  ModifiedDT = true;
1621  return true;
1622 }
1623 
1624 bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
1625  BasicBlock *BB = CI->getParent();
1626 
1627  // Lower inline assembly if we can.
1628  // If we found an inline asm expession, and if the target knows how to
1629  // lower it to normal LLVM code, do so now.
1630  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1631  if (TLI->ExpandInlineAsm(CI)) {
1632  // Avoid invalidating the iterator.
1633  CurInstIterator = BB->begin();
1634  // Avoid processing instructions out of order, which could cause
1635  // reuse before a value is defined.
1636  SunkAddrs.clear();
1637  return true;
1638  }
1639  // Sink address computing for memory operands into the block.
1640  if (optimizeInlineAsmInst(CI))
1641  return true;
1642  }
1643 
1644  // Align the pointer arguments to this call if the target thinks it's a good
1645  // idea
1646  unsigned MinSize, PrefAlign;
1647  if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1648  for (auto &Arg : CI->arg_operands()) {
1649  // We want to align both objects whose address is used directly and
1650  // objects whose address is used in casts and GEPs, though it only makes
1651  // sense for GEPs if the offset is a multiple of the desired alignment and
1652  // if size - offset meets the size threshold.
1653  if (!Arg->getType()->isPointerTy())
1654  continue;
1655  APInt Offset(DL->getIndexSizeInBits(
1656  cast<PointerType>(Arg->getType())->getAddressSpace()),
1657  0);
1659  uint64_t Offset2 = Offset.getLimitedValue();
1660  if ((Offset2 & (PrefAlign-1)) != 0)
1661  continue;
1662  AllocaInst *AI;
1663  if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
1664  DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1665  AI->setAlignment(PrefAlign);
1666  // Global variables can only be aligned if they are defined in this
1667  // object (i.e. they are uniquely initialized in this object), and
1668  // over-aligning global variables that have an explicit section is
1669  // forbidden.
1670  GlobalVariable *GV;
1671  if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
1672  GV->getPointerAlignment(*DL) < PrefAlign &&
1673  DL->getTypeAllocSize(GV->getValueType()) >=
1674  MinSize + Offset2)
1675  GV->setAlignment(PrefAlign);
1676  }
1677  // If this is a memcpy (or similar) then we may be able to improve the
1678  // alignment
1679  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1680  unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
1681  if (DestAlign > MI->getDestAlignment())
1682  MI->setDestAlignment(DestAlign);
1683  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1684  unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
1685  if (SrcAlign > MTI->getSourceAlignment())
1686  MTI->setSourceAlignment(SrcAlign);
1687  }
1688  }
1689  }
1690 
1691  // If we have a cold call site, try to sink addressing computation into the
1692  // cold block. This interacts with our handling for loads and stores to
1693  // ensure that we can fold all uses of a potential addressing computation
1694  // into their uses. TODO: generalize this to work over profiling data
1695  if (!OptSize && CI->hasFnAttr(Attribute::Cold))
1696  for (auto &Arg : CI->arg_operands()) {
1697  if (!Arg->getType()->isPointerTy())
1698  continue;
1699  unsigned AS = Arg->getType()->getPointerAddressSpace();
1700  return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
1701  }
1702 
1704  if (II) {
1705  switch (II->getIntrinsicID()) {
1706  default: break;
1707  case Intrinsic::objectsize: {
1708  // Lower all uses of llvm.objectsize.*
1709  ConstantInt *RetVal =
1710  lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
1711 
1712  resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1713  replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1714  });
1715  return true;
1716  }
1717  case Intrinsic::is_constant: {
1718  // If is_constant hasn't folded away yet, lower it to false now.
1719  Constant *RetVal = ConstantInt::get(II->getType(), 0);
1720  resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1721  replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1722  });
1723  return true;
1724  }
1726  case Intrinsic::aarch64_stxr: {
1727  ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
1728  if (!ExtVal || !ExtVal->hasOneUse() ||
1729  ExtVal->getParent() == CI->getParent())
1730  return false;
1731  // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
1732  ExtVal->moveBefore(CI);
1733  // Mark this instruction as "inserted by CGP", so that other
1734  // optimizations don't touch it.
1735  InsertedInsts.insert(ExtVal);
1736  return true;
1737  }
1740  Value *ArgVal = II->getArgOperand(0);
1741  auto it = LargeOffsetGEPMap.find(II);
1742  if (it != LargeOffsetGEPMap.end()) {
1743  // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
1744  // Make sure not to have to deal with iterator invalidation
1745  // after possibly adding ArgVal to LargeOffsetGEPMap.
1746  auto GEPs = std::move(it->second);
1747  LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
1748  LargeOffsetGEPMap.erase(II);
1749  }
1750 
1751  II->replaceAllUsesWith(ArgVal);
1752  II->eraseFromParent();
1753  return true;
1754  }
1755  case Intrinsic::cttz:
1756  case Intrinsic::ctlz:
1757  // If counting zeros is expensive, try to avoid it.
1758  return despeculateCountZeros(II, TLI, DL, ModifiedDT);
1759  }
1760 
1761  if (TLI) {
1762  SmallVector<Value*, 2> PtrOps;
1763  Type *AccessTy;
1764  if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
1765  while (!PtrOps.empty()) {
1766  Value *PtrVal = PtrOps.pop_back_val();
1767  unsigned AS = PtrVal->getType()->getPointerAddressSpace();
1768  if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
1769  return true;
1770  }
1771  }
1772  }
1773 
1774  // From here on out we're working with named functions.
1775  if (!CI->getCalledFunction()) return false;
1776 
1777  // Lower all default uses of _chk calls. This is very similar
1778  // to what InstCombineCalls does, but here we are only lowering calls
1779  // to fortified library functions (e.g. __memcpy_chk) that have the default
1780  // "don't know" as the objectsize. Anything else should be left alone.
1781  FortifiedLibCallSimplifier Simplifier(TLInfo, true);
1782  if (Value *V = Simplifier.optimizeCall(CI)) {
1783  CI->replaceAllUsesWith(V);
1784  CI->eraseFromParent();
1785  return true;
1786  }
1787 
1788  return false;
1789 }
1790 
1791 /// Look for opportunities to duplicate return instructions to the predecessor
1792 /// to enable tail call optimizations. The case it is currently looking for is:
1793 /// @code
1794 /// bb0:
1795 /// %tmp0 = tail call i32 @f0()
1796 /// br label %return
1797 /// bb1:
1798 /// %tmp1 = tail call i32 @f1()
1799 /// br label %return
1800 /// bb2:
1801 /// %tmp2 = tail call i32 @f2()
1802 /// br label %return
1803 /// return:
1804 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
1805 /// ret i32 %retval
1806 /// @endcode
1807 ///
1808 /// =>
1809 ///
1810 /// @code
1811 /// bb0:
1812 /// %tmp0 = tail call i32 @f0()
1813 /// ret i32 %tmp0
1814 /// bb1:
1815 /// %tmp1 = tail call i32 @f1()
1816 /// ret i32 %tmp1
1817 /// bb2:
1818 /// %tmp2 = tail call i32 @f2()
1819 /// ret i32 %tmp2
1820 /// @endcode
1821 bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
1822  if (!TLI)
1823  return false;
1824 
1825  ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
1826  if (!RetI)
1827  return false;
1828 
1829  PHINode *PN = nullptr;
1830  BitCastInst *BCI = nullptr;
1831  Value *V = RetI->getReturnValue();
1832  if (V) {
1833  BCI = dyn_cast<BitCastInst>(V);
1834  if (BCI)
1835  V = BCI->getOperand(0);
1836 
1837  PN = dyn_cast<PHINode>(V);
1838  if (!PN)
1839  return false;
1840  }
1841 
1842  if (PN && PN->getParent() != BB)
1843  return false;
1844 
1845  // Make sure there are no instructions between the PHI and return, or that the
1846  // return is the first instruction in the block.
1847  if (PN) {
1848  BasicBlock::iterator BI = BB->begin();
1849  do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
1850  if (&*BI == BCI)
1851  // Also skip over the bitcast.
1852  ++BI;
1853  if (&*BI != RetI)
1854  return false;
1855  } else {
1856  BasicBlock::iterator BI = BB->begin();
1857  while (isa<DbgInfoIntrinsic>(BI)) ++BI;
1858  if (&*BI != RetI)
1859  return false;
1860  }
1861 
1862  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
1863  /// call.
1864  const Function *F = BB->getParent();
1865  SmallVector<CallInst*, 4> TailCalls;
1866  if (PN) {
1867  for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
1868  CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
1869  // Make sure the phi value is indeed produced by the tail call.
1870  if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
1871  TLI->mayBeEmittedAsTailCall(CI) &&
1872  attributesPermitTailCall(F, CI, RetI, *TLI))
1873  TailCalls.push_back(CI);
1874  }
1875  } else {
1876  SmallPtrSet<BasicBlock*, 4> VisitedBBs;
1877  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
1878  if (!VisitedBBs.insert(*PI).second)
1879  continue;
1880 
1881  BasicBlock::InstListType &InstList = (*PI)->getInstList();
1882  BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
1883  BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
1884  do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
1885  if (RI == RE)
1886  continue;
1887 
1888  CallInst *CI = dyn_cast<CallInst>(&*RI);
1889  if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
1890  attributesPermitTailCall(F, CI, RetI, *TLI))
1891  TailCalls.push_back(CI);
1892  }
1893  }
1894 
1895  bool Changed = false;
1896  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
1897  CallInst *CI = TailCalls[i];
1898  CallSite CS(CI);
1899 
1900  // Make sure the call instruction is followed by an unconditional branch to
1901  // the return block.
1902  BasicBlock *CallBB = CI->getParent();
1903  BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
1904  if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
1905  continue;
1906 
1907  // Duplicate the return into CallBB.
1908  (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);
1909  ModifiedDT = Changed = true;
1910  ++NumRetsDup;
1911  }
1912 
1913  // If we eliminated all predecessors of the block, delete the block now.
1914  if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
1915  BB->eraseFromParent();
1916 
1917  return Changed;
1918 }
1919 
1920 //===----------------------------------------------------------------------===//
1921 // Memory Optimization
1922 //===----------------------------------------------------------------------===//
1923 
1924 namespace {
1925 
1926 /// This is an extended version of TargetLowering::AddrMode
1927 /// which holds actual Value*'s for register values.
1928 struct ExtAddrMode : public TargetLowering::AddrMode {
1929  Value *BaseReg = nullptr;
1930  Value *ScaledReg = nullptr;
1931  Value *OriginalValue = nullptr;
1932 
1933  enum FieldName {
1934  NoField = 0x00,
1935  BaseRegField = 0x01,
1936  BaseGVField = 0x02,
1937  BaseOffsField = 0x04,
1938  ScaledRegField = 0x08,
1939  ScaleField = 0x10,
1940  MultipleFields = 0xff
1941  };
1942 
1943  ExtAddrMode() = default;
1944 
1945  void print(raw_ostream &OS) const;
1946  void dump() const;
1947 
1948  FieldName compare(const ExtAddrMode &other) {
1949  // First check that the types are the same on each field, as differing types
1950  // is something we can't cope with later on.
1951  if (BaseReg && other.BaseReg &&
1952  BaseReg->getType() != other.BaseReg->getType())
1953  return MultipleFields;
1954  if (BaseGV && other.BaseGV &&
1955  BaseGV->getType() != other.BaseGV->getType())
1956  return MultipleFields;
1957  if (ScaledReg && other.ScaledReg &&
1958  ScaledReg->getType() != other.ScaledReg->getType())
1959  return MultipleFields;
1960 
1961  // Check each field to see if it differs.
1962  unsigned Result = NoField;
1963  if (BaseReg != other.BaseReg)
1964  Result |= BaseRegField;
1965  if (BaseGV != other.BaseGV)
1966  Result |= BaseGVField;
1967  if (BaseOffs != other.BaseOffs)
1968  Result |= BaseOffsField;
1969  if (ScaledReg != other.ScaledReg)
1970  Result |= ScaledRegField;
1971  // Don't count 0 as being a different scale, because that actually means
1972  // unscaled (which will already be counted by having no ScaledReg).
1973  if (Scale && other.Scale && Scale != other.Scale)
1974  Result |= ScaleField;
1975 
1976  if (countPopulation(Result) > 1)
1977  return MultipleFields;
1978  else
1979  return static_cast<FieldName>(Result);
1980  }
1981 
1982  // An AddrMode is trivial if it involves no calculation i.e. it is just a base
1983  // with no offset.
1984  bool isTrivial() {
1985  // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
1986  // trivial if at most one of these terms is nonzero, except that BaseGV and
1987  // BaseReg both being zero actually means a null pointer value, which we
1988  // consider to be 'non-zero' here.
1989  return !BaseOffs && !Scale && !(BaseGV && BaseReg);
1990  }
1991 
1992  Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
1993  switch (Field) {
1994  default:
1995  return nullptr;
1996  case BaseRegField:
1997  return BaseReg;
1998  case BaseGVField:
1999  return BaseGV;
2000  case ScaledRegField:
2001  return ScaledReg;
2002  case BaseOffsField:
2003  return ConstantInt::get(IntPtrTy, BaseOffs);
2004  }
2005  }
2006 
2007  void SetCombinedField(FieldName Field, Value *V,
2008  const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2009  switch (Field) {
2010  default:
2011  llvm_unreachable("Unhandled fields are expected to be rejected earlier");
2012  break;
2013  case ExtAddrMode::BaseRegField:
2014  BaseReg = V;
2015  break;
2016  case ExtAddrMode::BaseGVField:
2017  // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2018  // in the BaseReg field.
2019  assert(BaseReg == nullptr);
2020  BaseReg = V;
2021  BaseGV = nullptr;
2022  break;
2023  case ExtAddrMode::ScaledRegField:
2024  ScaledReg = V;
2025  // If we have a mix of scaled and unscaled addrmodes then we want scale
2026  // to be the scale and not zero.
2027  if (!Scale)
2028  for (const ExtAddrMode &AM : AddrModes)
2029  if (AM.Scale) {
2030  Scale = AM.Scale;
2031  break;
2032  }
2033  break;
2034  case ExtAddrMode::BaseOffsField:
2035  // The offset is no longer a constant, so it goes in ScaledReg with a
2036  // scale of 1.
2037  assert(ScaledReg == nullptr);
2038  ScaledReg = V;
2039  Scale = 1;
2040  BaseOffs = 0;
2041  break;
2042  }
2043  }
2044 };
2045 
2046 } // end anonymous namespace
2047 
2048 #ifndef NDEBUG
2049 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2050  AM.print(OS);
2051  return OS;
2052 }
2053 #endif
2054 
2055 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2056 void ExtAddrMode::print(raw_ostream &OS) const {
2057  bool NeedPlus = false;
2058  OS << "[";
2059  if (BaseGV) {
2060  OS << (NeedPlus ? " + " : "")
2061  << "GV:";
2062  BaseGV->printAsOperand(OS, /*PrintType=*/false);
2063  NeedPlus = true;
2064  }
2065 
2066  if (BaseOffs) {
2067  OS << (NeedPlus ? " + " : "")
2068  << BaseOffs;
2069  NeedPlus = true;
2070  }
2071 
2072  if (BaseReg) {
2073  OS << (NeedPlus ? " + " : "")
2074  << "Base:";
2075  BaseReg->printAsOperand(OS, /*PrintType=*/false);
2076  NeedPlus = true;
2077  }
2078  if (Scale) {
2079  OS << (NeedPlus ? " + " : "")
2080  << Scale << "*";
2081  ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2082  }
2083 
2084  OS << ']';
2085 }
2086 
2087 LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
2088  print(dbgs());
2089  dbgs() << '\n';
2090 }
2091 #endif
2092 
2093 namespace {
2094 
2095 /// This class provides transaction based operation on the IR.
2096 /// Every change made through this class is recorded in the internal state and
2097 /// can be undone (rollback) until commit is called.
2098 class TypePromotionTransaction {
2099  /// This represents the common interface of the individual transaction.
2100  /// Each class implements the logic for doing one specific modification on
2101  /// the IR via the TypePromotionTransaction.
2102  class TypePromotionAction {
2103  protected:
2104  /// The Instruction modified.
2105  Instruction *Inst;
2106 
2107  public:
2108  /// Constructor of the action.
2109  /// The constructor performs the related action on the IR.
2110  TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2111 
2112  virtual ~TypePromotionAction() = default;
2113 
2114  /// Undo the modification done by this action.
2115  /// When this method is called, the IR must be in the same state as it was
2116  /// before this action was applied.
2117  /// \pre Undoing the action works if and only if the IR is in the exact same
2118  /// state as it was directly after this action was applied.
2119  virtual void undo() = 0;
2120 
2121  /// Advocate every change made by this action.
2122  /// When the results on the IR of the action are to be kept, it is important
2123  /// to call this function, otherwise hidden information may be kept forever.
2124  virtual void commit() {
2125  // Nothing to be done, this action is not doing anything.
2126  }
2127  };
2128 
2129  /// Utility to remember the position of an instruction.
2130  class InsertionHandler {
2131  /// Position of an instruction.
2132  /// Either an instruction:
2133  /// - Is the first in a basic block: BB is used.
2134  /// - Has a previous instruction: PrevInst is used.
2135  union {
2136  Instruction *PrevInst;
2137  BasicBlock *BB;
2138  } Point;
2139 
2140  /// Remember whether or not the instruction had a previous instruction.
2141  bool HasPrevInstruction;
2142 
2143  public:
2144  /// Record the position of \p Inst.
2145  InsertionHandler(Instruction *Inst) {
2146  BasicBlock::iterator It = Inst->getIterator();
2147  HasPrevInstruction = (It != (Inst->getParent()->begin()));
2148  if (HasPrevInstruction)
2149  Point.PrevInst = &*--It;
2150  else
2151  Point.BB = Inst->getParent();
2152  }
2153 
2154  /// Insert \p Inst at the recorded position.
2155  void insert(Instruction *Inst) {
2156  if (HasPrevInstruction) {
2157  if (Inst->getParent())
2158  Inst->removeFromParent();
2159  Inst->insertAfter(Point.PrevInst);
2160  } else {
2161  Instruction *Position = &*Point.BB->getFirstInsertionPt();
2162  if (Inst->getParent())
2163  Inst->moveBefore(Position);
2164  else
2165  Inst->insertBefore(Position);
2166  }
2167  }
2168  };
2169 
2170  /// Move an instruction before another.
2171  class InstructionMoveBefore : public TypePromotionAction {
2172  /// Original position of the instruction.
2173  InsertionHandler Position;
2174 
2175  public:
2176  /// Move \p Inst before \p Before.
2177  InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2178  : TypePromotionAction(Inst), Position(Inst) {
2179  LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
2180  << "\n");
2181  Inst->moveBefore(Before);
2182  }
2183 
2184  /// Move the instruction back to its original position.
2185  void undo() override {
2186  LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
2187  Position.insert(Inst);
2188  }
2189  };
2190 
2191  /// Set the operand of an instruction with a new value.
2192  class OperandSetter : public TypePromotionAction {
2193  /// Original operand of the instruction.
2194  Value *Origin;
2195 
2196  /// Index of the modified instruction.
2197  unsigned Idx;
2198 
2199  public:
2200  /// Set \p Idx operand of \p Inst with \p NewVal.
2201  OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2202  : TypePromotionAction(Inst), Idx(Idx) {
2203  LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
2204  << "for:" << *Inst << "\n"
2205  << "with:" << *NewVal << "\n");
2206  Origin = Inst->getOperand(Idx);
2207  Inst->setOperand(Idx, NewVal);
2208  }
2209 
2210  /// Restore the original value of the instruction.
2211  void undo() override {
2212  LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
2213  << "for: " << *Inst << "\n"
2214  << "with: " << *Origin << "\n");
2215  Inst->setOperand(Idx, Origin);
2216  }
2217  };
2218 
2219  /// Hide the operands of an instruction.
2220  /// Do as if this instruction was not using any of its operands.
2221  class OperandsHider : public TypePromotionAction {
2222  /// The list of original operands.
2223  SmallVector<Value *, 4> OriginalValues;
2224 
2225  public:
2226  /// Remove \p Inst from the uses of the operands of \p Inst.
2227  OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2228  LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
2229  unsigned NumOpnds = Inst->getNumOperands();
2230  OriginalValues.reserve(NumOpnds);
2231  for (unsigned It = 0; It < NumOpnds; ++It) {
2232  // Save the current operand.
2233  Value *Val = Inst->getOperand(It);
2234  OriginalValues.push_back(Val);
2235  // Set a dummy one.
2236  // We could use OperandSetter here, but that would imply an overhead
2237  // that we are not willing to pay.
2238  Inst->setOperand(It, UndefValue::get(Val->getType()));
2239  }
2240  }
2241 
2242  /// Restore the original list of uses.
2243  void undo() override {
2244  LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
2245  for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2246  Inst->setOperand(It, OriginalValues[It]);
2247  }
2248  };
2249 
2250  /// Build a truncate instruction.
2251  class TruncBuilder : public TypePromotionAction {
2252  Value *Val;
2253 
2254  public:
2255  /// Build a truncate instruction of \p Opnd producing a \p Ty
2256  /// result.
2257  /// trunc Opnd to Ty.
2258  TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2259  IRBuilder<> Builder(Opnd);
2260  Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2261  LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
2262  }
2263 
2264  /// Get the built value.
2265  Value *getBuiltValue() { return Val; }
2266 
2267  /// Remove the built instruction.
2268  void undo() override {
2269  LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
2270  if (Instruction *IVal = dyn_cast<Instruction>(Val))
2271  IVal->eraseFromParent();
2272  }
2273  };
2274 
2275  /// Build a sign extension instruction.
2276  class SExtBuilder : public TypePromotionAction {
2277  Value *Val;
2278 
2279  public:
2280  /// Build a sign extension instruction of \p Opnd producing a \p Ty
2281  /// result.
2282  /// sext Opnd to Ty.
2283  SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2284  : TypePromotionAction(InsertPt) {
2285  IRBuilder<> Builder(InsertPt);
2286  Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2287  LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
2288  }
2289 
2290  /// Get the built value.
2291  Value *getBuiltValue() { return Val; }
2292 
2293  /// Remove the built instruction.
2294  void undo() override {
2295  LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
2296  if (Instruction *IVal = dyn_cast<Instruction>(Val))
2297  IVal->eraseFromParent();
2298  }
2299  };
2300 
2301  /// Build a zero extension instruction.
2302  class ZExtBuilder : public TypePromotionAction {
2303  Value *Val;
2304 
2305  public:
2306  /// Build a zero extension instruction of \p Opnd producing a \p Ty
2307  /// result.
2308  /// zext Opnd to Ty.
2309  ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2310  : TypePromotionAction(InsertPt) {
2311  IRBuilder<> Builder(InsertPt);
2312  Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2313  LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
2314  }
2315 
2316  /// Get the built value.
2317  Value *getBuiltValue() { return Val; }
2318 
2319  /// Remove the built instruction.
2320  void undo() override {
2321  LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
2322  if (Instruction *IVal = dyn_cast<Instruction>(Val))
2323  IVal->eraseFromParent();
2324  }
2325  };
2326 
2327  /// Mutate an instruction to another type.
2328  class TypeMutator : public TypePromotionAction {
2329  /// Record the original type.
2330  Type *OrigTy;
2331 
2332  public:
2333  /// Mutate the type of \p Inst into \p NewTy.
2334  TypeMutator(Instruction *Inst, Type *NewTy)
2335  : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2336  LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
2337  << "\n");
2338  Inst->mutateType(NewTy);
2339  }
2340 
2341  /// Mutate the instruction back to its original type.
2342  void undo() override {
2343  LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
2344  << "\n");
2345  Inst->mutateType(OrigTy);
2346  }
2347  };
2348 
2349  /// Replace the uses of an instruction by another instruction.
2350  class UsesReplacer : public TypePromotionAction {
2351  /// Helper structure to keep track of the replaced uses.
2352  struct InstructionAndIdx {
2353  /// The instruction using the instruction.
2354  Instruction *Inst;
2355 
2356  /// The index where this instruction is used for Inst.
2357  unsigned Idx;
2358 
2359  InstructionAndIdx(Instruction *Inst, unsigned Idx)
2360  : Inst(Inst), Idx(Idx) {}
2361  };
2362 
2363  /// Keep track of the original uses (pair Instruction, Index).
2364  SmallVector<InstructionAndIdx, 4> OriginalUses;
2365  /// Keep track of the debug users.
2367 
2368  using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
2369 
2370  public:
2371  /// Replace all the use of \p Inst by \p New.
2372  UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
2373  LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
2374  << "\n");
2375  // Record the original uses.
2376  for (Use &U : Inst->uses()) {
2377  Instruction *UserI = cast<Instruction>(U.getUser());
2378  OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2379  }
2380  // Record the debug uses separately. They are not in the instruction's
2381  // use list, but they are replaced by RAUW.
2382  findDbgValues(DbgValues, Inst);
2383 
2384  // Now, we can replace the uses.
2385  Inst->replaceAllUsesWith(New);
2386  }
2387 
2388  /// Reassign the original uses of Inst to Inst.
2389  void undo() override {
2390  LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
2391  for (use_iterator UseIt = OriginalUses.begin(),
2392  EndIt = OriginalUses.end();
2393  UseIt != EndIt; ++UseIt) {
2394  UseIt->Inst->setOperand(UseIt->Idx, Inst);
2395  }
2396  // RAUW has replaced all original uses with references to the new value,
2397  // including the debug uses. Since we are undoing the replacements,
2398  // the original debug uses must also be reinstated to maintain the
2399  // correctness and utility of debug value instructions.
2400  for (auto *DVI: DbgValues) {
2401  LLVMContext &Ctx = Inst->getType()->getContext();
2402  auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
2403  DVI->setOperand(0, MV);
2404  }
2405  }
2406  };
2407 
2408  /// Remove an instruction from the IR.
2409  class InstructionRemover : public TypePromotionAction {
2410  /// Original position of the instruction.
2411  InsertionHandler Inserter;
2412 
2413  /// Helper structure to hide all the link to the instruction. In other
2414  /// words, this helps to do as if the instruction was removed.
2415  OperandsHider Hider;
2416 
2417  /// Keep track of the uses replaced, if any.
2418  UsesReplacer *Replacer = nullptr;
2419 
2420  /// Keep track of instructions removed.
2421  SetOfInstrs &RemovedInsts;
2422 
2423  public:
2424  /// Remove all reference of \p Inst and optionally replace all its
2425  /// uses with New.
2426  /// \p RemovedInsts Keep track of the instructions removed by this Action.
2427  /// \pre If !Inst->use_empty(), then New != nullptr
2428  InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
2429  Value *New = nullptr)
2430  : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2431  RemovedInsts(RemovedInsts) {
2432  if (New)
2433  Replacer = new UsesReplacer(Inst, New);
2434  LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
2435  RemovedInsts.insert(Inst);
2436  /// The instructions removed here will be freed after completing
2437  /// optimizeBlock() for all blocks as we need to keep track of the
2438  /// removed instructions during promotion.
2439  Inst->removeFromParent();
2440  }
2441 
2442  ~InstructionRemover() override { delete Replacer; }
2443 
2444  /// Resurrect the instruction and reassign it to the proper uses if
2445  /// new value was provided when build this action.
2446  void undo() override {
2447  LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
2448  Inserter.insert(Inst);
2449  if (Replacer)
2450  Replacer->undo();
2451  Hider.undo();
2452  RemovedInsts.erase(Inst);
2453  }
2454  };
2455 
2456 public:
2457  /// Restoration point.
2458  /// The restoration point is a pointer to an action instead of an iterator
2459  /// because the iterator may be invalidated but not the pointer.
2460  using ConstRestorationPt = const TypePromotionAction *;
2461 
2462  TypePromotionTransaction(SetOfInstrs &RemovedInsts)
2463  : RemovedInsts(RemovedInsts) {}
2464 
2465  /// Advocate every changes made in that transaction.
2466  void commit();
2467 
2468  /// Undo all the changes made after the given point.
2469  void rollback(ConstRestorationPt Point);
2470 
2471  /// Get the current restoration point.
2472  ConstRestorationPt getRestorationPoint() const;
2473 
2474  /// \name API for IR modification with state keeping to support rollback.
2475  /// @{
2476  /// Same as Instruction::setOperand.
2477  void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2478 
2479  /// Same as Instruction::eraseFromParent.
2480  void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2481 
2482  /// Same as Value::replaceAllUsesWith.
2483  void replaceAllUsesWith(Instruction *Inst, Value *New);
2484 
2485  /// Same as Value::mutateType.
2486  void mutateType(Instruction *Inst, Type *NewTy);
2487 
2488  /// Same as IRBuilder::createTrunc.
2489  Value *createTrunc(Instruction *Opnd, Type *Ty);
2490 
2491  /// Same as IRBuilder::createSExt.
2492  Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2493 
2494  /// Same as IRBuilder::createZExt.
2495  Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2496 
2497  /// Same as Instruction::moveBefore.
2498  void moveBefore(Instruction *Inst, Instruction *Before);
2499  /// @}
2500 
2501 private:
2502  /// The ordered list of actions made so far.
2504 
2505  using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
2506 
2507  SetOfInstrs &RemovedInsts;
2508 };
2509 
2510 } // end anonymous namespace
2511 
2512 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2513  Value *NewVal) {
2514  Actions.push_back(llvm::make_unique<TypePromotionTransaction::OperandSetter>(
2515  Inst, Idx, NewVal));
2516 }
2517 
2519  Value *NewVal) {
2520  Actions.push_back(
2521  llvm::make_unique<TypePromotionTransaction::InstructionRemover>(
2522  Inst, RemovedInsts, NewVal));
2523 }
2524 
2525 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2526  Value *New) {
2527  Actions.push_back(
2528  llvm::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2529 }
2530 
2531 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2532  Actions.push_back(
2533  llvm::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2534 }
2535 
2536 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2537  Type *Ty) {
2538  std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2539  Value *Val = Ptr->getBuiltValue();
2540  Actions.push_back(std::move(Ptr));
2541  return Val;
2542 }
2543 
2544 Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2545  Value *Opnd, Type *Ty) {
2546  std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2547  Value *Val = Ptr->getBuiltValue();
2548  Actions.push_back(std::move(Ptr));
2549  return Val;
2550 }
2551 
2552 Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2553  Value *Opnd, Type *Ty) {
2554  std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2555  Value *Val = Ptr->getBuiltValue();
2556  Actions.push_back(std::move(Ptr));
2557  return Val;
2558 }
2559 
2560 void TypePromotionTransaction::moveBefore(Instruction *Inst,
2561  Instruction *Before) {
2562  Actions.push_back(
2563  llvm::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
2564  Inst, Before));
2565 }
2566 
2567 TypePromotionTransaction::ConstRestorationPt
2568 TypePromotionTransaction::getRestorationPoint() const {
2569  return !Actions.empty() ? Actions.back().get() : nullptr;
2570 }
2571 
2572 void TypePromotionTransaction::commit() {
2573  for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2574  ++It)
2575  (*It)->commit();
2576  Actions.clear();
2577 }
2578 
2579 void TypePromotionTransaction::rollback(
2580  TypePromotionTransaction::ConstRestorationPt Point) {
2581  while (!Actions.empty() && Point != Actions.back().get()) {
2582  std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2583  Curr->undo();
2584  }
2585 }
2586 
2587 namespace {
2588 
2589 /// A helper class for matching addressing modes.
2590 ///
2591 /// This encapsulates the logic for matching the target-legal addressing modes.
2592 class AddressingModeMatcher {
2593  SmallVectorImpl<Instruction*> &AddrModeInsts;
2594  const TargetLowering &TLI;
2595  const TargetRegisterInfo &TRI;
2596  const DataLayout &DL;
2597 
2598  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2599  /// the memory instruction that we're computing this address for.
2600  Type *AccessTy;
2601  unsigned AddrSpace;
2602  Instruction *MemoryInst;
2603 
2604  /// This is the addressing mode that we're building up. This is
2605  /// part of the return value of this addressing mode matching stuff.
2606  ExtAddrMode &AddrMode;
2607 
2608  /// The instructions inserted by other CodeGenPrepare optimizations.
2609  const SetOfInstrs &InsertedInsts;
2610 
2611  /// A map from the instructions to their type before promotion.
2612  InstrToOrigTy &PromotedInsts;
2613 
2614  /// The ongoing transaction where every action should be registered.
2615  TypePromotionTransaction &TPT;
2616 
2617  // A GEP which has too large offset to be folded into the addressing mode.
2618  std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
2619 
2620  /// This is set to true when we should not do profitability checks.
2621  /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
2622  bool IgnoreProfitability;
2623 
2624  AddressingModeMatcher(
2626  const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
2627  ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
2628  InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
2629  std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
2630  : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
2631  DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
2632  MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
2633  PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
2634  IgnoreProfitability = false;
2635  }
2636 
2637 public:
2638  /// Find the maximal addressing mode that a load/store of V can fold,
2639  /// give an access type of AccessTy. This returns a list of involved
2640  /// instructions in AddrModeInsts.
2641  /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
2642  /// optimizations.
2643  /// \p PromotedInsts maps the instructions to their type before promotion.
2644  /// \p The ongoing transaction where every action should be registered.
2645  static ExtAddrMode
2646  Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
2647  SmallVectorImpl<Instruction *> &AddrModeInsts,
2648  const TargetLowering &TLI, const TargetRegisterInfo &TRI,
2649  const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
2650  TypePromotionTransaction &TPT,
2651  std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
2652  ExtAddrMode Result;
2653 
2654  bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
2655  MemoryInst, Result, InsertedInsts,
2656  PromotedInsts, TPT, LargeOffsetGEP)
2657  .matchAddr(V, 0);
2658  (void)Success; assert(Success && "Couldn't select *anything*?");
2659  return Result;
2660  }
2661 
2662 private:
2663  bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2664  bool matchAddr(Value *Addr, unsigned Depth);
2665  bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
2666  bool *MovedAway = nullptr);
2667  bool isProfitableToFoldIntoAddressingMode(Instruction *I,
2668  ExtAddrMode &AMBefore,
2669  ExtAddrMode &AMAfter);
2670  bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2671  bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
2672  Value *PromotedOperand) const;
2673 };
2674 
2675 class PhiNodeSet;
2676 
2677 /// An iterator for PhiNodeSet.
2678 class PhiNodeSetIterator {
2679  PhiNodeSet * const Set;
2680  size_t CurrentIndex = 0;
2681 
2682 public:
2683  /// The constructor. Start should point to either a valid element, or be equal
2684  /// to the size of the underlying SmallVector of the PhiNodeSet.
2685  PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
2686  PHINode * operator*() const;
2687  PhiNodeSetIterator& operator++();
2688  bool operator==(const PhiNodeSetIterator &RHS) const;
2689  bool operator!=(const PhiNodeSetIterator &RHS) const;
2690 };
2691 
2692 /// Keeps a set of PHINodes.
2693 ///
2694 /// This is a minimal set implementation for a specific use case:
2695 /// It is very fast when there are very few elements, but also provides good
2696 /// performance when there are many. It is similar to SmallPtrSet, but also
2697 /// provides iteration by insertion order, which is deterministic and stable
2698 /// across runs. It is also similar to SmallSetVector, but provides removing
2699 /// elements in O(1) time. This is achieved by not actually removing the element
2700 /// from the underlying vector, so comes at the cost of using more memory, but
2701 /// that is fine, since PhiNodeSets are used as short lived objects.
2702 class PhiNodeSet {
2703  friend class PhiNodeSetIterator;
2704 
2705  using MapType = SmallDenseMap<PHINode *, size_t, 32>;
2706  using iterator = PhiNodeSetIterator;
2707 
2708  /// Keeps the elements in the order of their insertion in the underlying
2709  /// vector. To achieve constant time removal, it never deletes any element.
2711 
2712  /// Keeps the elements in the underlying set implementation. This (and not the
2713  /// NodeList defined above) is the source of truth on whether an element
2714  /// is actually in the collection.
2715  MapType NodeMap;
2716 
2717  /// Points to the first valid (not deleted) element when the set is not empty
2718  /// and the value is not zero. Equals to the size of the underlying vector
2719  /// when the set is empty. When the value is 0, as in the beginning, the
2720  /// first element may or may not be valid.
2721  size_t FirstValidElement = 0;
2722 
2723 public:
2724  /// Inserts a new element to the collection.
2725  /// \returns true if the element is actually added, i.e. was not in the
2726  /// collection before the operation.
2727  bool insert(PHINode *Ptr) {
2728  if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
2729  NodeList.push_back(Ptr);
2730  return true;
2731  }
2732  return false;
2733  }
2734 
2735  /// Removes the element from the collection.
2736  /// \returns whether the element is actually removed, i.e. was in the
2737  /// collection before the operation.
2738  bool erase(PHINode *Ptr) {
2739  auto it = NodeMap.find(Ptr);
2740  if (it != NodeMap.end()) {
2741  NodeMap.erase(Ptr);
2742  SkipRemovedElements(FirstValidElement);
2743  return true;
2744  }
2745  return false;
2746  }
2747 
2748  /// Removes all elements and clears the collection.
2749  void clear() {
2750  NodeMap.clear();
2751  NodeList.clear();
2752  FirstValidElement = 0;
2753  }
2754 
2755  /// \returns an iterator that will iterate the elements in the order of
2756  /// insertion.
2757  iterator begin() {
2758  if (FirstValidElement == 0)
2759  SkipRemovedElements(FirstValidElement);
2760  return PhiNodeSetIterator(this, FirstValidElement);
2761  }
2762 
2763  /// \returns an iterator that points to the end of the collection.
2764  iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
2765 
2766  /// Returns the number of elements in the collection.
2767  size_t size() const {
2768  return NodeMap.size();
2769  }
2770 
2771  /// \returns 1 if the given element is in the collection, and 0 if otherwise.
2772  size_t count(PHINode *Ptr) const {
2773  return NodeMap.count(Ptr);
2774  }
2775 
2776 private:
2777  /// Updates the CurrentIndex so that it will point to a valid element.
2778  ///
2779  /// If the element of NodeList at CurrentIndex is valid, it does not
2780  /// change it. If there are no more valid elements, it updates CurrentIndex
2781  /// to point to the end of the NodeList.
2782  void SkipRemovedElements(size_t &CurrentIndex) {
2783  while (CurrentIndex < NodeList.size()) {
2784  auto it = NodeMap.find(NodeList[CurrentIndex]);
2785  // If the element has been deleted and added again later, NodeMap will
2786  // point to a different index, so CurrentIndex will still be invalid.
2787  if (it != NodeMap.end() && it->second == CurrentIndex)
2788  break;
2789  ++CurrentIndex;
2790  }
2791  }
2792 };
2793 
2794 PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
2795  : Set(Set), CurrentIndex(Start) {}
2796 
2798  assert(CurrentIndex < Set->NodeList.size() &&
2799  "PhiNodeSet access out of range");
2800  return Set->NodeList[CurrentIndex];
2801 }
2802 
2803 PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
2804  assert(CurrentIndex < Set->NodeList.size() &&
2805  "PhiNodeSet access out of range");
2806  ++CurrentIndex;
2807  Set->SkipRemovedElements(CurrentIndex);
2808  return *this;
2809 }
2810 
2811 bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
2812  return CurrentIndex == RHS.CurrentIndex;
2813 }
2814 
2815 bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
2816  return !((*this) == RHS);
2817 }
2818 
2819 /// Keep track of simplification of Phi nodes.
2820 /// Accept the set of all phi nodes and erase phi node from this set
2821 /// if it is simplified.
2822 class SimplificationTracker {
2824  const SimplifyQuery &SQ;
2825  // Tracks newly created Phi nodes. The elements are iterated by insertion
2826  // order.
2827  PhiNodeSet AllPhiNodes;
2828  // Tracks newly created Select nodes.
2829  SmallPtrSet<SelectInst *, 32> AllSelectNodes;
2830 
2831 public:
2832  SimplificationTracker(const SimplifyQuery &sq)
2833  : SQ(sq) {}
2834 
2835  Value *Get(Value *V) {
2836  do {
2837  auto SV = Storage.find(V);
2838  if (SV == Storage.end())
2839  return V;
2840  V = SV->second;
2841  } while (true);
2842  }
2843 
2844  Value *Simplify(Value *Val) {
2845  SmallVector<Value *, 32> WorkList;
2846  SmallPtrSet<Value *, 32> Visited;
2847  WorkList.push_back(Val);
2848  while (!WorkList.empty()) {
2849  auto P = WorkList.pop_back_val();
2850  if (!Visited.insert(P).second)
2851  continue;
2852  if (auto *PI = dyn_cast<Instruction>(P))
2853  if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
2854  for (auto *U : PI->users())
2855  WorkList.push_back(cast<Value>(U));
2856  Put(PI, V);
2857  PI->replaceAllUsesWith(V);
2858  if (auto *PHI = dyn_cast<PHINode>(PI))
2859  AllPhiNodes.erase(PHI);
2860  if (auto *Select = dyn_cast<SelectInst>(PI))
2861  AllSelectNodes.erase(Select);
2862  PI->eraseFromParent();
2863  }
2864  }
2865  return Get(Val);
2866  }
2867 
2868  void Put(Value *From, Value *To) {
2869  Storage.insert({ From, To });
2870  }
2871 
2872  void ReplacePhi(PHINode *From, PHINode *To) {
2873  Value* OldReplacement = Get(From);
2874  while (OldReplacement != From) {
2875  From = To;
2876  To = dyn_cast<PHINode>(OldReplacement);
2877  OldReplacement = Get(From);
2878  }
2879  assert(Get(To) == To && "Replacement PHI node is already replaced.");
2880  Put(From, To);
2881  From->replaceAllUsesWith(To);
2882  AllPhiNodes.erase(From);
2883  From->eraseFromParent();
2884  }
2885 
2886  PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
2887 
2888  void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
2889 
2890  void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
2891 
2892  unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
2893 
2894  unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
2895 
2896  void destroyNewNodes(Type *CommonType) {
2897  // For safe erasing, replace the uses with dummy value first.
2898  auto Dummy = UndefValue::get(CommonType);
2899  for (auto I : AllPhiNodes) {
2901  I->eraseFromParent();
2902  }
2903  AllPhiNodes.clear();
2904  for (auto I : AllSelectNodes) {
2906  I->eraseFromParent();
2907  }
2908  AllSelectNodes.clear();
2909  }
2910 };
2911 
2912 /// A helper class for combining addressing modes.
2913 class AddressingModeCombiner {
2914  typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
2915  typedef std::pair<PHINode *, PHINode *> PHIPair;
2916 
2917 private:
2918  /// The addressing modes we've collected.
2919  SmallVector<ExtAddrMode, 16> AddrModes;
2920 
2921  /// The field in which the AddrModes differ, when we have more than one.
2922  ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
2923 
2924  /// Are the AddrModes that we have all just equal to their original values?
2925  bool AllAddrModesTrivial = true;
2926 
2927  /// Common Type for all different fields in addressing modes.
2928  Type *CommonType;
2929 
2930  /// SimplifyQuery for simplifyInstruction utility.
2931  const SimplifyQuery &SQ;
2932 
2933  /// Original Address.
2934  Value *Original;
2935 
2936 public:
2937  AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
2938  : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
2939 
2940  /// Get the combined AddrMode
2941  const ExtAddrMode &getAddrMode() const {
2942  return AddrModes[0];
2943  }
2944 
2945  /// Add a new AddrMode if it's compatible with the AddrModes we already
2946  /// have.
2947  /// \return True iff we succeeded in doing so.
2948  bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
2949  // Take note of if we have any non-trivial AddrModes, as we need to detect
2950  // when all AddrModes are trivial as then we would introduce a phi or select
2951  // which just duplicates what's already there.
2952  AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
2953 
2954  // If this is the first addrmode then everything is fine.
2955  if (AddrModes.empty()) {
2956  AddrModes.emplace_back(NewAddrMode);
2957  return true;
2958  }
2959 
2960  // Figure out how different this is from the other address modes, which we
2961  // can do just by comparing against the first one given that we only care
2962  // about the cumulative difference.
2963  ExtAddrMode::FieldName ThisDifferentField =
2964  AddrModes[0].compare(NewAddrMode);
2965  if (DifferentField == ExtAddrMode::NoField)
2966  DifferentField = ThisDifferentField;
2967  else if (DifferentField != ThisDifferentField)
2968  DifferentField = ExtAddrMode::MultipleFields;
2969 
2970  // If NewAddrMode differs in more than one dimension we cannot handle it.
2971  bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
2972 
2973  // If Scale Field is different then we reject.
2974  CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
2975 
2976  // We also must reject the case when base offset is different and
2977  // scale reg is not null, we cannot handle this case due to merge of
2978  // different offsets will be used as ScaleReg.
2979  CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
2980  !NewAddrMode.ScaledReg);
2981 
2982  // We also must reject the case when GV is different and BaseReg installed
2983  // due to we want to use base reg as a merge of GV values.
2984  CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
2985  !NewAddrMode.HasBaseReg);
2986 
2987  // Even if NewAddMode is the same we still need to collect it due to
2988  // original value is different. And later we will need all original values
2989  // as anchors during finding the common Phi node.
2990  if (CanHandle)
2991  AddrModes.emplace_back(NewAddrMode);
2992  else
2993  AddrModes.clear();
2994 
2995  return CanHandle;
2996  }
2997 
2998  /// Combine the addressing modes we've collected into a single
2999  /// addressing mode.
3000  /// \return True iff we successfully combined them or we only had one so
3001  /// didn't need to combine them anyway.
3002  bool combineAddrModes() {
3003  // If we have no AddrModes then they can't be combined.
3004  if (AddrModes.size() == 0)
3005  return false;
3006 
3007  // A single AddrMode can trivially be combined.
3008  if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
3009  return true;
3010 
3011  // If the AddrModes we collected are all just equal to the value they are
3012  // derived from then combining them wouldn't do anything useful.
3013  if (AllAddrModesTrivial)
3014  return false;
3015 
3016  if (!addrModeCombiningAllowed())
3017  return false;
3018 
3019  // Build a map between <original value, basic block where we saw it> to
3020  // value of base register.
3021  // Bail out if there is no common type.
3022  FoldAddrToValueMapping Map;
3023  if (!initializeMap(Map))
3024  return false;
3025 
3026  Value *CommonValue = findCommon(Map);
3027  if (CommonValue)
3028  AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3029  return CommonValue != nullptr;
3030  }
3031 
3032 private:
3033  /// Initialize Map with anchor values. For address seen
3034  /// we set the value of different field saw in this address.
3035  /// At the same time we find a common type for different field we will
3036  /// use to create new Phi/Select nodes. Keep it in CommonType field.
3037  /// Return false if there is no common type found.
3038  bool initializeMap(FoldAddrToValueMapping &Map) {
3039  // Keep track of keys where the value is null. We will need to replace it
3040  // with constant null when we know the common type.
3041  SmallVector<Value *, 2> NullValue;
3042  Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3043  for (auto &AM : AddrModes) {
3044  Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3045  if (DV) {
3046  auto *Type = DV->getType();
3047  if (CommonType && CommonType != Type)
3048  return false;
3049  CommonType = Type;
3050  Map[AM.OriginalValue] = DV;
3051  } else {
3052  NullValue.push_back(AM.OriginalValue);
3053  }
3054  }
3055  assert(CommonType && "At least one non-null value must be!");
3056  for (auto *V : NullValue)
3057  Map[V] = Constant::getNullValue(CommonType);
3058  return true;
3059  }
3060 
3061  /// We have mapping between value A and other value B where B was a field in
3062  /// addressing mode represented by A. Also we have an original value C
3063  /// representing an address we start with. Traversing from C through phi and
3064  /// selects we ended up with A's in a map. This utility function tries to find
3065  /// a value V which is a field in addressing mode C and traversing through phi
3066  /// nodes and selects we will end up in corresponded values B in a map.
3067  /// The utility will create a new Phi/Selects if needed.
3068  // The simple example looks as follows:
3069  // BB1:
3070  // p1 = b1 + 40
3071  // br cond BB2, BB3
3072  // BB2:
3073  // p2 = b2 + 40
3074  // br BB3
3075  // BB3:
3076  // p = phi [p1, BB1], [p2, BB2]
3077  // v = load p
3078  // Map is
3079  // p1 -> b1
3080  // p2 -> b2
3081  // Request is
3082  // p -> ?
3083  // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3084  Value *findCommon(FoldAddrToValueMapping &Map) {
3085  // Tracks the simplification of newly created phi nodes. The reason we use
3086  // this mapping is because we will add new created Phi nodes in AddrToBase.
3087  // Simplification of Phi nodes is recursive, so some Phi node may
3088  // be simplified after we added it to AddrToBase. In reality this
3089  // simplification is possible only if original phi/selects were not
3090  // simplified yet.
3091  // Using this mapping we can find the current value in AddrToBase.
3092  SimplificationTracker ST(SQ);
3093 
3094  // First step, DFS to create PHI nodes for all intermediate blocks.
3095  // Also fill traverse order for the second step.
3096  SmallVector<Value *, 32> TraverseOrder;
3097  InsertPlaceholders(Map, TraverseOrder, ST);
3098 
3099  // Second Step, fill new nodes by merged values and simplify if possible.
3100  FillPlaceholders(Map, TraverseOrder, ST);
3101 
3102  if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
3103  ST.destroyNewNodes(CommonType);
3104  return nullptr;
3105  }
3106 
3107  // Now we'd like to match New Phi nodes to existed ones.
3108  unsigned PhiNotMatchedCount = 0;
3109  if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
3110  ST.destroyNewNodes(CommonType);
3111  return nullptr;
3112  }
3113 
3114  auto *Result = ST.Get(Map.find(Original)->second);
3115  if (Result) {
3116  NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3117  NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3118  }
3119  return Result;
3120  }
3121 
3122  /// Try to match PHI node to Candidate.
3123  /// Matcher tracks the matched Phi nodes.
3124  bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
3125  SmallSetVector<PHIPair, 8> &Matcher,
3126  PhiNodeSet &PhiNodesToMatch) {
3127  SmallVector<PHIPair, 8> WorkList;
3128  Matcher.insert({ PHI, Candidate });
3129  WorkList.push_back({ PHI, Candidate });
3130  SmallSet<PHIPair, 8> Visited;
3131  while (!WorkList.empty()) {
3132  auto Item = WorkList.pop_back_val();
3133  if (!Visited.insert(Item).second)
3134  continue;
3135  // We iterate over all incoming values to Phi to compare them.
3136  // If values are different and both of them Phi and the first one is a
3137  // Phi we added (subject to match) and both of them is in the same basic
3138  // block then we can match our pair if values match. So we state that
3139  // these values match and add it to work list to verify that.
3140  for (auto B : Item.first->blocks()) {
3141  Value *FirstValue = Item.first->getIncomingValueForBlock(B);
3142  Value *SecondValue = Item.second->getIncomingValueForBlock(B);
3143  if (FirstValue == SecondValue)
3144  continue;
3145 
3146  PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
3147  PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
3148 
3149  // One of them is not Phi or
3150  // The first one is not Phi node from the set we'd like to match or
3151  // Phi nodes from different basic blocks then
3152  // we will not be able to match.
3153  if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
3154  FirstPhi->getParent() != SecondPhi->getParent())
3155  return false;
3156 
3157  // If we already matched them then continue.
3158  if (Matcher.count({ FirstPhi, SecondPhi }))
3159  continue;
3160  // So the values are different and does not match. So we need them to
3161  // match.
3162  Matcher.insert({ FirstPhi, SecondPhi });
3163  // But me must check it.
3164  WorkList.push_back({ FirstPhi, SecondPhi });
3165  }
3166  }
3167  return true;
3168  }
3169 
3170  /// For the given set of PHI nodes (in the SimplificationTracker) try
3171  /// to find their equivalents.
3172  /// Returns false if this matching fails and creation of new Phi is disabled.
3173  bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
3174  unsigned &PhiNotMatchedCount) {
3175  // Matched and PhiNodesToMatch iterate their elements in a deterministic
3176  // order, so the replacements (ReplacePhi) are also done in a deterministic
3177  // order.
3179  SmallPtrSet<PHINode *, 8> WillNotMatch;
3180  PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
3181  while (PhiNodesToMatch.size()) {
3182  PHINode *PHI = *PhiNodesToMatch.begin();
3183 
3184  // Add us, if no Phi nodes in the basic block we do not match.
3185  WillNotMatch.clear();
3186  WillNotMatch.insert(PHI);
3187 
3188  // Traverse all Phis until we found equivalent or fail to do that.
3189  bool IsMatched = false;
3190  for (auto &P : PHI->getParent()->phis()) {
3191  if (&P == PHI)
3192  continue;
3193  if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
3194  break;
3195  // If it does not match, collect all Phi nodes from matcher.
3196  // if we end up with no match, them all these Phi nodes will not match
3197  // later.
3198  for (auto M : Matched)
3199  WillNotMatch.insert(M.first);
3200  Matched.clear();
3201  }
3202  if (IsMatched) {
3203  // Replace all matched values and erase them.
3204  for (auto MV : Matched)
3205  ST.ReplacePhi(MV.first, MV.second);
3206  Matched.clear();
3207  continue;
3208  }
3209  // If we are not allowed to create new nodes then bail out.
3210  if (!AllowNewPhiNodes)
3211  return false;
3212  // Just remove all seen values in matcher. They will not match anything.
3213  PhiNotMatchedCount += WillNotMatch.size();
3214  for (auto *P : WillNotMatch)
3215  PhiNodesToMatch.erase(P);
3216  }
3217  return true;
3218  }
3219  /// Fill the placeholders with values from predecessors and simplify them.
3220  void FillPlaceholders(FoldAddrToValueMapping &Map,
3221  SmallVectorImpl<Value *> &TraverseOrder,
3222  SimplificationTracker &ST) {
3223  while (!TraverseOrder.empty()) {
3224  Value *Current = TraverseOrder.pop_back_val();
3225  assert(Map.find(Current) != Map.end() && "No node to fill!!!");
3226  Value *V = Map[Current];
3227 
3228  if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
3229  // CurrentValue also must be Select.
3230  auto *CurrentSelect = cast<SelectInst>(Current);
3231  auto *TrueValue = CurrentSelect->getTrueValue();
3232  assert(Map.find(TrueValue) != Map.end() && "No True Value!");
3233  Select->setTrueValue(ST.Get(Map[TrueValue]));
3234  auto *FalseValue = CurrentSelect->getFalseValue();
3235  assert(Map.find(FalseValue) != Map.end() && "No False Value!");
3236  Select->setFalseValue(ST.Get(Map[FalseValue]));
3237  } else {
3238  // Must be a Phi node then.
3239  PHINode *PHI = cast<PHINode>(V);
3240  auto *CurrentPhi = dyn_cast<PHINode>(Current);
3241  // Fill the Phi node with values from predecessors.
3242  for (auto B : predecessors(PHI->getParent())) {
3243  Value *PV = CurrentPhi->getIncomingValueForBlock(B);
3244  assert(Map.find(PV) != Map.end() && "No predecessor Value!");
3245  PHI->addIncoming(ST.Get(Map[PV]), B);
3246  }
3247  }
3248  Map[Current] = ST.Simplify(V);
3249  }
3250  }
3251 
3252  /// Starting from original value recursively iterates over def-use chain up to
3253  /// known ending values represented in a map. For each traversed phi/select
3254  /// inserts a placeholder Phi or Select.
3255  /// Reports all new created Phi/Select nodes by adding them to set.
3256  /// Also reports and order in what values have been traversed.
3257  void InsertPlaceholders(FoldAddrToValueMapping &Map,
3258  SmallVectorImpl<Value *> &TraverseOrder,
3259  SimplificationTracker &ST) {
3260  SmallVector<Value *, 32> Worklist;
3261  assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
3262  "Address must be a Phi or Select node");
3263  auto *Dummy = UndefValue::get(CommonType);
3264  Worklist.push_back(Original);
3265  while (!Worklist.empty()) {
3266  Value *Current = Worklist.pop_back_val();
3267  // if it is already visited or it is an ending value then skip it.
3268  if (Map.find(Current) != Map.end())
3269  continue;
3270  TraverseOrder.push_back(Current);
3271 
3272  // CurrentValue must be a Phi node or select. All others must be covered
3273  // by anchors.
3274  if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
3275  // Is it OK to get metadata from OrigSelect?!
3276  // Create a Select placeholder with dummy value.
3278  CurrentSelect->getCondition(), Dummy, Dummy,
3279  CurrentSelect->getName(), CurrentSelect, CurrentSelect);
3280  Map[Current] = Select;
3281  ST.insertNewSelect(Select);
3282  // We are interested in True and False values.
3283  Worklist.push_back(CurrentSelect->getTrueValue());
3284  Worklist.push_back(CurrentSelect->getFalseValue());
3285  } else {
3286  // It must be a Phi node then.
3287  PHINode *CurrentPhi = cast<PHINode>(Current);
3288  unsigned PredCount = CurrentPhi->getNumIncomingValues();
3289  PHINode *PHI =
3290  PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
3291  Map[Current] = PHI;
3292  ST.insertNewPhi(PHI);
3293  for (Value *P : CurrentPhi->incoming_values())
3294  Worklist.push_back(P);
3295  }
3296  }
3297  }
3298 
3299  bool addrModeCombiningAllowed() {
3301  return false;
3302  switch (DifferentField) {
3303  default:
3304  return false;
3305  case ExtAddrMode::BaseRegField:
3306  return AddrSinkCombineBaseReg;
3307  case ExtAddrMode::BaseGVField:
3308  return AddrSinkCombineBaseGV;
3309  case ExtAddrMode::BaseOffsField:
3310  return AddrSinkCombineBaseOffs;
3311  case ExtAddrMode::ScaledRegField:
3312  return AddrSinkCombineScaledReg;
3313  }
3314  }
3315 };
3316 } // end anonymous namespace
3317 
3318 /// Try adding ScaleReg*Scale to the current addressing mode.
3319 /// Return true and update AddrMode if this addr mode is legal for the target,
3320 /// false if not.
3321 bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
3322  unsigned Depth) {
3323  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
3324  // mode. Just process that directly.
3325  if (Scale == 1)
3326  return matchAddr(ScaleReg, Depth);
3327 
3328  // If the scale is 0, it takes nothing to add this.
3329  if (Scale == 0)
3330  return true;
3331 
3332  // If we already have a scale of this value, we can add to it, otherwise, we
3333  // need an available scale field.
3334  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
3335  return false;
3336 
3337  ExtAddrMode TestAddrMode = AddrMode;
3338 
3339  // Add scale to turn X*4+X*3 -> X*7. This could also do things like
3340  // [A+B + A*7] -> [B+A*8].
3341  TestAddrMode.Scale += Scale;
3342  TestAddrMode.ScaledReg = ScaleReg;
3343 
3344  // If the new address isn't legal, bail out.
3345  if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
3346  return false;
3347 
3348  // It was legal, so commit it.
3349  AddrMode = TestAddrMode;
3350 
3351  // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
3352  // to see if ScaleReg is actually X+C. If so, we can turn this into adding
3353  // X*Scale + C*Scale to addr mode.
3354  ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
3355  if (isa<Instruction>(ScaleReg) && // not a constant expr.
3356  match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
3357  TestAddrMode.ScaledReg = AddLHS;
3358  TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
3359 
3360  // If this addressing mode is legal, commit it and remember that we folded
3361  // this instruction.
3362  if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
3363  AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
3364  AddrMode = TestAddrMode;
3365  return true;
3366  }
3367  }
3368 
3369  // Otherwise, not (x+c)*scale, just return what we have.
3370  return true;
3371 }
3372 
3373 /// This is a little filter, which returns true if an addressing computation
3374 /// involving I might be folded into a load/store accessing it.
3375 /// This doesn't need to be perfect, but needs to accept at least
3376 /// the set of instructions that MatchOperationAddr can.
3378  switch (I->getOpcode()) {
3379  case Instruction::BitCast:
3380  case Instruction::AddrSpaceCast:
3381  // Don't touch identity bitcasts.
3382  if (I->getType() == I->getOperand(0)->getType())
3383  return false;
3384  return I->getType()->isIntOrPtrTy();
3385  case Instruction::PtrToInt:
3386  // PtrToInt is always a noop, as we know that the int type is pointer sized.
3387  return true;
3388  case Instruction::IntToPtr:
3389  // We know the input is intptr_t, so this is foldable.
3390  return true;
3391  case Instruction::Add:
3392  return true;
3393  case Instruction::Mul:
3394  case Instruction::Shl:
3395  // Can only handle X*C and X << C.
3396  return isa<ConstantInt>(I->getOperand(1));
3397  case Instruction::GetElementPtr:
3398  return true;
3399  default:
3400  return false;
3401  }
3402 }
3403 
3404 /// Check whether or not \p Val is a legal instruction for \p TLI.
3405 /// \note \p Val is assumed to be the product of some type promotion.
3406 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
3407 /// to be legal, as the non-promoted value would have had the same state.
3409  const DataLayout &DL, Value *Val) {
3410  Instruction *PromotedInst = dyn_cast<Instruction>(Val);
3411  if (!PromotedInst)
3412  return false;
3413  int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
3414  // If the ISDOpcode is undefined, it was undefined before the promotion.
3415  if (!ISDOpcode)
3416  return true;
3417  // Otherwise, check if the promoted instruction is legal or not.
3418  return TLI.isOperationLegalOrCustom(
3419  ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
3420 }
3421 
3422 namespace {
3423 
3424 /// Hepler class to perform type promotion.
3425 class TypePromotionHelper {
3426  /// Utility function to add a promoted instruction \p ExtOpnd to
3427  /// \p PromotedInsts and record the type of extension we have seen.
3428  static void addPromotedInst(InstrToOrigTy &PromotedInsts,
3429  Instruction *ExtOpnd,
3430  bool IsSExt) {
3431  ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3432  InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
3433  if (It != PromotedInsts.end()) {
3434  // If the new extension is same as original, the information in
3435  // PromotedInsts[ExtOpnd] is still correct.
3436  if (It->second.getInt() == ExtTy)
3437  return;
3438 
3439  // Now the new extension is different from old extension, we make
3440  // the type information invalid by setting extension type to
3441  // BothExtension.
3442  ExtTy = BothExtension;
3443  }
3444  PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
3445  }
3446 
3447  /// Utility function to query the original type of instruction \p Opnd
3448  /// with a matched extension type. If the extension doesn't match, we
3449  /// cannot use the information we had on the original type.
3450  /// BothExtension doesn't match any extension type.
3451  static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
3452  Instruction *Opnd,
3453  bool IsSExt) {
3454  ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3455  InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
3456  if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
3457  return It->second.getPointer();
3458  return nullptr;
3459  }
3460 
3461  /// Utility function to check whether or not a sign or zero extension
3462  /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
3463  /// either using the operands of \p Inst or promoting \p Inst.
3464  /// The type of the extension is defined by \p IsSExt.
3465  /// In other words, check if:
3466  /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
3467  /// #1 Promotion applies:
3468  /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
3469  /// #2 Operand reuses:
3470  /// ext opnd1 to ConsideredExtType.
3471  /// \p PromotedInsts maps the instructions to their type before promotion.
3472  static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
3473  const InstrToOrigTy &PromotedInsts, bool IsSExt);
3474 
3475  /// Utility function to determine if \p OpIdx should be promoted when
3476  /// promoting \p Inst.
3477  static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
3478  return !(isa<SelectInst>(Inst) && OpIdx == 0);
3479  }
3480 
3481  /// Utility function to promote the operand of \p Ext when this
3482  /// operand is a promotable trunc or sext or zext.
3483  /// \p PromotedInsts maps the instructions to their type before promotion.
3484  /// \p CreatedInstsCost[out] contains the cost of all instructions
3485  /// created to promote the operand of Ext.
3486  /// Newly added extensions are inserted in \p Exts.
3487  /// Newly added truncates are inserted in \p Truncs.
3488  /// Should never be called directly.
3489  /// \return The promoted value which is used instead of Ext.
3490  static Value *promoteOperandForTruncAndAnyExt(
3491  Instruction *Ext, TypePromotionTransaction &TPT,
3492  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3494  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
3495 
3496  /// Utility function to promote the operand of \p Ext when this
3497  /// operand is promotable and is not a supported trunc or sext.
3498  /// \p PromotedInsts maps the instructions to their type before promotion.
3499  /// \p CreatedInstsCost[out] contains the cost of all the instructions
3500  /// created to promote the operand of Ext.
3501  /// Newly added extensions are inserted in \p Exts.
3502  /// Newly added truncates are inserted in \p Truncs.
3503  /// Should never be called directly.
3504  /// \return The promoted value which is used instead of Ext.
3505  static Value *promoteOperandForOther(Instruction *Ext,
3506  TypePromotionTransaction &TPT,
3507  InstrToOrigTy &PromotedInsts,
3508  unsigned &CreatedInstsCost,
3511  const TargetLowering &TLI, bool IsSExt);
3512 
3513  /// \see promoteOperandForOther.
3514  static Value *signExtendOperandForOther(
3515  Instruction *Ext, TypePromotionTransaction &TPT,
3516  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3518  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3519  return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3520  Exts, Truncs, TLI, true);
3521  }
3522 
3523  /// \see promoteOperandForOther.
3524  static Value *zeroExtendOperandForOther(
3525  Instruction *Ext, TypePromotionTransaction &TPT,
3526  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3528  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3529  return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3530  Exts, Truncs, TLI, false);
3531  }
3532 
3533 public:
3534  /// Type for the utility function that promotes the operand of Ext.
3535  using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
3536  InstrToOrigTy &PromotedInsts,
3537  unsigned &CreatedInstsCost,
3540  const TargetLowering &TLI);
3541 
3542  /// Given a sign/zero extend instruction \p Ext, return the appropriate
3543  /// action to promote the operand of \p Ext instead of using Ext.
3544  /// \return NULL if no promotable action is possible with the current
3545  /// sign extension.
3546  /// \p InsertedInsts keeps track of all the instructions inserted by the
3547  /// other CodeGenPrepare optimizations. This information is important
3548  /// because we do not want to promote these instructions as CodeGenPrepare
3549  /// will reinsert them later. Thus creating an infinite loop: create/remove.
3550  /// \p PromotedInsts maps the instructions to their type before promotion.
3551  static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
3552  const TargetLowering &TLI,
3553  const InstrToOrigTy &PromotedInsts);
3554 };
3555 
3556 } // end anonymous namespace
3557 
3558 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
3559  Type *ConsideredExtType,
3560  const InstrToOrigTy &PromotedInsts,
3561  bool IsSExt) {
3562  // The promotion helper does not know how to deal with vector types yet.
3563  // To be able to fix that, we would need to fix the places where we
3564  // statically extend, e.g., constants and such.
3565  if (Inst->getType()->isVectorTy())
3566  return false;
3567 
3568  // We can always get through zext.
3569  if (isa<ZExtInst>(Inst))
3570  return true;
3571 
3572  // sext(sext) is ok too.
3573  if (IsSExt && isa<SExtInst>(Inst))
3574  return true;
3575 
3576  // We can get through binary operator, if it is legal. In other words, the
3577  // binary operator must have a nuw or nsw flag.
3578  const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
3579  if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
3580  ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
3581  (IsSExt && BinOp->hasNoSignedWrap())))
3582  return true;
3583 
3584  // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
3585  if ((Inst->getOpcode() == Instruction::And ||
3586  Inst->getOpcode() == Instruction::Or))
3587  return true;
3588 
3589  // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
3590  if (Inst->getOpcode() == Instruction::Xor) {
3591  const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
3592  // Make sure it is not a NOT.
3593  if (Cst && !Cst->getValue().isAllOnesValue())
3594  return true;
3595  }
3596 
3597  // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
3598  // It may change a poisoned value into a regular value, like
3599  // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
3600  // poisoned value regular value
3601  // It should be OK since undef covers valid value.
3602  if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
3603  return true;
3604 
3605  // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
3606  // It may change a poisoned value into a regular value, like
3607  // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
3608  // poisoned value regular value
3609  // It should be OK since undef covers valid value.
3610  if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
3611  const Instruction *ExtInst =
3612  dyn_cast<const Instruction>(*Inst->user_begin());
3613  if (ExtInst->hasOneUse()) {
3614  const Instruction *AndInst =
3615  dyn_cast<const Instruction>(*ExtInst->user_begin());
3616  if (AndInst && AndInst->getOpcode() == Instruction::And) {
3617  const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
3618  if (Cst &&
3619  Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
3620  return true;
3621  }
3622  }
3623  }
3624 
3625  // Check if we can do the following simplification.
3626  // ext(trunc(opnd)) --> ext(opnd)
3627  if (!isa<TruncInst>(Inst))
3628  return false;
3629 
3630  Value *OpndVal = Inst->getOperand(0);
3631  // Check if we can use this operand in the extension.
3632  // If the type is larger than the result type of the extension, we cannot.
3633  if (!OpndVal->getType()->isIntegerTy() ||
3634  OpndVal->getType()->getIntegerBitWidth() >
3635  ConsideredExtType->getIntegerBitWidth())
3636  return false;
3637 
3638  // If the operand of the truncate is not an instruction, we will not have
3639  // any information on the dropped bits.
3640  // (Actually we could for constant but it is not worth the extra logic).
3641  Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
3642  if (!Opnd)
3643  return false;
3644 
3645  // Check if the source of the type is narrow enough.
3646  // I.e., check that trunc just drops extended bits of the same kind of
3647  // the extension.
3648  // #1 get the type of the operand and check the kind of the extended bits.
3649  const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
3650  if (OpndType)
3651  ;
3652  else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
3653  OpndType = Opnd->getOperand(0)->getType();
3654  else
3655  return false;
3656 
3657  // #2 check that the truncate just drops extended bits.
3658  return Inst->getType()->getIntegerBitWidth() >=
3659  OpndType->getIntegerBitWidth();
3660 }
3661 
3662 TypePromotionHelper::Action TypePromotionHelper::getAction(
3663  Instruction *Ext, const SetOfInstrs &InsertedInsts,
3664  const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
3665  assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
3666  "Unexpected instruction type");
3667  Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
3668  Type *ExtTy = Ext->getType();
3669  bool IsSExt = isa<SExtInst>(Ext);
3670  // If the operand of the extension is not an instruction, we cannot
3671  // get through.
3672  // If it, check we can get through.
3673  if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
3674  return nullptr;
3675 
3676  // Do not promote if the operand has been added by codegenprepare.
3677  // Otherwise, it means we are undoing an optimization that is likely to be
3678  // redone, thus causing potential infinite loop.
3679  if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
3680  return nullptr;
3681 
3682  // SExt or Trunc instructions.
3683  // Return the related handler.
3684  if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
3685  isa<ZExtInst>(ExtOpnd))
3686  return promoteOperandForTruncAndAnyExt;
3687 
3688  // Regular instruction.
3689  // Abort early if we will have to insert non-free instructions.
3690  if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
3691  return nullptr;
3692  return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
3693 }
3694 
3695 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
3696  Instruction *SExt, TypePromotionTransaction &TPT,
3697  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3699  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3700  // By construction, the operand of SExt is an instruction. Otherwise we cannot
3701  // get through it and this method should not be called.
3702  Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
3703  Value *ExtVal = SExt;
3704  bool HasMergedNonFreeExt = false;
3705  if (isa<ZExtInst>(SExtOpnd)) {
3706  // Replace s|zext(zext(opnd))
3707  // => zext(opnd).
3708  HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
3709  Value *ZExt =
3710  TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
3711  TPT.replaceAllUsesWith(SExt, ZExt);
3712  TPT.eraseInstruction(SExt);
3713  ExtVal = ZExt;
3714  } else {
3715  // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
3716  // => z|sext(opnd).
3717  TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
3718  }
3719  CreatedInstsCost = 0;
3720 
3721  // Remove dead code.
3722  if (SExtOpnd->use_empty())
3723  TPT.eraseInstruction(SExtOpnd);
3724 
3725  // Check if the extension is still needed.
3726  Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
3727  if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
3728  if (ExtInst) {
3729  if (Exts)
3730  Exts->push_back(ExtInst);
3731  CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
3732  }
3733  return ExtVal;
3734  }
3735 
3736  // At this point we have: ext ty opnd to ty.
3737  // Reassign the uses of ExtInst to the opnd and remove ExtInst.
3738  Value *NextVal = ExtInst->getOperand(0);
3739  TPT.eraseInstruction(ExtInst, NextVal);
3740  return NextVal;
3741 }
3742 
3743 Value *TypePromotionHelper::promoteOperandForOther(
3744  Instruction *Ext, TypePromotionTransaction &TPT,
3745  InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3747  SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
3748  bool IsSExt) {
3749  // By construction, the operand of Ext is an instruction. Otherwise we cannot
3750  // get through it and this method should not be called.
3751  Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
3752  CreatedInstsCost = 0;
3753  if (!ExtOpnd->hasOneUse()) {
3754  // ExtOpnd will be promoted.
3755  // All its uses, but Ext, will need to use a truncated value of the
3756  // promoted version.
3757  // Create the truncate now.
3758  Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
3759  if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
3760  // Insert it just after the definition.
3761  ITrunc->moveAfter(ExtOpnd);
3762  if (Truncs)
3763  Truncs->push_back(ITrunc);
3764  }
3765 
3766  TPT.replaceAllUsesWith(ExtOpnd, Trunc);
3767  // Restore the operand of Ext (which has been replaced by the previous call
3768  // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
3769  TPT.setOperand(Ext, 0, ExtOpnd);
3770  }
3771 
3772  // Get through the Instruction:
3773  // 1. Update its type.
3774  // 2. Replace the uses of Ext by Inst.
3775  // 3. Extend each operand that needs to be extended.
3776 
3777  // Remember the original type of the instruction before promotion.
3778  // This is useful to know that the high bits are sign extended bits.
3779  addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
3780  // Step #1.
3781  TPT.mutateType(ExtOpnd, Ext->getType());
3782  // Step #2.
3783  TPT.replaceAllUsesWith(Ext, ExtOpnd);
3784  // Step #3.
3785  Instruction *ExtForOpnd = Ext;
3786 
3787  LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
3788  for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
3789  ++OpIdx) {
3790  LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
3791  if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
3792  !shouldExtOperand(ExtOpnd, OpIdx)) {
3793  LLVM_DEBUG(dbgs() << "No need to propagate\n");
3794  continue;
3795  }
3796  // Check if we can statically extend the operand.
3797  Value *Opnd = ExtOpnd->getOperand(OpIdx);
3798  if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
3799  LLVM_DEBUG(dbgs() << "Statically extend\n");
3800  unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
3801  APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
3802  : Cst->getValue().zext(BitWidth);
3803  TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
3804  continue;
3805  }
3806  // UndefValue are typed, so we have to statically sign extend them.
3807  if (isa<UndefValue>(Opnd)) {
3808  LLVM_DEBUG(dbgs() << "Statically extend\n");
3809  TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
3810  continue;
3811  }
3812 
3813  // Otherwise we have to explicitly sign extend the operand.
3814  // Check if Ext was reused to extend an operand.
3815  if (!ExtForOpnd) {
3816  // If yes, create a new one.
3817  LLVM_DEBUG(dbgs() << "More operands to ext\n");
3818  Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
3819  : TPT.createZExt(Ext, Opnd, Ext->getType());
3820  if (!isa<Instruction>(ValForExtOpnd)) {
3821  TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
3822  continue;
3823  }
3824  ExtForOpnd = cast<Instruction>(ValForExtOpnd);
3825  }
3826  if (Exts)
3827  Exts->push_back(ExtForOpnd);
3828  TPT.setOperand(ExtForOpnd, 0, Opnd);
3829 
3830  // Move the sign extension before the insertion point.
3831  TPT.moveBefore(ExtForOpnd, ExtOpnd);
3832  TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
3833  CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
3834  // If more sext are required, new instructions will have to be created.
3835  ExtForOpnd = nullptr;
3836  }
3837  if (ExtForOpnd == Ext) {
3838  LLVM_DEBUG(dbgs() << "Extension is useless now\n");
3839  TPT.eraseInstruction(Ext);
3840  }
3841  return ExtOpnd;
3842 }
3843 
3844 /// Check whether or not promoting an instruction to a wider type is profitable.
3845 /// \p NewCost gives the cost of extension instructions created by the
3846 /// promotion.
3847 /// \p OldCost gives the cost of extension instructions before the promotion
3848 /// plus the number of instructions that have been
3849 /// matched in the addressing mode the promotion.
3850 /// \p PromotedOperand is the value that has been promoted.
3851 /// \return True if the promotion is profitable, false otherwise.
3852 bool AddressingModeMatcher::isPromotionProfitable(
3853  unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
3854  LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
3855  << '\n');
3856  // The cost of the new extensions is greater than the cost of the
3857  // old extension plus what we folded.
3858  // This is not profitable.
3859  if (NewCost > OldCost)
3860  return false;
3861  if (NewCost < OldCost)
3862  return true;
3863  // The promotion is neutral but it may help folding the sign extension in
3864  // loads for instance.
3865  // Check that we did not create an illegal instruction.
3866  return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
3867 }
3868 
3869 /// Given an instruction or constant expr, see if we can fold the operation
3870 /// into the addressing mode. If so, update the addressing mode and return
3871 /// true, otherwise return false without modifying AddrMode.
3872 /// If \p MovedAway is not NULL, it contains the information of whether or
3873 /// not AddrInst has to be folded into the addressing mode on success.
3874 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
3875 /// because it has been moved away.
3876 /// Thus AddrInst must not be added in the matched instructions.
3877 /// This state can happen when AddrInst is a sext, since it may be moved away.
3878 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
3879 /// not be referenced anymore.
3880 bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
3881  unsigned Depth,
3882  bool *MovedAway) {
3883  // Avoid exponential behavior on extremely deep expression trees.
3884  if (Depth >= 5) return false;
3885 
3886  // By default, all matched instructions stay in place.
3887  if (MovedAway)
3888  *MovedAway = false;
3889 
3890  switch (Opcode) {
3891  case Instruction::PtrToInt:
3892  // PtrToInt is always a noop, as we know that the int type is pointer sized.
3893  return matchAddr(AddrInst->getOperand(0), Depth);
3894  case Instruction::IntToPtr: {
3895  auto AS = AddrInst->getType()->getPointerAddressSpace();
3896  auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
3897  // This inttoptr is a no-op if the integer type is pointer sized.
3898  if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
3899  return matchAddr(AddrInst->getOperand(0), Depth);
3900  return false;
3901  }
3902  case Instruction::BitCast:
3903  // BitCast is always a noop, and we can handle it as long as it is
3904  // int->int or pointer->pointer (we don't want int<->fp or something).
3905  if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
3906  // Don't touch identity bitcasts. These were probably put here by LSR,
3907  // and we don't want to mess around with them. Assume it knows what it
3908  // is doing.
3909  AddrInst->getOperand(0)->getType() != AddrInst->getType())
3910  return matchAddr(AddrInst->getOperand(0), Depth);
3911  return false;
3912  case Instruction::AddrSpaceCast: {
3913  unsigned SrcAS
3914  = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
3915  unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
3916  if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3917  return matchAddr(AddrInst->getOperand(0), Depth);
3918  return false;
3919  }
3920  case Instruction::Add: {
3921  // Check to see if we can merge in the RHS then the LHS. If so, we win.
3922  ExtAddrMode BackupAddrMode = AddrMode;
3923  unsigned OldSize = AddrModeInsts.size();
3924  // Start a transaction at this point.
3925  // The LHS may match but not the RHS.
3926  // Therefore, we need a higher level restoration point to undo partially
3927  // matched operation.
3928  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3929  TPT.getRestorationPoint();
3930 
3931  if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
3932  matchAddr(AddrInst->getOperand(0), Depth+1))
3933  return true;
3934 
3935  // Restore the old addr mode info.
3936  AddrMode = BackupAddrMode;
3937  AddrModeInsts.resize(OldSize);
3938  TPT.rollback(LastKnownGood);
3939 
3940  // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
3941  if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
3942  matchAddr(AddrInst->getOperand(1), Depth+1))
3943  return true;
3944 
3945  // Otherwise we definitely can't merge the ADD in.
3946  AddrMode = BackupAddrMode;
3947  AddrModeInsts.resize(OldSize);
3948  TPT.rollback(LastKnownGood);
3949  break;
3950  }
3951  //case Instruction::Or:
3952  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
3953  //break;
3954  case Instruction::Mul:
3955  case Instruction::Shl: {
3956  // Can only handle X*C and X << C.
3957  ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
3958  if (!RHS || RHS->getBitWidth() > 64)
3959  return false;
3960  int64_t Scale = RHS->getSExtValue();
3961  if (Opcode == Instruction::Shl)
3962  Scale = 1LL << Scale;
3963 
3964  return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
3965  }
3966  case Instruction::GetElementPtr: {
3967  // Scan the GEP. We check it if it contains constant offsets and at most
3968  // one variable offset.
3969  int VariableOperand = -1;
3970  unsigned VariableScale = 0;
3971 
3972  int64_t ConstantOffset = 0;
3973  gep_type_iterator GTI = gep_type_begin(AddrInst);
3974  for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
3975  if (StructType *STy = GTI.getStructTypeOrNull()) {
3976  const StructLayout *SL = DL.getStructLayout(STy);
3977  unsigned Idx =
3978  cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
3979  ConstantOffset += SL->getElementOffset(Idx);
3980  } else {
3981  uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
3982  if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
3983  const APInt &CVal = CI->getValue();
3984  if (CVal.getMinSignedBits() <= 64) {
3985  ConstantOffset += CVal.getSExtValue() * TypeSize;
3986  continue;
3987  }
3988  }
3989  if (TypeSize) { // Scales of zero don't do anything.
3990  // We only allow one variable index at the moment.
3991  if (VariableOperand != -1)
3992  return false;
3993 
3994  // Remember the variable index.
3995  VariableOperand = i;
3996  VariableScale = TypeSize;
3997  }
3998  }
3999  }
4000 
4001  // A common case is for the GEP to only do a constant offset. In this case,
4002  // just add it to the disp field and check validity.
4003  if (VariableOperand == -1) {
4004  AddrMode.BaseOffs += ConstantOffset;
4005  if (ConstantOffset == 0 ||
4006  TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
4007  // Check to see if we can fold the base pointer in too.
4008  if (matchAddr(AddrInst->getOperand(0), Depth+1))
4009  return true;
4010  } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4011  TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4012  ConstantOffset > 0) {
4013  // Record GEPs with non-zero offsets as candidates for splitting in the
4014  // event that the offset cannot fit into the r+i addressing mode.
4015  // Simple and common case that only one GEP is used in calculating the
4016  // address for the memory access.
4017  Value *Base = AddrInst->getOperand(0);
4018  auto *BaseI = dyn_cast<Instruction>(Base);
4019  auto *GEP = cast<GetElementPtrInst>(AddrInst);
4020  if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4021  (BaseI && !isa<CastInst>(BaseI) &&
4022  !isa<GetElementPtrInst>(BaseI))) {
4023  // If the base is an instruction, make sure the GEP is not in the same
4024  // basic block as the base. If the base is an argument or global
4025  // value, make sure the GEP is not in the entry block. Otherwise,
4026  // instruction selection can undo the split. Also make sure the
4027  // parent block allows inserting non-PHI instructions before the
4028  // terminator.
4029  BasicBlock *Parent =
4030  BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
4031  if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad())
4032  LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4033  }
4034  }
4035  AddrMode.BaseOffs -= ConstantOffset;
4036  return false;
4037  }
4038 
4039  // Save the valid addressing mode in case we can't match.
4040  ExtAddrMode BackupAddrMode = AddrMode;
4041  unsigned OldSize = AddrModeInsts.size();
4042 
4043  // See if the scale and offset amount is valid for this target.
4044  AddrMode.BaseOffs += ConstantOffset;
4045 
4046  // Match the base operand of the GEP.
4047  if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
4048  // If it couldn't be matched, just stuff the value in a register.
4049  if (AddrMode.HasBaseReg) {
4050  AddrMode = BackupAddrMode;
4051  AddrModeInsts.resize(OldSize);
4052  return false;
4053  }
4054  AddrMode.HasBaseReg = true;
4055  AddrMode.BaseReg = AddrInst->getOperand(0);
4056  }
4057 
4058  // Match the remaining variable portion of the GEP.
4059  if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
4060  Depth)) {
4061  // If it couldn't be matched, try stuffing the base into a register
4062  // instead of matching it, and retrying the match of the scale.
4063  AddrMode = BackupAddrMode;
4064  AddrModeInsts.resize(OldSize);
4065  if (AddrMode.HasBaseReg)
4066  return false;
4067  AddrMode.HasBaseReg = true;
4068  AddrMode.BaseReg = AddrInst->getOperand(0);
4069  AddrMode.BaseOffs += ConstantOffset;
4070  if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
4071  VariableScale, Depth)) {
4072  // If even that didn't work, bail.
4073  AddrMode = BackupAddrMode;
4074  AddrModeInsts.resize(OldSize);
4075  return false;
4076  }
4077  }
4078 
4079  return true;
4080  }
4081  case Instruction::SExt:
4082  case Instruction::ZExt: {
4083  Instruction *Ext = dyn_cast<Instruction>(AddrInst);
4084  if (!Ext)
4085  return false;
4086 
4087  // Try to move this ext out of the way of the addressing mode.
4088  // Ask for a method for doing so.
4089  TypePromotionHelper::Action TPH =
4090  TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
4091  if (!TPH)
4092  return false;
4093 
4094  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4095  TPT.getRestorationPoint();
4096  unsigned CreatedInstsCost = 0;
4097  unsigned ExtCost = !TLI.isExtFree(Ext);
4098  Value *PromotedOperand =
4099  TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
4100  // SExt has been moved away.
4101  // Thus either it will be rematched later in the recursive calls or it is
4102  // gone. Anyway, we must not fold it into the addressing mode at this point.
4103  // E.g.,
4104  // op = add opnd, 1
4105  // idx = ext op
4106  // addr = gep base, idx
4107  // is now:
4108  // promotedOpnd = ext opnd <- no match here
4109  // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
4110  // addr = gep base, op <- match
4111  if (MovedAway)
4112  *MovedAway = true;
4113 
4114  assert(PromotedOperand &&
4115  "TypePromotionHelper should have filtered out those cases");
4116 
4117  ExtAddrMode BackupAddrMode = AddrMode;
4118  unsigned OldSize = AddrModeInsts.size();
4119 
4120  if (!matchAddr(PromotedOperand, Depth) ||
4121  // The total of the new cost is equal to the cost of the created
4122  // instructions.
4123  // The total of the old cost is equal to the cost of the extension plus
4124  // what we have saved in the addressing mode.
4125  !isPromotionProfitable(CreatedInstsCost,
4126  ExtCost + (AddrModeInsts.size() - OldSize),
4127  PromotedOperand)) {
4128  AddrMode = BackupAddrMode;
4129  AddrModeInsts.resize(OldSize);
4130  LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
4131  TPT.rollback(LastKnownGood);
4132  return false;
4133  }
4134  return true;
4135  }
4136  }
4137  return false;
4138 }
4139 
4140 /// If we can, try to add the value of 'Addr' into the current addressing mode.
4141 /// If Addr can't be added to AddrMode this returns false and leaves AddrMode
4142 /// unmodified. This assumes that Addr is either a pointer type or intptr_t
4143 /// for the target.
4144 ///
4145 bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
4146  // Start a transaction at this point that we will rollback if the matching
4147  // fails.
4148  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4149  TPT.getRestorationPoint();
4150  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
4151  // Fold in immediates if legal for the target.
4152  AddrMode.BaseOffs += CI->getSExtValue();
4153  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4154  return true;
4155  AddrMode.BaseOffs -= CI->getSExtValue();
4156  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
4157  // If this is a global variable, try to fold it into the addressing mode.
4158  if (!AddrMode.BaseGV) {
4159  AddrMode.BaseGV = GV;
4160  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4161  return true;
4162  AddrMode.BaseGV = nullptr;
4163  }
4164  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
4165  ExtAddrMode BackupAddrMode = AddrMode;
4166  unsigned OldSize = AddrModeInsts.size();
4167 
4168  // Check to see if it is possible to fold this operation.
4169  bool MovedAway = false;
4170  if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
4171  // This instruction may have been moved away. If so, there is nothing
4172  // to check here.
4173  if (MovedAway)
4174  return true;
4175  // Okay, it's possible to fold this. Check to see if it is actually
4176  // *profitable* to do so. We use a simple cost model to avoid increasing
4177  // register pressure too much.
4178  if (I->hasOneUse() ||
4179  isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
4180  AddrModeInsts.push_back(I);
4181  return true;
4182  }
4183 
4184  // It isn't profitable to do this, roll back.
4185  //cerr << "NOT FOLDING: " << *I;
4186  AddrMode = BackupAddrMode;
4187  AddrModeInsts.resize(OldSize);
4188  TPT.rollback(LastKnownGood);
4189  }
4190  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
4191  if (matchOperationAddr(CE, CE->getOpcode(), Depth))
4192  return true;
4193  TPT.rollback(LastKnownGood);
4194  } else if (isa<ConstantPointerNull>(Addr)) {
4195  // Null pointer gets folded without affecting the addressing mode.
4196  return true;
4197  }
4198 
4199  // Worse case, the target should support [reg] addressing modes. :)
4200  if (!AddrMode.HasBaseReg) {
4201  AddrMode.HasBaseReg = true;
4202  AddrMode.BaseReg = Addr;
4203  // Still check for legality in case the target supports [imm] but not [i+r].
4204  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4205  return true;
4206  AddrMode.HasBaseReg = false;
4207  AddrMode.BaseReg = nullptr;
4208  }
4209 
4210  // If the base register is already taken, see if we can do [r+r].
4211  if (AddrMode.Scale == 0) {
4212  AddrMode.Scale = 1;
4213  AddrMode.ScaledReg = Addr;
4214  if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4215  return true;
4216  AddrMode.Scale = 0;
4217  AddrMode.ScaledReg = nullptr;
4218  }
4219  // Couldn't match.
4220  TPT.rollback(LastKnownGood);
4221  return false;
4222 }
4223 
4224 /// Check to see if all uses of OpVal by the specified inline asm call are due
4225 /// to memory operands. If so, return true, otherwise return false.
4226 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
4227  const TargetLowering &TLI,
4228  const TargetRegisterInfo &TRI) {
4229  const Function *F = CI->getFunction();
4230  TargetLowering::AsmOperandInfoVector TargetConstraints =
4232  ImmutableCallSite(CI));
4233 
4234  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4235  TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4236 
4237  // Compute the constraint code and ConstraintType to use.
4238  TLI.ComputeConstraintToUse(OpInfo, SDValue());
4239 
4240  // If this asm operand is our Value*, and if it isn't an indirect memory
4241  // operand, we can't fold it!
4242  if (OpInfo.CallOperandVal == OpVal &&
4244  !OpInfo.isIndirect))
4245  return false;
4246  }
4247 
4248  return true;
4249 }
4250 
4251 // Max number of memory uses to look at before aborting the search to conserve
4252 // compile time.
4253 static constexpr int MaxMemoryUsesToScan = 20;
4254 
4255 /// Recursively walk all the uses of I until we find a memory use.
4256 /// If we find an obviously non-foldable instruction, return true.
4257 /// Add the ultimately found memory instructions to MemoryUses.
4258 static bool FindAllMemoryUses(
4259  Instruction *I,
4260  SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
4261  SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
4262  const TargetRegisterInfo &TRI, int SeenInsts = 0) {
4263  // If we already considered this instruction, we're done.
4264  if (!ConsideredInsts.insert(I).second)
4265  return false;
4266 
4267  // If this is an obviously unfoldable instruction, bail out.
4268  if (!MightBeFoldableInst(I))
4269  return true;
4270 
4271  const bool OptSize = I->getFunction()->optForSize();
4272 
4273  // Loop over all the uses, recursively processing them.
4274  for (Use &U : I->uses()) {
4275  // Conservatively return true if we're seeing a large number or a deep chain
4276  // of users. This avoids excessive compilation times in pathological cases.
4277  if (SeenInsts++ >= MaxMemoryUsesToScan)
4278  return true;
4279 
4280  Instruction *UserI = cast<Instruction>(U.getUser());
4281  if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4282  MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
4283  continue;
4284  }
4285 
4286  if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
4287  unsigned opNo = U.getOperandNo();
4288  if (opNo != StoreInst::getPointerOperandIndex())
4289  return true; // Storing addr, not into addr.
4290  MemoryUses.push_back(std::make_pair(SI, opNo));
4291  continue;
4292  }
4293 
4294  if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
4295  unsigned opNo = U.getOperandNo();
4297  return true; // Storing addr, not into addr.
4298  MemoryUses.push_back(std::make_pair(RMW, opNo));
4299  continue;
4300  }
4301 
4302  if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
4303  unsigned opNo = U.getOperandNo();
4305  return true; // Storing addr, not into addr.
4306  MemoryUses.push_back(std::make_pair(CmpX, opNo));
4307  continue;
4308  }
4309 
4310  if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
4311  // If this is a cold call, we can sink the addressing calculation into
4312  // the cold path. See optimizeCallInst
4313  if (!OptSize && CI->hasFnAttr(Attribute::Cold))
4314  continue;
4315 
4317  if (!IA) return true;
4318 
4319  // If this is a memory operand, we're cool, otherwise bail out.
4320  if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
4321  return true;
4322  continue;
4323  }
4324 
4325  if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
4326  SeenInsts))
4327  return true;
4328  }
4329 
4330  return false;
4331 }
4332 
4333 /// Return true if Val is already known to be live at the use site that we're
4334 /// folding it into. If so, there is no cost to include it in the addressing
4335 /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
4336 /// instruction already.
4337 bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
4338  Value *KnownLive2) {
4339  // If Val is either of the known-live values, we know it is live!
4340  if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
4341  return true;
4342 
4343  // All values other than instructions and arguments (e.g. constants) are live.
4344  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
4345 
4346  // If Val is a constant sized alloca in the entry block, it is live, this is
4347  // true because it is just a reference to the stack/frame pointer, which is
4348  // live for the whole function.
4349  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
4350  if (AI->isStaticAlloca())
4351  return true;
4352 
4353  // Check to see if this value is already used in the memory instruction's
4354  // block. If so, it's already live into the block at the very least, so we
4355  // can reasonably fold it.
4356  return Val->isUsedInBasicBlock(MemoryInst->getParent());
4357 }
4358 
4359 /// It is possible for the addressing mode of the machine to fold the specified
4360 /// instruction into a load or store that ultimately uses it.
4361 /// However, the specified instruction has multiple uses.
4362 /// Given this, it may actually increase register pressure to fold it
4363 /// into the load. For example, consider this code:
4364 ///
4365 /// X = ...
4366 /// Y = X+1
4367 /// use(Y) -> nonload/store
4368 /// Z = Y+1
4369 /// load Z
4370 ///
4371 /// In this case, Y has multiple uses, and can be folded into the load of Z
4372 /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
4373 /// be live at the use(Y) line. If we don't fold Y into load Z, we use one
4374 /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
4375 /// number of computations either.
4376 ///
4377 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
4378 /// X was live across 'load Z' for other reasons, we actually *would* want to
4379 /// fold the addressing mode in the Z case. This would make Y die earlier.
4380 bool AddressingModeMatcher::
4381 isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
4382  ExtAddrMode &AMAfter) {
4383  if (IgnoreProfitability) return true;
4384 
4385  // AMBefore is the addressing mode before this instruction was folded into it,
4386  // and AMAfter is the addressing mode after the instruction was folded. Get
4387  // the set of registers referenced by AMAfter and subtract out those
4388  // referenced by AMBefore: this is the set of values which folding in this
4389  // address extends the lifetime of.
4390  //
4391  // Note that there are only two potential values being referenced here,
4392  // BaseReg and ScaleReg (global addresses are always available, as are any
4393  // folded immediates).
4394  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
4395 
4396  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
4397  // lifetime wasn't extended by adding this instruction.
4398  if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4399  BaseReg = nullptr;
4400  if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4401  ScaledReg = nullptr;
4402 
4403  // If folding this instruction (and it's subexprs) didn't extend any live
4404  // ranges, we're ok with it.
4405  if (!BaseReg && !ScaledReg)
4406  return true;
4407 
4408  // If all uses of this instruction can have the address mode sunk into them,
4409  // we can remove the addressing mode and effectively trade one live register
4410  // for another (at worst.) In this context, folding an addressing mode into
4411  // the use is just a particularly nice way of sinking it.
4413  SmallPtrSet<Instruction*, 16> ConsideredInsts;
4414  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
4415  return false; // Has a non-memory, non-foldable use!
4416 
4417  // Now that we know that all uses of this instruction are part of a chain of
4418  // computation involving only operations that could theoretically be folded
4419  // into a memory use, loop over each of these memory operation uses and see
4420  // if they could *actually* fold the instruction. The assumption is that
4421  // addressing modes are cheap and that duplicating the computation involved
4422  // many times is worthwhile, even on a fastpath. For sinking candidates
4423  // (i.e. cold call sites), this serves as a way to prevent excessive code
4424  // growth since most architectures have some reasonable small and fast way to
4425  // compute an effective address. (i.e LEA on x86)
4426  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
4427  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
4428  Instruction *User = MemoryUses[i].first;
4429  unsigned OpNo = MemoryUses[i].second;
4430 
4431  // Get the access type of this use. If the use isn't a pointer, we don't
4432  // know what it accesses.
4433  Value *Address = User->getOperand(OpNo);
4434  PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
4435  if (!AddrTy)
4436  return false;
4437  Type *AddressAccessTy = AddrTy->getElementType();
4438  unsigned AS = AddrTy->getAddressSpace();
4439 
4440  // Do a match against the root of this address, ignoring profitability. This
4441  // will tell us if the addressing mode for the memory operation will
4442  // *actually* cover the shared instruction.
4443  ExtAddrMode Result;
4444  std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4445  0);
4446  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4447  TPT.getRestorationPoint();
4448  AddressingModeMatcher Matcher(
4449  MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
4450  InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4451  Matcher.IgnoreProfitability = true;
4452  bool Success = Matcher.matchAddr(Address, 0);
4453  (void)Success; assert(Success && "Couldn't select *anything*?");
4454 
4455  // The match was to check the profitability, the changes made are not
4456  // part of the original matcher. Therefore, they should be dropped
4457  // otherwise the original matcher will not present the right state.
4458  TPT.rollback(LastKnownGood);
4459 
4460  // If the match didn't cover I, then it won't be shared by it.
4461  if (!is_contained(MatchedAddrModeInsts, I))
4462  return false;
4463 
4464  MatchedAddrModeInsts.clear();
4465  }
4466 
4467  return true;
4468 }
4469 
4470 /// Return true if the specified values are defined in a
4471 /// different basic block than BB.
4472 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
4473  if (Instruction *I = dyn_cast<Instruction>(V))
4474  return I->getParent() != BB;
4475  return false;
4476 }
4477 
4478 /// Sink addressing mode computation immediate before MemoryInst if doing so
4479 /// can be done without increasing register pressure. The need for the
4480 /// register pressure constraint means this can end up being an all or nothing
4481 /// decision for all uses of the same addressing computation.
4482 ///
4483 /// Load and Store Instructions often have addressing modes that can do
4484 /// significant amounts of computation. As such, instruction selection will try
4485 /// to get the load or store to do as much computation as possible for the
4486 /// program. The problem is that isel can only see within a single block. As
4487 /// such, we sink as much legal addressing mode work into the block as possible.
4488 ///
4489 /// This method is used to optimize both load/store and inline asms with memory
4490 /// operands. It's also used to sink addressing computations feeding into cold
4491 /// call sites into their (cold) basic block.
4492 ///
4493 /// The motivation for handling sinking into cold blocks is that doing so can
4494 /// both enable other address mode sinking (by satisfying the register pressure
4495 /// constraint above), and reduce register pressure globally (by removing the
4496 /// addressing mode computation from the fast path entirely.).
4497 bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
4498  Type *AccessTy, unsigned AddrSpace) {
4499  Value *Repl = Addr;
4500 
4501  // Try to collapse single-value PHI nodes. This is necessary to undo
4502  // unprofitable PRE transformations.
4503  SmallVector<Value*, 8> worklist;
4504  SmallPtrSet<Value*, 16> Visited;
4505  worklist.push_back(Addr);
4506 
4507  // Use a worklist to iteratively look through PHI and select nodes, and
4508  // ensure that the addressing mode obtained from the non-PHI/select roots of
4509  // the graph are compatible.
4510  bool PhiOrSelectSeen = false;
4511  SmallVector<Instruction*, 16> AddrModeInsts;
4512  const SimplifyQuery SQ(*DL, TLInfo);
4513  AddressingModeCombiner AddrModes(SQ, Addr);
4514  TypePromotionTransaction TPT(RemovedInsts);
4515  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4516  TPT.getRestorationPoint();
4517  while (!worklist.empty()) {
4518  Value *V = worklist.back();
4519  worklist.pop_back();
4520 
4521  // We allow traversing cyclic Phi nodes.
4522  // In case of success after this loop we ensure that traversing through
4523  // Phi nodes ends up with all cases to compute address of the form
4524  // BaseGV + Base + Scale * Index + Offset
4525  // where Scale and Offset are constans and BaseGV, Base and Index
4526  // are exactly the same Values in all cases.
4527  // It means that BaseGV, Scale and Offset dominate our memory instruction
4528  // and have the same value as they had in address computation represented
4529  // as Phi. So we can safely sink address computation to memory instruction.
4530  if (!Visited.insert(V).second)
4531  continue;
4532 
4533  // For a PHI node, push all of its incoming values.
4534  if (PHINode *P = dyn_cast<PHINode>(V)) {
4535  for (Value *IncValue : P->incoming_values())
4536  worklist.push_back(IncValue);
4537  PhiOrSelectSeen = true;
4538  continue;
4539  }
4540  // Similar for select.
4541  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
4542  worklist.push_back(SI->getFalseValue());
4543  worklist.push_back(SI->getTrueValue());
4544  PhiOrSelectSeen = true;
4545  continue;
4546  }
4547 
4548  // For non-PHIs, determine the addressing mode being computed. Note that
4549  // the result may differ depending on what other uses our candidate
4550  // addressing instructions might have.
4551  AddrModeInsts.clear();
4552  std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4553  0);
4554  ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
4555  V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
4556  InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4557 
4558  GetElementPtrInst *GEP = LargeOffsetGEP.first;
4559  if (GEP && GEP->getParent() != MemoryInst->getParent() &&
4560  !NewGEPBases.count(GEP)) {
4561  // If splitting the underlying data structure can reduce the offset of a
4562  // GEP, collect the GEP. Skip the GEPs that are the new bases of
4563  // previously split data structures.
4564  LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
4565  if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
4566  LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
4567  }
4568 
4569  NewAddrMode.OriginalValue = V;
4570  if (!AddrModes.addNewAddrMode(NewAddrMode))
4571  break;
4572  }
4573 
4574  // Try to combine the AddrModes we've collected. If we couldn't collect any,
4575  // or we have multiple but either couldn't combine them or combining them
4576  // wouldn't do anything useful, bail out now.
4577  if (!AddrModes.combineAddrModes()) {
4578  TPT.rollback(LastKnownGood);
4579  return false;
4580  }
4581  TPT.commit();
4582 
4583  // Get the combined AddrMode (or the only AddrMode, if we only had one).
4584  ExtAddrMode AddrMode = AddrModes.getAddrMode();
4585 
4586  // If all the instructions matched are already in this BB, don't do anything.
4587  // If we saw a Phi node then it is not local definitely, and if we saw a select
4588  // then we want to push the address calculation past it even if it's already
4589  // in this BB.
4590  if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
4591  return IsNonLocalValue(V, MemoryInst->getParent());
4592  })) {
4593  LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
4594  << "\n");
4595  return false;
4596  }
4597 
4598  // Insert this computation right after this user. Since our caller is
4599  // scanning from the top of the BB to the bottom, reuse of the expr are
4600  // guaranteed to happen later.
4601  IRBuilder<> Builder(MemoryInst);
4602 
4603  // Now that we determined the addressing expression we want to use and know
4604  // that we have to sink it into this block. Check to see if we have already
4605  // done this for some other load/store instr in this block. If so, reuse
4606  // the computation. Before attempting reuse, check if the address is valid
4607  // as it may have been erased.
4608 
4609  WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
4610 
4611  Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
4612  if (SunkAddr) {
4613  LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
4614  << " for " << *MemoryInst << "\n");
4615  if (SunkAddr->getType() != Addr->getType())
4616  SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4617  } else if (AddrSinkUsingGEPs ||
4618  (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) {
4619  // By default, we use the GEP-based method when AA is used later. This
4620  // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
4621  LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
4622  << " for " << *MemoryInst << "\n");
4623  Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4624  Value *ResultPtr = nullptr, *ResultIndex = nullptr;
4625 
4626  // First, find the pointer.
4627  if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
4628  ResultPtr = AddrMode.BaseReg;
4629  AddrMode.BaseReg = nullptr;
4630  }
4631 
4632  if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
4633  // We can't add more than one pointer together, nor can we scale a
4634  // pointer (both of which seem meaningless).
4635  if (ResultPtr || AddrMode.Scale != 1)
4636  return false;
4637 
4638  ResultPtr = AddrMode.ScaledReg;
4639  AddrMode.Scale = 0;
4640  }
4641 
4642  // It is only safe to sign extend the BaseReg if we know that the math
4643  // required to create it did not overflow before we extend it. Since
4644  // the original IR value was tossed in favor of a constant back when
4645  // the AddrMode was created we need to bail out gracefully if widths
4646  // do not match instead of extending it.
4647  //
4648  // (See below for code to add the scale.)
4649  if (AddrMode.Scale) {
4650  Type *ScaledRegTy = AddrMode.ScaledReg->getType();
4651  if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
4652  cast<IntegerType>(ScaledRegTy)->getBitWidth())
4653  return false;
4654  }
4655 
4656  if (AddrMode.BaseGV) {
4657  if (ResultPtr)
4658  return false;
4659 
4660  ResultPtr = AddrMode.BaseGV;
4661  }
4662 
4663  // If the real base value actually came from an inttoptr, then the matcher
4664  // will look through it and provide only the integer value. In that case,
4665  // use it here.
4666  if (!DL->isNonIntegralPointerType(Addr->getType())) {
4667  if (!ResultPtr && AddrMode.BaseReg) {
4668  ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
4669  "sunkaddr");
4670  AddrMode.BaseReg = nullptr;
4671  } else if (!ResultPtr && AddrMode.Scale == 1) {
4672  ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
4673  "sunkaddr");
4674  AddrMode.Scale = 0;
4675  }
4676  }
4677 
4678  if (!ResultPtr &&
4679  !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
4680  SunkAddr = Constant::getNullValue(Addr->getType());
4681  } else if (!ResultPtr) {
4682  return false;
4683  } else {
4684  Type *I8PtrTy =
4685  Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
4686  Type *I8Ty = Builder.getInt8Ty();
4687 
4688  // Start with the base register. Do this first so that subsequent address
4689  // matching finds it last, which will prevent it from trying to match it
4690  // as the scaled value in case it happens to be a mul. That would be
4691  // problematic if we've sunk a different mul for the scale, because then
4692  // we'd end up sinking both muls.
4693  if (AddrMode.BaseReg) {
4694  Value *V = AddrMode.BaseReg;
4695  if (V->getType() != IntPtrTy)
4696  V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4697 
4698  ResultIndex = V;
4699  }
4700 
4701  // Add the scale value.
4702  if (AddrMode.Scale) {
4703  Value *V = AddrMode.ScaledReg;
4704  if (V->getType() == IntPtrTy) {
4705  // done.
4706  } else {
4707  assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
4708  cast<IntegerType>(V->getType())->getBitWidth() &&
4709  "We can't transform if ScaledReg is too narrow");
4710  V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4711  }
4712 
4713  if (AddrMode.Scale != 1)
4714  V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4715  "sunkaddr");
4716  if (ResultIndex)
4717  ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
4718  else
4719  ResultIndex = V;
4720  }
4721 
4722  // Add in the Base Offset if present.
4723  if (AddrMode.BaseOffs) {
4724  Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4725  if (ResultIndex) {
4726  // We need to add this separately from the scale above to help with
4727  // SDAG consecutive load/store merging.
4728  if (ResultPtr->getType() != I8PtrTy)
4729  ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4730  ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4731  }
4732 
4733  ResultIndex = V;
4734  }
4735 
4736  if (!ResultIndex) {
4737  SunkAddr = ResultPtr;
4738  } else {
4739  if (ResultPtr->getType() != I8PtrTy)
4740  ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4741  SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4742  }
4743 
4744  if (SunkAddr->getType() != Addr->getType())
4745  SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4746  }
4747  } else {
4748  // We'd require a ptrtoint/inttoptr down the line, which we can't do for
4749  // non-integral pointers, so in that case bail out now.
4750  Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
4751  Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
4752  PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
4753  PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
4754  if (DL->isNonIntegralPointerType(Addr->getType()) ||
4755  (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
4756  (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
4757  (AddrMode.BaseGV &&
4758  DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
4759  return false;
4760 
4761  LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
4762  << " for " << *MemoryInst << "\n");
4763  Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4764  Value *Result = nullptr;
4765 
4766  // Start with the base register. Do this first so that subsequent address
4767  // matching finds it last, which will prevent it from trying to match it
4768  // as the scaled value in case it happens to be a mul. That would be
4769  // problematic if we've sunk a different mul for the scale, because then
4770  // we'd end up sinking both muls.
4771  if (AddrMode.BaseReg) {
4772  Value *V = AddrMode.BaseReg;
4773  if (V->getType()->isPointerTy())
4774  V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4775  if (V->getType() != IntPtrTy)
4776  V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4777  Result = V;
4778  }
4779 
4780  // Add the scale value.
4781  if (AddrMode.Scale) {
4782  Value *V = AddrMode.ScaledReg;
4783  if (V->getType() == IntPtrTy) {
4784  // done.
4785  } else if (V->getType()->isPointerTy()) {
4786  V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4787  } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
4788  cast<IntegerType>(V->getType())->getBitWidth()) {
4789  V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4790  } else {
4791  // It is only safe to sign extend the BaseReg if we know that the math
4792  // required to create it did not overflow before we extend it. Since
4793  // the original IR value was tossed in favor of a constant back when
4794  // the AddrMode was created we need to bail out gracefully if widths
4795  // do not match instead of extending it.
4796  Instruction *I = dyn_cast_or_null<Instruction>(Result);
4797  if (I && (Result != AddrMode.BaseReg))
4798  I->eraseFromParent();
4799  return false;
4800  }
4801  if (AddrMode.Scale != 1)
4802  V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4803  "sunkaddr");
4804  if (Result)
4805  Result = Builder.CreateAdd(Result, V, "sunkaddr");
4806  else
4807  Result = V;
4808  }
4809 
4810  // Add in the BaseGV if present.
4811  if (AddrMode.BaseGV) {
4812  Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
4813  if (Result)
4814  Result = Builder.CreateAdd(Result, V, "sunkaddr");
4815  else
4816  Result = V;
4817  }
4818 
4819  // Add in the Base Offset if present.
4820  if (AddrMode.BaseOffs) {
4821  Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4822  if (Result)
4823  Result = Builder.CreateAdd(Result, V, "sunkaddr");
4824  else
4825  Result = V;
4826  }
4827 
4828  if (!Result)
4829  SunkAddr = Constant::getNullValue(Addr->getType());
4830  else
4831  SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
4832  }
4833 
4834  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
4835  // Store the newly computed address into the cache. In the case we reused a
4836  // value, this should be idempotent.
4837  SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
4838 
4839  // If we have no uses, recursively delete the value and all dead instructions
4840  // using it.
4841  if (Repl->use_empty()) {
4842  // This can cause recursive deletion, which can invalidate our iterator.
4843  // Use a WeakTrackingVH to hold onto it in case this happens.
4844  Value *CurValue = &*CurInstIterator;
4845  WeakTrackingVH IterHandle(CurValue);
4846  BasicBlock *BB = CurInstIterator->getParent();
4847 
4849 
4850  if (IterHandle != CurValue) {
4851  // If the iterator instruction was recursively deleted, start over at the
4852  // start of the block.
4853  CurInstIterator = BB->begin();
4854  SunkAddrs.clear();
4855  }
4856  }
4857  ++NumMemoryInsts;
4858  return true;
4859 }
4860 
4861 /// If there are any memory operands, use OptimizeMemoryInst to sink their
4862 /// address computing into the block when possible / profitable.
4863 bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
4864  bool MadeChange = false;
4865 
4866  const TargetRegisterInfo *TRI =
4867  TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
4868  TargetLowering::AsmOperandInfoVector TargetConstraints =
4869  TLI->ParseConstraints(*DL, TRI, CS);
4870  unsigned ArgNo = 0;
4871  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4872  TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4873 
4874  // Compute the constraint code and ConstraintType to use.
4875  TLI->ComputeConstraintToUse(OpInfo, SDValue());
4876 
4877  if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
4878  OpInfo.isIndirect) {
4879  Value *OpVal = CS->getArgOperand(ArgNo++);
4880  MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
4881  } else if (OpInfo.Type == InlineAsm::isInput)
4882  ArgNo++;
4883  }
4884 
4885  return MadeChange;
4886 }
4887 
4888 /// Check if all the uses of \p Val are equivalent (or free) zero or
4889 /// sign extensions.
4890 static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
4891  assert(!Val->use_empty() && "Input must have at least one use");
4892  const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
4893  bool IsSExt = isa<SExtInst>(FirstUser);
4894  Type *ExtTy = FirstUser->getType();
4895  for (const User *U : Val->users()) {
4896  const Instruction *UI = cast<Instruction>(U);
4897  if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
4898  return false;
4899  Type *CurTy = UI->getType();
4900  // Same input and output types: Same instruction after CSE.
4901  if (CurTy == ExtTy)
4902  continue;
4903 
4904  // If IsSExt is true, we are in this situation:
4905  // a = Val
4906  // b = sext ty1 a to ty2
4907  // c = sext ty1 a to ty3
4908  // Assuming ty2 is shorter than ty3, this could be turned into:
4909  // a = Val
4910  // b = sext ty1 a to ty2
4911  // c = sext ty2 b to ty3
4912  // However, the last sext is not free.
4913  if (IsSExt)
4914  return false;
4915 
4916  // This is a ZExt, maybe this is free to extend from one type to another.
4917  // In that case, we would not account for a different use.
4918  Type *NarrowTy;
4919  Type *LargeTy;
4920  if (ExtTy->getScalarType()->getIntegerBitWidth() >
4921  CurTy->getScalarType()->getIntegerBitWidth()) {
4922  NarrowTy = CurTy;
4923  LargeTy = ExtTy;
4924  } else {
4925  NarrowTy = ExtTy;
4926  LargeTy = CurTy;
4927  }
4928 
4929  if (!TLI.isZExtFree(NarrowTy, LargeTy))
4930  return false;
4931  }
4932  // All uses are the same or can be derived from one another for free.
4933  return true;
4934 }
4935 
4936 /// Try to speculatively promote extensions in \p Exts and continue
4937 /// promoting through newly promoted operands recursively as far as doing so is
4938 /// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
4939 /// When some promotion happened, \p TPT contains the proper state to revert
4940 /// them.
4941 ///
4942 /// \return true if some promotion happened, false otherwise.
4943 bool CodeGenPrepare::tryToPromoteExts(
4944  TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
4945  SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
4946  unsigned CreatedInstsCost) {
4947  bool Promoted = false;
4948 
4949  // Iterate over all the extensions to try to promote them.
4950  for (auto I : Exts) {
4951  // Early check if we directly have ext(load).
4952  if (isa<LoadInst>(I->getOperand(0))) {
4953  ProfitablyMovedExts.push_back(I);
4954  continue;
4955  }
4956 
4957  // Check whether or not we want to do any promotion. The reason we have
4958  // this check inside the for loop is to catch the case where an extension
4959  // is directly fed by a load because in such case the extension can be moved
4960  // up without any promotion on its operands.
4961  if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
4962  return false;
4963 
4964  // Get the action to perform the promotion.
4965  TypePromotionHelper::Action TPH =
4966  TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
4967  // Check if we can promote.
4968  if (!TPH) {
4969  // Save the current extension as we cannot move up through its operand.
4970  ProfitablyMovedExts.push_back(I);
4971  continue;
4972  }
4973 
4974  // Save the current state.
4975  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4976  TPT.getRestorationPoint();
4978  unsigned NewCreatedInstsCost = 0;
4979  unsigned ExtCost = !TLI->isExtFree(I);
4980  // Promote.
4981  Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
4982  &NewExts, nullptr, *TLI);
4983  assert(PromotedVal &&
4984  "TypePromotionHelper should have filtered out those cases");
4985 
4986  // We would be able to merge only one extension in a load.
4987  // Therefore, if we have more than 1 new extension we heuristically
4988  // cut this search path, because it means we degrade the code quality.
4989  // With exactly 2, the transformation is neutral, because we will merge
4990  // one extension but leave one. However, we optimistically keep going,
4991  // because the new extension may be removed too.
4992  long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
4993  // FIXME: It would be possible to propagate a negative value instead of
4994  // conservatively ceiling it to 0.
4995  TotalCreatedInstsCost =
4996  std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
4997  if (!StressExtLdPromotion &&
4998  (TotalCreatedInstsCost > 1 ||
4999  !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
5000  // This promotion is not profitable, rollback to the previous state, and
5001  // save the current extension in ProfitablyMovedExts as the latest
5002  // speculative promotion turned out to be unprofitable.
5003  TPT.rollback(LastKnownGood);
5004  ProfitablyMovedExts.push_back(I);
5005  continue;
5006  }
5007  // Continue promoting NewExts as far as doing so is profitable.
5008  SmallVector<Instruction *, 2> NewlyMovedExts;
5009  (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
5010  bool NewPromoted = false;
5011  for (auto ExtInst : NewlyMovedExts) {
5012  Instruction *MovedExt = cast<Instruction>(ExtInst);
5013  Value *ExtOperand = MovedExt->getOperand(0);
5014  // If we have reached to a load, we need this extra profitability check
5015  // as it could potentially be merged into an ext(load).
5016  if (isa<LoadInst>(ExtOperand) &&
5017  !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
5018  (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
5019  continue;
5020 
5021  ProfitablyMovedExts.push_back(MovedExt);
5022  NewPromoted = true;
5023  }
5024 
5025  // If none of speculative promotions for NewExts is profitable, rollback
5026  // and save the current extension (I) as the last profitable extension.
5027  if (!NewPromoted) {
5028  TPT.rollback(LastKnownGood);
5029  ProfitablyMovedExts.push_back(I);
5030  continue;
5031  }
5032  // The promotion is profitable.
5033  Promoted = true;
5034  }
5035  return Promoted;
5036 }
5037 
5038 /// Merging redundant sexts when one is dominating the other.
5039 bool CodeGenPrepare::mergeSExts(Function &F) {
5040  DominatorTree DT(F);
5041  bool Changed = false;
5042  for (auto &Entry : ValToSExtendedUses) {
5043  SExts &Insts = Entry.second;
5044  SExts CurPts;
5045  for (Instruction *Inst : Insts) {
5046  if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
5047  Inst->getOperand(0) != Entry.first)
5048  continue;
5049  bool inserted = false;
5050  for (auto &Pt : CurPts) {
5051  if (DT.dominates(Inst, Pt)) {
5052  Pt->replaceAllUsesWith(Inst);
5053  RemovedInsts.insert(Pt);
5054  Pt->removeFromParent();
5055  Pt = Inst;
5056  inserted = true;
5057  Changed = true;
5058  break;
5059  }
5060  if (!DT.dominates(Pt, Inst))
5061  // Give up if we need to merge in a common dominator as the
5062  // experiments show it is not profitable.
5063  continue;
5064  Inst->replaceAllUsesWith(Pt);
5065  RemovedInsts.insert(Inst);
5066  Inst->removeFromParent();
5067  inserted = true;
5068  Changed = true;
5069  break;
5070  }
5071  if (!inserted)
5072  CurPts.push_back(Inst);
5073  }
5074  }
5075  return Changed;
5076 }
5077 
5078 // Spliting large data structures so that the GEPs accessing them can have
5079 // smaller offsets so that they can be sunk to the same blocks as their users.
5080 // For example, a large struct starting from %base is splitted into two parts
5081 // where the second part starts from %new_base.
5082 //
5083 // Before:
5084 // BB0:
5085 // %base =
5086 //
5087 // BB1:
5088 // %gep0 = gep %base, off0
5089 // %gep1 = gep %base, off1
5090 // %gep2 = gep %base, off2
5091 //
5092 // BB2:
5093 // %load1 = load %gep0
5094 // %load2 = load %gep1
5095 // %load3 = load %gep2
5096 //
5097 // After:
5098 // BB0:
5099 // %base =
5100 // %new_base = gep %base, off0
5101 //
5102 // BB1:
5103 // %new_gep0 = %new_base
5104 // %new_gep1 = gep %new_base, off1 - off0
5105 // %new_gep2 = gep %new_base, off2 - off0
5106 //
5107 // BB2:
5108 // %load1 = load i32, i32* %new_gep0
5109 // %load2 = load i32, i32* %new_gep1
5110 // %load3 = load i32, i32* %new_gep2
5111 //
5112 // %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
5113 // their offsets are smaller enough to fit into the addressing mode.
5114 bool CodeGenPrepare::splitLargeGEPOffsets() {
5115  bool Changed = false;
5116  for (auto &Entry : LargeOffsetGEPMap) {
5117  Value *OldBase = Entry.first;
5119  &LargeOffsetGEPs = Entry.second;
5120  auto compareGEPOffset =
5121  [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
5122  const std::pair<GetElementPtrInst *, int64_t> &RHS) {
5123  if (LHS.first == RHS.first)
5124  return false;
5125  if (LHS.second != RHS.second)
5126  return LHS.second < RHS.second;
5127  return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
5128  };
5129  // Sorting all the GEPs of the same data structures based on the offsets.
5130  llvm::sort(LargeOffsetGEPs, compareGEPOffset);
5131  LargeOffsetGEPs.erase(
5132  std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
5133  LargeOffsetGEPs.end());
5134  // Skip if all the GEPs have the same offsets.
5135  if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
5136  continue;
5137  GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
5138  int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
5139  Value *NewBaseGEP = nullptr;
5140 
5141  auto LargeOffsetGEP = LargeOffsetGEPs.begin();
5142  while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
5143  GetElementPtrInst *GEP = LargeOffsetGEP->first;
5144  int64_t Offset = LargeOffsetGEP->second;
5145  if (Offset != BaseOffset) {
5147  AddrMode.BaseOffs = Offset - BaseOffset;
5148  // The result type of the GEP might not be the type of the memory
5149  // access.
5150  if (!TLI->isLegalAddressingMode(*DL, AddrMode,
5151  GEP->getResultElementType(),
5152  GEP->getAddressSpace())) {
5153  // We need to create a new base if the offset to the current base is
5154  // too large to fit into the addressing mode. So, a very large struct
5155  // may be splitted into several parts.
5156  BaseGEP = GEP;
5157  BaseOffset = Offset;
5158  NewBaseGEP = nullptr;
5159  }
5160  }
5161 
5162  // Generate a new GEP to replace the current one.
5163  LLVMContext &Ctx = GEP->getContext();
5164  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
5165  Type *I8PtrTy =
5167  Type *I8Ty = Type::getInt8Ty(Ctx);
5168 
5169  if (!NewBaseGEP) {
5170  // Create a new base if we don't have one yet. Find the insertion
5171  // pointer for the new base first.
5172  BasicBlock::iterator NewBaseInsertPt;
5173  BasicBlock *NewBaseInsertBB;
5174  if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
5175  // If the base of the struct is an instruction, the new base will be
5176  // inserted close to it.
5177  NewBaseInsertBB = BaseI->getParent();
5178  if (isa<PHINode>(BaseI))
5179  NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5180  else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
5181  NewBaseInsertBB =
5182  SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
5183  NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5184  } else
5185  NewBaseInsertPt = std::next(BaseI->getIterator());
5186  } else {
5187  // If the current base is an argument or global value, the new base
5188  // will be inserted to the entry block.
5189  NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
5190  NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5191  }
5192  IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
5193  // Create a new base.
5194  Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
5195  NewBaseGEP = OldBase;
5196  if (NewBaseGEP->getType() != I8PtrTy)
5197  NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
5198  NewBaseGEP =
5199  NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
5200  NewGEPBases.insert(NewBaseGEP);
5201  }
5202 
5203  IRBuilder<> Builder(GEP);
5204  Value *NewGEP = NewBaseGEP;
5205  if (Offset == BaseOffset) {
5206  if (GEP->getType() != I8PtrTy)
5207  NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5208  } else {
5209  // Calculate the new offset for the new GEP.
5210  Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
5211  NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
5212 
5213  if (GEP->getType() != I8PtrTy)
5214  NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5215  }
5216  GEP->replaceAllUsesWith(NewGEP);
5217  LargeOffsetGEPID.erase(GEP);
5218  LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
5219  GEP->eraseFromParent();
5220  Changed = true;
5221  }
5222  }
5223  return Changed;
5224 }
5225 
5226 /// Return true, if an ext(load) can be formed from an extension in
5227 /// \p MovedExts.
5228 bool CodeGenPrepare::canFormExtLd(
5229  const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
5230  Instruction *&Inst, bool HasPromoted) {
5231  for (auto *MovedExtInst : MovedExts) {
5232  if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
5233  LI = cast<LoadInst>(MovedExtInst->getOperand(0));
5234  Inst = MovedExtInst;
5235  break;
5236  }
5237  }
5238  if (!LI)
5239  return false;
5240 
5241  // If they're already in the same block, there's nothing to do.
5242  // Make the cheap checks first if we did not promote.
5243  // If we promoted, we need to check if it is indeed profitable.
5244  if (!HasPromoted && LI->getParent() == Inst->getParent())
5245  return false;
5246 
5247  return TLI->isExtLoad(LI, Inst, *DL);
5248 }
5249 
5250 /// Move a zext or sext fed by a load into the same basic block as the load,
5251 /// unless conditions are unfavorable. This allows SelectionDAG to fold the
5252 /// extend into the load.
5253 ///
5254 /// E.g.,
5255 /// \code
5256 /// %ld = load i32* %addr
5257 /// %add = add nuw i32 %ld, 4
5258 /// %zext = zext i32 %add to i64
5259 // \endcode
5260 /// =>
5261 /// \code
5262 /// %ld = load i32* %addr
5263 /// %zext = zext i32 %ld to i64
5264 /// %add = add nuw i64 %zext, 4
5265 /// \encode
5266 /// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
5267 /// allow us to match zext(load i32*) to i64.
5268 ///
5269 /// Also, try to promote the computations used to obtain a sign extended
5270 /// value used into memory accesses.
5271 /// E.g.,
5272 /// \code
5273 /// a = add nsw i32 b, 3
5274 /// d = sext i32 a to i64
5275 /// e = getelementptr ..., i64 d
5276 /// \endcode
5277 /// =>
5278 /// \code
5279 /// f = sext i32 b to i64
5280 /// a = add nsw i64 f, 3
5281 /// e = getelementptr ..., i64 a
5282 /// \endcode
5283 ///
5284 /// \p Inst[in/out] the extension may be modified during the process if some
5285 /// promotions apply.
5286 bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
5287  // ExtLoad formation and address type promotion infrastructure requires TLI to
5288  // be effective.
5289  if (!TLI)
5290  return false;
5291 
5292  bool AllowPromotionWithoutCommonHeader = false;
5293  /// See if it is an interesting sext operations for the address type
5294  /// promotion before trying to promote it, e.g., the ones with the right
5295  /// type and used in memory accesses.
5296  bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
5297  *Inst, AllowPromotionWithoutCommonHeader);
5298  TypePromotionTransaction TPT(RemovedInsts);
5299  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5300  TPT.getRestorationPoint();
5302  SmallVector<Instruction *, 2> SpeculativelyMovedExts;
5303  Exts.push_back(Inst);
5304 
5305  bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
5306 
5307  // Look for a load being extended.
5308  LoadInst *LI = nullptr;
5309  Instruction *ExtFedByLoad;
5310 
5311  // Try to promote a chain of computation if it allows to form an extended
5312  // load.
5313  if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
5314  assert(LI && ExtFedByLoad && "Expect a valid load and extension");
5315  TPT.commit();
5316  // Move the extend into the same block as the load
5317  ExtFedByLoad->moveAfter(LI);
5318  // CGP does not check if the zext would be speculatively executed when moved
5319  // to the same basic block as the load. Preserving its original location
5320  // would pessimize the debugging experience, as well as negatively impact
5321  // the quality of sample pgo. We don't want to use "line 0" as that has a
5322  // size cost in the line-table section and logically the zext can be seen as
5323  // part of the load. Therefore we conservatively reuse the same debug
5324  // location for the load and the zext.
5325  ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
5326  ++NumExtsMoved;
5327  Inst = ExtFedByLoad;
5328  return true;
5329  }
5330 
5331  // Continue promoting SExts if known as considerable depending on targets.
5332  if (ATPConsiderable &&
5333  performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
5334  HasPromoted, TPT, SpeculativelyMovedExts))
5335  return true;
5336 
5337  TPT.rollback(LastKnownGood);
5338  return false;
5339 }
5340 
5341 // Perform address type promotion if doing so is profitable.
5342 // If AllowPromotionWithoutCommonHeader == false, we should find other sext
5343 // instructions that sign extended the same initial value. However, if
5344 // AllowPromotionWithoutCommonHeader == true, we expect promoting the
5345 // extension is just profitable.
5346 bool CodeGenPrepare::performAddressTypePromotion(
5347  Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
5348  bool HasPromoted, TypePromotionTransaction &TPT,
5349  SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
5350  bool Promoted = false;
5351  SmallPtrSet<Instruction *, 1> UnhandledExts;
5352  bool AllSeenFirst = true;
5353  for (auto I : SpeculativelyMovedExts) {
5354  Value *HeadOfChain = I->getOperand(0);
5356  SeenChainsForSExt.find(HeadOfChain);
5357  // If there is an unhandled SExt which has the same header, try to promote
5358  // it as well.
5359  if (AlreadySeen != SeenChainsForSExt.end()) {
5360  if (AlreadySeen->second != nullptr)
5361  UnhandledExts.insert(AlreadySeen->second);
5362  AllSeenFirst = false;
5363  }
5364  }
5365 
5366  if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
5367  SpeculativelyMovedExts.size() == 1)) {
5368  TPT.commit();
5369  if (HasPromoted)
5370  Promoted = true;
5371  for (auto I : SpeculativelyMovedExts) {
5372  Value *HeadOfChain = I->getOperand(0);
5373  SeenChainsForSExt[HeadOfChain] = nullptr;
5374  ValToSExtendedUses[HeadOfChain].push_back(I);
5375  }
5376  // Update Inst as promotion happen.
5377  Inst = SpeculativelyMovedExts.pop_back_val();
5378  } else {
5379  // This is the first chain visited from the header, keep the current chain
5380  // as unhandled. Defer to promote this until we encounter another SExt
5381  // chain derived from the same header.
5382  for (auto I : SpeculativelyMovedExts) {
5383  Value *HeadOfChain = I->getOperand(0);
5384  SeenChainsForSExt[HeadOfChain] = Inst;
5385  }
5386  return false;
5387  }
5388 
5389  if (!AllSeenFirst && !UnhandledExts.empty())
5390  for (auto VisitedSExt : UnhandledExts) {
5391  if (RemovedInsts.count(VisitedSExt))
5392  continue;
5393  TypePromotionTransaction TPT(RemovedInsts);
5396  Exts.push_back(VisitedSExt);
5397  bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
5398  TPT.commit();
5399  if (HasPromoted)
5400  Promoted = true;
5401  for (auto I : Chains) {
5402  Value *HeadOfChain = I->getOperand(0);
5403  // Mark this as handled.
5404  SeenChainsForSExt[HeadOfChain] = nullptr;
5405  ValToSExtendedUses[HeadOfChain].push_back(I);
5406  }
5407  }
5408  return Promoted;
5409 }
5410 
5411 bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
5412  BasicBlock *DefBB = I->getParent();
5413 
5414  // If the result of a {s|z}ext and its source are both live out, rewrite all
5415  // other uses of the source with result of extension.
5416  Value *Src = I->getOperand(0);
5417  if (Src->hasOneUse())
5418  return false;
5419 
5420  // Only do this xform if truncating is free.
5421  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
5422  return false;
5423 
5424  // Only safe to perform the optimization if the source is also defined in
5425  // this block.
5426  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
5427  return false;
5428 
5429  bool DefIsLiveOut = false;
5430  for (User *U : I->users()) {
5431  Instruction *UI = cast<Instruction>(U);
5432 
5433  // Figure out which BB this ext is used in.
5434  BasicBlock *UserBB = UI->getParent();
5435  if (UserBB == DefBB) continue;
5436  DefIsLiveOut = true;
5437  break;
5438  }
5439  if (!DefIsLiveOut)
5440  return false;
5441 
5442  // Make sure none of the uses are PHI nodes.
5443  for (User *U : Src->users()) {
5444  Instruction *UI = cast<Instruction>(U);
5445  BasicBlock *UserBB = UI->getParent();
5446  if (UserBB == DefBB) continue;
5447  // Be conservative. We don't want this xform to end up introducing
5448  // reloads just before load / store instructions.
5449  if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
5450  return false;
5451  }
5452 
5453  // InsertedTruncs - Only insert one trunc in each block once.
5454  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
5455 
5456  bool MadeChange = false;
5457  for (Use &U : Src->uses()) {
5458  Instruction *User = cast<Instruction>(U.getUser());
5459 
5460  // Figure out which BB this ext is used in.
5461  BasicBlock *UserBB = User->getParent();
5462  if (UserBB == DefBB) continue;
5463 
5464  // Both src and def are live in this block. Rewrite the use.
5465  Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
5466 
5467  if (!InsertedTrunc) {
5468  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5469  assert(InsertPt != UserBB->end());
5470  InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
5471  InsertedInsts.insert(InsertedTrunc);
5472  }
5473 
5474  // Replace a use of the {s|z}ext source with a use of the result.
5475  U = InsertedTrunc;
5476  ++NumExtUses;
5477  MadeChange = true;
5478  }
5479 
5480  return MadeChange;
5481 }
5482 
5483 // Find loads whose uses only use some of the loaded value's bits. Add an "and"
5484 // just after the load if the target can fold this into one extload instruction,
5485 // with the hope of eliminating some of the other later "and" instructions using
5486 // the loaded value. "and"s that are made trivially redundant by the insertion
5487 // of the new "and" are removed by this function, while others (e.g. those whose
5488 // path from the load goes through a phi) are left for isel to potentially
5489 // remove.
5490 //
5491 // For example:
5492 //
5493 // b0:
5494 // x = load i32
5495 // ...
5496 // b1:
5497 // y = and x, 0xff
5498 // z = use y
5499 //
5500 // becomes:
5501 //
5502 // b0:
5503 // x = load i32
5504 // x' = and x, 0xff
5505 // ...
5506 // b1:
5507 // z = use x'
5508 //
5509 // whereas:
5510 //
5511 // b0:
5512 // x1 = load i32
5513 // ...
5514 // b1:
5515 // x2 = load i32
5516 // ...
5517 // b2:
5518 // x = phi x1, x2
5519 // y = and x, 0xff
5520 //
5521 // becomes (after a call to optimizeLoadExt for each load):
5522 //
5523 // b0:
5524 // x1 = load i32
5525 // x1' = and x1, 0xff
5526 // ...
5527 // b1:
5528 // x2 = load i32
5529 // x2' = and x2, 0xff
5530 // ...
5531 // b2:
5532 // x = phi x1', x2'
5533 // y = and x, 0xff
5534 bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
5535  if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
5536  return false;
5537 
5538  // Skip loads we've already transformed.
5539  if (Load->hasOneUse() &&
5540  InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
5541  return false;
5542 
5543  // Look at all uses of Load, looking through phis, to determine how many bits
5544  // of the loaded value are needed.
5547  SmallVector<Instruction *, 8> AndsToMaybeRemove;
5548  for (auto *U : Load->users())
5549  WorkList.push_back(cast<Instruction>(U));
5550 
5551  EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
5552  unsigned BitWidth = LoadResultVT.getSizeInBits();
5553  APInt DemandBits(BitWidth, 0);
5554  APInt WidestAndBits(BitWidth, 0);
5555 
5556  while (!WorkList.empty()) {
5557  Instruction *I = WorkList.back();
5558  WorkList.pop_back();
5559 
5560  // Break use-def graph loops.
5561  if (!Visited.insert(I).second)
5562  continue;
5563 
5564  // For a PHI node, push all of its users.
5565  if (auto *Phi = dyn_cast<PHINode>(I)) {
5566  for (auto *U : Phi->users())
5567  WorkList.push_back(cast<Instruction>(U));
5568  continue;
5569  }
5570 
5571  switch (I->getOpcode()) {
5572  case Instruction::And: {
5573  auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
5574  if (!AndC)
5575  return false;
5576  APInt AndBits = AndC->getValue();
5577  DemandBits |= AndBits;
5578  // Keep track of the widest and mask we see.
5579  if (AndBits.ugt(WidestAndBits))
5580  WidestAndBits = AndBits;
5581  if (AndBits == WidestAndBits && I->getOperand(0) == Load)
5582  AndsToMaybeRemove.push_back(I);
5583  break;
5584  }
5585 
5586  case Instruction::Shl: {
5587  auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
5588  if (!ShlC)
5589  return false;
5590  uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
5591  DemandBits.setLowBits(BitWidth - ShiftAmt);
5592  break;
5593  }
5594 
5595  case Instruction::Trunc: {
5596  EVT TruncVT = TLI->getValueType(*DL, I->getType());
5597  unsigned TruncBitWidth = TruncVT.getSizeInBits();
5598  DemandBits.setLowBits(TruncBitWidth);
5599  break;
5600  }
5601 
5602  default:
5603  return false;
5604  }
5605  }
5606 
5607  uint32_t ActiveBits = DemandBits.getActiveBits();
5608  // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
5609  // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
5610  // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
5611  // (and (load x) 1) is not matched as a single instruction, rather as a LDR
5612  // followed by an AND.
5613  // TODO: Look into removing this restriction by fixing backends to either
5614  // return false for isLoadExtLegal for i1 or have them select this pattern to
5615  // a single instruction.
5616  //
5617  // Also avoid hoisting if we didn't see any ands with the exact DemandBits
5618  // mask, since these are the only ands that will be removed by isel.
5619  if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
5620  WidestAndBits != DemandBits)
5621  return false;
5622 
5623  LLVMContext &Ctx = Load->getType()->getContext();
5624  Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
5625  EVT TruncVT = TLI->getValueType(*DL, TruncTy);
5626 
5627  // Reject cases that won't be matched as extloads.
5628  if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
5629  !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
5630  return false;
5631 
5632  IRBuilder<> Builder(Load->getNextNode());
5633  auto *NewAnd = dyn_cast<Instruction>(
5634  Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
5635  // Mark this instruction as "inserted by CGP", so that other
5636  // optimizations don't touch it.
5637  InsertedInsts.insert(NewAnd);
5638 
5639  // Replace all uses of load with new and (except for the use of load in the
5640  // new and itself).
5641  Load->replaceAllUsesWith(NewAnd);
5642  NewAnd->setOperand(0, Load);
5643 
5644  // Remove any and instructions that are now redundant.
5645  for (auto *And : AndsToMaybeRemove)
5646  // Check that the and mask is the same as the one we decided to put on the
5647  // new and.
5648  if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
5649  And->replaceAllUsesWith(NewAnd);
5650  if (&*CurInstIterator == And)
5651  CurInstIterator = std::next(And->getIterator());
5652  And->eraseFromParent();
5653  ++NumAndUses;
5654  }
5655 
5656  ++NumAndsAdded;
5657  return true;
5658 }
5659 
5660 /// Check if V (an operand of a select instruction) is an expensive instruction
5661 /// that is only used once.
5662 static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
5663  auto *I = dyn_cast<Instruction>(V);
5664  // If it's safe to speculatively execute, then it should not have side
5665  // effects; therefore, it's safe to sink and possibly *not* execute.
5666  return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
5668 }
5669 
5670 /// Returns true if a SelectInst should be turned into an explicit branch.
5672  const TargetLowering *TLI,
5673  SelectInst *SI) {
5674  // If even a predictable select is cheap, then a branch can't be cheaper.
5675  if (!TLI->isPredictableSelectExpensive())
5676  return false;
5677 
5678  // FIXME: This should use the same heuristics as IfConversion to determine
5679  // whether a select is better represented as a branch.
5680 
5681  // If metadata tells us that the select condition is obviously predictable,
5682  // then we want to replace the select with a branch.
5683  uint64_t TrueWeight, FalseWeight;
5684  if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
5685  uint64_t Max = std::max(TrueWeight, FalseWeight);
5686  uint64_t Sum = TrueWeight + FalseWeight;
5687  if (Sum != 0) {
5688  auto Probability = BranchProbability::getBranchProbability(Max, Sum);
5689  if (Probability > TLI->getPredictableBranchThreshold())
5690  return true;
5691  }
5692  }
5693 
5694  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
5695 
5696  // If a branch is predictable, an out-of-order CPU can avoid blocking on its
5697  // comparison condition. If the compare has more than one use, there's
5698  // probably another cmov or setcc around, so it's not worth emitting a branch.
5699  if (!Cmp || !Cmp->hasOneUse())
5700  return false;
5701 
5702  // If either operand of the select is expensive and only needed on one side
5703  // of the select, we should form a branch.
5704  if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
5705  sinkSelectOperand(TTI, SI->getFalseValue()))
5706  return true;
5707 
5708  return false;
5709 }
5710 
5711 /// If \p isTrue is true, return the true value of \p SI, otherwise return
5712 /// false value of \p SI. If the true/false value of \p SI is defined by any
5713 /// select instructions in \p Selects, look through the defining select
5714 /// instruction until the true/false value is not defined in \p Selects.
5716  SelectInst *SI, bool isTrue,
5717  const SmallPtrSet<const Instruction *, 2> &Selects) {
5718  Value *V;
5719 
5720  for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
5721  DefSI = dyn_cast<SelectInst>(V)) {
5722  assert(DefSI->getCondition() == SI->getCondition() &&
5723  "The condition of DefSI does not match with SI");
5724  V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
5725  }
5726  return V;
5727 }
5728 
5729 /// If we have a SelectInst that will likely profit from branch prediction,
5730 /// turn it into a branch.
5731 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
5732  // If branch conversion isn't desirable, exit early.
5733  if (DisableSelectToBranch || OptSize || !TLI)
5734  return false;
5735 
5736  // Find all consecutive select instructions that share the same condition.
5738  ASI.push_back(SI);
5740  It != SI->getParent()->end(); ++It) {
5741  SelectInst *I = dyn_cast<SelectInst>(&*It);
5742  if (I && SI->getCondition() == I->getCondition()) {
5743  ASI.push_back(I);
5744  } else {
5745  break;
5746  }
5747  }
5748 
5749  SelectInst *LastSI = ASI.back();
5750  // Increment the current iterator to skip all the rest of select instructions
5751  // because they will be either "not lowered" or "all lowered" to branch.
5752  CurInstIterator = std::next(LastSI->getIterator());
5753 
5754  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
5755 
5756  // Can we convert the 'select' to CF ?
5757  if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
5758  return false;
5759 
5761  if (VectorCond)
5762  SelectKind = TargetLowering::VectorMaskSelect;
5763  else if (SI->getType()->isVectorTy())
5765  else
5766  SelectKind = TargetLowering::ScalarValSelect;
5767 
5768  if (TLI->isSelectSupported(SelectKind) &&
5769  !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
5770  return false;
5771 
5772  ModifiedDT = true;
5773 
5774  // Transform a sequence like this:
5775  // start:
5776  // %cmp = cmp uge i32 %a, %b
5777  // %sel = select i1 %cmp, i32 %c, i32 %d
5778  //
5779  // Into:
5780  // start:
5781  // %cmp = cmp uge i32 %a, %b
5782  // br i1 %cmp, label %select.true, label %select.false
5783  // select.true:
5784  // br label %select.end
5785  // select.false:
5786  // br label %select.end
5787  // select.end:
5788  // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
5789  //
5790  // In addition, we may sink instructions that produce %c or %d from
5791  // the entry block into the destination(s) of the new branch.
5792  // If the true or false blocks do not contain a sunken instruction, that
5793  // block and its branch may be optimized away. In that case, one side of the
5794  // first branch will point directly to select.end, and the corresponding PHI
5795  // predecessor block will be the start block.
5796 
5797  // First, we split the block containing the select into 2 blocks.
5798  BasicBlock *StartBlock = SI->getParent();
5799  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
5800  BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
5801 
5802  // Delete the unconditional branch that was just created by the split.
5803  StartBlock->getTerminator()->eraseFromParent();
5804 
5805  // These are the new basic blocks for the conditional branch.
5806  // At least one will become an actual new basic block.
5807  BasicBlock *TrueBlock = nullptr;
5808  BasicBlock *FalseBlock = nullptr;
5809  BranchInst *TrueBranch = nullptr;
5810  BranchInst *FalseBranch = nullptr;
5811 
5812  // Sink expensive instructions into the conditional blocks to avoid executing
5813  // them speculatively.
5814  for (SelectInst *SI : ASI) {
5815  if (sinkSelectOperand(TTI, SI->getTrueValue())) {
5816  if (TrueBlock == nullptr) {
5817  TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
5818  EndBlock->getParent(), EndBlock);
5819  TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
5820  TrueBranch->setDebugLoc(SI->getDebugLoc());
5821  }
5822  auto *TrueInst = cast<Instruction>(SI->getTrueValue());
5823  TrueInst->moveBefore(TrueBranch);
5824  }
5825  if (sinkSelectOperand(TTI, SI->getFalseValue())) {
5826  if (FalseBlock == nullptr) {
5827  FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
5828  EndBlock->getParent(), EndBlock);
5829  FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
5830  FalseBranch->setDebugLoc(SI->getDebugLoc());
5831  }
5832  auto *FalseInst = cast<Instruction>(SI->getFalseValue());
5833  FalseInst->moveBefore(FalseBranch);
5834  }
5835  }
5836 
5837  // If there was nothing to sink, then arbitrarily choose the 'false' side
5838  // for a new input value to the PHI.
5839  if (TrueBlock == FalseBlock) {
5840  assert(TrueBlock == nullptr &&
5841  "Unexpected basic block transform while optimizing select");
5842 
5843  FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
5844  EndBlock->getParent(), EndBlock);
5845  auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
5846  FalseBranch->setDebugLoc(SI->getDebugLoc());
5847  }
5848 
5849  // Insert the real conditional branch based on the original condition.
5850  // If we did not create a new block for one of the 'true' or 'false' paths
5851  // of the condition, it means that side of the branch goes to the end block
5852  // directly and the path originates from the start block from the point of
5853  // view of the new PHI.
5854  BasicBlock *TT, *FT;
5855  if (TrueBlock == nullptr) {
5856  TT = EndBlock;
5857  FT = FalseBlock;
5858  TrueBlock = StartBlock;
5859  } else if (FalseBlock == nullptr) {
5860  TT = TrueBlock;
5861  FT = EndBlock;
5862  FalseBlock = StartBlock;
5863  } else {
5864  TT = TrueBlock;
5865  FT = FalseBlock;
5866  }
5867  IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI);
5868 
5870  INS.insert(ASI.begin(), ASI.end());
5871  // Use reverse iterator because later select may use the value of the
5872  // earlier select, and we need to propagate value through earlier select
5873  // to get the PHI operand.
5874  for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
5875  SelectInst *SI = *It;
5876  // The select itself is replaced with a PHI Node.
5877  PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
5878  PN->takeName(SI);
5879  PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
5880  PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
5881  PN->setDebugLoc(SI->getDebugLoc());
5882 
5883  SI->replaceAllUsesWith(PN);
5884  SI->eraseFromParent();
5885  INS.erase(SI);
5886  ++NumSelectsExpanded;
5887  }
5888 
5889  // Instruct OptimizeBlock to skip to the next block.
5890  CurInstIterator = StartBlock->end();
5891  return true;
5892 }
5893 
5896  int SplatElem = -1;
5897  for (unsigned i = 0; i < Mask.size(); ++i) {
5898  if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
5899  return false;
5900  SplatElem = Mask[i];
5901  }
5902 
5903  return true;
5904 }
5905 
5906 /// Some targets have expensive vector shifts if the lanes aren't all the same
5907 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
5908 /// it's often worth sinking a shufflevector splat down to its use so that
5909 /// codegen can spot all lanes are identical.
5910 bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
5911  BasicBlock *DefBB = SVI->getParent();
5912 
5913  // Only do this xform if variable vector shifts are particularly expensive.
5914  if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
5915  return false;
5916 
5917  // We only expect better codegen by sinking a shuffle if we can recognise a
5918  // constant splat.
5919  if (!isBroadcastShuffle(SVI))
5920  return false;
5921 
5922  // InsertedShuffles - Only insert a shuffle in each block once.
5923  DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
5924 
5925  bool MadeChange = false;
5926  for (User *U : SVI->users()) {
5927  Instruction *UI = cast<Instruction>(U);
5928 
5929  // Figure out which BB this ext is used in.
5930  BasicBlock *UserBB = UI->getParent();
5931  if (UserBB == DefBB) continue;
5932 
5933  // For now only apply this when the splat is used by a shift instruction.
5934  if (!UI->isShift()) continue;
5935 
5936  // Everything checks out, sink the shuffle if the user's block doesn't
5937  // already have a copy.
5938  Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
5939 
5940  if (!InsertedShuffle) {
5941  BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5942  assert(InsertPt != UserBB->end());
5943  InsertedShuffle =
5944  new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
5945  SVI->getOperand(2), "", &*InsertPt);
5946  }
5947 
5948  UI->replaceUsesOfWith(SVI, InsertedShuffle);
5949  MadeChange = true;
5950  }
5951 
5952  // If we removed all uses, nuke the shuffle.
5953  if (SVI->use_empty()) {
5954  SVI->eraseFromParent();
5955  MadeChange = true;
5956  }
5957 
5958  return MadeChange;
5959 }
5960 
5961 bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
5962  if (!TLI || !DL)
5963  return false;
5964 
5965  Value *Cond = SI->getCondition();
5966  Type *OldType = Cond->getType();
5967  LLVMContext &Context = Cond->getContext();
5968  MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
5969  unsigned RegWidth = RegType.getSizeInBits();
5970 
5971  if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
5972  return false;
5973 
5974  // If the register width is greater than the type width, expand the condition
5975  // of the switch instruction and each case constant to the width of the
5976  // register. By widening the type of the switch condition, subsequent
5977  // comparisons (for case comparisons) will not need to be extended to the
5978  // preferred register width, so we will potentially eliminate N-1 extends,
5979  // where N is the number of cases in the switch.
5980  auto *NewType = Type::getIntNTy(Context, RegWidth);
5981 
5982  // Zero-extend the switch condition and case constants unless the switch
5983  // condition is a function argument that is already being sign-extended.
5984  // In that case, we can avoid an unnecessary mask/extension by sign-extending
5985  // everything instead.
5986  Instruction::CastOps ExtType = Instruction::ZExt;
5987  if (auto *Arg = dyn_cast<Argument>(Cond))
5988  if (Arg->hasSExtAttr())
5989  ExtType = Instruction::SExt;
5990 
5991  auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
5992  ExtInst->insertBefore(SI);
5993  ExtInst->setDebugLoc(SI->getDebugLoc());
5994  SI->setCondition(ExtInst);
5995  for (auto Case : SI->cases()) {
5996  APInt NarrowConst = Case.getCaseValue()->getValue();
5997  APInt WideConst = (ExtType == Instruction::ZExt) ?
5998  NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
5999  Case.setValue(ConstantInt::get(Context, WideConst));
6000  }
6001 
6002  return true;
6003 }
6004 
6005 
6006 namespace {
6007 
6008 /// Helper class to promote a scalar operation to a vector one.
6009 /// This class is used to move downward extractelement transition.
6010 /// E.g.,
6011 /// a = vector_op <2 x i32>
6012 /// b = extractelement <2 x i32> a, i32 0
6013 /// c = scalar_op b
6014 /// store c
6015 ///
6016 /// =>
6017 /// a = vector_op <2 x i32>
6018 /// c = vector_op a (equivalent to scalar_op on the related lane)
6019 /// * d = extractelement <2 x i32> c, i32 0
6020 /// * store d
6021 /// Assuming both extractelement and store can be combine, we get rid of the
6022 /// transition.
6023 class VectorPromoteHelper {
6024  /// DataLayout associated with the current module.
6025  const DataLayout &DL;
6026 
6027  /// Used to perform some checks on the legality of vector operations.
6028  const TargetLowering &TLI;
6029 
6030  /// Used to estimated the cost of the promoted chain.
6031  const TargetTransformInfo &TTI;
6032 
6033  /// The transition being moved downwards.
6034  Instruction *Transition;
6035 
6036  /// The sequence of instructions to be promoted.
6037  SmallVector<Instruction *, 4> InstsToBePromoted;
6038 
6039  /// Cost of combining a store and an extract.
6040  unsigned StoreExtractCombineCost;
6041 
6042  /// Instruction that will be combined with the transition.
6043  Instruction *CombineInst = nullptr;
6044 
6045  /// The instruction that represents the current end of the transition.
6046  /// Since we are faking the promotion until we reach the end of the chain
6047  /// of computation, we need a way to get the current end of the transition.
6048  Instruction *getEndOfTransition() const {
6049  if (InstsToBePromoted.empty())
6050  return Transition;
6051  return InstsToBePromoted.back();
6052  }
6053 
6054  /// Return the index of the original value in the transition.
6055  /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
6056  /// c, is at index 0.
6057  unsigned getTransitionOriginalValueIdx() const {
6058  assert(isa<ExtractElementInst>(Transition) &&
6059  "Other kind of transitions are not supported yet");
6060  return 0;
6061  }
6062 
6063  /// Return the index of the index in the transition.
6064  /// E.g., for "extractelement <2 x i32> c, i32 0" the index
6065  /// is at index 1.
6066  unsigned getTransitionIdx() const {
6067  assert(isa<ExtractElementInst>(Transition) &&
6068  "Other kind of transitions are not supported yet");
6069  return 1;
6070  }
6071 
6072  /// Get the type of the transition.
6073  /// This is the type of the original value.
6074  /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
6075  /// transition is <2 x i32>.
6076  Type *getTransitionType() const {
6077  return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
6078  }
6079 
6080  /// Promote \p ToBePromoted by moving \p Def downward through.
6081  /// I.e., we have the following sequence:
6082  /// Def = Transition <ty1> a to <ty2>
6083  /// b = ToBePromoted <ty2> Def, ...
6084  /// =>
6085  /// b = ToBePromoted <ty1> a, ...
6086  /// Def = Transition <ty1> ToBePromoted to <ty2>
6087  void promoteImpl(Instruction *ToBePromoted);
6088 
6089  /// Check whether or not it is profitable to promote all the
6090  /// instructions enqueued to be promoted.
6091  bool isProfitableToPromote() {
6092  Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
6093  unsigned Index = isa<ConstantInt>(ValIdx)
6094  ? cast<ConstantInt>(ValIdx)->getZExtValue()
6095  : -1;
6096  Type *PromotedType = getTransitionType();
6097 
6098  StoreInst *ST = cast<StoreInst>(CombineInst);
6099  unsigned AS = ST->getPointerAddressSpace();
6100  unsigned Align = ST->getAlignment();
6101  // Check if this store is supported.
6103  TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
6104  Align)) {
6105  // If this is not supported, there is no way we can combine
6106  // the extract with the store.
6107  return false;
6108  }
6109 
6110  // The scalar chain of computation has to pay for the transition
6111  // scalar to vector.
6112  // The vector chain has to account for the combining cost.
6113  uint64_t ScalarCost =
6114  TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
6115  uint64_t VectorCost = StoreExtractCombineCost;
6116  for (const auto &Inst : InstsToBePromoted) {
6117  // Compute the cost.
6118  // By construction, all instructions being promoted are arithmetic ones.
6119  // Moreover, one argument is a constant that can be viewed as a splat
6120  // constant.
6121  Value *Arg0 = Inst->getOperand(0);
6122  bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
6123  isa<ConstantFP>(Arg0);
6130  ScalarCost += TTI.getArithmeticInstrCost(
6131  Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
6132  VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
6133  Arg0OVK, Arg1OVK);
6134  }
6135  LLVM_DEBUG(
6136  dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
6137  << ScalarCost << "\nVector: " << VectorCost << '\n');
6138  return ScalarCost > VectorCost;
6139  }
6140 
6141  /// Generate a constant vector with \p Val with the same
6142  /// number of elements as the transition.
6143  /// \p UseSplat defines whether or not \p Val should be replicated
6144  /// across the whole vector.
6145  /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
6146  /// otherwise we generate a vector with as many undef as possible:
6147  /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
6148  /// used at the index of the extract.
6149  Value *getConstantVector(Constant *Val, bool UseSplat) const {
6150  unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
6151  if (!UseSplat) {
6152  // If we cannot determine where the constant must be, we have to
6153  // use a splat constant.
6154  Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
6155  if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
6156  ExtractIdx = CstVal->getSExtValue();
6157  else
6158  UseSplat = true;
6159  }
6160 
6161  unsigned End = getTransitionType()->getVectorNumElements();
6162  if (UseSplat)
6163  return ConstantVector::getSplat(End, Val);
6164 
6165  SmallVector<Constant *, 4> ConstVec;
6166  UndefValue *UndefVal = UndefValue::get(Val->getType());
6167  for (unsigned Idx = 0; Idx != End; ++Idx) {
6168  if (Idx == ExtractIdx)
6169  ConstVec.push_back(Val);
6170  else
6171  ConstVec.push_back(UndefVal);
6172  }
6173  return ConstantVector::get(ConstVec);
6174  }
6175 
6176  /// Check if promoting to a vector type an operand at \p OperandIdx
6177  /// in \p Use can trigger undefined behavior.
6178  static bool canCauseUndefinedBehavior(const Instruction *Use,
6179  unsigned OperandIdx) {
6180  // This is not safe to introduce undef when the operand is on
6181  // the right hand side of a division-like instruction.
6182  if (OperandIdx != 1)
6183  return false;
6184  switch (Use->getOpcode()) {
6185  default:
6186  return false;
6187  case Instruction::SDiv:
6188  case Instruction::UDiv:
6189  case Instruction::SRem:
6190  case Instruction::URem:
6191  return true;
6192  case Instruction::FDiv:
6193  case Instruction::FRem:
6194  return !Use->hasNoNaNs();
6195  }
6196  llvm_unreachable(nullptr);
6197  }
6198 
6199 public:
6200  VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
6201  const TargetTransformInfo &TTI, Instruction *Transition,
6202  unsigned CombineCost)
6203  : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
6204  StoreExtractCombineCost(CombineCost) {
6205  assert(Transition && "Do not know how to promote null");
6206  }
6207 
6208  /// Check if we can promote \p ToBePromoted to \p Type.
6209  bool canPromote(const Instruction *ToBePromoted) const {
6210  // We could support CastInst too.
6211  return isa<BinaryOperator>(ToBePromoted);
6212  }
6213 
6214  /// Check if it is profitable to promote \p ToBePromoted
6215  /// by moving downward the transition through.
6216  bool shouldPromote(const Instruction *ToBePromoted) const {
6217  // Promote only if all the operands can be statically expanded.
6218  // Indeed, we do not want to introduce any new kind of transitions.
6219  for (const Use &U : ToBePromoted->operands()) {
6220  const Value *Val = U.get();
6221  if (Val == getEndOfTransition()) {
6222  // If the use is a division and the transition is on the rhs,
6223  // we cannot promote the operation, otherwise we may create a
6224  // division by zero.
6225  if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
6226  return false;
6227  continue;
6228  }
6229  if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
6230  !isa<ConstantFP>(Val))
6231  return false;
6232  }
6233  // Check that the resulting operation is legal.
6234  int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
6235  if (!ISDOpcode)
6236  return false;
6237  return StressStoreExtract ||
6239  ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
6240  }
6241 
6242  /// Check whether or not \p Use can be combined
6243  /// with the transition.
6244  /// I.e., is it possible to do Use(Transition) => AnotherUse?
6245  bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
6246 
6247  /// Record \p ToBePromoted as part of the chain to be promoted.
6248  void enqueueForPromotion(Instruction *ToBePromoted) {
6249  InstsToBePromoted.push_back(ToBePromoted);
6250  }
6251 
6252  /// Set the instruction that will be combined with the transition.
6253  void recordCombineInstruction(Instruction *ToBeCombined) {
6254  assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
6255  CombineInst = ToBeCombined;
6256  }
6257 
6258  /// Promote all the instructions enqueued for promotion if it is
6259  /// is profitable.
6260  /// \return True if the promotion happened, false otherwise.
6261  bool promote() {
6262  // Check if there is something to promote.
6263  // Right now, if we do not have anything to combine with,
6264  // we assume the promotion is not profitable.
6265  if (InstsToBePromoted.empty() || !CombineInst)
6266  return false;
6267 
6268  // Check cost.
6269  if (!StressStoreExtract && !isProfitableToPromote())
6270  return false;
6271 
6272  // Promote.
6273  for (auto &ToBePromoted : InstsToBePromoted)
6274  promoteImpl(ToBePromoted);
6275  InstsToBePromoted.clear();
6276  return true;
6277  }
6278 };
6279 
6280 } // end anonymous namespace
6281 
6282 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
6283  // At this point, we know that all the operands of ToBePromoted but Def
6284  // can be statically promoted.
6285  // For Def, we need to use its parameter in ToBePromoted:
6286  // b = ToBePromoted ty1 a
6287  // Def = Transition ty1 b to ty2
6288  // Move the transition down.
6289  // 1. Replace all uses of the promoted operation by the transition.
6290  // = ... b => = ... Def.
6291  assert(ToBePromoted->getType() == Transition->getType() &&
6292  "The type of the result of the transition does not match "
6293  "the final type");
6294  ToBePromoted->replaceAllUsesWith(Transition);
6295  // 2. Update the type of the uses.
6296  // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
6297  Type *TransitionTy = getTransitionType();
6298  ToBePromoted->mutateType(TransitionTy);
6299  // 3. Update all the operands of the promoted operation with promoted
6300  // operands.
6301  // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
6302  for (Use &U : ToBePromoted->operands()) {
6303  Value *Val = U.get();
6304  Value *NewVal = nullptr;
6305  if (Val == Transition)
6306  NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
6307  else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
6308  isa<ConstantFP>(Val)) {
6309  // Use a splat constant if it is not safe to use undef.
6310  NewVal = getConstantVector(
6311  cast<Constant>(Val),
6312  isa<UndefValue>(Val) ||
6313  canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
6314  } else
6315  llvm_unreachable("Did you modified shouldPromote and forgot to update "
6316  "this?");
6317  ToBePromoted->setOperand(U.getOperandNo(), NewVal);
6318  }
6319  Transition->moveAfter(ToBePromoted);
6320  Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
6321 }
6322 
6323 /// Some targets can do store(extractelement) with one instruction.
6324 /// Try to push the extractelement towards the stores when the target
6325 /// has this feature and this is profitable.
6326 bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
6327  unsigned CombineCost = std::numeric_limits<unsigned>::max();
6328  if (DisableStoreExtract || !TLI ||
6329  (!StressStoreExtract &&
6330  !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
6331  Inst->getOperand(1), CombineCost)))
6332  return false;
6333 
6334  // At this point we know that Inst is a vector to scalar transition.
6335  // Try to move it down the def-use chain, until:
6336  // - We can combine the transition with its single use
6337  // => we got rid of the transition.
6338  // - We escape the current basic block
6339  // => we would need to check that we are moving it at a cheaper place and
6340  // we do not do that for now.
6341  BasicBlock *Parent = Inst->getParent();
6342  LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
6343  VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
6344  // If the transition has more than one use, assume this is not going to be
6345  // beneficial.
6346  while (Inst->hasOneUse()) {
6347  Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
6348  LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
6349 
6350  if (ToBePromoted->getParent() != Parent) {
6351  LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
6352  << ToBePromoted->getParent()->getName()
6353  << ") than the transition (" << Parent->getName()
6354  << ").\n");
6355  return false;
6356  }
6357 
6358  if (VPH.canCombine(ToBePromoted)) {
6359  LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
6360  << "will be combined with: " << *ToBePromoted << '\n');
6361  VPH.recordCombineInstruction(ToBePromoted);
6362  bool Changed = VPH.promote();
6363  NumStoreExtractExposed += Changed;
6364  return Changed;
6365  }
6366 
6367  LLVM_DEBUG(dbgs() << "Try promoting.\n");
6368  if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
6369  return false;
6370 
6371  LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
6372 
6373  VPH.enqueueForPromotion(ToBePromoted);
6374  Inst = ToBePromoted;
6375  }
6376  return false;
6377 }
6378 
6379 /// For the instruction sequence of store below, F and I values
6380 /// are bundled together as an i64 value before being stored into memory.
6381 /// Sometimes it is more efficient to generate separate stores for F and I,
6382 /// which can remove the bitwise instructions or sink them to colder places.
6383 ///
6384 /// (store (or (zext (bitcast F to i32) to i64),
6385 /// (shl (zext I to i64), 32)), addr) -->
6386 /// (store F, addr) and (store I, addr+4)
6387 ///
6388 /// Similarly, splitting for other merged store can also be beneficial, like:
6389 /// For pair of {i32, i32}, i64 store --> two i32 stores.
6390 /// For pair of {i32, i16}, i64 store --> two i32 stores.
6391 /// For pair of {i16, i16}, i32 store --> two i16 stores.
6392 /// For pair of {i16, i8}, i32 store --> two i16 stores.
6393 /// For pair of {i8, i8}, i16 store --> two i8 stores.
6394 ///
6395 /// We allow each target to determine specifically which kind of splitting is
6396 /// supported.
6397 ///
6398 /// The store patterns are commonly seen from the simple code snippet below
6399 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
6400 /// void goo(const std::pair<int, float> &);
6401 /// hoo() {
6402 /// ...
6403 /// goo(std::make_pair(tmp, ftmp));
6404 /// ...
6405 /// }
6406 ///
6407 /// Although we already have similar splitting in DAG Combine, we duplicate
6408 /// it in CodeGenPrepare to catch the case in which pattern is across
6409 /// multiple BBs. The logic in DAG Combine is kept to catch case generated
6410 /// during code expansion.
6411 static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
6412  const TargetLowering &TLI) {
6413  // Handle simple but common cases only.
6414  Type *StoreType = SI.getValueOperand()->getType();
6415  if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
6416  DL.getTypeSizeInBits(StoreType) == 0)
6417  return false;
6418 
6419  unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
6420  Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
6421  if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
6422  DL.getTypeSizeInBits(SplitStoreType))
6423  return false;
6424 
6425  // Match the following patterns:
6426  // (store (or (zext LValue to i64),
6427  // (shl (zext HValue to i64), 32)), HalfValBitSize)
6428  // or
6429  // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
6430  // (zext LValue to i64),
6431  // Expect both operands of OR and the first operand of SHL have only
6432  // one use.
6433  Value *LValue, *HValue;
6434  if (!match(SI.getValueOperand(),
6435  m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
6436  m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
6437  m_SpecificInt(HalfValBitSize))))))
6438  return false;
6439 
6440  // Check LValue and HValue are int with size less or equal than 32.
6441  if (!LValue->getType()->isIntegerTy() ||
6442  DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
6443  !HValue->getType()->isIntegerTy() ||
6444  DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
6445  return false;
6446 
6447  // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
6448  // as the input of target query.
6449  auto *LBC = dyn_cast<BitCastInst>(LValue);
6450  auto *HBC = dyn_cast<BitCastInst>(HValue);
6451  EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
6452  : EVT::getEVT(LValue->getType());
6453  EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
6454  : EVT::getEVT(HValue->getType());
6455  if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
6456  return false;
6457 
6458  // Start to split store.
6459  IRBuilder<> Builder(SI.getContext());
6460  Builder.SetInsertPoint(&SI);
6461 
6462  // If LValue/HValue is a bitcast in another BB, create a new one in current
6463  // BB so it may be merged with the splitted stores by dag combiner.
6464  if (LBC && LBC->getParent() != SI.getParent())
6465  LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
6466  if (HBC && HBC->getParent() != SI.getParent())
6467  HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
6468 
6469  bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
6470  auto CreateSplitStore = [&](Value *V, bool Upper) {
6471  V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
6472  Value *Addr = Builder.CreateBitCast(
6473  SI.getOperand(1),
6474  SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
6475  if ((IsLE && Upper) || (!IsLE && !Upper))
6476  Addr = Builder.CreateGEP(
6477  SplitStoreType, Addr,
6479  Builder.CreateAlignedStore(
6480  V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
6481  };
6482 
6483  CreateSplitStore(LValue, false);
6484  CreateSplitStore(HValue, true);
6485 
6486  // Delete the old store.
6487  SI.eraseFromParent();
6488  return true;
6489 }
6490 
6491 // Return true if the GEP has two operands, the first operand is of a sequential
6492 // type, and the second operand is a constant.
6495  return GEP->getNumOperands() == 2 &&
6496  I.isSequential() &&
6497  isa<ConstantInt>(GEP->getOperand(1));
6498 }
6499 
6500 // Try unmerging GEPs to reduce liveness interference (register pressure) across
6501 // IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
6502 // reducing liveness interference across those edges benefits global register
6503 // allocation. Currently handles only certain cases.
6504 //
6505 // For example, unmerge %GEPI and %UGEPI as below.
6506 //
6507 // ---------- BEFORE ----------
6508 // SrcBlock:
6509 // ...
6510 // %GEPIOp = ...
6511 // ...
6512 // %GEPI = gep %GEPIOp, Idx
6513 // ...
6514 // indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
6515 // (* %GEPI is alive on the indirectbr edges due to other uses ahead)
6516 // (* %GEPIOp is alive on the indirectbr edges only because of it's used by
6517 // %UGEPI)
6518 //
6519 // DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
6520 // DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
6521 // ...
6522 //
6523 // DstBi:
6524 // ...
6525 // %UGEPI = gep %GEPIOp, UIdx
6526 // ...
6527 // ---------------------------
6528 //
6529 // ---------- AFTER ----------
6530 // SrcBlock:
6531 // ... (same as above)
6532 // (* %GEPI is still alive on the indirectbr edges)
6533 // (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
6534 // unmerging)
6535 // ...
6536 //
6537 // DstBi:
6538 // ...
6539 // %UGEPI = gep %GEPI, (UIdx-Idx)
6540 // ...
6541 // ---------------------------
6542 //
6543 // The register pressure on the IndirectBr edges is reduced because %GEPIOp is
6544 // no longer alive on them.
6545 //
6546 // We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
6547 // of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
6548 // not to disable further simplications and optimizations as a result of GEP
6549 // merging.
6550 //
6551 // Note this unmerging may increase the length of the data flow critical path
6552 // (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
6553 // between the register pressure and the length of data-flow critical
6554 // path. Restricting this to the uncommon IndirectBr case would minimize the
6555 // impact of potentially longer critical path, if any, and the impact on compile
6556 // time.
6558  const TargetTransformInfo *TTI) {
6559  BasicBlock *SrcBlock = GEPI->getParent();
6560  // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
6561  // (non-IndirectBr) cases exit early here.
6562  if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
6563  return false;
6564  // Check that GEPI is a simple gep with a single constant index.
6565  if (!GEPSequentialConstIndexed(GEPI))
6566  return false;
6567  ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
6568  // Check that GEPI is a cheap one.
6569  if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType())
6571  return false;
6572  Value *GEPIOp = GEPI->getOperand(0);
6573  // Check that GEPIOp is an instruction that's also defined in SrcBlock.
6574  if (!isa<Instruction>(GEPIOp))
6575  return false;
6576  auto *GEPIOpI = cast<Instruction>(GEPIOp);
6577  if (GEPIOpI->getParent() != SrcBlock)
6578  return false;
6579  // Check that GEP is used outside the block, meaning it's alive on the
6580  // IndirectBr edge(s).
6581  if (find_if(GEPI->users(), [&](User *Usr) {
6582  if (auto *I = dyn_cast<Instruction>(Usr)) {
6583  if (I->getParent() != SrcBlock) {
6584  return true;
6585  }
6586  }
6587  return false;
6588  }) == GEPI->users().end())
6589  return false;
6590  // The second elements of the GEP chains to be unmerged.
6591  std::vector<GetElementPtrInst *> UGEPIs;
6592  // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
6593  // on IndirectBr edges.
6594  for (User *Usr : GEPIOp->users()) {
6595  if (Usr == GEPI) continue;
6596  // Check if Usr is an Instruction. If not, give up.
6597  if (!isa<Instruction>(Usr))
6598  return false;
6599  auto *UI = cast<Instruction>(Usr);
6600  // Check if Usr in the same block as GEPIOp, which is fine, skip.
6601  if (UI->getParent() == SrcBlock)
6602  continue;
6603  // Check if Usr is a GEP. If not, give up.
6604  if (!isa<GetElementPtrInst>(Usr))
6605  return false;
6606  auto *UGEPI = cast<GetElementPtrInst>(Usr);
6607  // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
6608  // the pointer operand to it. If so, record it in the vector. If not, give
6609  // up.
6610  if (!GEPSequentialConstIndexed(UGEPI))
6611  return false;
6612  if (UGEPI->getOperand(0) != GEPIOp)
6613  return false;
6614  if (GEPIIdx->getType() !=
6615  cast<ConstantInt>(UGEPI->getOperand(1))->getType())
6616  return false;
6617  ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6618  if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType())
6620  return false;
6621  UGEPIs.push_back(UGEPI);
6622  }
6623  if (UGEPIs.size() == 0)
6624  return false;
6625  // Check the materializing cost of (Uidx-Idx).
6626  for (GetElementPtrInst *UGEPI : UGEPIs) {
6627  ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6628  APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
6629  unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType());
6630  if (ImmCost > TargetTransformInfo::TCC_Basic)
6631  return false;
6632  }
6633  // Now unmerge between GEPI and UGEPIs.
6634  for (GetElementPtrInst *UGEPI : UGEPIs) {
6635  UGEPI->setOperand(0, GEPI);
6636  ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6637  Constant *NewUGEPIIdx =
6638  ConstantInt::get(GEPIIdx->getType(),
6639  UGEPIIdx->getValue() - GEPIIdx->getValue());
6640  UGEPI->setOperand(1, NewUGEPIIdx);
6641  // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
6642  // inbounds to avoid UB.
6643  if (!GEPI->isInBounds()) {
6644  UGEPI->setIsInBounds(false);
6645  }
6646  }
6647  // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
6648  // alive on IndirectBr edges).
6649  assert(find_if(GEPIOp->users(), [&](User *Usr) {
6650  return cast<Instruction>(Usr)->getParent() != SrcBlock;
6651  }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock");
6652  return true;
6653 }
6654 
6655 bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
6656  // Bail out if we inserted the instruction to prevent optimizations from
6657  // stepping on each other's toes.
6658  if (InsertedInsts.count(I))
6659  return false;
6660 
6661  if (PHINode *P = dyn_cast<PHINode>(I)) {
6662  // It is possible for very late stage optimizations (such as SimplifyCFG)
6663  // to introduce PHI nodes too late to be cleaned up. If we detect such a
6664  // trivial PHI, go ahead and zap it here.
6665  if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
6666  P->replaceAllUsesWith(V);
6667  P->eraseFromParent();
6668  ++NumPHIsElim;
6669  return true;
6670  }
6671  return false;
6672  }
6673 
6674  if (CastInst *CI = dyn_cast<CastInst>(I)) {
6675  // If the source of the cast is a constant, then this should have
6676  // already been constant folded. The only reason NOT to constant fold
6677  // it is if something (e.g. LSR) was careful to place the constant
6678  // evaluation in a block other than then one that uses it (e.g. to hoist
6679  // the address of globals out of a loop). If this is the case, we don't
6680  // want to forward-subst the cast.
6681  if (isa<Constant>(CI->getOperand(0)))
6682  return false;
6683 
6684  if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
6685  return true;
6686 
6687  if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
6688  /// Sink a zext or sext into its user blocks if the target type doesn't
6689  /// fit in one register
6690  if (TLI &&
6691  TLI->getTypeAction(CI->getContext(),
6692  TLI->getValueType(*DL, CI->getType())) ==
6694  return SinkCast(CI);
6695  } else {
6696  bool MadeChange = optimizeExt(I);
6697  return MadeChange | optimizeExtUses(I);
6698  }
6699  }
6700  return false;
6701  }
6702 
6703  if (CmpInst *CI = dyn_cast<CmpInst>(I))
6704  if (!TLI || !TLI->hasMultipleConditionRegisters())
6705  return OptimizeCmpExpression(CI, TLI);
6706 
6707  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
6709  if (TLI) {
6710  bool Modified = optimizeLoadExt(LI);
6711  unsigned AS = LI->getPointerAddressSpace();
6712  Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
6713  return Modified;
6714  }
6715  return false;
6716  }
6717 
6718  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
6719  if (TLI && splitMergedValStore(*SI, *DL, *TLI))
6720  return true;
6722  if (TLI) {
6723  unsigned AS = SI->getPointerAddressSpace();
6724  return optimizeMemoryInst(I, SI->getOperand(1),
6725  SI->getOperand(0)->getType(), AS);
6726  }
6727  return false;
6728  }
6729 
6730  if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
6731  unsigned AS = RMW->getPointerAddressSpace();
6732  return optimizeMemoryInst(I, RMW->getPointerOperand(),
6733  RMW->getType(), AS);
6734  }
6735 
6736  if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
6737  unsigned AS = CmpX->getPointerAddressSpace();
6738  return optimizeMemoryInst(I, CmpX->getPointerOperand(),
6739  CmpX->getCompareOperand()->getType(), AS);
6740  }
6741 
6743 
6744  if (BinOp && (BinOp->getOpcode() == Instruction::And) &&
6745  EnableAndCmpSinking && TLI)
6746  return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
6747 
6748  if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
6749  BinOp->getOpcode() == Instruction::LShr)) {
6750  ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
6751  if (TLI && CI && TLI->hasExtractBitsInsn())
6752  return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
6753 
6754  return false;
6755  }
6756 
6757  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
6758  if (GEPI->hasAllZeroIndices()) {
6759  /// The GEP operand must be a pointer, so must its result -> BitCast
6760  Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
6761  GEPI->getName(), GEPI);
6762  NC->setDebugLoc(GEPI->getDebugLoc());
6763  GEPI->replaceAllUsesWith(NC);
6764  GEPI->eraseFromParent();
6765  ++NumGEPsElim;
6766  optimizeInst(NC, ModifiedDT);
6767  return true;
6768  }
6769  if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
6770  return true;
6771  }
6772  return false;
6773  }
6774 
6775  if (CallInst *CI = dyn_cast<CallInst>(I))
6776  return optimizeCallInst(CI, ModifiedDT);
6777 
6778  if (SelectInst *SI = dyn_cast<SelectInst>(I))
6779  return optimizeSelectInst(SI);
6780 
6781  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
6782  return optimizeShuffleVectorInst(SVI);
6783 
6784  if (auto *Switch = dyn_cast<SwitchInst>(I))
6785  return optimizeSwitchInst(Switch);
6786 
6787  if (isa<ExtractElementInst>(I))
6788  return optimizeExtractElementInst(I);
6789 
6790  return false;
6791 }
6792 
6793 /// Given an OR instruction, check to see if this is a bitreverse
6794 /// idiom. If so, insert the new intrinsic and return true.
6795 static bool makeBitReverse(Instruction &I, const DataLayout &DL,
6796  const TargetLowering &TLI) {
6797  if (!I.getType()->isIntegerTy() ||
6799  TLI.getValueType(DL, I.getType(), true)))
6800  return false;
6801 
6803  if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
6804  return false;
6805  Instruction *LastInst = Insts.back();
6806  I.replaceAllUsesWith(LastInst);
6808  return true;
6809 }
6810 
6811 // In this pass we look for GEP and cast instructions that are used
6812 // across basic blocks and rewrite them to improve basic-block-at-a-time
6813 // selection.
6814 bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
6815  SunkAddrs.clear();
6816  bool MadeChange = false;
6817 
6818  CurInstIterator = BB.begin();
6819  while (CurInstIterator != BB.end()) {
6820  MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
6821  if (ModifiedDT)
6822  return true;
6823  }
6824 
6825  bool MadeBitReverse = true;
6826  while (TLI && MadeBitReverse) {
6827  MadeBitReverse = false;
6828  for (auto &I : reverse(BB)) {
6829  if (makeBitReverse(I, *DL, *TLI)) {
6830  MadeBitReverse = MadeChange = true;
6831  ModifiedDT = true;
6832  break;
6833  }
6834  }
6835  }
6836  MadeChange |= dupRetToEnableTailCallOpts(&BB);
6837 
6838  return MadeChange;
6839 }
6840 
6841 // llvm.dbg.value is far away from the value then iSel may not be able
6842 // handle it properly. iSel will drop llvm.dbg.value if it can not
6843 // find a node corresponding to the value.
6844 bool CodeGenPrepare::placeDbgValues(Function &F) {
6845  bool MadeChange = false;
6846  for (BasicBlock &BB : F) {
6847  Instruction *PrevNonDbgInst = nullptr;
6848  for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
6849  Instruction *Insn = &*BI++;
6850  DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
6851  // Leave dbg.values that refer to an alloca alone. These
6852  // intrinsics describe the address of a variable (= the alloca)
6853  // being taken. They should not be moved next to the alloca
6854  // (and to the beginning of the scope), but rather stay close to
6855  // where said address is used.
6856  if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
6857  PrevNonDbgInst = Insn;
6858  continue;
6859  }
6860 
6861  Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
6862  if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
6863  // If VI is a phi in a block with an EHPad terminator, we can't insert
6864  // after it.
6865  if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
6866  continue;
6867  LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
6868  << *DVI << ' ' << *VI);
6869  DVI->removeFromParent();
6870  if (isa<PHINode>(VI))
6871  DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
6872  else
6873  DVI->insertAfter(VI);
6874  MadeChange = true;
6875  ++NumDbgValueMoved;
6876  }
6877  }
6878  }
6879  return MadeChange;
6880 }
6881 
6882 /// Scale down both weights to fit into uint32_t.
6883 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
6884  uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
6885  uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
6886  NewTrue = NewTrue / Scale;
6887  NewFalse = NewFalse / Scale;
6888 }
6889 
6890 /// Some targets prefer to split a conditional branch like:
6891 /// \code
6892 /// %0 = icmp ne i32 %a, 0
6893 /// %1 = icmp ne i32 %b, 0
6894 /// %or.cond = or i1 %0, %1
6895 /// br i1 %or.cond, label %TrueBB, label %FalseBB
6896 /// \endcode
6897 /// into multiple branch instructions like:
6898 /// \code
6899 /// bb1:
6900 /// %0 = icmp ne i32 %a, 0
6901 /// br i1 %0, label %TrueBB, label %bb2
6902 /// bb2:
6903 /// %1 = icmp ne i32 %b, 0
6904 /// br i1 %1, label %TrueBB, label %FalseBB
6905 /// \endcode
6906 /// This usually allows instruction selection to do even further optimizations
6907 /// and combine the compare with the branch instruction. Currently this is
6908 /// applied for targets which have "cheap" jump instructions.
6909 ///
6910 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
6911 ///
6912 bool CodeGenPrepare::splitBranchCondition(Function &F) {
6913  if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
6914  return false;
6915 
6916  bool MadeChange = false;
6917  for (auto &BB : F) {
6918  // Does this BB end with the following?
6919  // %cond1 = icmp|fcmp|binary instruction ...
6920  // %cond2 = icmp|fcmp|binary instruction ...
6921  // %cond.or = or|and i1 %cond1, cond2
6922  // br i1 %cond.or label %dest1, label %dest2"
6923  BinaryOperator *LogicOp;
6924  BasicBlock *TBB, *FBB;
6925  if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
6926  continue;
6927 
6928  auto *Br1 = cast<BranchInst>(BB.getTerminator());
6929  if (Br1->getMetadata(LLVMContext::MD_unpredictable))
6930  continue;
6931 
6932  unsigned Opc;
6933  Value *Cond1, *Cond2;
6934  if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
6935  m_OneUse(m_Value(Cond2)))))
6936  Opc = Instruction::And;
6937  else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
6938  m_OneUse(m_Value(Cond2)))))
6939  Opc = Instruction::Or;
6940  else
6941  continue;
6942 
6943  if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
6944  !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
6945  continue;
6946 
6947  LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
6948 
6949  // Create a new BB.
6950  auto TmpBB =
6951  BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
6952  BB.getParent(), BB.getNextNode());
6953 
6954  // Update original basic block by using the first condition directly by the
6955  // branch instruction and removing the no longer needed and/or instruction.
6956  Br1->setCondition(Cond1);
6957  LogicOp->eraseFromParent();
6958 
6959  // Depending on the condition we have to either replace the true or the
6960  // false successor of the original branch instruction.
6961  if (Opc == Instruction::And)
6962  Br1->setSuccessor(0, TmpBB);
6963  else
6964  Br1->setSuccessor(1, TmpBB);
6965 
6966  // Fill in the new basic block.
6967  auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
6968  if (auto *I = dyn_cast<Instruction>(Cond2)) {
6969  I->removeFromParent();
6970  I->insertBefore(Br2);
6971  }
6972 
6973  // Update PHI nodes in both successors. The original BB needs to be
6974  // replaced in one successor's PHI nodes, because the branch comes now from
6975  // the newly generated BB (NewBB). In the other successor we need to add one
6976  // incoming edge to the PHI nodes, because both branch instructions target
6977  // now the same successor. Depending on the original branch condition
6978  // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
6979  // we perform the correct update for the PHI nodes.
6980  // This doesn't change the successor order of the just created branch
6981  // instruction (or any other instruction).
6982  if (Opc == Instruction::Or)
6983  std::swap(TBB, FBB);
6984 
6985  // Replace the old BB with the new BB.
6986  for (PHINode &PN : TBB->phis()) {
6987  int i;
6988  while ((i = PN.getBasicBlockIndex(&BB)) >= 0)
6989  PN.setIncomingBlock(i, TmpBB);
6990  }
6991 
6992  // Add another incoming edge form the new BB.
6993  for (PHINode &PN : FBB->phis()) {
6994  auto *Val = PN.getIncomingValueForBlock(&BB);
6995  PN.addIncoming(Val, TmpBB);
6996  }
6997 
6998  // Update the branch weights (from SelectionDAGBuilder::
6999  // FindMergedConditions).
7000  if (Opc == Instruction::Or) {
7001  // Codegen X | Y as:
7002  // BB1:
7003  // jmp_if_X TBB
7004  // jmp TmpBB
7005  // TmpBB:
7006  // jmp_if_Y TBB
7007  // jmp FBB
7008  //
7009 
7010  // We have flexibility in setting Prob for BB1 and Prob for NewBB.
7011  // The requirement is that
7012  // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
7013  // = TrueProb for original BB.
7014  // Assuming the original weights are A and B, one choice is to set BB1's
7015  // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
7016  // assumes that
7017  // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
7018  // Another choice is to assume TrueProb for BB1 equals to TrueProb for
7019  // TmpBB, but the math is more complicated.
7020  uint64_t TrueWeight, FalseWeight;
7021  if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
7022  uint64_t NewTrueWeight = TrueWeight;
7023  uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
7024  scaleWeights(NewTrueWeight, NewFalseWeight);
7025  Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
7026  .createBranchWeights(TrueWeight, FalseWeight));
7027 
7028  NewTrueWeight = TrueWeight;
7029  NewFalseWeight = 2 * FalseWeight;
7030  scaleWeights(NewTrueWeight, NewFalseWeight);
7031  Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
7032  .createBranchWeights(TrueWeight, FalseWeight));
7033  }
7034  } else {
7035  // Codegen X & Y as:
7036  // BB1:
7037  // jmp_if_X TmpBB
7038  // jmp FBB
7039  // TmpBB:
7040  // jmp_if_Y TBB
7041  // jmp FBB
7042  //
7043  // This requires creation of TmpBB after CurBB.
7044 
7045  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
7046  // The requirement is that
7047  // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
7048  // = FalseProb for original BB.
7049  // Assuming the original weights are A and B, one choice is to set BB1's
7050  // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
7051  // assumes that
7052  // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
7053  uint64_t TrueWeight, FalseWeight;
7054  if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
7055  uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
7056  uint64_t NewFalseWeight = FalseWeight;
7057  scaleWeights(NewTrueWeight, NewFalseWeight);
7058  Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
7059  .createBranchWeights(TrueWeight, FalseWeight));
7060 
7061  NewTrueWeight = 2 * TrueWeight;
7062  NewFalseWeight = FalseWeight;
7063  scaleWeights(NewTrueWeight, NewFalseWeight);
7064  Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
7065  .createBranchWeights(TrueWeight, FalseWeight));
7066  }
7067  }
7068 
7069  // Note: No point in getting fancy here, since the DT info is never
7070  // available to CodeGenPrepare.
7071  ModifiedDT = true;
7072 
7073  MadeChange = true;
7074 
7075  LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
7076  TmpBB->dump());
7077  }
7078  return MadeChange;
7079 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
void clear()
Definition: ValueMap.h:152
const Function & getFunction() const
Definition: Function.h:134
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, AliasSetTracker *AST, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1302
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:749
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
Return a value (possibly void), from a function.
Value * getValueOperand()
Definition: Instructions.h:410
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
static MVT getIntegerVT(unsigned BitWidth)
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:427
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:71
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:636
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
bool isSimple() const
Definition: Instructions.h:277
static Constant * getConstantVector(MVT VT, const APInt &SplatValue, unsigned SplatBitSize, LLVMContext &C)
iterator_range< use_iterator > uses()
Definition: Value.h:355
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:79
virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:834
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
This class represents lattice values for constants.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:316
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:106
Value * optimizeCall(CallInst *CI)
Take the given call instruction and return a more optimal value to replace the instruction with or 0 ...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr)
Attempts to merge a block into its predecessor, if possible.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
void setAlignment(unsigned Align)
iterator end()
Definition: Function.h:658
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:529
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This class represents zero extension of integer types.
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
Analysis providing profile information.
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:858
This class represents a function call, abstracting a target machine&#39;s calling convention.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
Value * getCondition() const
const Value * getTrueValue() const
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
virtual bool isSelectSupported(SelectSupportKind) const
bool salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1591
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Delete the specified block, which must have no predecessors.
This instruction constructs a fixed permutation of two input vectors.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
bool isTerminator() const
Definition: Instruction.h:129
void setSectionPrefix(StringRef Prefix)
Set the section prefix for this function.
Definition: Function.cpp:1414
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:299
BasicBlock * getSuccessor(unsigned i) const
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1663
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1140
A debug info location.
Definition: DebugLoc.h:34
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, ImmutableCallSite CS) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
An instruction for reading from memory.
Definition: Instructions.h:168
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:692
Hexagon Common GEP
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
This defines the Use class.
void reserve(size_type N)
Definition: SmallVector.h:376
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
void setAlignment(unsigned Align)
Definition: Globals.cpp:116
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:33
unsigned getBitWidth() const
getBitWidth - Return the bitwidth of this constant.
Definition: Constants.h:143
static bool despeculateCountZeros(IntrinsicInst *CountZeros, const TargetLowering *TLI, const DataLayout *DL, bool &ModifiedDT)
If counting leading or trailing zeros is an expensive operation and a zero input is defined...
unsigned getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:646
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
static cl::opt< unsigned > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:265
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Split the edge connecting specified block.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1135
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4298
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:48
AnalysisUsage & addRequired()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:529
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isFunctionHotInCallGraph(const Function *F, BlockFrequencyInfo &BFI)
Returns true if F contains hot code.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
#define DEBUG_TYPE
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:113
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1286
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:652
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1200
This file contains the simple types necessary to represent the attributes associated with functions a...
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it&#39;s significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1014
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
#define LLVM_DUMP_METHOD
Definition: Compiler.h:74
Position
Position to insert a new instruction relative to an existing instruction.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2091
This file implements a class to represent arbitrary precision integral constant values and operations...
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:642
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:103
unsigned getSizeInBits() const
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
static bool shouldPromote(Value *V)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1727
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1533
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
Key
PAL metadata keys.
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:716
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:419
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:889
void findDbgValues(SmallVectorImpl< DbgValueInst *> &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: Local.cpp:1495
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1575
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:606
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:182
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:124
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value *> &OffsetV)
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:247
This contains information for each constraint that we are lowering.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:269
virtual bool isCheapToSpeculateCtlz() const
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:212
This class represents a no-op cast from one type to another.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:221
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:82
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:221
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
iterator_range< User::op_iterator > arg_operands()
Definition: InstrTypes.h:1127
An instruction for storing to memory.
Definition: Instructions.h:321
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:151
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
virtual bool isCheapToSpeculateCttz() const
Return true if it is cheap to speculate a call to intrinsic cttz.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1659
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber.
Definition: InlineAsm.h:121
bool pointsToAliveValue() const
Definition: ValueHandle.h:202
Optimize for code generation
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
iterator begin()
Definition: Function.h:656
This class implements simplifications for calls to fortified library functions (__st*cpy_chk, __memcpy_chk, __memmove_chk, __memset_chk), to, when possible, replace them with their non-checking counterparts.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1020
This class represents a truncation of integer types.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
Value * getOperand(unsigned i) const
Definition: User.h:170
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
Class to represent pointers.
Definition: DerivedTypes.h:467
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1252
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:106
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
unsigned getAddressSpace() const
Returns the address space of this instruction&#39;s pointer type.
Definition: Instructions.h:963
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes: ...
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI)
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst *> &Targets)
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
const Instruction * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Definition: Statepoint.h:350
const BasicBlock & getEntryBlock() const
Definition: Function.h:640
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:854
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:750
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:62
static bool runOnFunction(Function &F, bool PostInlining)
static unsigned getPointerOperandIndex()
Definition: Instructions.h:620
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
std::vector< AsmOperandInfo > AsmOperandInfoVector
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:396
bool canIncreaseAlignment() const
Definition: Globals.cpp:220
static bool isBroadcastShuffle(ShuffleVectorInst *SVI)
bool operator!=(const UnitT &, const UnitT &)
static bool CombineUAddWithOverflow(CmpInst *CI)
Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if possible.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:217
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:308
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:234
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Instruction *, unsigned >> &MemoryUses, SmallPtrSetImpl< Instruction *> &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, int SeenInsts=0)
Recursively walk all the uses of I until we find a memory use.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:74
Machine Value Type.
Value * getCalledValue() const
Definition: InstrTypes.h:1174
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
PointerIntPair - This class implements a pair of a pointer and small integer.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:755
static unsigned getPointerOperandIndex()
Definition: Instructions.h:415
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Conditional or Unconditional Branch instruction.
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:145
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:217
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
Value * getValue() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
const Instruction & front() const
Definition: BasicBlock.h:281
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool isMask(unsigned numBits) const
Definition: APInt.h:495
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition: Value.cpp:139
brc_match< Cond_t > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Represent the analysis usage information of a pass.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:767
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:772
static bool SinkCast(CastInst *CI)
SinkCast - Sink the specified cast instruction into its user blocks.
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
op_range operands()
Definition: User.h:238
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:74
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1214
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:450
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
const Value * getCondition() const
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
unsigned getBasePtrIndex() const
The index into the associate statepoint&#39;s argument list which contains the base pointer of the pointe...
Definition: Statepoint.h:387
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:430
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:53
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1251
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:385
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1048
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1655
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:106
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1458
size_type size() const
Definition: SmallPtrSet.h:93
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:392
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1119
INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) INITIALIZE_PASS_END(CodeGenPrepare
void initializeCodeGenPreparePass(PassRegistry &)
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
See the file comment.
Definition: ValueMap.h:86
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:251
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1167
BlockVerifier::State From
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
iterator end()
Definition: BasicBlock.h:271
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:1801
static ValueAsMetadata * get(Value *V)
Definition: Metadata.cpp:349
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
Module.h This file contains the declarations for the Module class.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
Provides information about what library functions are available for the current target.
virtual bool shouldConsiderGEPOffsetSplit() const
iterator begin() const
Definition: LoopInfo.h:142
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI)
Returns true if F contains only cold code.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
bool isConditional() const
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:125
#define NC
Definition: regutils.h:42
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:238
void setOperand(unsigned i, Value *Val)
Definition: User.h:175
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(...(promotableInstN(load)))) into promotedInst1(...(promotedInstN(ext(load)))).
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:547
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:70
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
typename SuperClass::iterator iterator
Definition: SmallVector.h:327
iterator_range< user_iterator > users()
Definition: Value.h:400
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:337
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:212
const Value * getFalseValue() const
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1778
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
Definition: RDFGraph.h:513
FunctionPass * createCodeGenPreparePass()
createCodeGenPreparePass - Transform the code to expose more pattern matching during instruction sele...
void removeFromParent()
This method unlinks &#39;this&#39; from the containing basic block, but does not delete it.
Definition: Instruction.cpp:64
amdgpu Simplify well known AMD library false Value Value * Arg
TargetSubtargetInfo - Generic base class for all target subtargets.
#define Success
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:568
virtual bool useSoftFloat() const
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1969
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1255
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:721
This class wraps the llvm.memcpy/memmove intrinsics.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
static bool isTrivial(const DICompositeType *DCTy)
Analysis providing branch probability information.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:311
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:551
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
SelectSupportKind
Enum that describes what type of support for selects the target has.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
void setCondition(Value *V)
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
void clear()
Definition: ilist.h:309
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
bool SplitIndirectBrCriticalEdges(Function &F, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
This represents the llvm.dbg.value instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value...
Definition: APInt.h:482
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
Establish a view to a call site for examination.
Definition: CallSite.h:711
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1181
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction...
Definition: Instruction.cpp:80
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink &#39;this&#39; from the containing function and delete it.
Definition: BasicBlock.cpp:115
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
user_iterator_impl< User > user_iterator
Definition: Value.h:369
Type * getResultElementType() const
Definition: Instructions.h:956
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
iterator end() const
Definition: LoopInfo.h:143
unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:268
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:276
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned getPointerOperandIndex()
Definition: Instructions.h:798
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:325
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:408
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isUnconditional() const
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:309
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2039
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:366
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:604
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1722
bool isStatepoint(ImmutableCallSite CS)
Definition: Statepoint.cpp:27
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
Definition: ScaledNumber.h:252
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:291
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:91
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1552
user_iterator user_begin()
Definition: Value.h:376
bool hasHugeWorkingSetSize()
Returns true if the working set size of the code is considered huge.
The cost of a typical &#39;add&#39; instruction.
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Represents calls to the gc.relocate intrinsic.
Definition: Statepoint.h:374
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
LLVM Value Representation.
Definition: Value.h:73
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:355
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches."))
static constexpr int MaxMemoryUsesToScan
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:87
const DataLayout & DL
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
Invoke instruction.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Use profile info to add section prefix for hot/cold functions"))
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:573
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:970
IRTranslator LLVM IR MI
bool hasNoNaNs() const
Determine whether the no-NaNs flag is set.
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
OperandValueKind
Additional information about an operand&#39;s possible values.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:197
bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction *> &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition: Local.cpp:2749
This pass exposes codegen information to IR-level passes.
static bool makeBitReverse(Instruction &I, const DataLayout &DL, const TargetLowering &TLI)
Given an OR instruction, check to see if this is a bitreverse idiom.
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1967
specific_intval m_SpecificInt(uint64_t V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:579
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
virtual BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor, it is very likely to be predicted correctly.
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr)
Replace all uses of &#39;I&#39; with &#39;SimpleV&#39; and simplify the uses recursively.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1315
op_range incoming_values()
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition: Analysis.cpp:513
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
VectorType * getType() const
Overload to return most specific vector type.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
The cost of a &#39;div&#39; instruction on x86.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
bool use_empty() const
Definition: Value.h:323
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1079
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1442
Type * getElementType() const
Definition: DerivedTypes.h:486
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:479
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst *> &AllRelocateCalls, DenseMap< GCRelocateInst *, SmallVector< GCRelocateInst *, 2 >> &RelocateInstMap)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given &#39;and&#39; instruction into user blocks where it is used in a compare to allo...
gep_type_iterator gep_type_begin(const User *GEP)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1245
user_iterator user_end()
Definition: Value.h:384
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator *> &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate&#39;s BB.