LLVM  8.0.1
StraightLineStrengthReduce.cpp
Go to the documentation of this file.
1 //===- StraightLineStrengthReduce.cpp - -----------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements straight-line strength reduction (SLSR). Unlike loop
11 // strength reduction, this algorithm is designed to reduce arithmetic
12 // redundancy in straight-line code instead of loops. It has proven to be
13 // effective in simplifying arithmetic statements derived from an unrolled loop.
14 // It can also simplify the logic of SeparateConstOffsetFromGEP.
15 //
16 // There are many optimizations we can perform in the domain of SLSR. This file
17 // for now contains only an initial step. Specifically, we look for strength
18 // reduction candidates in the following forms:
19 //
20 // Form 1: B + i * S
21 // Form 2: (B + i) * S
22 // Form 3: &B[i * S]
23 //
24 // where S is an integer variable, and i is a constant integer. If we found two
25 // candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
26 // in a simpler way with respect to S1. For example,
27 //
28 // S1: X = B + i * S
29 // S2: Y = B + i' * S => X + (i' - i) * S
30 //
31 // S1: X = (B + i) * S
32 // S2: Y = (B + i') * S => X + (i' - i) * S
33 //
34 // S1: X = &B[i * S]
35 // S2: Y = &B[i' * S] => &X[(i' - i) * S]
36 //
37 // Note: (i' - i) * S is folded to the extent possible.
38 //
39 // This rewriting is in general a good idea. The code patterns we focus on
40 // usually come from loop unrolling, so (i' - i) * S is likely the same
41 // across iterations and can be reused. When that happens, the optimized form
42 // takes only one add starting from the second iteration.
43 //
44 // When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
45 // multiple bases, we choose to rewrite S2 with respect to its "immediate"
46 // basis, the basis that is the closest ancestor in the dominator tree.
47 //
48 // TODO:
49 //
50 // - Floating point arithmetics when fast math is enabled.
51 //
52 // - SLSR may decrease ILP at the architecture level. Targets that are very
53 // sensitive to ILP may want to disable it. Having SLSR to consider ILP is
54 // left as future work.
55 //
56 // - When (i' - i) is constant but i and i' are not, we could still perform
57 // SLSR.
58 
59 #include "llvm/ADT/APInt.h"
61 #include "llvm/ADT/SmallVector.h"
66 #include "llvm/IR/Constants.h"
67 #include "llvm/IR/DataLayout.h"
68 #include "llvm/IR/DerivedTypes.h"
69 #include "llvm/IR/Dominators.h"
71 #include "llvm/IR/IRBuilder.h"
72 #include "llvm/IR/InstrTypes.h"
73 #include "llvm/IR/Instruction.h"
74 #include "llvm/IR/Instructions.h"
75 #include "llvm/IR/Module.h"
76 #include "llvm/IR/Operator.h"
77 #include "llvm/IR/PatternMatch.h"
78 #include "llvm/IR/Type.h"
79 #include "llvm/IR/Value.h"
80 #include "llvm/Pass.h"
81 #include "llvm/Support/Casting.h"
83 #include "llvm/Transforms/Scalar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <limits>
87 #include <list>
88 #include <vector>
89 
90 using namespace llvm;
91 using namespace PatternMatch;
92 
93 static const unsigned UnknownAddressSpace =
95 
96 namespace {
97 
98 class StraightLineStrengthReduce : public FunctionPass {
99 public:
100  // SLSR candidate. Such a candidate must be in one of the forms described in
101  // the header comments.
102  struct Candidate {
103  enum Kind {
104  Invalid, // reserved for the default constructor
105  Add, // B + i * S
106  Mul, // (B + i) * S
107  GEP, // &B[..][i * S][..]
108  };
109 
110  Candidate() = default;
111  Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
112  Instruction *I)
113  : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I) {}
114 
115  Kind CandidateKind = Invalid;
116 
117  const SCEV *Base = nullptr;
118 
119  // Note that Index and Stride of a GEP candidate do not necessarily have the
120  // same integer type. In that case, during rewriting, Stride will be
121  // sign-extended or truncated to Index's type.
122  ConstantInt *Index = nullptr;
123 
124  Value *Stride = nullptr;
125 
126  // The instruction this candidate corresponds to. It helps us to rewrite a
127  // candidate with respect to its immediate basis. Note that one instruction
128  // can correspond to multiple candidates depending on how you associate the
129  // expression. For instance,
130  //
131  // (a + 1) * (b + 2)
132  //
133  // can be treated as
134  //
135  // <Base: a, Index: 1, Stride: b + 2>
136  //
137  // or
138  //
139  // <Base: b, Index: 2, Stride: a + 1>
140  Instruction *Ins = nullptr;
141 
142  // Points to the immediate basis of this candidate, or nullptr if we cannot
143  // find any basis for this candidate.
144  Candidate *Basis = nullptr;
145  };
146 
147  static char ID;
148 
149  StraightLineStrengthReduce() : FunctionPass(ID) {
151  }
152 
153  void getAnalysisUsage(AnalysisUsage &AU) const override {
157  // We do not modify the shape of the CFG.
158  AU.setPreservesCFG();
159  }
160 
161  bool doInitialization(Module &M) override {
162  DL = &M.getDataLayout();
163  return false;
164  }
165 
166  bool runOnFunction(Function &F) override;
167 
168 private:
169  // Returns true if Basis is a basis for C, i.e., Basis dominates C and they
170  // share the same base and stride.
171  bool isBasisFor(const Candidate &Basis, const Candidate &C);
172 
173  // Returns whether the candidate can be folded into an addressing mode.
174  bool isFoldable(const Candidate &C, TargetTransformInfo *TTI,
175  const DataLayout *DL);
176 
177  // Returns true if C is already in a simplest form and not worth being
178  // rewritten.
179  bool isSimplestForm(const Candidate &C);
180 
181  // Checks whether I is in a candidate form. If so, adds all the matching forms
182  // to Candidates, and tries to find the immediate basis for each of them.
183  void allocateCandidatesAndFindBasis(Instruction *I);
184 
185  // Allocate candidates and find bases for Add instructions.
186  void allocateCandidatesAndFindBasisForAdd(Instruction *I);
187 
188  // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a
189  // candidate.
190  void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS,
191  Instruction *I);
192  // Allocate candidates and find bases for Mul instructions.
193  void allocateCandidatesAndFindBasisForMul(Instruction *I);
194 
195  // Splits LHS into Base + Index and, if succeeds, calls
196  // allocateCandidatesAndFindBasis.
197  void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS,
198  Instruction *I);
199 
200  // Allocate candidates and find bases for GetElementPtr instructions.
201  void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
202 
203  // A helper function that scales Idx with ElementSize before invoking
204  // allocateCandidatesAndFindBasis.
205  void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
206  Value *S, uint64_t ElementSize,
207  Instruction *I);
208 
209  // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
210  // basis.
211  void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
212  ConstantInt *Idx, Value *S,
213  Instruction *I);
214 
215  // Rewrites candidate C with respect to Basis.
216  void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
217 
218  // A helper function that factors ArrayIdx to a product of a stride and a
219  // constant index, and invokes allocateCandidatesAndFindBasis with the
220  // factorings.
221  void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
223 
224  // Emit code that computes the "bump" from Basis to C. If the candidate is a
225  // GEP and the bump is not divisible by the element size of the GEP, this
226  // function sets the BumpWithUglyGEP flag to notify its caller to bump the
227  // basis using an ugly GEP.
228  static Value *emitBump(const Candidate &Basis, const Candidate &C,
229  IRBuilder<> &Builder, const DataLayout *DL,
230  bool &BumpWithUglyGEP);
231 
232  const DataLayout *DL = nullptr;
233  DominatorTree *DT = nullptr;
234  ScalarEvolution *SE;
235  TargetTransformInfo *TTI = nullptr;
236  std::list<Candidate> Candidates;
237 
238  // Temporarily holds all instructions that are unlinked (but not deleted) by
239  // rewriteCandidateWithBasis. These instructions will be actually removed
240  // after all rewriting finishes.
241  std::vector<Instruction *> UnlinkedInstructions;
242 };
243 
244 } // end anonymous namespace
245 
247 
248 INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
249  "Straight line strength reduction", false, false)
253 INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
254  "Straight line strength reduction", false, false)
255 
257  return new StraightLineStrengthReduce();
258 }
259 
260 bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
261  const Candidate &C) {
262  return (Basis.Ins != C.Ins && // skip the same instruction
263  // They must have the same type too. Basis.Base == C.Base doesn't
264  // guarantee their types are the same (PR23975).
265  Basis.Ins->getType() == C.Ins->getType() &&
266  // Basis must dominate C in order to rewrite C with respect to Basis.
267  DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
268  // They share the same base, stride, and candidate kind.
269  Basis.Base == C.Base && Basis.Stride == C.Stride &&
270  Basis.CandidateKind == C.CandidateKind);
271 }
272 
274  const TargetTransformInfo *TTI) {
276  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
277  Indices.push_back(*I);
278  return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
279  Indices) == TargetTransformInfo::TCC_Free;
280 }
281 
282 // Returns whether (Base + Index * Stride) can be folded to an addressing mode.
283 static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
284  TargetTransformInfo *TTI) {
285  // Index->getSExtValue() may crash if Index is wider than 64-bit.
286  return Index->getBitWidth() <= 64 &&
287  TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
289 }
290 
291 bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
292  TargetTransformInfo *TTI,
293  const DataLayout *DL) {
294  if (C.CandidateKind == Candidate::Add)
295  return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
296  if (C.CandidateKind == Candidate::GEP)
297  return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI);
298  return false;
299 }
300 
301 // Returns true if GEP has zero or one non-zero index.
303  unsigned NumNonZeroIndices = 0;
304  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) {
305  ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
306  if (ConstIdx == nullptr || !ConstIdx->isZero())
307  ++NumNonZeroIndices;
308  }
309  return NumNonZeroIndices <= 1;
310 }
311 
312 bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) {
313  if (C.CandidateKind == Candidate::Add) {
314  // B + 1 * S or B + (-1) * S
315  return C.Index->isOne() || C.Index->isMinusOne();
316  }
317  if (C.CandidateKind == Candidate::Mul) {
318  // (B + 0) * S
319  return C.Index->isZero();
320  }
321  if (C.CandidateKind == Candidate::GEP) {
322  // (char*)B + S or (char*)B - S
323  return ((C.Index->isOne() || C.Index->isMinusOne()) &&
324  hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins)));
325  }
326  return false;
327 }
328 
329 // TODO: We currently implement an algorithm whose time complexity is linear in
330 // the number of existing candidates. However, we could do better by using
331 // ScopedHashTable. Specifically, while traversing the dominator tree, we could
332 // maintain all the candidates that dominate the basic block being traversed in
333 // a ScopedHashTable. This hash table is indexed by the base and the stride of
334 // a candidate. Therefore, finding the immediate basis of a candidate boils down
335 // to one hash-table look up.
336 void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
337  Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
338  Instruction *I) {
339  Candidate C(CT, B, Idx, S, I);
340  // SLSR can complicate an instruction in two cases:
341  //
342  // 1. If we can fold I into an addressing mode, computing I is likely free or
343  // takes only one instruction.
344  //
345  // 2. I is already in a simplest form. For example, when
346  // X = B + 8 * S
347  // Y = B + S,
348  // rewriting Y to X - 7 * S is probably a bad idea.
349  //
350  // In the above cases, we still add I to the candidate list so that I can be
351  // the basis of other candidates, but we leave I's basis blank so that I
352  // won't be rewritten.
353  if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) {
354  // Try to compute the immediate basis of C.
355  unsigned NumIterations = 0;
356  // Limit the scan radius to avoid running in quadratice time.
357  static const unsigned MaxNumIterations = 50;
358  for (auto Basis = Candidates.rbegin();
359  Basis != Candidates.rend() && NumIterations < MaxNumIterations;
360  ++Basis, ++NumIterations) {
361  if (isBasisFor(*Basis, C)) {
362  C.Basis = &(*Basis);
363  break;
364  }
365  }
366  }
367  // Regardless of whether we find a basis for C, we need to push C to the
368  // candidate list so that it can be the basis of other candidates.
369  Candidates.push_back(C);
370 }
371 
372 void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
373  Instruction *I) {
374  switch (I->getOpcode()) {
375  case Instruction::Add:
376  allocateCandidatesAndFindBasisForAdd(I);
377  break;
378  case Instruction::Mul:
379  allocateCandidatesAndFindBasisForMul(I);
380  break;
381  case Instruction::GetElementPtr:
382  allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));
383  break;
384  }
385 }
386 
387 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
388  Instruction *I) {
389  // Try matching B + i * S.
390  if (!isa<IntegerType>(I->getType()))
391  return;
392 
393  assert(I->getNumOperands() == 2 && "isn't I an add?");
394  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
395  allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
396  if (LHS != RHS)
397  allocateCandidatesAndFindBasisForAdd(RHS, LHS, I);
398 }
399 
400 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
401  Value *LHS, Value *RHS, Instruction *I) {
402  Value *S = nullptr;
403  ConstantInt *Idx = nullptr;
404  if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) {
405  // I = LHS + RHS = LHS + Idx * S
406  allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
407  } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) {
408  // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx)
409  APInt One(Idx->getBitWidth(), 1);
410  Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue());
411  allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
412  } else {
413  // At least, I = LHS + 1 * RHS
414  ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1);
415  allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS,
416  I);
417  }
418 }
419 
420 // Returns true if A matches B + C where C is constant.
421 static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C) {
422  return (match(A, m_Add(m_Value(B), m_ConstantInt(C))) ||
423  match(A, m_Add(m_ConstantInt(C), m_Value(B))));
424 }
425 
426 // Returns true if A matches B | C where C is constant.
427 static bool matchesOr(Value *A, Value *&B, ConstantInt *&C) {
428  return (match(A, m_Or(m_Value(B), m_ConstantInt(C))) ||
429  match(A, m_Or(m_ConstantInt(C), m_Value(B))));
430 }
431 
432 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
433  Value *LHS, Value *RHS, Instruction *I) {
434  Value *B = nullptr;
435  ConstantInt *Idx = nullptr;
436  if (matchesAdd(LHS, B, Idx)) {
437  // If LHS is in the form of "Base + Index", then I is in the form of
438  // "(Base + Index) * RHS".
439  allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
440  } else if (matchesOr(LHS, B, Idx) && haveNoCommonBitsSet(B, Idx, *DL)) {
441  // If LHS is in the form of "Base | Index" and Base and Index have no common
442  // bits set, then
443  // Base | Index = Base + Index
444  // and I is thus in the form of "(Base + Index) * RHS".
445  allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
446  } else {
447  // Otherwise, at least try the form (LHS + 0) * RHS.
448  ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
449  allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
450  I);
451  }
452 }
453 
454 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
455  Instruction *I) {
456  // Try matching (B + i) * S.
457  // TODO: we could extend SLSR to float and vector types.
458  if (!isa<IntegerType>(I->getType()))
459  return;
460 
461  assert(I->getNumOperands() == 2 && "isn't I a mul?");
462  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
463  allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
464  if (LHS != RHS) {
465  // Symmetrically, try to split RHS to Base + Index.
466  allocateCandidatesAndFindBasisForMul(RHS, LHS, I);
467  }
468 }
469 
470 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
471  const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
472  Instruction *I) {
473  // I = B + sext(Idx *nsw S) * ElementSize
474  // = B + (sext(Idx) * sext(S)) * ElementSize
475  // = B + (sext(Idx) * ElementSize) * sext(S)
476  // Casting to IntegerType is safe because we skipped vector GEPs.
477  IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
478  ConstantInt *ScaledIdx = ConstantInt::get(
479  IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
480  allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
481 }
482 
483 void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
484  const SCEV *Base,
485  uint64_t ElementSize,
487  // At least, ArrayIdx = ArrayIdx *nsw 1.
488  allocateCandidatesAndFindBasisForGEP(
489  Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),
490  ArrayIdx, ElementSize, GEP);
491  Value *LHS = nullptr;
492  ConstantInt *RHS = nullptr;
493  // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx
494  // itself. This would allow us to handle the shl case for free. However,
495  // matching SCEVs has two issues:
496  //
497  // 1. this would complicate rewriting because the rewriting procedure
498  // would have to translate SCEVs back to IR instructions. This translation
499  // is difficult when LHS is further evaluated to a composite SCEV.
500  //
501  // 2. ScalarEvolution is designed to be control-flow oblivious. It tends
502  // to strip nsw/nuw flags which are critical for SLSR to trace into
503  // sext'ed multiplication.
504  if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
505  // SLSR is currently unsafe if i * S may overflow.
506  // GEP = Base + sext(LHS *nsw RHS) * ElementSize
507  allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
508  } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) {
509  // GEP = Base + sext(LHS <<nsw RHS) * ElementSize
510  // = Base + sext(LHS *nsw (1 << RHS)) * ElementSize
511  APInt One(RHS->getBitWidth(), 1);
512  ConstantInt *PowerOf2 =
513  ConstantInt::get(RHS->getContext(), One << RHS->getValue());
514  allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP);
515  }
516 }
517 
518 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
519  GetElementPtrInst *GEP) {
520  // TODO: handle vector GEPs
521  if (GEP->getType()->isVectorTy())
522  return;
523 
524  SmallVector<const SCEV *, 4> IndexExprs;
525  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
526  IndexExprs.push_back(SE->getSCEV(*I));
527 
529  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
530  if (GTI.isStruct())
531  continue;
532 
533  const SCEV *OrigIndexExpr = IndexExprs[I - 1];
534  IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType());
535 
536  // The base of this candidate is GEP's base plus the offsets of all
537  // indices except this current one.
538  const SCEV *BaseExpr = SE->getGEPExpr(cast<GEPOperator>(GEP), IndexExprs);
539  Value *ArrayIdx = GEP->getOperand(I);
540  uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
541  if (ArrayIdx->getType()->getIntegerBitWidth() <=
543  // Skip factoring if ArrayIdx is wider than the pointer size, because
544  // ArrayIdx is implicitly truncated to the pointer size.
545  factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP);
546  }
547  // When ArrayIdx is the sext of a value, we try to factor that value as
548  // well. Handling this case is important because array indices are
549  // typically sign-extended to the pointer size.
550  Value *TruncatedArrayIdx = nullptr;
551  if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))) &&
552  TruncatedArrayIdx->getType()->getIntegerBitWidth() <=
554  // Skip factoring if TruncatedArrayIdx is wider than the pointer size,
555  // because TruncatedArrayIdx is implicitly truncated to the pointer size.
556  factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP);
557  }
558 
559  IndexExprs[I - 1] = OrigIndexExpr;
560  }
561 }
562 
563 // A helper function that unifies the bitwidth of A and B.
564 static void unifyBitWidth(APInt &A, APInt &B) {
565  if (A.getBitWidth() < B.getBitWidth())
566  A = A.sext(B.getBitWidth());
567  else if (A.getBitWidth() > B.getBitWidth())
568  B = B.sext(A.getBitWidth());
569 }
570 
571 Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
572  const Candidate &C,
573  IRBuilder<> &Builder,
574  const DataLayout *DL,
575  bool &BumpWithUglyGEP) {
576  APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue();
577  unifyBitWidth(Idx, BasisIdx);
578  APInt IndexOffset = Idx - BasisIdx;
579 
580  BumpWithUglyGEP = false;
581  if (Basis.CandidateKind == Candidate::GEP) {
582  APInt ElementSize(
583  IndexOffset.getBitWidth(),
584  DL->getTypeAllocSize(
585  cast<GetElementPtrInst>(Basis.Ins)->getResultElementType()));
586  APInt Q, R;
587  APInt::sdivrem(IndexOffset, ElementSize, Q, R);
588  if (R == 0)
589  IndexOffset = Q;
590  else
591  BumpWithUglyGEP = true;
592  }
593 
594  // Compute Bump = C - Basis = (i' - i) * S.
595  // Common case 1: if (i' - i) is 1, Bump = S.
596  if (IndexOffset == 1)
597  return C.Stride;
598  // Common case 2: if (i' - i) is -1, Bump = -S.
599  if (IndexOffset.isAllOnesValue())
600  return Builder.CreateNeg(C.Stride);
601 
602  // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
603  // have different bit widths.
604  IntegerType *DeltaType =
605  IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth());
606  Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType);
607  if (IndexOffset.isPowerOf2()) {
608  // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i).
609  ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
610  return Builder.CreateShl(ExtendedStride, Exponent);
611  }
612  if ((-IndexOffset).isPowerOf2()) {
613  // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
615  ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
616  return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent));
617  }
618  Constant *Delta = ConstantInt::get(DeltaType, IndexOffset);
619  return Builder.CreateMul(ExtendedStride, Delta);
620 }
621 
622 void StraightLineStrengthReduce::rewriteCandidateWithBasis(
623  const Candidate &C, const Candidate &Basis) {
624  assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
625  C.Stride == Basis.Stride);
626  // We run rewriteCandidateWithBasis on all candidates in a post-order, so the
627  // basis of a candidate cannot be unlinked before the candidate.
628  assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");
629 
630  // An instruction can correspond to multiple candidates. Therefore, instead of
631  // simply deleting an instruction when we rewrite it, we mark its parent as
632  // nullptr (i.e. unlink it) so that we can skip the candidates whose
633  // instruction is already rewritten.
634  if (!C.Ins->getParent())
635  return;
636 
637  IRBuilder<> Builder(C.Ins);
638  bool BumpWithUglyGEP;
639  Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
640  Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
641  switch (C.CandidateKind) {
642  case Candidate::Add:
643  case Candidate::Mul: {
644  // C = Basis + Bump
645  Value *NegBump;
646  if (match(Bump, m_Neg(m_Value(NegBump)))) {
647  // If Bump is a neg instruction, emit C = Basis - (-Bump).
648  Reduced = Builder.CreateSub(Basis.Ins, NegBump);
649  // We only use the negative argument of Bump, and Bump itself may be
650  // trivially dead.
652  } else {
653  // It's tempting to preserve nsw on Bump and/or Reduced. However, it's
654  // usually unsound, e.g.,
655  //
656  // X = (-2 +nsw 1) *nsw INT_MAX
657  // Y = (-2 +nsw 3) *nsw INT_MAX
658  // =>
659  // Y = X + 2 * INT_MAX
660  //
661  // Neither + and * in the resultant expression are nsw.
662  Reduced = Builder.CreateAdd(Basis.Ins, Bump);
663  }
664  break;
665  }
666  case Candidate::GEP:
667  {
668  Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
669  bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
670  if (BumpWithUglyGEP) {
671  // C = (char *)Basis + Bump
672  unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
673  Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
674  Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
675  if (InBounds)
676  Reduced =
677  Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
678  else
679  Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
680  Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
681  } else {
682  // C = gep Basis, Bump
683  // Canonicalize bump to pointer size.
684  Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
685  if (InBounds)
686  Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
687  else
688  Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
689  }
690  break;
691  }
692  default:
693  llvm_unreachable("C.CandidateKind is invalid");
694  };
695  Reduced->takeName(C.Ins);
696  C.Ins->replaceAllUsesWith(Reduced);
697  // Unlink C.Ins so that we can skip other candidates also corresponding to
698  // C.Ins. The actual deletion is postponed to the end of runOnFunction.
699  C.Ins->removeFromParent();
700  UnlinkedInstructions.push_back(C.Ins);
701 }
702 
704  if (skipFunction(F))
705  return false;
706 
707  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
708  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
709  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
710  // Traverse the dominator tree in the depth-first order. This order makes sure
711  // all bases of a candidate are in Candidates when we process it.
712  for (const auto Node : depth_first(DT))
713  for (auto &I : *(Node->getBlock()))
714  allocateCandidatesAndFindBasis(&I);
715 
716  // Rewrite candidates in the reverse depth-first order. This order makes sure
717  // a candidate being rewritten is not a basis for any other candidate.
718  while (!Candidates.empty()) {
719  const Candidate &C = Candidates.back();
720  if (C.Basis != nullptr) {
721  rewriteCandidateWithBasis(C, *C.Basis);
722  }
723  Candidates.pop_back();
724  }
725 
726  // Delete all unlink instructions.
727  for (auto *UnlinkedInst : UnlinkedInstructions) {
728  for (unsigned I = 0, E = UnlinkedInst->getNumOperands(); I != E; ++I) {
729  Value *Op = UnlinkedInst->getOperand(I);
730  UnlinkedInst->setOperand(I, nullptr);
732  }
733  UnlinkedInst->deleteValue();
734  }
735  bool Ret = !UnlinkedInstructions.empty();
736  UnlinkedInstructions.clear();
737  return Ret;
738 }
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1477
FunctionPass * createStraightLineStrengthReducePass()
uint64_t CallInst * C
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:71
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:834
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents lattice values for constants.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
Definition: PatternMatch.h:701
The main scalar evolution driver.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
static void unifyBitWidth(APInt &A, APInt &B)
static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition: APInt.cpp:1837
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:363
static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C)
F(f)
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
unsigned getBitWidth() const
getBitWidth - Return the bitwidth of this constant.
Definition: Constants.h:143
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:48
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
static bool matchesOr(Value *A, Value *&B, ConstantInt *&C)
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1014
Type * getSourceElementType() const
Definition: Instructions.h:951
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:642
void initializeStraightLineStrengthReducePass(PassRegistry &)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1732
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1684
op_iterator idx_begin()
Definition: Instructions.h:979
static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:82
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr", "Straight line strength reduction", false, false) INITIALIZE_PASS_END(StraightLineStrengthReduce
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1031
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
Value * getOperand(unsigned i) const
Definition: User.h:170
unsigned getAddressSpace() const
Returns the address space of this instruction&#39;s pointer type.
Definition: Instructions.h:963
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:854
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:750
static bool runOnFunction(Function &F, bool PostInlining)
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:396
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:755
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
Straight line strength reduction
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static const unsigned UnknownAddressSpace
Expected to fold away in lowering.
Represent the analysis usage information of a pass.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:767
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1308
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Class to represent integer types.
Definition: DerivedTypes.h:40
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:430
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1048
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1458
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
unsigned getNumOperands() const
Definition: User.h:192
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
unsigned logBase2() const
Definition: APInt.h:1748
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a &#39;Neg&#39; as &#39;sub 0, V&#39;.
Class for arbitrary precision integers.
Definition: APInt.h:70
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:337
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1103
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
#define I(x, y, z)
Definition: MD5.cpp:58
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoSignedWrap > m_NSWShl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:836
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
iterator_range< df_iterator< T > > depth_first(const T &G)
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoSignedWrap > m_NSWMul(const LHS &L, const RHS &R)
Definition: PatternMatch.h:828
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
This pass exposes codegen information to IR-level passes.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:157
static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride, TargetTransformInfo *TTI)
gep_type_iterator gep_type_begin(const User *GEP)