LLVM  8.0.1
SeparateConstOffsetFromGEP.cpp
Go to the documentation of this file.
1 //===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Loop unrolling may create many similar GEPs for array accesses.
11 // e.g., a 2-level loop
12 //
13 // float a[32][32]; // global variable
14 //
15 // for (int i = 0; i < 2; ++i) {
16 // for (int j = 0; j < 2; ++j) {
17 // ...
18 // ... = a[x + i][y + j];
19 // ...
20 // }
21 // }
22 //
23 // will probably be unrolled to:
24 //
25 // gep %a, 0, %x, %y; load
26 // gep %a, 0, %x, %y + 1; load
27 // gep %a, 0, %x + 1, %y; load
28 // gep %a, 0, %x + 1, %y + 1; load
29 //
30 // LLVM's GVN does not use partial redundancy elimination yet, and is thus
31 // unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
32 // significant slowdown in targets with limited addressing modes. For instance,
33 // because the PTX target does not support the reg+reg addressing mode, the
34 // NVPTX backend emits PTX code that literally computes the pointer address of
35 // each GEP, wasting tons of registers. It emits the following PTX for the
36 // first load and similar PTX for other loads.
37 //
38 // mov.u32 %r1, %x;
39 // mov.u32 %r2, %y;
40 // mul.wide.u32 %rl2, %r1, 128;
41 // mov.u64 %rl3, a;
42 // add.s64 %rl4, %rl3, %rl2;
43 // mul.wide.u32 %rl5, %r2, 4;
44 // add.s64 %rl6, %rl4, %rl5;
45 // ld.global.f32 %f1, [%rl6];
46 //
47 // To reduce the register pressure, the optimization implemented in this file
48 // merges the common part of a group of GEPs, so we can compute each pointer
49 // address by adding a simple offset to the common part, saving many registers.
50 //
51 // It works by splitting each GEP into a variadic base and a constant offset.
52 // The variadic base can be computed once and reused by multiple GEPs, and the
53 // constant offsets can be nicely folded into the reg+immediate addressing mode
54 // (supported by most targets) without using any extra register.
55 //
56 // For instance, we transform the four GEPs and four loads in the above example
57 // into:
58 //
59 // base = gep a, 0, x, y
60 // load base
61 // laod base + 1 * sizeof(float)
62 // load base + 32 * sizeof(float)
63 // load base + 33 * sizeof(float)
64 //
65 // Given the transformed IR, a backend that supports the reg+immediate
66 // addressing mode can easily fold the pointer arithmetics into the loads. For
67 // example, the NVPTX backend can easily fold the pointer arithmetics into the
68 // ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
69 //
70 // mov.u32 %r1, %tid.x;
71 // mov.u32 %r2, %tid.y;
72 // mul.wide.u32 %rl2, %r1, 128;
73 // mov.u64 %rl3, a;
74 // add.s64 %rl4, %rl3, %rl2;
75 // mul.wide.u32 %rl5, %r2, 4;
76 // add.s64 %rl6, %rl4, %rl5;
77 // ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
78 // ld.global.f32 %f2, [%rl6+4]; // much better
79 // ld.global.f32 %f3, [%rl6+128]; // much better
80 // ld.global.f32 %f4, [%rl6+132]; // much better
81 //
82 // Another improvement enabled by the LowerGEP flag is to lower a GEP with
83 // multiple indices to either multiple GEPs with a single index or arithmetic
84 // operations (depending on whether the target uses alias analysis in codegen).
85 // Such transformation can have following benefits:
86 // (1) It can always extract constants in the indices of structure type.
87 // (2) After such Lowering, there are more optimization opportunities such as
88 // CSE, LICM and CGP.
89 //
90 // E.g. The following GEPs have multiple indices:
91 // BB1:
92 // %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3
93 // load %p
94 // ...
95 // BB2:
96 // %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2
97 // load %p2
98 // ...
99 //
100 // We can not do CSE to the common part related to index "i64 %i". Lowering
101 // GEPs can achieve such goals.
102 // If the target does not use alias analysis in codegen, this pass will
103 // lower a GEP with multiple indices into arithmetic operations:
104 // BB1:
105 // %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
106 // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
107 // %3 = add i64 %1, %2 ; CSE opportunity
108 // %4 = mul i64 %j1, length_of_struct
109 // %5 = add i64 %3, %4
110 // %6 = add i64 %3, struct_field_3 ; Constant offset
111 // %p = inttoptr i64 %6 to i32*
112 // load %p
113 // ...
114 // BB2:
115 // %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity
116 // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
117 // %9 = add i64 %7, %8 ; CSE opportunity
118 // %10 = mul i64 %j2, length_of_struct
119 // %11 = add i64 %9, %10
120 // %12 = add i64 %11, struct_field_2 ; Constant offset
121 // %p = inttoptr i64 %12 to i32*
122 // load %p2
123 // ...
124 //
125 // If the target uses alias analysis in codegen, this pass will lower a GEP
126 // with multiple indices into multiple GEPs with a single index:
127 // BB1:
128 // %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
129 // %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
130 // %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity
131 // %4 = mul i64 %j1, length_of_struct
132 // %5 = getelementptr i8* %3, i64 %4
133 // %6 = getelementptr i8* %5, struct_field_3 ; Constant offset
134 // %p = bitcast i8* %6 to i32*
135 // load %p
136 // ...
137 // BB2:
138 // %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity
139 // %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
140 // %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity
141 // %10 = mul i64 %j2, length_of_struct
142 // %11 = getelementptr i8* %9, i64 %10
143 // %12 = getelementptr i8* %11, struct_field_2 ; Constant offset
144 // %p2 = bitcast i8* %12 to i32*
145 // load %p2
146 // ...
147 //
148 // Lowering GEPs can also benefit other passes such as LICM and CGP.
149 // LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
150 // indices if one of the index is variant. If we lower such GEP into invariant
151 // parts and variant parts, LICM can hoist/sink those invariant parts.
152 // CGP (CodeGen Prepare) tries to sink address calculations that match the
153 // target's addressing modes. A GEP with multiple indices may not match and will
154 // not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
155 // them. So we end up with a better addressing mode.
156 //
157 //===----------------------------------------------------------------------===//
158 
159 #include "llvm/ADT/APInt.h"
160 #include "llvm/ADT/DenseMap.h"
162 #include "llvm/ADT/SmallVector.h"
163 #include "llvm/Analysis/LoopInfo.h"
170 #include "llvm/IR/BasicBlock.h"
171 #include "llvm/IR/Constant.h"
172 #include "llvm/IR/Constants.h"
173 #include "llvm/IR/DataLayout.h"
174 #include "llvm/IR/DerivedTypes.h"
175 #include "llvm/IR/Dominators.h"
176 #include "llvm/IR/Function.h"
178 #include "llvm/IR/IRBuilder.h"
179 #include "llvm/IR/Instruction.h"
180 #include "llvm/IR/Instructions.h"
181 #include "llvm/IR/Module.h"
182 #include "llvm/IR/PatternMatch.h"
183 #include "llvm/IR/Type.h"
184 #include "llvm/IR/User.h"
185 #include "llvm/IR/Value.h"
186 #include "llvm/Pass.h"
187 #include "llvm/Support/Casting.h"
192 #include "llvm/Transforms/Scalar.h"
193 #include <cassert>
194 #include <cstdint>
195 #include <string>
196 
197 using namespace llvm;
198 using namespace llvm::PatternMatch;
199 
201  "disable-separate-const-offset-from-gep", cl::init(false),
202  cl::desc("Do not separate the constant offset from a GEP instruction"),
203  cl::Hidden);
204 
205 // Setting this flag may emit false positives when the input module already
206 // contains dead instructions. Therefore, we set it only in unit tests that are
207 // free of dead code.
208 static cl::opt<bool>
209  VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false),
210  cl::desc("Verify this pass produces no dead code"),
211  cl::Hidden);
212 
213 namespace {
214 
215 /// A helper class for separating a constant offset from a GEP index.
216 ///
217 /// In real programs, a GEP index may be more complicated than a simple addition
218 /// of something and a constant integer which can be trivially splitted. For
219 /// example, to split ((a << 3) | 5) + b, we need to search deeper for the
220 /// constant offset, so that we can separate the index to (a << 3) + b and 5.
221 ///
222 /// Therefore, this class looks into the expression that computes a given GEP
223 /// index, and tries to find a constant integer that can be hoisted to the
224 /// outermost level of the expression as an addition. Not every constant in an
225 /// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
226 /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
227 /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
228 class ConstantOffsetExtractor {
229 public:
230  /// Extracts a constant offset from the given GEP index. It returns the
231  /// new index representing the remainder (equal to the original index minus
232  /// the constant offset), or nullptr if we cannot extract a constant offset.
233  /// \p Idx The given GEP index
234  /// \p GEP The given GEP
235  /// \p UserChainTail Outputs the tail of UserChain so that we can
236  /// garbage-collect unused instructions in UserChain.
237  static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
238  User *&UserChainTail, const DominatorTree *DT);
239 
240  /// Looks for a constant offset from the given GEP index without extracting
241  /// it. It returns the numeric value of the extracted constant offset (0 if
242  /// failed). The meaning of the arguments are the same as Extract.
243  static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
244  const DominatorTree *DT);
245 
246 private:
247  ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
248  : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
249  }
250 
251  /// Searches the expression that computes V for a non-zero constant C s.t.
252  /// V can be reassociated into the form V' + C. If the searching is
253  /// successful, returns C and update UserChain as a def-use chain from C to V;
254  /// otherwise, UserChain is empty.
255  ///
256  /// \p V The given expression
257  /// \p SignExtended Whether V will be sign-extended in the computation of the
258  /// GEP index
259  /// \p ZeroExtended Whether V will be zero-extended in the computation of the
260  /// GEP index
261  /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
262  /// an index of an inbounds GEP is guaranteed to be
263  /// non-negative. Levaraging this, we can better split
264  /// inbounds GEPs.
265  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
266 
267  /// A helper function to look into both operands of a binary operator.
268  APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
269  bool ZeroExtended);
270 
271  /// After finding the constant offset C from the GEP index I, we build a new
272  /// index I' s.t. I' + C = I. This function builds and returns the new
273  /// index I' according to UserChain produced by function "find".
274  ///
275  /// The building conceptually takes two steps:
276  /// 1) iteratively distribute s/zext towards the leaves of the expression tree
277  /// that computes I
278  /// 2) reassociate the expression tree to the form I' + C.
279  ///
280  /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
281  /// sext to a, b and 5 so that we have
282  /// sext(a) + (sext(b) + 5).
283  /// Then, we reassociate it to
284  /// (sext(a) + sext(b)) + 5.
285  /// Given this form, we know I' is sext(a) + sext(b).
286  Value *rebuildWithoutConstOffset();
287 
288  /// After the first step of rebuilding the GEP index without the constant
289  /// offset, distribute s/zext to the operands of all operators in UserChain.
290  /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
291  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
292  ///
293  /// The function also updates UserChain to point to new subexpressions after
294  /// distributing s/zext. e.g., the old UserChain of the above example is
295  /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
296  /// and the new UserChain is
297  /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
298  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
299  ///
300  /// \p ChainIndex The index to UserChain. ChainIndex is initially
301  /// UserChain.size() - 1, and is decremented during
302  /// the recursion.
303  Value *distributeExtsAndCloneChain(unsigned ChainIndex);
304 
305  /// Reassociates the GEP index to the form I' + C and returns I'.
306  Value *removeConstOffset(unsigned ChainIndex);
307 
308  /// A helper function to apply ExtInsts, a list of s/zext, to value V.
309  /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
310  /// returns "sext i32 (zext i16 V to i32) to i64".
311  Value *applyExts(Value *V);
312 
313  /// A helper function that returns whether we can trace into the operands
314  /// of binary operator BO for a constant offset.
315  ///
316  /// \p SignExtended Whether BO is surrounded by sext
317  /// \p ZeroExtended Whether BO is surrounded by zext
318  /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
319  /// array index.
320  bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
321  bool NonNegative);
322 
323  /// The path from the constant offset to the old GEP index. e.g., if the GEP
324  /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
325  /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
326  /// UserChain[2] will be the entire expression "a * b + (c + 5)".
327  ///
328  /// This path helps to rebuild the new GEP index.
329  SmallVector<User *, 8> UserChain;
330 
331  /// A data structure used in rebuildWithoutConstOffset. Contains all
332  /// sext/zext instructions along UserChain.
334 
335  /// Insertion position of cloned instructions.
336  Instruction *IP;
337 
338  const DataLayout &DL;
339  const DominatorTree *DT;
340 };
341 
342 /// A pass that tries to split every GEP in the function into a variadic
343 /// base and a constant offset. It is a FunctionPass because searching for the
344 /// constant offset may inspect other basic blocks.
345 class SeparateConstOffsetFromGEP : public FunctionPass {
346 public:
347  static char ID;
348 
349  SeparateConstOffsetFromGEP(bool LowerGEP = false)
350  : FunctionPass(ID), LowerGEP(LowerGEP) {
352  }
353 
354  void getAnalysisUsage(AnalysisUsage &AU) const override {
359  AU.setPreservesCFG();
361  }
362 
363  bool doInitialization(Module &M) override {
364  DL = &M.getDataLayout();
365  return false;
366  }
367 
368  bool runOnFunction(Function &F) override;
369 
370 private:
371  /// Tries to split the given GEP into a variadic base and a constant offset,
372  /// and returns true if the splitting succeeds.
373  bool splitGEP(GetElementPtrInst *GEP);
374 
375  /// Lower a GEP with multiple indices into multiple GEPs with a single index.
376  /// Function splitGEP already split the original GEP into a variadic part and
377  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
378  /// variadic part into a set of GEPs with a single index and applies
379  /// AccumulativeByteOffset to it.
380  /// \p Variadic The variadic part of the original GEP.
381  /// \p AccumulativeByteOffset The constant offset.
382  void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
383  int64_t AccumulativeByteOffset);
384 
385  /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
386  /// Function splitGEP already split the original GEP into a variadic part and
387  /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
388  /// variadic part into a set of arithmetic operations and applies
389  /// AccumulativeByteOffset to it.
390  /// \p Variadic The variadic part of the original GEP.
391  /// \p AccumulativeByteOffset The constant offset.
392  void lowerToArithmetics(GetElementPtrInst *Variadic,
393  int64_t AccumulativeByteOffset);
394 
395  /// Finds the constant offset within each index and accumulates them. If
396  /// LowerGEP is true, it finds in indices of both sequential and structure
397  /// types, otherwise it only finds in sequential indices. The output
398  /// NeedsExtraction indicates whether we successfully find a non-zero constant
399  /// offset.
400  int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
401 
402  /// Canonicalize array indices to pointer-size integers. This helps to
403  /// simplify the logic of splitting a GEP. For example, if a + b is a
404  /// pointer-size integer, we have
405  /// gep base, a + b = gep (gep base, a), b
406  /// However, this equality may not hold if the size of a + b is smaller than
407  /// the pointer size, because LLVM conceptually sign-extends GEP indices to
408  /// pointer size before computing the address
409  /// (http://llvm.org/docs/LangRef.html#id181).
410  ///
411  /// This canonicalization is very likely already done in clang and
412  /// instcombine. Therefore, the program will probably remain the same.
413  ///
414  /// Returns true if the module changes.
415  ///
416  /// Verified in @i32_add in split-gep.ll
417  bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
418 
419  /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
420  /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
421  /// the constant offset. After extraction, it becomes desirable to reunion the
422  /// distributed sexts. For example,
423  ///
424  /// &a[sext(i +nsw (j +nsw 5)]
425  /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
426  /// => constant extraction &a[sext(i) + sext(j)] + 5
427  /// => reunion &a[sext(i +nsw j)] + 5
428  bool reuniteExts(Function &F);
429 
430  /// A helper that reunites sexts in an instruction.
431  bool reuniteExts(Instruction *I);
432 
433  /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
434  Instruction *findClosestMatchingDominator(const SCEV *Key,
435  Instruction *Dominatee);
436  /// Verify F is free of dead code.
437  void verifyNoDeadCode(Function &F);
438 
439  bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
440 
441  // Swap the index operand of two GEP.
442  void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
443 
444  // Check if it is safe to swap operand of two GEP.
445  bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
446  Loop *CurLoop);
447 
448  const DataLayout *DL = nullptr;
449  DominatorTree *DT = nullptr;
450  ScalarEvolution *SE;
451 
452  LoopInfo *LI;
453  TargetLibraryInfo *TLI;
454 
455  /// Whether to lower a GEP with multiple indices into arithmetic operations or
456  /// multiple GEPs with a single index.
457  bool LowerGEP;
458 
460 };
461 
462 } // end anonymous namespace
463 
465 
467  SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
468  "Split GEPs to a variadic base and a constant offset for better CSE", false,
469  false)
476  SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
477  "Split GEPs to a variadic base and a constant offset for better CSE", false,
478  false)
479 
481  return new SeparateConstOffsetFromGEP(LowerGEP);
482 }
483 
484 bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
485  bool ZeroExtended,
486  BinaryOperator *BO,
487  bool NonNegative) {
488  // We only consider ADD, SUB and OR, because a non-zero constant found in
489  // expressions composed of these operations can be easily hoisted as a
490  // constant offset by reassociation.
491  if (BO->getOpcode() != Instruction::Add &&
492  BO->getOpcode() != Instruction::Sub &&
493  BO->getOpcode() != Instruction::Or) {
494  return false;
495  }
496 
497  Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
498  // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
499  // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
500  // FIXME: this does not appear to be covered by any tests
501  // (with x86/aarch64 backends at least)
502  if (BO->getOpcode() == Instruction::Or &&
503  !haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
504  return false;
505 
506  // In addition, tracing into BO requires that its surrounding s/zext (if
507  // any) is distributable to both operands.
508  //
509  // Suppose BO = A op B.
510  // SignExtended | ZeroExtended | Distributable?
511  // --------------+--------------+----------------------------------
512  // 0 | 0 | true because no s/zext exists
513  // 0 | 1 | zext(BO) == zext(A) op zext(B)
514  // 1 | 0 | sext(BO) == sext(A) op sext(B)
515  // 1 | 1 | zext(sext(BO)) ==
516  // | | zext(sext(A)) op zext(sext(B))
517  if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
518  // If a + b >= 0 and (a >= 0 or b >= 0), then
519  // sext(a + b) = sext(a) + sext(b)
520  // even if the addition is not marked nsw.
521  //
522  // Leveraging this invarient, we can trace into an sext'ed inbound GEP
523  // index if the constant offset is non-negative.
524  //
525  // Verified in @sext_add in split-gep.ll.
526  if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
527  if (!ConstLHS->isNegative())
528  return true;
529  }
530  if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
531  if (!ConstRHS->isNegative())
532  return true;
533  }
534  }
535 
536  // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
537  // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
538  if (BO->getOpcode() == Instruction::Add ||
539  BO->getOpcode() == Instruction::Sub) {
540  if (SignExtended && !BO->hasNoSignedWrap())
541  return false;
542  if (ZeroExtended && !BO->hasNoUnsignedWrap())
543  return false;
544  }
545 
546  return true;
547 }
548 
549 APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
550  bool SignExtended,
551  bool ZeroExtended) {
552  // BO being non-negative does not shed light on whether its operands are
553  // non-negative. Clear the NonNegative flag here.
554  APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
555  /* NonNegative */ false);
556  // If we found a constant offset in the left operand, stop and return that.
557  // This shortcut might cause us to miss opportunities of combining the
558  // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
559  // However, such cases are probably already handled by -instcombine,
560  // given this pass runs after the standard optimizations.
561  if (ConstantOffset != 0) return ConstantOffset;
562  ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
563  /* NonNegative */ false);
564  // If U is a sub operator, negate the constant offset found in the right
565  // operand.
566  if (BO->getOpcode() == Instruction::Sub)
567  ConstantOffset = -ConstantOffset;
568  return ConstantOffset;
569 }
570 
571 APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
572  bool ZeroExtended, bool NonNegative) {
573  // TODO(jingyue): We could trace into integer/pointer casts, such as
574  // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
575  // integers because it gives good enough results for our benchmarks.
576  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
577 
578  // We cannot do much with Values that are not a User, such as an Argument.
579  User *U = dyn_cast<User>(V);
580  if (U == nullptr) return APInt(BitWidth, 0);
581 
582  APInt ConstantOffset(BitWidth, 0);
583  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
584  // Hooray, we found it!
585  ConstantOffset = CI->getValue();
586  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
587  // Trace into subexpressions for more hoisting opportunities.
588  if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
589  ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
590  } else if (isa<TruncInst>(V)) {
591  ConstantOffset =
592  find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
593  .trunc(BitWidth);
594  } else if (isa<SExtInst>(V)) {
595  ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
596  ZeroExtended, NonNegative).sext(BitWidth);
597  } else if (isa<ZExtInst>(V)) {
598  // As an optimization, we can clear the SignExtended flag because
599  // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
600  //
601  // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
602  ConstantOffset =
603  find(U->getOperand(0), /* SignExtended */ false,
604  /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
605  }
606 
607  // If we found a non-zero constant offset, add it to the path for
608  // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
609  // help this optimization.
610  if (ConstantOffset != 0)
611  UserChain.push_back(U);
612  return ConstantOffset;
613 }
614 
615 Value *ConstantOffsetExtractor::applyExts(Value *V) {
616  Value *Current = V;
617  // ExtInsts is built in the use-def order. Therefore, we apply them to V
618  // in the reversed order.
619  for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) {
620  if (Constant *C = dyn_cast<Constant>(Current)) {
621  // If Current is a constant, apply s/zext using ConstantExpr::getCast.
622  // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
623  Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType());
624  } else {
625  Instruction *Ext = (*I)->clone();
626  Ext->setOperand(0, Current);
627  Ext->insertBefore(IP);
628  Current = Ext;
629  }
630  }
631  return Current;
632 }
633 
634 Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
635  distributeExtsAndCloneChain(UserChain.size() - 1);
636  // Remove all nullptrs (used to be s/zext) from UserChain.
637  unsigned NewSize = 0;
638  for (User *I : UserChain) {
639  if (I != nullptr) {
640  UserChain[NewSize] = I;
641  NewSize++;
642  }
643  }
644  UserChain.resize(NewSize);
645  return removeConstOffset(UserChain.size() - 1);
646 }
647 
648 Value *
649 ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
650  User *U = UserChain[ChainIndex];
651  if (ChainIndex == 0) {
652  assert(isa<ConstantInt>(U));
653  // If U is a ConstantInt, applyExts will return a ConstantInt as well.
654  return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
655  }
656 
657  if (CastInst *Cast = dyn_cast<CastInst>(U)) {
658  assert(
659  (isa<SExtInst>(Cast) || isa<ZExtInst>(Cast) || isa<TruncInst>(Cast)) &&
660  "Only following instructions can be traced: sext, zext & trunc");
661  ExtInsts.push_back(Cast);
662  UserChain[ChainIndex] = nullptr;
663  return distributeExtsAndCloneChain(ChainIndex - 1);
664  }
665 
666  // Function find only trace into BinaryOperator and CastInst.
667  BinaryOperator *BO = cast<BinaryOperator>(U);
668  // OpNo = which operand of BO is UserChain[ChainIndex - 1]
669  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
670  Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
671  Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
672 
673  BinaryOperator *NewBO = nullptr;
674  if (OpNo == 0) {
675  NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
676  BO->getName(), IP);
677  } else {
678  NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
679  BO->getName(), IP);
680  }
681  return UserChain[ChainIndex] = NewBO;
682 }
683 
684 Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
685  if (ChainIndex == 0) {
686  assert(isa<ConstantInt>(UserChain[ChainIndex]));
687  return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
688  }
689 
690  BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
691  assert(BO->getNumUses() <= 1 &&
692  "distributeExtsAndCloneChain clones each BinaryOperator in "
693  "UserChain, so no one should be used more than "
694  "once");
695 
696  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
697  assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
698  Value *NextInChain = removeConstOffset(ChainIndex - 1);
699  Value *TheOther = BO->getOperand(1 - OpNo);
700 
701  // If NextInChain is 0 and not the LHS of a sub, we can simplify the
702  // sub-expression to be just TheOther.
703  if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
704  if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
705  return TheOther;
706  }
707 
708  BinaryOperator::BinaryOps NewOp = BO->getOpcode();
709  if (BO->getOpcode() == Instruction::Or) {
710  // Rebuild "or" as "add", because "or" may be invalid for the new
711  // expression.
712  //
713  // For instance, given
714  // a | (b + 5) where a and b + 5 have no common bits,
715  // we can extract 5 as the constant offset.
716  //
717  // However, reusing the "or" in the new index would give us
718  // (a | b) + 5
719  // which does not equal a | (b + 5).
720  //
721  // Replacing the "or" with "add" is fine, because
722  // a | (b + 5) = a + (b + 5) = (a + b) + 5
723  NewOp = Instruction::Add;
724  }
725 
726  BinaryOperator *NewBO;
727  if (OpNo == 0) {
728  NewBO = BinaryOperator::Create(NewOp, NextInChain, TheOther, "", IP);
729  } else {
730  NewBO = BinaryOperator::Create(NewOp, TheOther, NextInChain, "", IP);
731  }
732  NewBO->takeName(BO);
733  return NewBO;
734 }
735 
736 Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
737  User *&UserChainTail,
738  const DominatorTree *DT) {
739  ConstantOffsetExtractor Extractor(GEP, DT);
740  // Find a non-zero constant offset first.
741  APInt ConstantOffset =
742  Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
743  GEP->isInBounds());
744  if (ConstantOffset == 0) {
745  UserChainTail = nullptr;
746  return nullptr;
747  }
748  // Separates the constant offset from the GEP index.
749  Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
750  UserChainTail = Extractor.UserChain.back();
751  return IdxWithoutConstOffset;
752 }
753 
755  const DominatorTree *DT) {
756  // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
757  return ConstantOffsetExtractor(GEP, DT)
758  .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
759  GEP->isInBounds())
760  .getSExtValue();
761 }
762 
763 bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
764  GetElementPtrInst *GEP) {
765  bool Changed = false;
766  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
767  gep_type_iterator GTI = gep_type_begin(*GEP);
768  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
769  I != E; ++I, ++GTI) {
770  // Skip struct member indices which must be i32.
771  if (GTI.isSequential()) {
772  if ((*I)->getType() != IntPtrTy) {
773  *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
774  Changed = true;
775  }
776  }
777  }
778  return Changed;
779 }
780 
781 int64_t
782 SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
783  bool &NeedsExtraction) {
784  NeedsExtraction = false;
785  int64_t AccumulativeByteOffset = 0;
786  gep_type_iterator GTI = gep_type_begin(*GEP);
787  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
788  if (GTI.isSequential()) {
789  // Tries to extract a constant offset from this GEP index.
790  int64_t ConstantOffset =
792  if (ConstantOffset != 0) {
793  NeedsExtraction = true;
794  // A GEP may have multiple indices. We accumulate the extracted
795  // constant offset to a byte offset, and later offset the remainder of
796  // the original GEP with this byte offset.
797  AccumulativeByteOffset +=
798  ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
799  }
800  } else if (LowerGEP) {
801  StructType *StTy = GTI.getStructType();
802  uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
803  // Skip field 0 as the offset is always 0.
804  if (Field != 0) {
805  NeedsExtraction = true;
806  AccumulativeByteOffset +=
807  DL->getStructLayout(StTy)->getElementOffset(Field);
808  }
809  }
810  }
811  return AccumulativeByteOffset;
812 }
813 
814 void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
815  GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
816  IRBuilder<> Builder(Variadic);
817  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
818 
819  Type *I8PtrTy =
820  Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
821  Value *ResultPtr = Variadic->getOperand(0);
822  Loop *L = LI->getLoopFor(Variadic->getParent());
823  // Check if the base is not loop invariant or used more than once.
824  bool isSwapCandidate =
825  L && L->isLoopInvariant(ResultPtr) &&
826  !hasMoreThanOneUseInLoop(ResultPtr, L);
827  Value *FirstResult = nullptr;
828 
829  if (ResultPtr->getType() != I8PtrTy)
830  ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
831 
832  gep_type_iterator GTI = gep_type_begin(*Variadic);
833  // Create an ugly GEP for each sequential index. We don't create GEPs for
834  // structure indices, as they are accumulated in the constant offset index.
835  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
836  if (GTI.isSequential()) {
837  Value *Idx = Variadic->getOperand(I);
838  // Skip zero indices.
839  if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
840  if (CI->isZero())
841  continue;
842 
843  APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
844  DL->getTypeAllocSize(GTI.getIndexedType()));
845  // Scale the index by element size.
846  if (ElementSize != 1) {
847  if (ElementSize.isPowerOf2()) {
848  Idx = Builder.CreateShl(
849  Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
850  } else {
851  Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
852  }
853  }
854  // Create an ugly GEP with a single index for each index.
855  ResultPtr =
856  Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
857  if (FirstResult == nullptr)
858  FirstResult = ResultPtr;
859  }
860  }
861 
862  // Create a GEP with the constant offset index.
863  if (AccumulativeByteOffset != 0) {
864  Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
865  ResultPtr =
866  Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
867  } else
868  isSwapCandidate = false;
869 
870  // If we created a GEP with constant index, and the base is loop invariant,
871  // then we swap the first one with it, so LICM can move constant GEP out
872  // later.
873  GetElementPtrInst *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(FirstResult);
874  GetElementPtrInst *SecondGEP = dyn_cast_or_null<GetElementPtrInst>(ResultPtr);
875  if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
876  swapGEPOperand(FirstGEP, SecondGEP);
877 
878  if (ResultPtr->getType() != Variadic->getType())
879  ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
880 
881  Variadic->replaceAllUsesWith(ResultPtr);
882  Variadic->eraseFromParent();
883 }
884 
885 void
886 SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
887  int64_t AccumulativeByteOffset) {
888  IRBuilder<> Builder(Variadic);
889  Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
890 
891  Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
892  gep_type_iterator GTI = gep_type_begin(*Variadic);
893  // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
894  // don't create arithmetics for structure indices, as they are accumulated
895  // in the constant offset index.
896  for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
897  if (GTI.isSequential()) {
898  Value *Idx = Variadic->getOperand(I);
899  // Skip zero indices.
900  if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
901  if (CI->isZero())
902  continue;
903 
904  APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
905  DL->getTypeAllocSize(GTI.getIndexedType()));
906  // Scale the index by element size.
907  if (ElementSize != 1) {
908  if (ElementSize.isPowerOf2()) {
909  Idx = Builder.CreateShl(
910  Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
911  } else {
912  Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
913  }
914  }
915  // Create an ADD for each index.
916  ResultPtr = Builder.CreateAdd(ResultPtr, Idx);
917  }
918  }
919 
920  // Create an ADD for the constant offset index.
921  if (AccumulativeByteOffset != 0) {
922  ResultPtr = Builder.CreateAdd(
923  ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset));
924  }
925 
926  ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType());
927  Variadic->replaceAllUsesWith(ResultPtr);
928  Variadic->eraseFromParent();
929 }
930 
931 bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
932  // Skip vector GEPs.
933  if (GEP->getType()->isVectorTy())
934  return false;
935 
936  // The backend can already nicely handle the case where all indices are
937  // constant.
938  if (GEP->hasAllConstantIndices())
939  return false;
940 
941  bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
942 
943  bool NeedsExtraction;
944  int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
945 
946  if (!NeedsExtraction)
947  return Changed;
948 
949  TargetTransformInfo &TTI =
950  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*GEP->getFunction());
951 
952  // If LowerGEP is disabled, before really splitting the GEP, check whether the
953  // backend supports the addressing mode we are about to produce. If no, this
954  // splitting probably won't be beneficial.
955  // If LowerGEP is enabled, even the extracted constant offset can not match
956  // the addressing mode, we can still do optimizations to other lowered parts
957  // of variable indices. Therefore, we don't check for addressing modes in that
958  // case.
959  if (!LowerGEP) {
960  unsigned AddrSpace = GEP->getPointerAddressSpace();
962  /*BaseGV=*/nullptr, AccumulativeByteOffset,
963  /*HasBaseReg=*/true, /*Scale=*/0,
964  AddrSpace)) {
965  return Changed;
966  }
967  }
968 
969  // Remove the constant offset in each sequential index. The resultant GEP
970  // computes the variadic base.
971  // Notice that we don't remove struct field indices here. If LowerGEP is
972  // disabled, a structure index is not accumulated and we still use the old
973  // one. If LowerGEP is enabled, a structure index is accumulated in the
974  // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
975  // handle the constant offset and won't need a new structure index.
976  gep_type_iterator GTI = gep_type_begin(*GEP);
977  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
978  if (GTI.isSequential()) {
979  // Splits this GEP index into a variadic part and a constant offset, and
980  // uses the variadic part as the new index.
981  Value *OldIdx = GEP->getOperand(I);
982  User *UserChainTail;
983  Value *NewIdx =
984  ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
985  if (NewIdx != nullptr) {
986  // Switches to the index with the constant offset removed.
987  GEP->setOperand(I, NewIdx);
988  // After switching to the new index, we can garbage-collect UserChain
989  // and the old index if they are not used.
992  }
993  }
994  }
995 
996  // Clear the inbounds attribute because the new index may be off-bound.
997  // e.g.,
998  //
999  // b = add i64 a, 5
1000  // addr = gep inbounds float, float* p, i64 b
1001  //
1002  // is transformed to:
1003  //
1004  // addr2 = gep float, float* p, i64 a ; inbounds removed
1005  // addr = gep inbounds float, float* addr2, i64 5
1006  //
1007  // If a is -4, although the old index b is in bounds, the new index a is
1008  // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1009  // inbounds keyword is not present, the offsets are added to the base
1010  // address with silently-wrapping two's complement arithmetic".
1011  // Therefore, the final code will be a semantically equivalent.
1012  //
1013  // TODO(jingyue): do some range analysis to keep as many inbounds as
1014  // possible. GEPs with inbounds are more friendly to alias analysis.
1015  bool GEPWasInBounds = GEP->isInBounds();
1016  GEP->setIsInBounds(false);
1017 
1018  // Lowers a GEP to either GEPs with a single index or arithmetic operations.
1019  if (LowerGEP) {
1020  // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1021  // arithmetic operations if the target uses alias analysis in codegen.
1022  if (TTI.useAA())
1023  lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
1024  else
1025  lowerToArithmetics(GEP, AccumulativeByteOffset);
1026  return true;
1027  }
1028 
1029  // No need to create another GEP if the accumulative byte offset is 0.
1030  if (AccumulativeByteOffset == 0)
1031  return true;
1032 
1033  // Offsets the base with the accumulative byte offset.
1034  //
1035  // %gep ; the base
1036  // ... %gep ...
1037  //
1038  // => add the offset
1039  //
1040  // %gep2 ; clone of %gep
1041  // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1042  // %gep ; will be removed
1043  // ... %gep ...
1044  //
1045  // => replace all uses of %gep with %new.gep and remove %gep
1046  //
1047  // %gep2 ; clone of %gep
1048  // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
1049  // ... %new.gep ...
1050  //
1051  // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
1052  // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
1053  // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
1054  // type of %gep.
1055  //
1056  // %gep2 ; clone of %gep
1057  // %0 = bitcast %gep2 to i8*
1058  // %uglygep = gep %0, <offset>
1059  // %new.gep = bitcast %uglygep to <type of %gep>
1060  // ... %new.gep ...
1061  Instruction *NewGEP = GEP->clone();
1062  NewGEP->insertBefore(GEP);
1063 
1064  // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
1065  // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
1066  // used with unsigned integers later.
1067  int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
1068  DL->getTypeAllocSize(GEP->getResultElementType()));
1069  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
1070  if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
1071  // Very likely. As long as %gep is naturally aligned, the byte offset we
1072  // extracted should be a multiple of sizeof(*%gep).
1073  int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
1074  NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
1075  ConstantInt::get(IntPtrTy, Index, true),
1076  GEP->getName(), GEP);
1077  NewGEP->copyMetadata(*GEP);
1078  // Inherit the inbounds attribute of the original GEP.
1079  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1080  } else {
1081  // Unlikely but possible. For example,
1082  // #pragma pack(1)
1083  // struct S {
1084  // int a[3];
1085  // int64 b[8];
1086  // };
1087  // #pragma pack()
1088  //
1089  // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
1090  // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
1091  // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
1092  // sizeof(int64).
1093  //
1094  // Emit an uglygep in this case.
1095  Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
1096  GEP->getPointerAddressSpace());
1097  NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
1098  NewGEP = GetElementPtrInst::Create(
1099  Type::getInt8Ty(GEP->getContext()), NewGEP,
1100  ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
1101  GEP);
1102  NewGEP->copyMetadata(*GEP);
1103  // Inherit the inbounds attribute of the original GEP.
1104  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
1105  if (GEP->getType() != I8PtrTy)
1106  NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
1107  }
1108 
1109  GEP->replaceAllUsesWith(NewGEP);
1110  GEP->eraseFromParent();
1111 
1112  return true;
1113 }
1114 
1116  if (skipFunction(F))
1117  return false;
1118 
1120  return false;
1121 
1122  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1123  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1124  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1125  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1126  bool Changed = false;
1127  for (BasicBlock &B : F) {
1128  for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
1129  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
1130  Changed |= splitGEP(GEP);
1131  // No need to split GEP ConstantExprs because all its indices are constant
1132  // already.
1133  }
1134 
1135  Changed |= reuniteExts(F);
1136 
1137  if (VerifyNoDeadCode)
1138  verifyNoDeadCode(F);
1139 
1140  return Changed;
1141 }
1142 
1143 Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1144  const SCEV *Key, Instruction *Dominatee) {
1145  auto Pos = DominatingExprs.find(Key);
1146  if (Pos == DominatingExprs.end())
1147  return nullptr;
1148 
1149  auto &Candidates = Pos->second;
1150  // Because we process the basic blocks in pre-order of the dominator tree, a
1151  // candidate that doesn't dominate the current instruction won't dominate any
1152  // future instruction either. Therefore, we pop it out of the stack. This
1153  // optimization makes the algorithm O(n).
1154  while (!Candidates.empty()) {
1155  Instruction *Candidate = Candidates.back();
1156  if (DT->dominates(Candidate, Dominatee))
1157  return Candidate;
1158  Candidates.pop_back();
1159  }
1160  return nullptr;
1161 }
1162 
1163 bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1164  if (!SE->isSCEVable(I->getType()))
1165  return false;
1166 
1167  // Dom: LHS+RHS
1168  // I: sext(LHS)+sext(RHS)
1169  // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1170  // TODO: handle zext
1171  Value *LHS = nullptr, *RHS = nullptr;
1172  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
1173  match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
1174  if (LHS->getType() == RHS->getType()) {
1175  const SCEV *Key =
1176  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1177  if (auto *Dom = findClosestMatchingDominator(Key, I)) {
1178  Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
1179  NewSExt->takeName(I);
1180  I->replaceAllUsesWith(NewSExt);
1182  return true;
1183  }
1184  }
1185  }
1186 
1187  // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1188  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
1189  match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
1191  const SCEV *Key =
1192  SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
1193  DominatingExprs[Key].push_back(I);
1194  }
1195  }
1196  return false;
1197 }
1198 
1199 bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1200  bool Changed = false;
1201  DominatingExprs.clear();
1202  for (const auto Node : depth_first(DT)) {
1203  BasicBlock *BB = Node->getBlock();
1204  for (auto I = BB->begin(); I != BB->end(); ) {
1205  Instruction *Cur = &*I++;
1206  Changed |= reuniteExts(Cur);
1207  }
1208  }
1209  return Changed;
1210 }
1211 
1212 void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1213  for (BasicBlock &B : F) {
1214  for (Instruction &I : B) {
1215  if (isInstructionTriviallyDead(&I)) {
1216  std::string ErrMessage;
1217  raw_string_ostream RSO(ErrMessage);
1218  RSO << "Dead instruction detected!\n" << I << "\n";
1219  llvm_unreachable(RSO.str().c_str());
1220  }
1221  }
1222  }
1223 }
1224 
1225 bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1226  GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
1227  if (!FirstGEP || !FirstGEP->hasOneUse())
1228  return false;
1229 
1230  if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
1231  return false;
1232 
1233  if (FirstGEP == SecondGEP)
1234  return false;
1235 
1236  unsigned FirstNum = FirstGEP->getNumOperands();
1237  unsigned SecondNum = SecondGEP->getNumOperands();
1238  // Give up if the number of operands are not 2.
1239  if (FirstNum != SecondNum || FirstNum != 2)
1240  return false;
1241 
1242  Value *FirstBase = FirstGEP->getOperand(0);
1243  Value *SecondBase = SecondGEP->getOperand(0);
1244  Value *FirstOffset = FirstGEP->getOperand(1);
1245  // Give up if the index of the first GEP is loop invariant.
1246  if (CurLoop->isLoopInvariant(FirstOffset))
1247  return false;
1248 
1249  // Give up if base doesn't have same type.
1250  if (FirstBase->getType() != SecondBase->getType())
1251  return false;
1252 
1253  Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
1254 
1255  // Check if the second operand of first GEP has constant coefficient.
1256  // For an example, for the following code, we won't gain anything by
1257  // hoisting the second GEP out because the second GEP can be folded away.
1258  // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1259  // %67 = shl i64 %scevgep.sum.ur159, 2
1260  // %uglygep160 = getelementptr i8* %65, i64 %67
1261  // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
1262 
1263  // Skip constant shift instruction which may be generated by Splitting GEPs.
1264  if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1265  isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
1266  FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
1267 
1268  // Give up if FirstOffsetDef is an Add or Sub with constant.
1269  // Because it may not profitable at all due to constant folding.
1270  if (FirstOffsetDef)
1271  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
1272  unsigned opc = BO->getOpcode();
1273  if ((opc == Instruction::Add || opc == Instruction::Sub) &&
1274  (isa<ConstantInt>(BO->getOperand(0)) ||
1275  isa<ConstantInt>(BO->getOperand(1))))
1276  return false;
1277  }
1278  return true;
1279 }
1280 
1281 bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
1282  int UsesInLoop = 0;
1283  for (User *U : V->users()) {
1284  if (Instruction *User = dyn_cast<Instruction>(U))
1285  if (L->contains(User))
1286  if (++UsesInLoop > 1)
1287  return true;
1288  }
1289  return false;
1290 }
1291 
1292 void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1293  GetElementPtrInst *Second) {
1294  Value *Offset1 = First->getOperand(1);
1295  Value *Offset2 = Second->getOperand(1);
1296  First->setOperand(1, Offset2);
1297  Second->setOperand(1, Offset1);
1298 
1299  // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1300  const DataLayout &DAL = First->getModule()->getDataLayout();
1302  cast<PointerType>(First->getType())->getAddressSpace()),
1303  0);
1304  Value *NewBase =
1305  First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
1306  uint64_t ObjectSize;
1307  if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
1308  Offset.ugt(ObjectSize)) {
1309  First->setIsInBounds(false);
1310  Second->setIsInBounds(false);
1311  } else
1312  First->setIsInBounds(true);
1313 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:820
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:71
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:373
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:654
This class represents lattice values for constants.
Definition: AllocatorList.h:24
BinaryOps getOpcode() const
Definition: InstrTypes.h:316
INITIALIZE_PASS_BEGIN(SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, false) INITIALIZE_PASS_END(SeparateConstOffsetFromGEP
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
The main scalar evolution driver.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:880
static cl::opt< bool > VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(false), cl::desc("Verify this pass produces no dead code"), cl::Hidden)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
F(f)
This class represents a sign extension of integer types.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
op_iterator op_begin()
Definition: User.h:230
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:265
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:269
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:48
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
Class to represent struct types.
Definition: DerivedTypes.h:201
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
separate const offset from Split GEPs to a variadic base and a constant offset for better CSE
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1014
This file implements a class to represent arbitrary precision integral constant values and operations...
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:642
bool programUndefinedIfFullPoison(const Instruction *PoisonI)
Return true if this function can prove that if PoisonI is executed and yields a full-poison value (al...
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1727
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1732
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
This class represents a no-op cast from one type to another.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:291
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
void initializeSeparateConstOffsetFromGEPPass(PassRegistry &)
Value * getOperand(unsigned i) const
Definition: User.h:170
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:854
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:74
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
static const SubtargetFeatureKV * Find(StringRef S, ArrayRef< SubtargetFeatureKV > A)
Find KV in array using binary search.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:232
static cl::opt< bool > DisableSeparateConstOffsetFromGEP("disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:430
static wasm::ValType getType(const TargetRegisterClass *RC)
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:385
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:499
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1048
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:58
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Value * CreateGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1458
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:271
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:812
void setOperand(unsigned i, Value *Val)
Definition: User.h:175
unsigned logBase2() const
Definition: APInt.h:1748
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:547
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:70
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:400
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:337
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1530
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1103
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1255
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:160
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if LHS and RHS have no common bits set.
Type * getResultElementType() const
Definition: Instructions.h:956
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1722
iterator_range< df_iterator< T > > depth_first(const T &G)
separate const offset from gep
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:483
aarch64 promote const
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:349
LLVM Value Representation.
Definition: Value.h:73
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:970
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
This pass exposes codegen information to IR-level passes.
static void Split(std::vector< std::string > &V, StringRef S)
Splits a string of comma separated items in to a vector of strings.
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
bool hasAllConstantIndices() const
Return true if all of the indices of this GEP are constant integers.
const BasicBlock * getParent() const
Definition: Instruction.h:67
gep_type_iterator gep_type_begin(const User *GEP)