LLVM  8.0.1
SpeculateAroundPHIs.cpp
Go to the documentation of this file.
1 //===- SpeculateAroundPHIs.cpp --------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
12 #include "llvm/ADT/Sequence.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/ADT/Statistic.h"
17 #include "llvm/IR/BasicBlock.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/Support/Debug.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "spec-phis"
27 
28 STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around");
29 STATISTIC(NumEdgesSplit,
30  "Number of critical edges which were split for speculation");
31 STATISTIC(NumSpeculatedInstructions,
32  "Number of instructions we speculated around the PHI nodes");
33 STATISTIC(NumNewRedundantInstructions,
34  "Number of new, redundant instructions inserted");
35 
36 /// Check whether speculating the users of a PHI node around the PHI
37 /// will be safe.
38 ///
39 /// This checks both that all of the users are safe and also that all of their
40 /// operands are either recursively safe or already available along an incoming
41 /// edge to the PHI.
42 ///
43 /// This routine caches both all the safe nodes explored in `PotentialSpecSet`
44 /// and the chain of nodes that definitively reach any unsafe node in
45 /// `UnsafeSet`. By preserving these between repeated calls to this routine for
46 /// PHIs in the same basic block, the exploration here can be reused. However,
47 /// these caches must no be reused for PHIs in a different basic block as they
48 /// reflect what is available along incoming edges.
49 static bool
51  SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
52  SmallPtrSetImpl<Instruction *> &UnsafeSet) {
53  auto *PhiBB = PN.getParent();
56 
57  // Walk each user of the PHI node.
58  for (Use &U : PN.uses()) {
59  auto *UI = cast<Instruction>(U.getUser());
60 
61  // Ensure the use post-dominates the PHI node. This ensures that, in the
62  // absence of unwinding, the use will actually be reached.
63  // FIXME: We use a blunt hammer of requiring them to be in the same basic
64  // block. We should consider using actual post-dominance here in the
65  // future.
66  if (UI->getParent() != PhiBB) {
67  LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI << "\n");
68  return false;
69  }
70 
71  // FIXME: This check is much too conservative. We're not going to move these
72  // instructions onto new dynamic paths through the program unless there is
73  // a call instruction between the use and the PHI node. And memory isn't
74  // changing unless there is a store in that same sequence. We should
75  // probably change this to do at least a limited scan of the intervening
76  // instructions and allow handling stores in easily proven safe cases.
77  if (mayBeMemoryDependent(*UI)) {
78  LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI << "\n");
79  return false;
80  }
81 
82  // Now do a depth-first search of everything these users depend on to make
83  // sure they are transitively safe. This is a depth-first search, but we
84  // check nodes in preorder to minimize the amount of checking.
85  Visited.insert(UI);
86  DFSStack.push_back({UI, UI->value_op_begin()});
87  do {
89  std::tie(UI, OpIt) = DFSStack.pop_back_val();
90 
91  while (OpIt != UI->value_op_end()) {
92  auto *OpI = dyn_cast<Instruction>(*OpIt);
93  // Increment to the next operand for whenever we continue.
94  ++OpIt;
95  // No need to visit non-instructions, which can't form dependencies.
96  if (!OpI)
97  continue;
98 
99  // Now do the main pre-order checks that this operand is a viable
100  // dependency of something we want to speculate.
101 
102  // First do a few checks for instructions that won't require
103  // speculation at all because they are trivially available on the
104  // incoming edge (either through dominance or through an incoming value
105  // to a PHI).
106  //
107  // The cases in the current block will be trivially dominated by the
108  // edge.
109  auto *ParentBB = OpI->getParent();
110  if (ParentBB == PhiBB) {
111  if (isa<PHINode>(OpI)) {
112  // We can trivially map through phi nodes in the same block.
113  continue;
114  }
115  } else if (DT.dominates(ParentBB, PhiBB)) {
116  // Instructions from dominating blocks are already available.
117  continue;
118  }
119 
120  // Once we know that we're considering speculating the operand, check
121  // if we've already explored this subgraph and found it to be safe.
122  if (PotentialSpecSet.count(OpI))
123  continue;
124 
125  // If we've already explored this subgraph and found it unsafe, bail.
126  // If when we directly test whether this is safe it fails, bail.
127  if (UnsafeSet.count(OpI) || ParentBB != PhiBB ||
128  mayBeMemoryDependent(*OpI)) {
129  LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: "
130  << *OpI << "\n");
131  // Record the stack of instructions which reach this node as unsafe
132  // so we prune subsequent searches.
133  UnsafeSet.insert(OpI);
134  for (auto &StackPair : DFSStack) {
135  Instruction *I = StackPair.first;
136  UnsafeSet.insert(I);
137  }
138  return false;
139  }
140 
141  // Skip any operands we're already recursively checking.
142  if (!Visited.insert(OpI).second)
143  continue;
144 
145  // Push onto the stack and descend. We can directly continue this
146  // loop when ascending.
147  DFSStack.push_back({UI, OpIt});
148  UI = OpI;
149  OpIt = OpI->value_op_begin();
150  }
151 
152  // This node and all its operands are safe. Go ahead and cache that for
153  // reuse later.
154  PotentialSpecSet.insert(UI);
155 
156  // Continue with the next node on the stack.
157  } while (!DFSStack.empty());
158  }
159 
160 #ifndef NDEBUG
161  // Every visited operand should have been marked as safe for speculation at
162  // this point. Verify this and return success.
163  for (auto *I : Visited)
164  assert(PotentialSpecSet.count(I) &&
165  "Failed to mark a visited instruction as safe!");
166 #endif
167  return true;
168 }
169 
170 /// Check whether, in isolation, a given PHI node is both safe and profitable
171 /// to speculate users around.
172 ///
173 /// This handles checking whether there are any constant operands to a PHI
174 /// which could represent a useful speculation candidate, whether the users of
175 /// the PHI are safe to speculate including all their transitive dependencies,
176 /// and whether after speculation there will be some cost savings (profit) to
177 /// folding the operands into the users of the PHI node. Returns true if both
178 /// safe and profitable with relevant cost savings updated in the map and with
179 /// an update to the `PotentialSpecSet`. Returns false if either safety or
180 /// profitability are absent. Some new entries may be made to the
181 /// `PotentialSpecSet` even when this routine returns false, but they remain
182 /// conservatively correct.
183 ///
184 /// The profitability check here is a local one, but it checks this in an
185 /// interesting way. Beyond checking that the total cost of materializing the
186 /// constants will be less than the cost of folding them into their users, it
187 /// also checks that no one incoming constant will have a higher cost when
188 /// folded into its users rather than materialized. This higher cost could
189 /// result in a dynamic *path* that is more expensive even when the total cost
190 /// is lower. Currently, all of the interesting cases where this optimization
191 /// should fire are ones where it is a no-loss operation in this sense. If we
192 /// ever want to be more aggressive here, we would need to balance the
193 /// different incoming edges' cost by looking at their respective
194 /// probabilities.
196  PHINode &PN, SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
197  SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
199  TargetTransformInfo &TTI) {
200  // First see whether there is any cost savings to speculating around this
201  // PHI, and build up a map of the constant inputs to how many times they
202  // occur.
203  bool NonFreeMat = false;
204  struct CostsAndCount {
205  int MatCost = TargetTransformInfo::TCC_Free;
206  int FoldedCost = TargetTransformInfo::TCC_Free;
207  int Count = 0;
208  };
210  SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks;
211  for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) {
212  auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i));
213  if (!IncomingC)
214  continue;
215 
216  // Only visit each incoming edge with a constant input once.
217  if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second)
218  continue;
219 
220  auto InsertResult = CostsAndCounts.insert({IncomingC, {}});
221  // Count how many edges share a given incoming costant.
222  ++InsertResult.first->second.Count;
223  // Only compute the cost the first time we see a particular constant.
224  if (!InsertResult.second)
225  continue;
226 
227  int &MatCost = InsertResult.first->second.MatCost;
228  MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType());
229  NonFreeMat |= MatCost != TTI.TCC_Free;
230  }
231  if (!NonFreeMat) {
232  LLVM_DEBUG(dbgs() << " Free: " << PN << "\n");
233  // No profit in free materialization.
234  return false;
235  }
236 
237  // Now check that the uses of this PHI can actually be speculated,
238  // otherwise we'll still have to materialize the PHI value.
239  if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) {
240  LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN << "\n");
241  return false;
242  }
243 
244  // Compute how much (if any) savings are available by speculating around this
245  // PHI.
246  for (Use &U : PN.uses()) {
247  auto *UserI = cast<Instruction>(U.getUser());
248  // Now check whether there is any savings to folding the incoming constants
249  // into this use.
250  unsigned Idx = U.getOperandNo();
251 
252  // If we have a binary operator that is commutative, an actual constant
253  // operand would end up on the RHS, so pretend the use of the PHI is on the
254  // RHS.
255  //
256  // Technically, this is a bit weird if *both* operands are PHIs we're
257  // speculating. But if that is the case, giving an "optimistic" cost isn't
258  // a bad thing because after speculation it will constant fold. And
259  // moreover, such cases should likely have been constant folded already by
260  // some other pass, so we shouldn't worry about "modeling" them terribly
261  // accurately here. Similarly, if the other operand is a constant, it still
262  // seems fine to be "optimistic" in our cost modeling, because when the
263  // incoming operand from the PHI node is also a constant, we will end up
264  // constant folding.
265  if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1)
266  // Assume we will commute the constant to the RHS to be canonical.
267  Idx = 1;
268 
269  // Get the intrinsic ID if this user is an intrinsic.
271  if (auto *UserII = dyn_cast<IntrinsicInst>(UserI))
272  IID = UserII->getIntrinsicID();
273 
274  for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) {
275  ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first;
276  int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
277  int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
278  if (IID)
279  FoldedCost += TTI.getIntImmCost(IID, Idx, IncomingC->getValue(),
280  IncomingC->getType());
281  else
282  FoldedCost +=
283  TTI.getIntImmCost(UserI->getOpcode(), Idx, IncomingC->getValue(),
284  IncomingC->getType());
285 
286  // If we accumulate more folded cost for this incoming constant than
287  // materialized cost, then we'll regress any edge with this constant so
288  // just bail. We're only interested in cases where folding the incoming
289  // constants is at least break-even on all paths.
290  if (FoldedCost > MatCost) {
291  LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC
292  << "\n"
293  " Materializing cost: "
294  << MatCost
295  << "\n"
296  " Accumulated folded cost: "
297  << FoldedCost << "\n");
298  return false;
299  }
300  }
301  }
302 
303  // Compute the total cost savings afforded by this PHI node.
304  int TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free;
305  for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) {
306  int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
307  int FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
308  int Count = IncomingConstantAndCostsAndCount.second.Count;
309 
310  TotalMatCost += MatCost * Count;
311  TotalFoldedCost += FoldedCost * Count;
312  }
313  assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is "
314  "less that its materialized cost, "
315  "the sum must be as well.");
316 
317  LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost - TotalFoldedCost)
318  << ": " << PN << "\n");
319  CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost;
320  return true;
321 }
322 
323 /// Simple helper to walk all the users of a list of phis depth first, and call
324 /// a visit function on each one in post-order.
325 ///
326 /// All of the PHIs should be in the same basic block, and this is primarily
327 /// used to make a single depth-first walk across their collective users
328 /// without revisiting any subgraphs. Callers should provide a fast, idempotent
329 /// callable to test whether a node has been visited and the more important
330 /// callable to actually visit a particular node.
331 ///
332 /// Depth-first and postorder here refer to the *operand* graph -- we start
333 /// from a collection of users of PHI nodes and walk "up" the operands
334 /// depth-first.
335 template <typename IsVisitedT, typename VisitT>
337  IsVisitedT IsVisited,
338  VisitT Visit) {
340  for (auto *PN : PNs)
341  for (Use &U : PN->uses()) {
342  auto *UI = cast<Instruction>(U.getUser());
343  if (IsVisited(UI))
344  // Already visited this user, continue across the roots.
345  continue;
346 
347  // Otherwise, walk the operand graph depth-first and visit each
348  // dependency in postorder.
349  DFSStack.push_back({UI, UI->value_op_begin()});
350  do {
352  std::tie(UI, OpIt) = DFSStack.pop_back_val();
353  while (OpIt != UI->value_op_end()) {
354  auto *OpI = dyn_cast<Instruction>(*OpIt);
355  // Increment to the next operand for whenever we continue.
356  ++OpIt;
357  // No need to visit non-instructions, which can't form dependencies,
358  // or instructions outside of our potential dependency set that we
359  // were given. Finally, if we've already visited the node, continue
360  // to the next.
361  if (!OpI || IsVisited(OpI))
362  continue;
363 
364  // Push onto the stack and descend. We can directly continue this
365  // loop when ascending.
366  DFSStack.push_back({UI, OpIt});
367  UI = OpI;
368  OpIt = OpI->value_op_begin();
369  }
370 
371  // Finished visiting children, visit this node.
372  assert(!IsVisited(UI) && "Should not have already visited a node!");
373  Visit(UI);
374  } while (!DFSStack.empty());
375  }
376 }
377 
378 /// Find profitable PHIs to speculate.
379 ///
380 /// For a PHI node to be profitable, we need the cost of speculating its users
381 /// (and their dependencies) to not exceed the savings of folding the PHI's
382 /// constant operands into the speculated users.
383 ///
384 /// Computing this is surprisingly challenging. Because users of two different
385 /// PHI nodes can depend on each other or on common other instructions, it may
386 /// be profitable to speculate two PHI nodes together even though neither one
387 /// in isolation is profitable. The straightforward way to find all the
388 /// profitable PHIs would be to check each combination of PHIs' cost, but this
389 /// is exponential in complexity.
390 ///
391 /// Even if we assume that we only care about cases where we can consider each
392 /// PHI node in isolation (rather than considering cases where none are
393 /// profitable in isolation but some subset are profitable as a set), we still
394 /// have a challenge. The obvious way to find all individually profitable PHIs
395 /// is to iterate until reaching a fixed point, but this will be quadratic in
396 /// complexity. =/
397 ///
398 /// This code currently uses a linear-to-compute order for a greedy approach.
399 /// It won't find cases where a set of PHIs must be considered together, but it
400 /// handles most cases of order dependence without quadratic iteration. The
401 /// specific order used is the post-order across the operand DAG. When the last
402 /// user of a PHI is visited in this postorder walk, we check it for
403 /// profitability.
404 ///
405 /// There is an orthogonal extra complexity to all of this: computing the cost
406 /// itself can easily become a linear computation making everything again (at
407 /// best) quadratic. Using a postorder over the operand graph makes it
408 /// particularly easy to avoid this through dynamic programming. As we do the
409 /// postorder walk, we build the transitive cost of that subgraph. It is also
410 /// straightforward to then update these costs when we mark a PHI for
411 /// speculation so that subsequent PHIs don't re-pay the cost of already
412 /// speculated instructions.
415  const SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
416  const SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
417  int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI) {
419 
420  // First, establish a reverse mapping from immediate users of the PHI nodes
421  // to the nodes themselves, and count how many users each PHI node has in
422  // a way we can update while processing them.
424  SmallDenseMap<PHINode *, int, 16> PNUserCountMap;
426  for (auto *PN : PNs) {
427  assert(UserSet.empty() && "Must start with an empty user set!");
428  for (Use &U : PN->uses())
429  UserSet.insert(cast<Instruction>(U.getUser()));
430  PNUserCountMap[PN] = UserSet.size();
431  for (auto *UI : UserSet)
432  UserToPNMap.insert({UI, {}}).first->second.push_back(PN);
433  UserSet.clear();
434  }
435 
436  // Now do a DFS across the operand graph of the users, computing cost as we
437  // go and when all costs for a given PHI are known, checking that PHI for
438  // profitability.
441  PNs,
442  /*IsVisited*/
443  [&](Instruction *I) {
444  // We consider anything that isn't potentially speculated to be
445  // "visited" as it is already handled. Similarly, anything that *is*
446  // potentially speculated but for which we have an entry in our cost
447  // map, we're done.
448  return !PotentialSpecSet.count(I) || SpecCostMap.count(I);
449  },
450  /*Visit*/
451  [&](Instruction *I) {
452  // We've fully visited the operands, so sum their cost with this node
453  // and update the cost map.
454  int Cost = TTI.TCC_Free;
455  for (Value *OpV : I->operand_values())
456  if (auto *OpI = dyn_cast<Instruction>(OpV)) {
457  auto CostMapIt = SpecCostMap.find(OpI);
458  if (CostMapIt != SpecCostMap.end())
459  Cost += CostMapIt->second;
460  }
461  Cost += TTI.getUserCost(I);
462  bool Inserted = SpecCostMap.insert({I, Cost}).second;
463  (void)Inserted;
464  assert(Inserted && "Must not re-insert a cost during the DFS!");
465 
466  // Now check if this node had a corresponding PHI node using it. If so,
467  // we need to decrement the outstanding user count for it.
468  auto UserPNsIt = UserToPNMap.find(I);
469  if (UserPNsIt == UserToPNMap.end())
470  return;
471  auto &UserPNs = UserPNsIt->second;
472  auto UserPNsSplitIt = std::stable_partition(
473  UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) {
474  int &PNUserCount = PNUserCountMap.find(UserPN)->second;
475  assert(
476  PNUserCount > 0 &&
477  "Should never re-visit a PN after its user count hits zero!");
478  --PNUserCount;
479  return PNUserCount != 0;
480  });
481 
482  // FIXME: Rather than one at a time, we should sum the savings as the
483  // cost will be completely shared.
484  SmallVector<Instruction *, 16> SpecWorklist;
485  for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) {
486  int SpecCost = TTI.TCC_Free;
487  for (Use &U : PN->uses())
488  SpecCost +=
489  SpecCostMap.find(cast<Instruction>(U.getUser()))->second;
490  SpecCost *= (NumPreds - 1);
491  // When the user count of a PHI node hits zero, we should check its
492  // profitability. If profitable, we should mark it for speculation
493  // and zero out the cost of everything it depends on.
494  int CostSavings = CostSavingsMap.find(PN)->second;
495  if (SpecCost > CostSavings) {
496  LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN
497  << "\n"
498  " Cost savings: "
499  << CostSavings
500  << "\n"
501  " Speculation cost: "
502  << SpecCost << "\n");
503  continue;
504  }
505 
506  // We're going to speculate this user-associated PHI. Copy it out and
507  // add its users to the worklist to update their cost.
508  SpecPNs.push_back(PN);
509  for (Use &U : PN->uses()) {
510  auto *UI = cast<Instruction>(U.getUser());
511  auto CostMapIt = SpecCostMap.find(UI);
512  if (CostMapIt->second == 0)
513  continue;
514  // Zero out this cost entry to avoid duplicates.
515  CostMapIt->second = 0;
516  SpecWorklist.push_back(UI);
517  }
518  }
519 
520  // Now walk all the operands of the users in the worklist transitively
521  // to zero out all the memoized costs.
522  while (!SpecWorklist.empty()) {
523  Instruction *SpecI = SpecWorklist.pop_back_val();
524  assert(SpecCostMap.find(SpecI)->second == 0 &&
525  "Didn't zero out a cost!");
526 
527  // Walk the operands recursively to zero out their cost as well.
528  for (auto *OpV : SpecI->operand_values()) {
529  auto *OpI = dyn_cast<Instruction>(OpV);
530  if (!OpI)
531  continue;
532  auto CostMapIt = SpecCostMap.find(OpI);
533  if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0)
534  continue;
535  CostMapIt->second = 0;
536  SpecWorklist.push_back(OpI);
537  }
538  }
539  });
540 
541  return SpecPNs;
542 }
543 
544 /// Speculate users around a set of PHI nodes.
545 ///
546 /// This routine does the actual speculation around a set of PHI nodes where we
547 /// have determined this to be both safe and profitable.
548 ///
549 /// This routine handles any spliting of critical edges necessary to create
550 /// a safe block to speculate into as well as cloning the instructions and
551 /// rewriting all uses.
552 static void speculatePHIs(ArrayRef<PHINode *> SpecPNs,
553  SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
555  DominatorTree &DT) {
556  LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs.size() << " PHIs!\n");
557  NumPHIsSpeculated += SpecPNs.size();
558 
559  // Split any critical edges so that we have a block to hoist into.
560  auto *ParentBB = SpecPNs[0]->getParent();
562  SpecPreds.reserve(PredSet.size());
563  for (auto *PredBB : PredSet) {
564  auto *NewPredBB = SplitCriticalEdge(
565  PredBB, ParentBB,
566  CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges());
567  if (NewPredBB) {
568  ++NumEdgesSplit;
569  LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB->getName()
570  << "\n");
571  SpecPreds.push_back(NewPredBB);
572  } else {
573  assert(PredBB->getSingleSuccessor() == ParentBB &&
574  "We need a non-critical predecessor to speculate into.");
575  assert(!isa<InvokeInst>(PredBB->getTerminator()) &&
576  "Cannot have a non-critical invoke!");
577 
578  // Already non-critical, use existing pred.
579  SpecPreds.push_back(PredBB);
580  }
581  }
582 
586  /*IsVisited*/
587  [&](Instruction *I) {
588  // This is visited if we don't need to
589  // speculate it or we already have
590  // speculated it.
591  return !PotentialSpecSet.count(I) ||
592  SpecSet.count(I);
593  },
594  /*Visit*/
595  [&](Instruction *I) {
596  // All operands scheduled, schedule this
597  // node.
598  SpecSet.insert(I);
599  SpecList.push_back(I);
600  });
601 
602  int NumSpecInsts = SpecList.size() * SpecPreds.size();
603  int NumRedundantInsts = NumSpecInsts - SpecList.size();
604  LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts
605  << " speculated instructions, " << NumRedundantInsts
606  << " redundancies\n");
607  NumSpeculatedInstructions += NumSpecInsts;
608  NumNewRedundantInstructions += NumRedundantInsts;
609 
610  // Each predecessor is numbered by its index in `SpecPreds`, so for each
611  // instruction we speculate, the speculated instruction is stored in that
612  // index of the vector associated with the original instruction. We also
613  // store the incoming values for each predecessor from any PHIs used.
615 
616  // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
617  // value. This handles both the PHIs we are speculating around and any other
618  // PHIs that happen to be used.
619  for (auto *OrigI : SpecList)
620  for (auto *OpV : OrigI->operand_values()) {
621  auto *OpPN = dyn_cast<PHINode>(OpV);
622  if (!OpPN || OpPN->getParent() != ParentBB)
623  continue;
624 
625  auto InsertResult = SpeculatedValueMap.insert({OpPN, {}});
626  if (!InsertResult.second)
627  continue;
628 
629  auto &SpeculatedVals = InsertResult.first->second;
630 
631  // Populating our structure for mapping is particularly annoying because
632  // finding an incoming value for a particular predecessor block in a PHI
633  // node is a linear time operation! To avoid quadratic behavior, we build
634  // a map for this PHI node's incoming values and then translate it into
635  // the more compact representation used below.
637  for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues()))
638  IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i);
639 
640  for (auto *PredBB : SpecPreds)
641  SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second);
642  }
643 
644  // Speculate into each predecessor.
645  for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) {
646  auto *PredBB = SpecPreds[PredIdx];
647  assert(PredBB->getSingleSuccessor() == ParentBB &&
648  "We need a non-critical predecessor to speculate into.");
649 
650  for (auto *OrigI : SpecList) {
651  auto *NewI = OrigI->clone();
652  NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx));
653  NewI->insertBefore(PredBB->getTerminator());
654 
655  // Rewrite all the operands to the previously speculated instructions.
656  // Because we're walking in-order, the defs must precede the uses and we
657  // should already have these mappings.
658  for (Use &U : NewI->operands()) {
659  auto *OpI = dyn_cast<Instruction>(U.get());
660  if (!OpI)
661  continue;
662  auto MapIt = SpeculatedValueMap.find(OpI);
663  if (MapIt == SpeculatedValueMap.end())
664  continue;
665  const auto &SpeculatedVals = MapIt->second;
666  assert(SpeculatedVals[PredIdx] &&
667  "Must have a speculated value for this predecessor!");
668  assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() &&
669  "Speculated value has the wrong type!");
670 
671  // Rewrite the use to this predecessor's speculated instruction.
672  U.set(SpeculatedVals[PredIdx]);
673  }
674 
675  // Commute instructions which now have a constant in the LHS but not the
676  // RHS.
677  if (NewI->isBinaryOp() && NewI->isCommutative() &&
678  isa<Constant>(NewI->getOperand(0)) &&
679  !isa<Constant>(NewI->getOperand(1)))
680  NewI->getOperandUse(0).swap(NewI->getOperandUse(1));
681 
682  SpeculatedValueMap[OrigI].push_back(NewI);
683  assert(SpeculatedValueMap[OrigI][PredIdx] == NewI &&
684  "Mismatched speculated instruction index!");
685  }
686  }
687 
688  // Walk the speculated instruction list and if they have uses, insert a PHI
689  // for them from the speculated versions, and replace the uses with the PHI.
690  // Then erase the instructions as they have been fully speculated. The walk
691  // needs to be in reverse so that we don't think there are users when we'll
692  // actually eventually remove them later.
693  IRBuilder<> IRB(SpecPNs[0]);
694  for (auto *OrigI : llvm::reverse(SpecList)) {
695  // Check if we need a PHI for any remaining users and if so, insert it.
696  if (!OrigI->use_empty()) {
697  auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(),
698  Twine(OrigI->getName()) + ".phi");
699  // Add the incoming values we speculated.
700  auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second;
701  for (int PredIdx : llvm::seq<int>(0, SpecPreds.size()))
702  SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]);
703 
704  // And replace the uses with the PHI node.
705  OrigI->replaceAllUsesWith(SpecIPN);
706  }
707 
708  // It is important to immediately erase this so that it stops using other
709  // instructions. This avoids inserting needless PHIs of them.
710  OrigI->eraseFromParent();
711  }
712 
713  // All of the uses of the speculated phi nodes should be removed at this
714  // point, so erase them.
715  for (auto *SpecPN : SpecPNs) {
716  assert(SpecPN->use_empty() && "All users should have been speculated!");
717  SpecPN->eraseFromParent();
718  }
719 }
720 
721 /// Try to speculate around a series of PHIs from a single basic block.
722 ///
723 /// This routine checks whether any of these PHIs are profitable to speculate
724 /// users around. If safe and profitable, it does the speculation. It returns
725 /// true when at least some speculation occurs.
728  LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
729 
730  // Savings in cost from speculating around a PHI node.
731  SmallDenseMap<PHINode *, int, 16> CostSavingsMap;
732 
733  // Remember the set of instructions that are candidates for speculation so
734  // that we can quickly walk things within that space. This prunes out
735  // instructions already available along edges, etc.
736  SmallPtrSet<Instruction *, 16> PotentialSpecSet;
737 
738  // Remember the set of instructions that are (transitively) unsafe to
739  // speculate into the incoming edges of this basic block. This avoids
740  // recomputing them for each PHI node we check. This set is specific to this
741  // block though as things are pruned out of it based on what is available
742  // along incoming edges.
744 
745  // For each PHI node in this block, check whether there are immediate folding
746  // opportunities from speculation, and whether that speculation will be
747  // valid. This determise the set of safe PHIs to speculate.
748  PNs.erase(llvm::remove_if(PNs,
749  [&](PHINode *PN) {
751  *PN, CostSavingsMap, PotentialSpecSet,
752  UnsafeSet, DT, TTI);
753  }),
754  PNs.end());
755  // If no PHIs were profitable, skip.
756  if (PNs.empty()) {
757  LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n");
758  return false;
759  }
760 
761  // We need to know how much speculation will cost which is determined by how
762  // many incoming edges will need a copy of each speculated instruction.
764  for (auto *PredBB : PNs[0]->blocks()) {
765  if (!PredSet.insert(PredBB))
766  continue;
767 
768  // We cannot speculate when a predecessor is an indirect branch.
769  // FIXME: We also can't reliably create a non-critical edge block for
770  // speculation if the predecessor is an invoke. This doesn't seem
771  // fundamental and we should probably be splitting critical edges
772  // differently.
773  if (isa<IndirectBrInst>(PredBB->getTerminator()) ||
774  isa<InvokeInst>(PredBB->getTerminator())) {
775  LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
776  << PredBB->getName() << "\n");
777  return false;
778  }
779  }
780  if (PredSet.size() < 2) {
781  LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n");
782  return false;
783  }
784 
786  PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI);
787  if (SpecPNs.empty())
788  // Nothing to do.
789  return false;
790 
791  speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT);
792  return true;
793 }
794 
797  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
798  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
799 
800  bool Changed = false;
801  for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) {
803  auto BBI = BB->begin();
804  while (auto *PN = dyn_cast<PHINode>(&*BBI)) {
805  PNs.push_back(PN);
806  ++BBI;
807  }
808 
809  if (PNs.empty())
810  continue;
811 
812  Changed |= tryToSpeculatePHIs(PNs, DT, TTI);
813  }
814 
815  if (!Changed)
816  return PreservedAnalyses::all();
817 
819  return PA;
820 }
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
This routine provides some synthesis utilities to produce sequences of values.
iterator_range< use_iterator > uses()
Definition: Value.h:355
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
This class represents lattice values for constants.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
Analysis pass providing the TargetTransformInfo.
unsigned second
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:231
F(f)
void reserve(size_type N)
Definition: SmallVector.h:376
Option class for critical edge splitting.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT, SmallPtrSetImpl< Instruction *> &PotentialSpecSet, SmallPtrSetImpl< Instruction *> &UnsafeSet)
Check whether speculating the users of a PHI node around the PHI will be safe.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
DenseMap< BasicBlock *, Value * > IncomingValueMap
Definition: Local.cpp:823
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions())
If this edge is a critical edge, insert a new node to split the critical edge.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static SmallVector< PHINode *, 16 > findProfitablePHIs(ArrayRef< PHINode *> PNs, const SmallDenseMap< PHINode *, int, 16 > &CostSavingsMap, const SmallPtrSetImpl< Instruction *> &PotentialSpecSet, int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI)
Find profitable PHIs to speculate.
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
static bool tryToSpeculatePHIs(SmallVectorImpl< PHINode *> &PNs, DominatorTree &DT, TargetTransformInfo &TTI)
Try to speculate around a series of PHIs from a single basic block.
Expected to fold away in lowering.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1226
static void visitPHIUsersAndDepsInPostOrder(ArrayRef< PHINode *> PNs, IsVisitedT IsVisited, VisitT Visit)
Simple helper to walk all the users of a list of phis depth first, and call a visit function on each ...
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
size_t size() const
Definition: SmallVector.h:53
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned first
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:1969
Iterator for directly iterating over the operand Values.
Definition: User.h:246
size_type size() const
Definition: SmallPtrSet.h:93
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
bool mayBeMemoryDependent(const Instruction &I)
Returns true if the result or effects of the given instructions I depend on or influence global memor...
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
#define I(x, y, z)
Definition: MD5.cpp:58
iterator_range< value_op_iterator > operand_values()
Definition: User.h:262
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static bool isSafeAndProfitableToSpeculateAroundPHI(PHINode &PN, SmallDenseMap< PHINode *, int, 16 > &CostSavingsMap, SmallPtrSetImpl< Instruction *> &PotentialSpecSet, SmallPtrSetImpl< Instruction *> &UnsafeSet, DominatorTree &DT, TargetTransformInfo &TTI)
Check whether, in isolation, a given PHI node is both safe and profitable to speculate users around...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
static void speculatePHIs(ArrayRef< PHINode *> SpecPNs, SmallPtrSetImpl< Instruction *> &PotentialSpecSet, SmallSetVector< BasicBlock *, 16 > &PredSet, DominatorTree &DT)
Speculate users around a set of PHI nodes.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
const BasicBlock * getParent() const
Definition: Instruction.h:67