LLVM  8.0.1
BlockFrequencyInfoImpl.cpp
Go to the documentation of this file.
1 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Loops should be simplified before this analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/GraphTraits.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/SCCIterator.h"
20 #include "llvm/Config/llvm-config.h"
21 #include "llvm/IR/Function.h"
24 #include "llvm/Support/Compiler.h"
25 #include "llvm/Support/Debug.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstddef>
32 #include <cstdint>
33 #include <iterator>
34 #include <list>
35 #include <numeric>
36 #include <utility>
37 #include <vector>
38 
39 using namespace llvm;
40 using namespace llvm::bfi_detail;
41 
42 #define DEBUG_TYPE "block-freq"
43 
45  if (isFull())
46  return ScaledNumber<uint64_t>(1, 0);
47  return ScaledNumber<uint64_t>(getMass() + 1, -64);
48 }
49 
50 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
52 #endif
53 
54 static char getHexDigit(int N) {
55  assert(N < 16);
56  if (N < 10)
57  return '0' + N;
58  return 'a' + N - 10;
59 }
60 
62  for (int Digits = 0; Digits < 16; ++Digits)
63  OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
64  return OS;
65 }
66 
67 namespace {
68 
70 using Distribution = BlockFrequencyInfoImplBase::Distribution;
75 using FrequencyData = BlockFrequencyInfoImplBase::FrequencyData;
76 
77 /// Dithering mass distributer.
78 ///
79 /// This class splits up a single mass into portions by weight, dithering to
80 /// spread out error. No mass is lost. The dithering precision depends on the
81 /// precision of the product of \a BlockMass and \a BranchProbability.
82 ///
83 /// The distribution algorithm follows.
84 ///
85 /// 1. Initialize by saving the sum of the weights in \a RemWeight and the
86 /// mass to distribute in \a RemMass.
87 ///
88 /// 2. For each portion:
89 ///
90 /// 1. Construct a branch probability, P, as the portion's weight divided
91 /// by the current value of \a RemWeight.
92 /// 2. Calculate the portion's mass as \a RemMass times P.
93 /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
94 /// the current portion's weight and mass.
95 struct DitheringDistributer {
96  uint32_t RemWeight;
97  BlockMass RemMass;
98 
99  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
100 
101  BlockMass takeMass(uint32_t Weight);
102 };
103 
104 } // end anonymous namespace
105 
106 DitheringDistributer::DitheringDistributer(Distribution &Dist,
107  const BlockMass &Mass) {
108  Dist.normalize();
109  RemWeight = Dist.Total;
110  RemMass = Mass;
111 }
112 
113 BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
114  assert(Weight && "invalid weight");
115  assert(Weight <= RemWeight);
116  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
117 
118  // Decrement totals (dither).
119  RemWeight -= Weight;
120  RemMass -= Mass;
121  return Mass;
122 }
123 
124 void Distribution::add(const BlockNode &Node, uint64_t Amount,
125  Weight::DistType Type) {
126  assert(Amount && "invalid weight of 0");
127  uint64_t NewTotal = Total + Amount;
128 
129  // Check for overflow. It should be impossible to overflow twice.
130  bool IsOverflow = NewTotal < Total;
131  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
132  DidOverflow |= IsOverflow;
133 
134  // Update the total.
135  Total = NewTotal;
136 
137  // Save the weight.
138  Weights.push_back(Weight(Type, Node, Amount));
139 }
140 
141 static void combineWeight(Weight &W, const Weight &OtherW) {
142  assert(OtherW.TargetNode.isValid());
143  if (!W.Amount) {
144  W = OtherW;
145  return;
146  }
147  assert(W.Type == OtherW.Type);
148  assert(W.TargetNode == OtherW.TargetNode);
149  assert(OtherW.Amount && "Expected non-zero weight");
150  if (W.Amount > W.Amount + OtherW.Amount)
151  // Saturate on overflow.
152  W.Amount = UINT64_MAX;
153  else
154  W.Amount += OtherW.Amount;
155 }
156 
157 static void combineWeightsBySorting(WeightList &Weights) {
158  // Sort so edges to the same node are adjacent.
159  llvm::sort(Weights, [](const Weight &L, const Weight &R) {
160  return L.TargetNode < R.TargetNode;
161  });
162 
163  // Combine adjacent edges.
164  WeightList::iterator O = Weights.begin();
165  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
166  ++O, (I = L)) {
167  *O = *I;
168 
169  // Find the adjacent weights to the same node.
170  for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
171  combineWeight(*O, *L);
172  }
173 
174  // Erase extra entries.
175  Weights.erase(O, Weights.end());
176 }
177 
178 static void combineWeightsByHashing(WeightList &Weights) {
179  // Collect weights into a DenseMap.
180  using HashTable = DenseMap<BlockNode::IndexType, Weight>;
181 
182  HashTable Combined(NextPowerOf2(2 * Weights.size()));
183  for (const Weight &W : Weights)
184  combineWeight(Combined[W.TargetNode.Index], W);
185 
186  // Check whether anything changed.
187  if (Weights.size() == Combined.size())
188  return;
189 
190  // Fill in the new weights.
191  Weights.clear();
192  Weights.reserve(Combined.size());
193  for (const auto &I : Combined)
194  Weights.push_back(I.second);
195 }
196 
197 static void combineWeights(WeightList &Weights) {
198  // Use a hash table for many successors to keep this linear.
199  if (Weights.size() > 128) {
200  combineWeightsByHashing(Weights);
201  return;
202  }
203 
204  combineWeightsBySorting(Weights);
205 }
206 
207 static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
208  assert(Shift >= 0);
209  assert(Shift < 64);
210  if (!Shift)
211  return N;
212  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
213 }
214 
215 void Distribution::normalize() {
216  // Early exit for termination nodes.
217  if (Weights.empty())
218  return;
219 
220  // Only bother if there are multiple successors.
221  if (Weights.size() > 1)
222  combineWeights(Weights);
223 
224  // Early exit when combined into a single successor.
225  if (Weights.size() == 1) {
226  Total = 1;
227  Weights.front().Amount = 1;
228  return;
229  }
230 
231  // Determine how much to shift right so that the total fits into 32-bits.
232  //
233  // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
234  // for each weight can cause a 32-bit overflow.
235  int Shift = 0;
236  if (DidOverflow)
237  Shift = 33;
238  else if (Total > UINT32_MAX)
239  Shift = 33 - countLeadingZeros(Total);
240 
241  // Early exit if nothing needs to be scaled.
242  if (!Shift) {
243  // If we didn't overflow then combineWeights() shouldn't have changed the
244  // sum of the weights, but let's double-check.
245  assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
246  [](uint64_t Sum, const Weight &W) {
247  return Sum + W.Amount;
248  }) &&
249  "Expected total to be correct");
250  return;
251  }
252 
253  // Recompute the total through accumulation (rather than shifting it) so that
254  // it's accurate after shifting and any changes combineWeights() made above.
255  Total = 0;
256 
257  // Sum the weights to each node and shift right if necessary.
258  for (Weight &W : Weights) {
259  // Scale down below UINT32_MAX. Since Shift is larger than necessary, we
260  // can round here without concern about overflow.
261  assert(W.TargetNode.isValid());
262  W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
263  assert(W.Amount <= UINT32_MAX);
264 
265  // Update the total.
266  Total += W.Amount;
267  }
268  assert(Total <= UINT32_MAX);
269 }
270 
272  // Swap with a default-constructed std::vector, since std::vector<>::clear()
273  // does not actually clear heap storage.
274  std::vector<FrequencyData>().swap(Freqs);
275  IsIrrLoopHeader.clear();
276  std::vector<WorkingData>().swap(Working);
277  Loops.clear();
278 }
279 
280 /// Clear all memory not needed downstream.
281 ///
282 /// Releases all memory not used downstream. In particular, saves Freqs.
284  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
285  SparseBitVector<> SavedIsIrrLoopHeader(std::move(BFI.IsIrrLoopHeader));
286  BFI.clear();
287  BFI.Freqs = std::move(SavedFreqs);
288  BFI.IsIrrLoopHeader = std::move(SavedIsIrrLoopHeader);
289 }
290 
292  const LoopData *OuterLoop,
293  const BlockNode &Pred,
294  const BlockNode &Succ,
295  uint64_t Weight) {
296  if (!Weight)
297  Weight = 1;
298 
299  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
300  return OuterLoop && OuterLoop->isHeader(Node);
301  };
302 
303  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
304 
305 #ifndef NDEBUG
306  auto debugSuccessor = [&](const char *Type) {
307  dbgs() << " =>"
308  << " [" << Type << "] weight = " << Weight;
309  if (!isLoopHeader(Resolved))
310  dbgs() << ", succ = " << getBlockName(Succ);
311  if (Resolved != Succ)
312  dbgs() << ", resolved = " << getBlockName(Resolved);
313  dbgs() << "\n";
314  };
315  (void)debugSuccessor;
316 #endif
317 
318  if (isLoopHeader(Resolved)) {
319  LLVM_DEBUG(debugSuccessor("backedge"));
320  Dist.addBackedge(Resolved, Weight);
321  return true;
322  }
323 
324  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
325  LLVM_DEBUG(debugSuccessor(" exit "));
326  Dist.addExit(Resolved, Weight);
327  return true;
328  }
329 
330  if (Resolved < Pred) {
331  if (!isLoopHeader(Pred)) {
332  // If OuterLoop is an irreducible loop, we can't actually handle this.
333  assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
334  "unhandled irreducible control flow");
335 
336  // Irreducible backedge. Abort.
337  LLVM_DEBUG(debugSuccessor("abort!!!"));
338  return false;
339  }
340 
341  // If "Pred" is a loop header, then this isn't really a backedge; rather,
342  // OuterLoop must be irreducible. These false backedges can come only from
343  // secondary loop headers.
344  assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
345  "unhandled irreducible control flow");
346  }
347 
348  LLVM_DEBUG(debugSuccessor(" local "));
349  Dist.addLocal(Resolved, Weight);
350  return true;
351 }
352 
354  const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
355  // Copy the exit map into Dist.
356  for (const auto &I : Loop.Exits)
357  if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
358  I.second.getMass()))
359  // Irreducible backedge.
360  return false;
361 
362  return true;
363 }
364 
365 /// Compute the loop scale for a loop.
367  // Compute loop scale.
368  LLVM_DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
369 
370  // Infinite loops need special handling. If we give the back edge an infinite
371  // mass, they may saturate all the other scales in the function down to 1,
372  // making all the other region temperatures look exactly the same. Choose an
373  // arbitrary scale to avoid these issues.
374  //
375  // FIXME: An alternate way would be to select a symbolic scale which is later
376  // replaced to be the maximum of all computed scales plus 1. This would
377  // appropriately describe the loop as having a large scale, without skewing
378  // the final frequency computation.
379  const Scaled64 InfiniteLoopScale(1, 12);
380 
381  // LoopScale == 1 / ExitMass
382  // ExitMass == HeadMass - BackedgeMass
383  BlockMass TotalBackedgeMass;
384  for (auto &Mass : Loop.BackedgeMass)
385  TotalBackedgeMass += Mass;
386  BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass;
387 
388  // Block scale stores the inverse of the scale. If this is an infinite loop,
389  // its exit mass will be zero. In this case, use an arbitrary scale for the
390  // loop scale.
391  Loop.Scale =
392  ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse();
393 
394  LLVM_DEBUG(dbgs() << " - exit-mass = " << ExitMass << " ("
395  << BlockMass::getFull() << " - " << TotalBackedgeMass
396  << ")\n"
397  << " - scale = " << Loop.Scale << "\n");
398 }
399 
400 /// Package up a loop.
402  LLVM_DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
403 
404  // Clear the subloop exits to prevent quadratic memory usage.
405  for (const BlockNode &M : Loop.Nodes) {
406  if (auto *Loop = Working[M.Index].getPackagedLoop())
407  Loop->Exits.clear();
408  LLVM_DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
409  }
410  Loop.IsPackaged = true;
411 }
412 
413 #ifndef NDEBUG
415  const DitheringDistributer &D, const BlockNode &T,
416  const BlockMass &M, const char *Desc) {
417  dbgs() << " => assign " << M << " (" << D.RemMass << ")";
418  if (Desc)
419  dbgs() << " [" << Desc << "]";
420  if (T.isValid())
421  dbgs() << " to " << BFI.getBlockName(T);
422  dbgs() << "\n";
423 }
424 #endif
425 
427  LoopData *OuterLoop,
428  Distribution &Dist) {
429  BlockMass Mass = Working[Source.Index].getMass();
430  LLVM_DEBUG(dbgs() << " => mass: " << Mass << "\n");
431 
432  // Distribute mass to successors as laid out in Dist.
433  DitheringDistributer D(Dist, Mass);
434 
435  for (const Weight &W : Dist.Weights) {
436  // Check for a local edge (non-backedge and non-exit).
437  BlockMass Taken = D.takeMass(W.Amount);
438  if (W.Type == Weight::Local) {
439  Working[W.TargetNode.Index].getMass() += Taken;
440  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
441  continue;
442  }
443 
444  // Backedges and exits only make sense if we're processing a loop.
445  assert(OuterLoop && "backedge or exit outside of loop");
446 
447  // Check for a backedge.
448  if (W.Type == Weight::Backedge) {
449  OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken;
450  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back"));
451  continue;
452  }
453 
454  // This must be an exit.
455  assert(W.Type == Weight::Exit);
456  OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
457  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit"));
458  }
459 }
460 
462  const Scaled64 &Min, const Scaled64 &Max) {
463  // Scale the Factor to a size that creates integers. Ideally, integers would
464  // be scaled so that Max == UINT64_MAX so that they can be best
465  // differentiated. However, in the presence of large frequency values, small
466  // frequencies are scaled down to 1, making it impossible to differentiate
467  // small, unequal numbers. When the spread between Min and Max frequencies
468  // fits well within MaxBits, we make the scale be at least 8.
469  const unsigned MaxBits = 64;
470  const unsigned SpreadBits = (Max / Min).lg();
471  Scaled64 ScalingFactor;
472  if (SpreadBits <= MaxBits - 3) {
473  // If the values are small enough, make the scaling factor at least 8 to
474  // allow distinguishing small values.
475  ScalingFactor = Min.inverse();
476  ScalingFactor <<= 3;
477  } else {
478  // If the values need more than MaxBits to be represented, saturate small
479  // frequency values down to 1 by using a scaling factor that benefits large
480  // frequency values.
481  ScalingFactor = Scaled64(1, MaxBits) / Max;
482  }
483 
484  // Translate the floats to integers.
485  LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
486  << ", factor = " << ScalingFactor << "\n");
487  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
488  Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
489  BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
490  LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
491  << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
492  << ", int = " << BFI.Freqs[Index].Integer << "\n");
493  }
494 }
495 
496 /// Unwrap a loop package.
497 ///
498 /// Visits all the members of a loop, adjusting their BlockData according to
499 /// the loop's pseudo-node.
500 static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
501  LLVM_DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
502  << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
503  << "\n");
504  Loop.Scale *= Loop.Mass.toScaled();
505  Loop.IsPackaged = false;
506  LLVM_DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
507 
508  // Propagate the head scale through the loop. Since members are visited in
509  // RPO, the head scale will be updated by the loop scale first, and then the
510  // final head scale will be used for updated the rest of the members.
511  for (const BlockNode &N : Loop.Nodes) {
512  const auto &Working = BFI.Working[N.Index];
513  Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
514  : BFI.Freqs[N.Index].Scaled;
515  Scaled64 New = Loop.Scale * F;
516  LLVM_DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => "
517  << New << "\n");
518  F = New;
519  }
520 }
521 
523  // Set initial frequencies from loop-local masses.
524  for (size_t Index = 0; Index < Working.size(); ++Index)
525  Freqs[Index].Scaled = Working[Index].Mass.toScaled();
526 
527  for (LoopData &Loop : Loops)
528  unwrapLoop(*this, Loop);
529 }
530 
532  // Unwrap loop packages in reverse post-order, tracking min and max
533  // frequencies.
534  auto Min = Scaled64::getLargest();
535  auto Max = Scaled64::getZero();
536  for (size_t Index = 0; Index < Working.size(); ++Index) {
537  // Update min/max scale.
538  Min = std::min(Min, Freqs[Index].Scaled);
539  Max = std::max(Max, Freqs[Index].Scaled);
540  }
541 
542  // Convert to integers.
543  convertFloatingToInteger(*this, Min, Max);
544 
545  // Clean up data structures.
546  cleanup(*this);
547 
548  // Print out the final stats.
549  LLVM_DEBUG(dump());
550 }
551 
554  if (!Node.isValid())
555  return 0;
556  return Freqs[Node.Index].Integer;
557 }
558 
561  const BlockNode &Node) const {
562  return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
563 }
564 
567  uint64_t Freq) const {
568  auto EntryCount = F.getEntryCount();
569  if (!EntryCount)
570  return None;
571  // Use 128 bit APInt to do the arithmetic to avoid overflow.
572  APInt BlockCount(128, EntryCount.getCount());
573  APInt BlockFreq(128, Freq);
574  APInt EntryFreq(128, getEntryFreq());
575  BlockCount *= BlockFreq;
576  // Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
577  // lshr by 1 gives EntryFreq/2.
578  BlockCount = (BlockCount + EntryFreq.lshr(1)).udiv(EntryFreq);
579  return BlockCount.getLimitedValue();
580 }
581 
582 bool
584  if (!Node.isValid())
585  return false;
586  return IsIrrLoopHeader.test(Node.Index);
587 }
588 
589 Scaled64
591  if (!Node.isValid())
592  return Scaled64::getZero();
593  return Freqs[Node.Index].Scaled;
594 }
595 
597  uint64_t Freq) {
598  assert(Node.isValid() && "Expected valid node");
599  assert(Node.Index < Freqs.size() && "Expected legal index");
600  Freqs[Node.Index].Integer = Freq;
601 }
602 
603 std::string
605  return {};
606 }
607 
608 std::string
610  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
611 }
612 
613 raw_ostream &
615  const BlockNode &Node) const {
616  return OS << getFloatingBlockFreq(Node);
617 }
618 
619 raw_ostream &
621  const BlockFrequency &Freq) const {
622  Scaled64 Block(Freq.getFrequency(), 0);
623  Scaled64 Entry(getEntryFreq(), 0);
624 
625  return OS << Block / Entry;
626 }
627 
629  Start = OuterLoop.getHeader();
630  Nodes.reserve(OuterLoop.Nodes.size());
631  for (auto N : OuterLoop.Nodes)
632  addNode(N);
633  indexNodes();
634 }
635 
637  Start = 0;
638  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
639  if (!BFI.Working[Index].isPackaged())
640  addNode(Index);
641  indexNodes();
642 }
643 
645  for (auto &I : Nodes)
646  Lookup[I.Node.Index] = &I;
647 }
648 
650  const BFIBase::LoopData *OuterLoop) {
651  if (OuterLoop && OuterLoop->isHeader(Succ))
652  return;
653  auto L = Lookup.find(Succ.Index);
654  if (L == Lookup.end())
655  return;
656  IrrNode &SuccIrr = *L->second;
657  Irr.Edges.push_back(&SuccIrr);
658  SuccIrr.Edges.push_front(&Irr);
659  ++SuccIrr.NumIn;
660 }
661 
662 namespace llvm {
663 
664 template <> struct GraphTraits<IrreducibleGraph> {
666  using NodeRef = const GraphT::IrrNode *;
667  using ChildIteratorType = GraphT::IrrNode::iterator;
668 
669  static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
671  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
672 };
673 
674 } // end namespace llvm
675 
676 /// Find extra irreducible headers.
677 ///
678 /// Find entry blocks and other blocks with backedges, which exist when \c G
679 /// contains irreducible sub-SCCs.
682  const IrreducibleGraph &G,
683  const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
684  LoopData::NodeList &Headers, LoopData::NodeList &Others) {
685  // Map from nodes in the SCC to whether it's an entry block.
687 
688  // InSCC also acts the set of nodes in the graph. Seed it.
689  for (const auto *I : SCC)
690  InSCC[I] = false;
691 
692  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
693  auto &Irr = *I->first;
694  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
695  if (InSCC.count(P))
696  continue;
697 
698  // This is an entry block.
699  I->second = true;
700  Headers.push_back(Irr.Node);
701  LLVM_DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node)
702  << "\n");
703  break;
704  }
705  }
706  assert(Headers.size() >= 2 &&
707  "Expected irreducible CFG; -loop-info is likely invalid");
708  if (Headers.size() == InSCC.size()) {
709  // Every block is a header.
710  llvm::sort(Headers);
711  return;
712  }
713 
714  // Look for extra headers from irreducible sub-SCCs.
715  for (const auto &I : InSCC) {
716  // Entry blocks are already headers.
717  if (I.second)
718  continue;
719 
720  auto &Irr = *I.first;
721  for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
722  // Skip forward edges.
723  if (P->Node < Irr.Node)
724  continue;
725 
726  // Skip predecessors from entry blocks. These can have inverted
727  // ordering.
728  if (InSCC.lookup(P))
729  continue;
730 
731  // Store the extra header.
732  Headers.push_back(Irr.Node);
733  LLVM_DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node)
734  << "\n");
735  break;
736  }
737  if (Headers.back() == Irr.Node)
738  // Added this as a header.
739  continue;
740 
741  // This is not a header.
742  Others.push_back(Irr.Node);
743  LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
744  }
745  llvm::sort(Headers);
746  llvm::sort(Others);
747 }
748 
751  LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
752  const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
753  // Translate the SCC into RPO.
754  LLVM_DEBUG(dbgs() << " - found-scc\n");
755 
756  LoopData::NodeList Headers;
757  LoopData::NodeList Others;
758  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
759 
760  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
761  Headers.end(), Others.begin(), Others.end());
762 
763  // Update loop hierarchy.
764  for (const auto &N : Loop->Nodes)
765  if (BFI.Working[N.Index].isLoopHeader())
766  BFI.Working[N.Index].Loop->Parent = &*Loop;
767  else
768  BFI.Working[N.Index].Loop = &*Loop;
769 }
770 
773  const IrreducibleGraph &G, LoopData *OuterLoop,
774  std::list<LoopData>::iterator Insert) {
775  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
776  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
777 
778  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
779  if (I->size() < 2)
780  continue;
781 
782  // Translate the SCC into RPO.
783  createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
784  }
785 
786  if (OuterLoop)
787  return make_range(std::next(Prev), Insert);
788  return make_range(Loops.begin(), Insert);
789 }
790 
791 void
793  OuterLoop.Exits.clear();
794  for (auto &Mass : OuterLoop.BackedgeMass)
795  Mass = BlockMass::getEmpty();
796  auto O = OuterLoop.Nodes.begin() + 1;
797  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
798  if (!Working[I->Index].isPackaged())
799  *O++ = *I;
800  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
801 }
802 
804  assert(Loop.isIrreducible() && "this only makes sense on irreducible loops");
805 
806  // Since the loop has more than one header block, the mass flowing back into
807  // each header will be different. Adjust the mass in each header loop to
808  // reflect the masses flowing through back edges.
809  //
810  // To do this, we distribute the initial mass using the backedge masses
811  // as weights for the distribution.
812  BlockMass LoopMass = BlockMass::getFull();
813  Distribution Dist;
814 
815  LLVM_DEBUG(dbgs() << "adjust-loop-header-mass:\n");
816  for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
817  auto &HeaderNode = Loop.Nodes[H];
818  auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
819  LLVM_DEBUG(dbgs() << " - Add back edge mass for node "
820  << getBlockName(HeaderNode) << ": " << BackedgeMass
821  << "\n");
822  if (BackedgeMass.getMass() > 0)
823  Dist.addLocal(HeaderNode, BackedgeMass.getMass());
824  else
825  LLVM_DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n");
826  }
827 
828  DitheringDistributer D(Dist, LoopMass);
829 
830  LLVM_DEBUG(dbgs() << " Distribute loop mass " << LoopMass
831  << " to headers using above weights\n");
832  for (const Weight &W : Dist.Weights) {
833  BlockMass Taken = D.takeMass(W.Amount);
834  assert(W.Type == Weight::Local && "all weights should be local");
835  Working[W.TargetNode.Index].getMass() = Taken;
836  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
837  }
838 }
839 
841  BlockMass LoopMass = BlockMass::getFull();
842  DitheringDistributer D(Dist, LoopMass);
843  for (const Weight &W : Dist.Weights) {
844  BlockMass Taken = D.takeMass(W.Amount);
845  assert(W.Type == Weight::Local && "all weights should be local");
846  Working[W.TargetNode.Index].getMass() = Taken;
847  LLVM_DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr));
848  }
849 }
const NoneType None
Definition: None.h:24
static void combineWeights(WeightList &Weights)
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
static uint64_t shiftRightAndRound(uint64_t N, int Shift)
static void combineWeightsBySorting(WeightList &Weights)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool IsPackaged
Whether this has been packaged.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static char getHexDigit(int N)
void push_back(const T &Elt)
Definition: SmallVector.h:218
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
raw_ostream & print(raw_ostream &OS) const
static NodeRef getEntryNode(const GraphT &G)
void addLocal(const BlockNode &Node, uint64_t Amount)
static ChildIteratorType child_end(NodeRef N)
F(f)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static void combineWeight(Weight &W, const Weight &OtherW)
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
void setBlockFreq(const BlockNode &Node, uint64_t Freq)
SparseBitVector IsIrrLoopHeader
Whether each block is an irreducible loop header.
Hexagon Hardware Loops
ProfileCount getEntryCount() const
Get the entry count for this function.
Definition: Function.cpp:1381
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
virtual std::string getBlockName(const BlockNode &Node) const
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
#define LLVM_DUMP_METHOD
Definition: Compiler.h:74
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
static void debugAssign(const BlockFrequencyInfoImplBase &BFI, const DitheringDistributer &D, const BlockNode &T, const BlockMass &M, const char *Desc)
This file implements a class to represent arbitrary precision integral constant values and operations...
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:226
#define UINT64_MAX
Definition: DataTypes.h:83
static void findIrreducibleHeaders(const BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, const std::vector< const IrreducibleGraph::IrrNode *> &SCC, LoopData::NodeList &Headers, LoopData::NodeList &Others)
Find extra irreducible headers.
Graph of irreducible control flow.
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
void addBackedge(const BlockNode &Node, uint64_t Amount)
std::string getLoopName(const LoopData &Loop) const
bool isHeader(const BlockNode &Node) const
static ChildIteratorType child_begin(NodeRef N)
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
bool isIrrLoopHeader(const BlockNode &Node)
static void createIrreducibleLoop(BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert, const std::vector< const IrreducibleGraph::IrrNode *> &SCC)
#define P(N)
std::vector< FrequencyData > Freqs
Data about each block. This is used downstream.
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max)
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
ScaledNumber inverse() const
Definition: ScaledNumber.h:678
void distributeIrrLoopHeaderMass(Distribution &Dist)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
#define H(x, y, z)
Definition: MD5.cpp:57
Distribution of unscaled probability weight.
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq) const
IntT toInt() const
Convert to the given integer type.
Definition: ScaledNumber.h:781
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop)
Unwrap a loop package.
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:640
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
size_t size() const
Definition: SmallVector.h:53
void addExit(const BlockNode &Node, uint64_t Amount)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:971
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
ExitMap Exits
Successor edges (and weights).
const DataFlowGraph & G
Definition: RDFGraph.cpp:211
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
std::list< LoopData > Loops
Indexed information about loops.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
A range adaptor for a pair of iterators.
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
Class for arbitrary precision integers.
Definition: APInt.h:70
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
Definition: RDFGraph.h:513
static void combineWeightsByHashing(WeightList &Weights)
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
void packageLoop(LoopData &Loop)
Package up a loop.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
NodeList Nodes
Header and the members of the loop.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ScaledNumber< uint64_t > Scaled64
std::vector< WorkingData > Working
Loop data: see initializeLoops().
HeaderMassList BackedgeMass
Mass returned to each loop header.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node) const
BlockFrequency getBlockFreq(const BlockNode &Node) const
Base class for BlockFrequencyInfoImpl.
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
void finalizeMetrics()
Finalize frequency metrics.
WeightList Weights
Individual successor weights.