20 #include "llvm/Config/llvm-config.h" 42 #define DEBUG_TYPE "block-freq" 50 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 62 for (
int Digits = 0; Digits < 16; ++Digits)
95 struct DitheringDistributer {
99 DitheringDistributer(Distribution &Dist,
const BlockMass &Mass);
106 DitheringDistributer::DitheringDistributer(Distribution &Dist,
109 RemWeight = Dist.Total;
114 assert(Weight &&
"invalid weight");
115 assert(Weight <= RemWeight);
125 Weight::DistType
Type) {
126 assert(Amount &&
"invalid weight of 0");
127 uint64_t NewTotal = Total + Amount;
130 bool IsOverflow = NewTotal < Total;
131 assert(!(DidOverflow && IsOverflow) &&
"unexpected repeated overflow");
132 DidOverflow |= IsOverflow;
138 Weights.push_back(Weight(Type, Node, Amount));
142 assert(OtherW.TargetNode.isValid());
147 assert(W.Type == OtherW.Type);
148 assert(W.TargetNode == OtherW.TargetNode);
149 assert(OtherW.Amount &&
"Expected non-zero weight");
150 if (W.Amount > W.Amount + OtherW.Amount)
154 W.Amount += OtherW.Amount;
159 llvm::sort(Weights, [](
const Weight &L,
const Weight &R) {
160 return L.TargetNode < R.TargetNode;
164 WeightList::iterator
O = Weights.begin();
165 for (WeightList::const_iterator
I = O, L = O,
E = Weights.end();
I !=
E;
170 for (++L; L !=
E && I->TargetNode == L->TargetNode; ++L)
175 Weights.erase(O, Weights.end());
183 for (
const Weight &
W : Weights)
187 if (Weights.size() == Combined.size())
192 Weights.reserve(Combined.size());
193 for (
const auto &
I : Combined)
194 Weights.push_back(
I.second);
199 if (Weights.size() > 128) {
212 return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
215 void Distribution::normalize() {
221 if (Weights.size() > 1)
225 if (Weights.size() == 1) {
227 Weights.front().Amount = 1;
238 else if (Total > UINT32_MAX)
245 assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0),
246 [](uint64_t Sum,
const Weight &
W) {
247 return Sum +
W.Amount;
249 "Expected total to be correct");
258 for (Weight &
W : Weights) {
261 assert(
W.TargetNode.isValid());
263 assert(
W.Amount <= UINT32_MAX);
268 assert(Total <= UINT32_MAX);
274 std::vector<FrequencyData>().
swap(Freqs);
275 IsIrrLoopHeader.clear();
276 std::vector<WorkingData>().
swap(Working);
284 std::vector<FrequencyData> SavedFreqs(std::move(BFI.
Freqs));
287 BFI.
Freqs = std::move(SavedFreqs);
299 auto isLoopHeader = [&OuterLoop](
const BlockNode &Node) {
300 return OuterLoop && OuterLoop->
isHeader(Node);
306 auto debugSuccessor = [&](
const char *
Type) {
308 <<
" [" <<
Type <<
"] weight = " << Weight;
309 if (!isLoopHeader(Resolved))
311 if (Resolved != Succ)
315 (void)debugSuccessor;
318 if (isLoopHeader(Resolved)) {
324 if (Working[Resolved.
Index].getContainingLoop() != OuterLoop) {
326 Dist.
addExit(Resolved, Weight);
330 if (Resolved < Pred) {
331 if (!isLoopHeader(Pred)) {
334 "unhandled irreducible control flow");
345 "unhandled irreducible control flow");
356 for (
const auto &
I : Loop.
Exits)
357 if (!addToDist(Dist, OuterLoop, Loop.
getHeader(),
I.first,
368 LLVM_DEBUG(
dbgs() <<
"compute-loop-scale: " << getLoopName(Loop) <<
"\n");
379 const Scaled64 InfiniteLoopScale(1, 12);
385 TotalBackedgeMass += Mass;
397 <<
" - scale = " << Loop.
Scale <<
"\n");
402 LLVM_DEBUG(
dbgs() <<
"packaging-loop: " << getLoopName(Loop) <<
"\n");
406 if (
auto *Loop = Working[M.
Index].getPackagedLoop())
415 const DitheringDistributer &
D,
const BlockNode &
T,
417 dbgs() <<
" => assign " << M <<
" (" << D.RemMass <<
")";
419 dbgs() <<
" [" << Desc <<
"]";
433 DitheringDistributer
D(Dist, Mass);
435 for (
const Weight &
W : Dist.
Weights) {
438 if (W.
Type == Weight::Local) {
445 assert(OuterLoop &&
"backedge or exit outside of loop");
448 if (W.
Type == Weight::Backedge) {
469 const unsigned MaxBits = 64;
470 const unsigned SpreadBits = (Max / Min).lg();
472 if (SpreadBits <= MaxBits - 3) {
481 ScalingFactor =
Scaled64(1, MaxBits) / Max;
485 LLVM_DEBUG(
dbgs() <<
"float-to-int: min = " << Min <<
", max = " << Max
486 <<
", factor = " << ScalingFactor <<
"\n");
491 << BFI.
Freqs[
Index].Scaled <<
", scaled = " << Scaled
492 <<
", int = " << BFI.
Freqs[
Index].Integer <<
"\n");
502 <<
": mass = " << Loop.Mass <<
", scale = " << Loop.Scale
504 Loop.Scale *= Loop.Mass.toScaled();
505 Loop.IsPackaged =
false;
506 LLVM_DEBUG(
dbgs() <<
" => combined-scale = " << Loop.Scale <<
"\n");
511 for (
const BlockNode &
N : Loop.Nodes) {
512 const auto &Working = BFI.
Working[N.Index];
513 Scaled64 &
F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
514 : BFI.
Freqs[N.Index].Scaled;
534 auto Min = Scaled64::getLargest();
535 auto Max = Scaled64::getZero();
538 Min = std::min(Min, Freqs[
Index].Scaled);
556 return Freqs[Node.
Index].Integer;
562 return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
567 uint64_t Freq)
const {
572 APInt BlockCount(128, EntryCount.getCount());
573 APInt BlockFreq(128, Freq);
574 APInt EntryFreq(128, getEntryFreq());
575 BlockCount *= BlockFreq;
578 BlockCount = (BlockCount + EntryFreq.
lshr(1)).udiv(EntryFreq);
579 return BlockCount.getLimitedValue();
586 return IsIrrLoopHeader.test(Node.
Index);
592 return Scaled64::getZero();
593 return Freqs[Node.
Index].Scaled;
599 assert(Node.
Index < Freqs.size() &&
"Expected legal index");
600 Freqs[Node.
Index].Integer = Freq;
616 return OS << getFloatingBlockFreq(Node);
625 return OS << Block / Entry;
631 for (
auto N : OuterLoop.
Nodes)
639 if (!
BFI.Working[
Index].isPackaged())
645 for (
auto &
I : Nodes)
651 if (OuterLoop && OuterLoop->
isHeader(Succ))
657 Irr.
Edges.push_back(&SuccIrr);
658 SuccIrr.
Edges.push_front(&Irr);
683 const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
689 for (
const auto *
I : SCC)
693 auto &Irr = *
I->first;
694 for (
const auto *
P :
make_range(Irr.pred_begin(), Irr.pred_end())) {
700 Headers.push_back(Irr.Node);
706 assert(Headers.size() >= 2 &&
707 "Expected irreducible CFG; -loop-info is likely invalid");
708 if (Headers.size() == InSCC.
size()) {
715 for (
const auto &
I : InSCC) {
720 auto &Irr = *
I.first;
721 for (
const auto *
P :
make_range(Irr.pred_begin(), Irr.pred_end())) {
723 if (
P->Node < Irr.Node)
732 Headers.push_back(Irr.Node);
737 if (Headers.back() == Irr.Node)
742 Others.push_back(Irr.Node);
751 LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
752 const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
760 auto Loop = BFI.
Loops.emplace(Insert, OuterLoop, Headers.begin(),
761 Headers.end(), Others.begin(), Others.end());
764 for (
const auto &
N :
Loop->Nodes)
765 if (BFI.
Working[
N.Index].isLoopHeader())
774 std::list<LoopData>::iterator Insert) {
775 assert((OuterLoop ==
nullptr) == (Insert ==
Loops.begin()));
776 auto Prev = OuterLoop ? std::prev(Insert) :
Loops.end();
798 if (!Working[
I->Index].isPackaged())
817 auto &HeaderNode = Loop.
Nodes[
H];
822 if (BackedgeMass.getMass() > 0)
823 Dist.
addLocal(HeaderNode, BackedgeMass.getMass());
828 DitheringDistributer
D(Dist, LoopMass);
831 <<
" to headers using above weights\n");
832 for (
const Weight &
W : Dist.
Weights) {
834 assert(W.
Type == Weight::Local &&
"all weights should be local");
842 DitheringDistributer
D(Dist, LoopMass);
843 for (
const Weight &
W : Dist.
Weights) {
845 assert(W.
Type == Weight::Local &&
"all weights should be local");
static void combineWeights(WeightList &Weights)
void addNodesInLoop(const BFIBase::LoopData &OuterLoop)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
static uint64_t shiftRightAndRound(uint64_t N, int Shift)
static void combineWeightsBySorting(WeightList &Weights)
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool IsPackaged
Whether this has been packaged.
This class represents lattice values for constants.
static char getHexDigit(int N)
void push_back(const T &Elt)
GraphT::IrrNode::iterator ChildIteratorType
iterator succ_begin() const
Stats about a block itself.
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockNode &Node) const
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
raw_ostream & print(raw_ostream &OS) const
static NodeRef getEntryNode(const GraphT &G)
void addLocal(const BlockNode &Node, uint64_t Amount)
static ChildIteratorType child_end(NodeRef N)
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
static void combineWeight(Weight &W, const Weight &OtherW)
BlockNode getHeader() const
uint32_t NumHeaders
Number of headers.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
void setBlockFreq(const BlockNode &Node, uint64_t Freq)
SparseBitVector IsIrrLoopHeader
Whether each block is an irreducible loop header.
iterator succ_end() const
ProfileCount getEntryCount() const
Get the entry count for this function.
Scaled64 getFloatingBlockFreq(const BlockNode &Node) const
virtual std::string getBlockName(const BlockNode &Node) const
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop)
void adjustLoopHeaderMass(LoopData &Loop)
Adjust the mass of all headers in an irreducible loop.
static void debugAssign(const BlockFrequencyInfoImplBase &BFI, const DitheringDistributer &D, const BlockNode &T, const BlockMass &M, const char *Desc)
This file implements a class to represent arbitrary precision integral constant values and operations...
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
static void findIrreducibleHeaders(const BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, const std::vector< const IrreducibleGraph::IrrNode *> &SCC, LoopData::NodeList &Headers, LoopData::NodeList &Others)
Find extra irreducible headers.
Graph of irreducible control flow.
ScaledNumber< uint64_t > Scaled64
void computeLoopScale(LoopData &Loop)
Compute the loop scale for a loop.
void addBackedge(const BlockNode &Node, uint64_t Amount)
std::string getLoopName(const LoopData &Loop) const
bool isHeader(const BlockNode &Node) const
static ChildIteratorType child_begin(NodeRef N)
bool addToDist(Distribution &Dist, const LoopData *OuterLoop, const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight)
Add an edge to the distribution.
bool isIrrLoopHeader(const BlockNode &Node)
static void createIrreducibleLoop(BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert, const std::vector< const IrreducibleGraph::IrrNode *> &SCC)
std::vector< FrequencyData > Freqs
Data about each block. This is used downstream.
bool isIrreducible() const
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, const Scaled64 &Min, const Scaled64 &Max)
iterator_range< std::list< LoopData >::iterator > analyzeIrreducible(const bfi_detail::IrreducibleGraph &G, LoopData *OuterLoop, std::list< LoopData >::iterator Insert)
Analyze irreducible SCCs.
The instances of the Type class are immutable: once they are created, they are never changed...
ScaledNumber inverse() const
void distributeIrrLoopHeaderMass(Distribution &Dist)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Distribution of unscaled probability weight.
Optional< uint64_t > getProfileCountFromFreq(const Function &F, uint64_t Freq) const
IntT toInt() const
Convert to the given integer type.
static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop)
Unwrap a loop package.
void clear()
Clear all memory.
void addNodesInFunction()
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
iterator erase(const_iterator CI)
void addExit(const BlockNode &Node, uint64_t Amount)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
void sort(IteratorTy Start, IteratorTy End)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
ExitMap Exits
Successor edges (and weights).
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static BlockMass getEmpty()
SmallVector< Weight, 4 > WeightList
ScaledNumber< uint64_t > toScaled() const
Convert to scaled number.
std::list< LoopData > Loops
Indexed information about loops.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A range adaptor for a pair of iterators.
std::string getBlockName(const BlockT *BB)
Get the name of a MachineBasicBlock.
Class for arbitrary precision integers.
void updateLoopWithIrreducible(LoopData &OuterLoop)
Update a loop after packaging irreducible SCCs inside of it.
SmallVector< NodeAddr< NodeBase * >, 4 > NodeList
Unscaled probability weight.
static void combineWeightsByHashing(WeightList &Weights)
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Represents a single loop in the control flow graph.
void packageLoop(LoopData &Loop)
Package up a loop.
static BlockMass getFull()
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
void distributeMass(const BlockNode &Source, LoopData *OuterLoop, Distribution &Dist)
Distribute mass according to a distribution.
NodeList Nodes
Header and the members of the loop.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ScaledNumber< uint64_t > Scaled64
std::vector< WorkingData > Working
Loop data: see initializeLoops().
HeaderMassList BackedgeMass
Mass returned to each loop header.
This class implements an extremely fast bulk output stream that can only output to a stream...
Optional< uint64_t > getBlockProfileCount(const Function &F, const BlockNode &Node) const
BlockFrequency getBlockFreq(const BlockNode &Node) const
std::deque< const IrrNode * > Edges
Base class for BlockFrequencyInfoImpl.
bool addLoopSuccessorsToDist(const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist)
Add all edges out of a packaged loop to the distribution.
void unwrapLoops()
Unwrap loops.
HeaderMassList::difference_type getHeaderIndex(const BlockNode &B)
Representative of a block.
void finalizeMetrics()
Finalize frequency metrics.
WeightList Weights
Individual successor weights.