LLVM  8.0.1
MachineOutliner.cpp
Go to the documentation of this file.
1 //===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// Replaces repeated sequences of instructions with function calls.
12 ///
13 /// This works by placing every instruction from every basic block in a
14 /// suffix tree, and repeatedly querying that tree for repeated sequences of
15 /// instructions. If a sequence of instructions appears often, then it ought
16 /// to be beneficial to pull out into a function.
17 ///
18 /// The MachineOutliner communicates with a given target using hooks defined in
19 /// TargetInstrInfo.h. The target supplies the outliner with information on how
20 /// a specific sequence of instructions should be outlined. This information
21 /// is used to deduce the number of instructions necessary to
22 ///
23 /// * Create an outlined function
24 /// * Call that outlined function
25 ///
26 /// Targets must implement
27 /// * getOutliningCandidateInfo
28 /// * buildOutlinedFrame
29 /// * insertOutlinedCall
30 /// * isFunctionSafeToOutlineFrom
31 ///
32 /// in order to make use of the MachineOutliner.
33 ///
34 /// This was originally presented at the 2016 LLVM Developers' Meeting in the
35 /// talk "Reducing Code Size Using Outlining". For a high-level overview of
36 /// how this pass works, the talk is available on YouTube at
37 ///
38 /// https://www.youtube.com/watch?v=yorld-WSOeU
39 ///
40 /// The slides for the talk are available at
41 ///
42 /// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
43 ///
44 /// The talk provides an overview of how the outliner finds candidates and
45 /// ultimately outlines them. It describes how the main data structure for this
46 /// pass, the suffix tree, is queried and purged for candidates. It also gives
47 /// a simplified suffix tree construction algorithm for suffix trees based off
48 /// of the algorithm actually used here, Ukkonen's algorithm.
49 ///
50 /// For the original RFC for this pass, please see
51 ///
52 /// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
53 ///
54 /// For more information on the suffix tree data structure, please see
55 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
56 ///
57 //===----------------------------------------------------------------------===//
59 #include "llvm/ADT/DenseMap.h"
60 #include "llvm/ADT/Statistic.h"
61 #include "llvm/ADT/Twine.h"
66 #include "llvm/CodeGen/Passes.h"
69 #include "llvm/IR/DIBuilder.h"
70 #include "llvm/IR/IRBuilder.h"
71 #include "llvm/IR/Mangler.h"
72 #include "llvm/Support/Allocator.h"
74 #include "llvm/Support/Debug.h"
76 #include <functional>
77 #include <map>
78 #include <sstream>
79 #include <tuple>
80 #include <vector>
81 
82 #define DEBUG_TYPE "machine-outliner"
83 
84 using namespace llvm;
85 using namespace ore;
86 using namespace outliner;
87 
88 STATISTIC(NumOutlined, "Number of candidates outlined");
89 STATISTIC(FunctionsCreated, "Number of functions created");
90 
91 // Set to true if the user wants the outliner to run on linkonceodr linkage
92 // functions. This is false by default because the linker can dedupe linkonceodr
93 // functions. Since the outliner is confined to a single module (modulo LTO),
94 // this is off by default. It should, however, be the default behaviour in
95 // LTO.
97  "enable-linkonceodr-outlining",
98  cl::Hidden,
99  cl::desc("Enable the machine outliner on linkonceodr functions"),
100  cl::init(false));
101 
102 namespace {
103 
104 /// Represents an undefined index in the suffix tree.
105 const unsigned EmptyIdx = -1;
106 
107 /// A node in a suffix tree which represents a substring or suffix.
108 ///
109 /// Each node has either no children or at least two children, with the root
110 /// being a exception in the empty tree.
111 ///
112 /// Children are represented as a map between unsigned integers and nodes. If
113 /// a node N has a child M on unsigned integer k, then the mapping represented
114 /// by N is a proper prefix of the mapping represented by M. Note that this,
115 /// although similar to a trie is somewhat different: each node stores a full
116 /// substring of the full mapping rather than a single character state.
117 ///
118 /// Each internal node contains a pointer to the internal node representing
119 /// the same string, but with the first character chopped off. This is stored
120 /// in \p Link. Each leaf node stores the start index of its respective
121 /// suffix in \p SuffixIdx.
122 struct SuffixTreeNode {
123 
124  /// The children of this node.
125  ///
126  /// A child existing on an unsigned integer implies that from the mapping
127  /// represented by the current node, there is a way to reach another
128  /// mapping by tacking that character on the end of the current string.
130 
131  /// The start index of this node's substring in the main string.
132  unsigned StartIdx = EmptyIdx;
133 
134  /// The end index of this node's substring in the main string.
135  ///
136  /// Every leaf node must have its \p EndIdx incremented at the end of every
137  /// step in the construction algorithm. To avoid having to update O(N)
138  /// nodes individually at the end of every step, the end index is stored
139  /// as a pointer.
140  unsigned *EndIdx = nullptr;
141 
142  /// For leaves, the start index of the suffix represented by this node.
143  ///
144  /// For all other nodes, this is ignored.
145  unsigned SuffixIdx = EmptyIdx;
146 
147  /// For internal nodes, a pointer to the internal node representing
148  /// the same sequence with the first character chopped off.
149  ///
150  /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
151  /// Ukkonen's algorithm does to achieve linear-time construction is
152  /// keep track of which node the next insert should be at. This makes each
153  /// insert O(1), and there are a total of O(N) inserts. The suffix link
154  /// helps with inserting children of internal nodes.
155  ///
156  /// Say we add a child to an internal node with associated mapping S. The
157  /// next insertion must be at the node representing S - its first character.
158  /// This is given by the way that we iteratively build the tree in Ukkonen's
159  /// algorithm. The main idea is to look at the suffixes of each prefix in the
160  /// string, starting with the longest suffix of the prefix, and ending with
161  /// the shortest. Therefore, if we keep pointers between such nodes, we can
162  /// move to the next insertion point in O(1) time. If we don't, then we'd
163  /// have to query from the root, which takes O(N) time. This would make the
164  /// construction algorithm O(N^2) rather than O(N).
165  SuffixTreeNode *Link = nullptr;
166 
167  /// The length of the string formed by concatenating the edge labels from the
168  /// root to this node.
169  unsigned ConcatLen = 0;
170 
171  /// Returns true if this node is a leaf.
172  bool isLeaf() const { return SuffixIdx != EmptyIdx; }
173 
174  /// Returns true if this node is the root of its owning \p SuffixTree.
175  bool isRoot() const { return StartIdx == EmptyIdx; }
176 
177  /// Return the number of elements in the substring associated with this node.
178  size_t size() const {
179 
180  // Is it the root? If so, it's the empty string so return 0.
181  if (isRoot())
182  return 0;
183 
184  assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
185 
186  // Size = the number of elements in the string.
187  // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
188  return *EndIdx - StartIdx + 1;
189  }
190 
191  SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
192  : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
193 
194  SuffixTreeNode() {}
195 };
196 
197 /// A data structure for fast substring queries.
198 ///
199 /// Suffix trees represent the suffixes of their input strings in their leaves.
200 /// A suffix tree is a type of compressed trie structure where each node
201 /// represents an entire substring rather than a single character. Each leaf
202 /// of the tree is a suffix.
203 ///
204 /// A suffix tree can be seen as a type of state machine where each state is a
205 /// substring of the full string. The tree is structured so that, for a string
206 /// of length N, there are exactly N leaves in the tree. This structure allows
207 /// us to quickly find repeated substrings of the input string.
208 ///
209 /// In this implementation, a "string" is a vector of unsigned integers.
210 /// These integers may result from hashing some data type. A suffix tree can
211 /// contain 1 or many strings, which can then be queried as one large string.
212 ///
213 /// The suffix tree is implemented using Ukkonen's algorithm for linear-time
214 /// suffix tree construction. Ukkonen's algorithm is explained in more detail
215 /// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
216 /// paper is available at
217 ///
218 /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
219 class SuffixTree {
220 public:
221  /// Each element is an integer representing an instruction in the module.
222  ArrayRef<unsigned> Str;
223 
224  /// A repeated substring in the tree.
225  struct RepeatedSubstring {
226  /// The length of the string.
227  unsigned Length;
228 
229  /// The start indices of each occurrence.
230  std::vector<unsigned> StartIndices;
231  };
232 
233 private:
234  /// Maintains each node in the tree.
236 
237  /// The root of the suffix tree.
238  ///
239  /// The root represents the empty string. It is maintained by the
240  /// \p NodeAllocator like every other node in the tree.
241  SuffixTreeNode *Root = nullptr;
242 
243  /// Maintains the end indices of the internal nodes in the tree.
244  ///
245  /// Each internal node is guaranteed to never have its end index change
246  /// during the construction algorithm; however, leaves must be updated at
247  /// every step. Therefore, we need to store leaf end indices by reference
248  /// to avoid updating O(N) leaves at every step of construction. Thus,
249  /// every internal node must be allocated its own end index.
250  BumpPtrAllocator InternalEndIdxAllocator;
251 
252  /// The end index of each leaf in the tree.
253  unsigned LeafEndIdx = -1;
254 
255  /// Helper struct which keeps track of the next insertion point in
256  /// Ukkonen's algorithm.
257  struct ActiveState {
258  /// The next node to insert at.
259  SuffixTreeNode *Node;
260 
261  /// The index of the first character in the substring currently being added.
262  unsigned Idx = EmptyIdx;
263 
264  /// The length of the substring we have to add at the current step.
265  unsigned Len = 0;
266  };
267 
268  /// The point the next insertion will take place at in the
269  /// construction algorithm.
270  ActiveState Active;
271 
272  /// Allocate a leaf node and add it to the tree.
273  ///
274  /// \param Parent The parent of this node.
275  /// \param StartIdx The start index of this node's associated string.
276  /// \param Edge The label on the edge leaving \p Parent to this node.
277  ///
278  /// \returns A pointer to the allocated leaf node.
279  SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
280  unsigned Edge) {
281 
282  assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
283 
284  SuffixTreeNode *N = new (NodeAllocator.Allocate())
285  SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
286  Parent.Children[Edge] = N;
287 
288  return N;
289  }
290 
291  /// Allocate an internal node and add it to the tree.
292  ///
293  /// \param Parent The parent of this node. Only null when allocating the root.
294  /// \param StartIdx The start index of this node's associated string.
295  /// \param EndIdx The end index of this node's associated string.
296  /// \param Edge The label on the edge leaving \p Parent to this node.
297  ///
298  /// \returns A pointer to the allocated internal node.
299  SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
300  unsigned EndIdx, unsigned Edge) {
301 
302  assert(StartIdx <= EndIdx && "String can't start after it ends!");
303  assert(!(!Parent && StartIdx != EmptyIdx) &&
304  "Non-root internal nodes must have parents!");
305 
306  unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
307  SuffixTreeNode *N = new (NodeAllocator.Allocate())
308  SuffixTreeNode(StartIdx, E, Root);
309  if (Parent)
310  Parent->Children[Edge] = N;
311 
312  return N;
313  }
314 
315  /// Set the suffix indices of the leaves to the start indices of their
316  /// respective suffixes.
317  ///
318  /// \param[in] CurrNode The node currently being visited.
319  /// \param CurrNodeLen The concatenation of all node sizes from the root to
320  /// this node. Used to produce suffix indices.
321  void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) {
322 
323  bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
324 
325  // Store the concatenation of lengths down from the root.
326  CurrNode.ConcatLen = CurrNodeLen;
327  // Traverse the tree depth-first.
328  for (auto &ChildPair : CurrNode.Children) {
329  assert(ChildPair.second && "Node had a null child!");
330  setSuffixIndices(*ChildPair.second,
331  CurrNodeLen + ChildPair.second->size());
332  }
333 
334  // Is this node a leaf? If it is, give it a suffix index.
335  if (IsLeaf)
336  CurrNode.SuffixIdx = Str.size() - CurrNodeLen;
337  }
338 
339  /// Construct the suffix tree for the prefix of the input ending at
340  /// \p EndIdx.
341  ///
342  /// Used to construct the full suffix tree iteratively. At the end of each
343  /// step, the constructed suffix tree is either a valid suffix tree, or a
344  /// suffix tree with implicit suffixes. At the end of the final step, the
345  /// suffix tree is a valid tree.
346  ///
347  /// \param EndIdx The end index of the current prefix in the main string.
348  /// \param SuffixesToAdd The number of suffixes that must be added
349  /// to complete the suffix tree at the current phase.
350  ///
351  /// \returns The number of suffixes that have not been added at the end of
352  /// this step.
353  unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) {
354  SuffixTreeNode *NeedsLink = nullptr;
355 
356  while (SuffixesToAdd > 0) {
357 
358  // Are we waiting to add anything other than just the last character?
359  if (Active.Len == 0) {
360  // If not, then say the active index is the end index.
361  Active.Idx = EndIdx;
362  }
363 
364  assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
365 
366  // The first character in the current substring we're looking at.
367  unsigned FirstChar = Str[Active.Idx];
368 
369  // Have we inserted anything starting with FirstChar at the current node?
370  if (Active.Node->Children.count(FirstChar) == 0) {
371  // If not, then we can just insert a leaf and move too the next step.
372  insertLeaf(*Active.Node, EndIdx, FirstChar);
373 
374  // The active node is an internal node, and we visited it, so it must
375  // need a link if it doesn't have one.
376  if (NeedsLink) {
377  NeedsLink->Link = Active.Node;
378  NeedsLink = nullptr;
379  }
380  } else {
381  // There's a match with FirstChar, so look for the point in the tree to
382  // insert a new node.
383  SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
384 
385  unsigned SubstringLen = NextNode->size();
386 
387  // Is the current suffix we're trying to insert longer than the size of
388  // the child we want to move to?
389  if (Active.Len >= SubstringLen) {
390  // If yes, then consume the characters we've seen and move to the next
391  // node.
392  Active.Idx += SubstringLen;
393  Active.Len -= SubstringLen;
394  Active.Node = NextNode;
395  continue;
396  }
397 
398  // Otherwise, the suffix we're trying to insert must be contained in the
399  // next node we want to move to.
400  unsigned LastChar = Str[EndIdx];
401 
402  // Is the string we're trying to insert a substring of the next node?
403  if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
404  // If yes, then we're done for this step. Remember our insertion point
405  // and move to the next end index. At this point, we have an implicit
406  // suffix tree.
407  if (NeedsLink && !Active.Node->isRoot()) {
408  NeedsLink->Link = Active.Node;
409  NeedsLink = nullptr;
410  }
411 
412  Active.Len++;
413  break;
414  }
415 
416  // The string we're trying to insert isn't a substring of the next node,
417  // but matches up to a point. Split the node.
418  //
419  // For example, say we ended our search at a node n and we're trying to
420  // insert ABD. Then we'll create a new node s for AB, reduce n to just
421  // representing C, and insert a new leaf node l to represent d. This
422  // allows us to ensure that if n was a leaf, it remains a leaf.
423  //
424  // | ABC ---split---> | AB
425  // n s
426  // C / \ D
427  // n l
428 
429  // The node s from the diagram
430  SuffixTreeNode *SplitNode =
431  insertInternalNode(Active.Node, NextNode->StartIdx,
432  NextNode->StartIdx + Active.Len - 1, FirstChar);
433 
434  // Insert the new node representing the new substring into the tree as
435  // a child of the split node. This is the node l from the diagram.
436  insertLeaf(*SplitNode, EndIdx, LastChar);
437 
438  // Make the old node a child of the split node and update its start
439  // index. This is the node n from the diagram.
440  NextNode->StartIdx += Active.Len;
441  SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
442 
443  // SplitNode is an internal node, update the suffix link.
444  if (NeedsLink)
445  NeedsLink->Link = SplitNode;
446 
447  NeedsLink = SplitNode;
448  }
449 
450  // We've added something new to the tree, so there's one less suffix to
451  // add.
452  SuffixesToAdd--;
453 
454  if (Active.Node->isRoot()) {
455  if (Active.Len > 0) {
456  Active.Len--;
457  Active.Idx = EndIdx - SuffixesToAdd + 1;
458  }
459  } else {
460  // Start the next phase at the next smallest suffix.
461  Active.Node = Active.Node->Link;
462  }
463  }
464 
465  return SuffixesToAdd;
466  }
467 
468 public:
469  /// Construct a suffix tree from a sequence of unsigned integers.
470  ///
471  /// \param Str The string to construct the suffix tree for.
472  SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
473  Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
474  Active.Node = Root;
475 
476  // Keep track of the number of suffixes we have to add of the current
477  // prefix.
478  unsigned SuffixesToAdd = 0;
479  Active.Node = Root;
480 
481  // Construct the suffix tree iteratively on each prefix of the string.
482  // PfxEndIdx is the end index of the current prefix.
483  // End is one past the last element in the string.
484  for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End;
485  PfxEndIdx++) {
486  SuffixesToAdd++;
487  LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
488  SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
489  }
490 
491  // Set the suffix indices of each leaf.
492  assert(Root && "Root node can't be nullptr!");
493  setSuffixIndices(*Root, 0);
494  }
495 
496 
497  /// Iterator for finding all repeated substrings in the suffix tree.
498  struct RepeatedSubstringIterator {
499  private:
500  /// The current node we're visiting.
501  SuffixTreeNode *N = nullptr;
502 
503  /// The repeated substring associated with this node.
504  RepeatedSubstring RS;
505 
506  /// The nodes left to visit.
507  std::vector<SuffixTreeNode *> ToVisit;
508 
509  /// The minimum length of a repeated substring to find.
510  /// Since we're outlining, we want at least two instructions in the range.
511  /// FIXME: This may not be true for targets like X86 which support many
512  /// instruction lengths.
513  const unsigned MinLength = 2;
514 
515  /// Move the iterator to the next repeated substring.
516  void advance() {
517  // Clear the current state. If we're at the end of the range, then this
518  // is the state we want to be in.
519  RS = RepeatedSubstring();
520  N = nullptr;
521 
522  // Each leaf node represents a repeat of a string.
523  std::vector<SuffixTreeNode *> LeafChildren;
524 
525  // Continue visiting nodes until we find one which repeats more than once.
526  while (!ToVisit.empty()) {
527  SuffixTreeNode *Curr = ToVisit.back();
528  ToVisit.pop_back();
529  LeafChildren.clear();
530 
531  // Keep track of the length of the string associated with the node. If
532  // it's too short, we'll quit.
533  unsigned Length = Curr->ConcatLen;
534 
535  // Iterate over each child, saving internal nodes for visiting, and
536  // leaf nodes in LeafChildren. Internal nodes represent individual
537  // strings, which may repeat.
538  for (auto &ChildPair : Curr->Children) {
539  // Save all of this node's children for processing.
540  if (!ChildPair.second->isLeaf())
541  ToVisit.push_back(ChildPair.second);
542 
543  // It's not an internal node, so it must be a leaf. If we have a
544  // long enough string, then save the leaf children.
545  else if (Length >= MinLength)
546  LeafChildren.push_back(ChildPair.second);
547  }
548 
549  // The root never represents a repeated substring. If we're looking at
550  // that, then skip it.
551  if (Curr->isRoot())
552  continue;
553 
554  // Do we have any repeated substrings?
555  if (LeafChildren.size() >= 2) {
556  // Yes. Update the state to reflect this, and then bail out.
557  N = Curr;
558  RS.Length = Length;
559  for (SuffixTreeNode *Leaf : LeafChildren)
560  RS.StartIndices.push_back(Leaf->SuffixIdx);
561  break;
562  }
563  }
564 
565  // At this point, either NewRS is an empty RepeatedSubstring, or it was
566  // set in the above loop. Similarly, N is either nullptr, or the node
567  // associated with NewRS.
568  }
569 
570  public:
571  /// Return the current repeated substring.
572  RepeatedSubstring &operator*() { return RS; }
573 
574  RepeatedSubstringIterator &operator++() {
575  advance();
576  return *this;
577  }
578 
579  RepeatedSubstringIterator operator++(int I) {
580  RepeatedSubstringIterator It(*this);
581  advance();
582  return It;
583  }
584 
585  bool operator==(const RepeatedSubstringIterator &Other) {
586  return N == Other.N;
587  }
588  bool operator!=(const RepeatedSubstringIterator &Other) {
589  return !(*this == Other);
590  }
591 
592  RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) {
593  // Do we have a non-null node?
594  if (N) {
595  // Yes. At the first step, we need to visit all of N's children.
596  // Note: This means that we visit N last.
597  ToVisit.push_back(N);
598  advance();
599  }
600  }
601 };
602 
603  typedef RepeatedSubstringIterator iterator;
604  iterator begin() { return iterator(Root); }
605  iterator end() { return iterator(nullptr); }
606 };
607 
608 /// Maps \p MachineInstrs to unsigned integers and stores the mappings.
609 struct InstructionMapper {
610 
611  /// The next available integer to assign to a \p MachineInstr that
612  /// cannot be outlined.
613  ///
614  /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
615  unsigned IllegalInstrNumber = -3;
616 
617  /// The next available integer to assign to a \p MachineInstr that can
618  /// be outlined.
619  unsigned LegalInstrNumber = 0;
620 
621  /// Correspondence from \p MachineInstrs to unsigned integers.
623  InstructionIntegerMap;
624 
625  /// Correspondence between \p MachineBasicBlocks and target-defined flags.
627 
628  /// The vector of unsigned integers that the module is mapped to.
629  std::vector<unsigned> UnsignedVec;
630 
631  /// Stores the location of the instruction associated with the integer
632  /// at index i in \p UnsignedVec for each index i.
633  std::vector<MachineBasicBlock::iterator> InstrList;
634 
635  // Set if we added an illegal number in the previous step.
636  // Since each illegal number is unique, we only need one of them between
637  // each range of legal numbers. This lets us make sure we don't add more
638  // than one illegal number per range.
639  bool AddedIllegalLastTime = false;
640 
641  /// Maps \p *It to a legal integer.
642  ///
643  /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
644  /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
645  ///
646  /// \returns The integer that \p *It was mapped to.
647  unsigned mapToLegalUnsigned(
648  MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
649  bool &HaveLegalRange, unsigned &NumLegalInBlock,
650  std::vector<unsigned> &UnsignedVecForMBB,
651  std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
652  // We added something legal, so we should unset the AddedLegalLastTime
653  // flag.
654  AddedIllegalLastTime = false;
655 
656  // If we have at least two adjacent legal instructions (which may have
657  // invisible instructions in between), remember that.
658  if (CanOutlineWithPrevInstr)
659  HaveLegalRange = true;
660  CanOutlineWithPrevInstr = true;
661 
662  // Keep track of the number of legal instructions we insert.
663  NumLegalInBlock++;
664 
665  // Get the integer for this instruction or give it the current
666  // LegalInstrNumber.
667  InstrListForMBB.push_back(It);
668  MachineInstr &MI = *It;
669  bool WasInserted;
671  ResultIt;
672  std::tie(ResultIt, WasInserted) =
673  InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
674  unsigned MINumber = ResultIt->second;
675 
676  // There was an insertion.
677  if (WasInserted)
678  LegalInstrNumber++;
679 
680  UnsignedVecForMBB.push_back(MINumber);
681 
682  // Make sure we don't overflow or use any integers reserved by the DenseMap.
683  if (LegalInstrNumber >= IllegalInstrNumber)
684  report_fatal_error("Instruction mapping overflow!");
685 
686  assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
687  "Tried to assign DenseMap tombstone or empty key to instruction.");
688  assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
689  "Tried to assign DenseMap tombstone or empty key to instruction.");
690 
691  return MINumber;
692  }
693 
694  /// Maps \p *It to an illegal integer.
695  ///
696  /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
697  /// IllegalInstrNumber.
698  ///
699  /// \returns The integer that \p *It was mapped to.
700  unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It,
701  bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB,
702  std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
703  // Can't outline an illegal instruction. Set the flag.
704  CanOutlineWithPrevInstr = false;
705 
706  // Only add one illegal number per range of legal numbers.
707  if (AddedIllegalLastTime)
708  return IllegalInstrNumber;
709 
710  // Remember that we added an illegal number last time.
711  AddedIllegalLastTime = true;
712  unsigned MINumber = IllegalInstrNumber;
713 
714  InstrListForMBB.push_back(It);
715  UnsignedVecForMBB.push_back(IllegalInstrNumber);
716  IllegalInstrNumber--;
717 
718  assert(LegalInstrNumber < IllegalInstrNumber &&
719  "Instruction mapping overflow!");
720 
721  assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
722  "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
723 
724  assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
725  "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
726 
727  return MINumber;
728  }
729 
730  /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
731  /// and appends it to \p UnsignedVec and \p InstrList.
732  ///
733  /// Two instructions are assigned the same integer if they are identical.
734  /// If an instruction is deemed unsafe to outline, then it will be assigned an
735  /// unique integer. The resulting mapping is placed into a suffix tree and
736  /// queried for candidates.
737  ///
738  /// \param MBB The \p MachineBasicBlock to be translated into integers.
739  /// \param TII \p TargetInstrInfo for the function.
740  void convertToUnsignedVec(MachineBasicBlock &MBB,
741  const TargetInstrInfo &TII) {
742  unsigned Flags = 0;
743 
744  // Don't even map in this case.
745  if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
746  return;
747 
748  // Store info for the MBB for later outlining.
749  MBBFlagsMap[&MBB] = Flags;
750 
752 
753  // The number of instructions in this block that will be considered for
754  // outlining.
755  unsigned NumLegalInBlock = 0;
756 
757  // True if we have at least two legal instructions which aren't separated
758  // by an illegal instruction.
759  bool HaveLegalRange = false;
760 
761  // True if we can perform outlining given the last mapped (non-invisible)
762  // instruction. This lets us know if we have a legal range.
763  bool CanOutlineWithPrevInstr = false;
764 
765  // FIXME: Should this all just be handled in the target, rather than using
766  // repeated calls to getOutliningType?
767  std::vector<unsigned> UnsignedVecForMBB;
768  std::vector<MachineBasicBlock::iterator> InstrListForMBB;
769 
770  for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) {
771  // Keep track of where this instruction is in the module.
772  switch (TII.getOutliningType(It, Flags)) {
773  case InstrType::Illegal:
774  mapToIllegalUnsigned(It, CanOutlineWithPrevInstr,
775  UnsignedVecForMBB, InstrListForMBB);
776  break;
777 
778  case InstrType::Legal:
779  mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
780  NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
781  break;
782 
784  mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
785  NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
786  // The instruction also acts as a terminator, so we have to record that
787  // in the string.
788  mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
789  InstrListForMBB);
790  break;
791 
793  // Normally this is set by mapTo(Blah)Unsigned, but we just want to
794  // skip this instruction. So, unset the flag here.
795  AddedIllegalLastTime = false;
796  break;
797  }
798  }
799 
800  // Are there enough legal instructions in the block for outlining to be
801  // possible?
802  if (HaveLegalRange) {
803  // After we're done every insertion, uniquely terminate this part of the
804  // "string". This makes sure we won't match across basic block or function
805  // boundaries since the "end" is encoded uniquely and thus appears in no
806  // repeated substring.
807  mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
808  InstrListForMBB);
809  InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
810  InstrListForMBB.end());
811  UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
812  UnsignedVecForMBB.end());
813  }
814  }
815 
816  InstructionMapper() {
817  // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
818  // changed.
819  assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
820  "DenseMapInfo<unsigned>'s empty key isn't -1!");
822  "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
823  }
824 };
825 
826 /// An interprocedural pass which finds repeated sequences of
827 /// instructions and replaces them with calls to functions.
828 ///
829 /// Each instruction is mapped to an unsigned integer and placed in a string.
830 /// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
831 /// is then repeatedly queried for repeated sequences of instructions. Each
832 /// non-overlapping repeated sequence is then placed in its own
833 /// \p MachineFunction and each instance is then replaced with a call to that
834 /// function.
835 struct MachineOutliner : public ModulePass {
836 
837  static char ID;
838 
839  /// Set to true if the outliner should consider functions with
840  /// linkonceodr linkage.
841  bool OutlineFromLinkOnceODRs = false;
842 
843  /// Set to true if the outliner should run on all functions in the module
844  /// considered safe for outlining.
845  /// Set to true by default for compatibility with llc's -run-pass option.
846  /// Set when the pass is constructed in TargetPassConfig.
847  bool RunOnAllFunctions = true;
848 
849  StringRef getPassName() const override { return "Machine Outliner"; }
850 
851  void getAnalysisUsage(AnalysisUsage &AU) const override {
854  AU.setPreservesAll();
856  }
857 
858  MachineOutliner() : ModulePass(ID) {
860  }
861 
862  /// Remark output explaining that not outlining a set of candidates would be
863  /// better than outlining that set.
864  void emitNotOutliningCheaperRemark(
865  unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
866  OutlinedFunction &OF);
867 
868  /// Remark output explaining that a function was outlined.
869  void emitOutlinedFunctionRemark(OutlinedFunction &OF);
870 
871  /// Find all repeated substrings that satisfy the outlining cost model by
872  /// constructing a suffix tree.
873  ///
874  /// If a substring appears at least twice, then it must be represented by
875  /// an internal node which appears in at least two suffixes. Each suffix
876  /// is represented by a leaf node. To do this, we visit each internal node
877  /// in the tree, using the leaf children of each internal node. If an
878  /// internal node represents a beneficial substring, then we use each of
879  /// its leaf children to find the locations of its substring.
880  ///
881  /// \param Mapper Contains outlining mapping information.
882  /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
883  /// each type of candidate.
884  void findCandidates(InstructionMapper &Mapper,
885  std::vector<OutlinedFunction> &FunctionList);
886 
887  /// Replace the sequences of instructions represented by \p OutlinedFunctions
888  /// with calls to functions.
889  ///
890  /// \param M The module we are outlining from.
891  /// \param FunctionList A list of functions to be inserted into the module.
892  /// \param Mapper Contains the instruction mappings for the module.
893  bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
894  InstructionMapper &Mapper);
895 
896  /// Creates a function for \p OF and inserts it into the module.
897  MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
898  InstructionMapper &Mapper,
899  unsigned Name);
900 
901  /// Construct a suffix tree on the instructions in \p M and outline repeated
902  /// strings from that tree.
903  bool runOnModule(Module &M) override;
904 
905  /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
906  /// function for remark emission.
907  DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
908  DISubprogram *SP;
909  for (const Candidate &C : OF.Candidates)
910  if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram()))
911  return SP;
912  return nullptr;
913  }
914 
915  /// Populate and \p InstructionMapper with instruction-to-integer mappings.
916  /// These are used to construct a suffix tree.
917  void populateMapper(InstructionMapper &Mapper, Module &M,
918  MachineModuleInfo &MMI);
919 
920  /// Initialize information necessary to output a size remark.
921  /// FIXME: This should be handled by the pass manager, not the outliner.
922  /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
923  /// pass manager.
924  void initSizeRemarkInfo(
925  const Module &M, const MachineModuleInfo &MMI,
926  StringMap<unsigned> &FunctionToInstrCount);
927 
928  /// Emit the remark.
929  // FIXME: This should be handled by the pass manager, not the outliner.
930  void emitInstrCountChangedRemark(
931  const Module &M, const MachineModuleInfo &MMI,
932  const StringMap<unsigned> &FunctionToInstrCount);
933 };
934 } // Anonymous namespace.
935 
936 char MachineOutliner::ID = 0;
937 
938 namespace llvm {
939 ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
940  MachineOutliner *OL = new MachineOutliner();
941  OL->RunOnAllFunctions = RunOnAllFunctions;
942  return OL;
943 }
944 
945 } // namespace llvm
946 
947 INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
948  false)
949 
950 void MachineOutliner::emitNotOutliningCheaperRemark(
951  unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
952  OutlinedFunction &OF) {
953  // FIXME: Right now, we arbitrarily choose some Candidate from the
954  // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
955  // We should probably sort these by function name or something to make sure
956  // the remarks are stable.
957  Candidate &C = CandidatesForRepeatedSeq.front();
958  MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
959  MORE.emit([&]() {
960  MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
961  C.front()->getDebugLoc(), C.getMBB());
962  R << "Did not outline " << NV("Length", StringLen) << " instructions"
963  << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
964  << " locations."
965  << " Bytes from outlining all occurrences ("
966  << NV("OutliningCost", OF.getOutliningCost()) << ")"
967  << " >= Unoutlined instruction bytes ("
968  << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
969  << " (Also found at: ";
970 
971  // Tell the user the other places the candidate was found.
972  for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
973  R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
974  CandidatesForRepeatedSeq[i].front()->getDebugLoc());
975  if (i != e - 1)
976  R << ", ";
977  }
978 
979  R << ")";
980  return R;
981  });
982 }
983 
984 void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
985  MachineBasicBlock *MBB = &*OF.MF->begin();
986  MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
987  MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
988  MBB->findDebugLoc(MBB->begin()), MBB);
989  R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
990  << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
991  << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
992  << " locations. "
993  << "(Found at: ";
994 
995  // Tell the user the other places the candidate was found.
996  for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
997 
998  R << NV((Twine("StartLoc") + Twine(i)).str(),
999  OF.Candidates[i].front()->getDebugLoc());
1000  if (i != e - 1)
1001  R << ", ";
1002  }
1003 
1004  R << ")";
1005 
1006  MORE.emit(R);
1007 }
1008 
1009 void
1010 MachineOutliner::findCandidates(InstructionMapper &Mapper,
1011  std::vector<OutlinedFunction> &FunctionList) {
1012  FunctionList.clear();
1013  SuffixTree ST(Mapper.UnsignedVec);
1014 
1015  // First, find dall of the repeated substrings in the tree of minimum length
1016  // 2.
1017  std::vector<Candidate> CandidatesForRepeatedSeq;
1018  for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
1019  CandidatesForRepeatedSeq.clear();
1020  SuffixTree::RepeatedSubstring RS = *It;
1021  unsigned StringLen = RS.Length;
1022  for (const unsigned &StartIdx : RS.StartIndices) {
1023  unsigned EndIdx = StartIdx + StringLen - 1;
1024  // Trick: Discard some candidates that would be incompatible with the
1025  // ones we've already found for this sequence. This will save us some
1026  // work in candidate selection.
1027  //
1028  // If two candidates overlap, then we can't outline them both. This
1029  // happens when we have candidates that look like, say
1030  //
1031  // AA (where each "A" is an instruction).
1032  //
1033  // We might have some portion of the module that looks like this:
1034  // AAAAAA (6 A's)
1035  //
1036  // In this case, there are 5 different copies of "AA" in this range, but
1037  // at most 3 can be outlined. If only outlining 3 of these is going to
1038  // be unbeneficial, then we ought to not bother.
1039  //
1040  // Note that two things DON'T overlap when they look like this:
1041  // start1...end1 .... start2...end2
1042  // That is, one must either
1043  // * End before the other starts
1044  // * Start after the other ends
1045  if (std::all_of(
1046  CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(),
1047  [&StartIdx, &EndIdx](const Candidate &C) {
1048  return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
1049  })) {
1050  // It doesn't overlap with anything, so we can outline it.
1051  // Each sequence is over [StartIt, EndIt].
1052  // Save the candidate and its location.
1053 
1054  MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
1055  MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
1056  MachineBasicBlock *MBB = StartIt->getParent();
1057 
1058  CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
1059  EndIt, MBB, FunctionList.size(),
1060  Mapper.MBBFlagsMap[MBB]);
1061  }
1062  }
1063 
1064  // We've found something we might want to outline.
1065  // Create an OutlinedFunction to store it and check if it'd be beneficial
1066  // to outline.
1067  if (CandidatesForRepeatedSeq.size() < 2)
1068  continue;
1069 
1070  // Arbitrarily choose a TII from the first candidate.
1071  // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
1072  const TargetInstrInfo *TII =
1073  CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
1074 
1075  OutlinedFunction OF =
1076  TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
1077 
1078  // If we deleted too many candidates, then there's nothing worth outlining.
1079  // FIXME: This should take target-specified instruction sizes into account.
1080  if (OF.Candidates.size() < 2)
1081  continue;
1082 
1083  // Is it better to outline this candidate than not?
1084  if (OF.getBenefit() < 1) {
1085  emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);
1086  continue;
1087  }
1088 
1089  FunctionList.push_back(OF);
1090  }
1091 }
1092 
1094 MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
1095  InstructionMapper &Mapper,
1096  unsigned Name) {
1097 
1098  // Create the function name. This should be unique. For now, just hash the
1099  // module name and include it in the function name plus the number of this
1100  // function.
1101  std::ostringstream NameStream;
1102  // FIXME: We should have a better naming scheme. This should be stable,
1103  // regardless of changes to the outliner's cost model/traversal order.
1104  NameStream << "OUTLINED_FUNCTION_" << Name;
1105 
1106  // Create the function using an IR-level function.
1107  LLVMContext &C = M.getContext();
1109  M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
1110  assert(F && "Function was null!");
1111 
1112  // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
1113  // which gives us better results when we outline from linkonceodr functions.
1116 
1117  // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's
1118  // necessary.
1119 
1120  // Set optsize/minsize, so we don't insert padding between outlined
1121  // functions.
1124 
1125  // Include target features from an arbitrary candidate for the outlined
1126  // function. This makes sure the outlined function knows what kinds of
1127  // instructions are going into it. This is fine, since all parent functions
1128  // must necessarily support the instructions that are in the outlined region.
1129  Candidate &FirstCand = OF.Candidates.front();
1130  const Function &ParentFn = FirstCand.getMF()->getFunction();
1131  if (ParentFn.hasFnAttribute("target-features"))
1132  F->addFnAttr(ParentFn.getFnAttribute("target-features"));
1133 
1134  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
1135  IRBuilder<> Builder(EntryBB);
1136  Builder.CreateRetVoid();
1137 
1138  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1140  MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
1141  const TargetSubtargetInfo &STI = MF.getSubtarget();
1142  const TargetInstrInfo &TII = *STI.getInstrInfo();
1143 
1144  // Insert the new function into the module.
1145  MF.insert(MF.begin(), &MBB);
1146 
1147  for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
1148  ++I) {
1149  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
1150  NewMI->dropMemRefs(MF);
1151 
1152  // Don't keep debug information for outlined instructions.
1153  NewMI->setDebugLoc(DebugLoc());
1154  MBB.insert(MBB.end(), NewMI);
1155  }
1156 
1157  TII.buildOutlinedFrame(MBB, MF, OF);
1158 
1159  // Outlined functions shouldn't preserve liveness.
1160  MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
1161  MF.getRegInfo().freezeReservedRegs(MF);
1162 
1163  // If there's a DISubprogram associated with this outlined function, then
1164  // emit debug info for the outlined function.
1165  if (DISubprogram *SP = getSubprogramOrNull(OF)) {
1166  // We have a DISubprogram. Get its DICompileUnit.
1167  DICompileUnit *CU = SP->getUnit();
1168  DIBuilder DB(M, true, CU);
1169  DIFile *Unit = SP->getFile();
1170  Mangler Mg;
1171  // Get the mangled name of the function for the linkage name.
1172  std::string Dummy;
1173  llvm::raw_string_ostream MangledNameStream(Dummy);
1174  Mg.getNameWithPrefix(MangledNameStream, F, false);
1175 
1176  DISubprogram *OutlinedSP = DB.createFunction(
1177  Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()),
1178  Unit /* File */,
1179  0 /* Line 0 is reserved for compiler-generated code. */,
1180  DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
1181  0, /* Line 0 is reserved for compiler-generated code. */
1182  DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
1183  /* Outlined code is optimized code by definition. */
1184  DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
1185 
1186  // Don't add any new variables to the subprogram.
1187  DB.finalizeSubprogram(OutlinedSP);
1188 
1189  // Attach subprogram to the function.
1190  F->setSubprogram(OutlinedSP);
1191  // We're done with the DIBuilder.
1192  DB.finalize();
1193  }
1194 
1195  return &MF;
1196 }
1197 
1198 bool MachineOutliner::outline(Module &M,
1199  std::vector<OutlinedFunction> &FunctionList,
1200  InstructionMapper &Mapper) {
1201 
1202  bool OutlinedSomething = false;
1203 
1204  // Number to append to the current outlined function.
1205  unsigned OutlinedFunctionNum = 0;
1206 
1207  // Sort by benefit. The most beneficial functions should be outlined first.
1208  std::stable_sort(
1209  FunctionList.begin(), FunctionList.end(),
1210  [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
1211  return LHS.getBenefit() > RHS.getBenefit();
1212  });
1213 
1214  // Walk over each function, outlining them as we go along. Functions are
1215  // outlined greedily, based off the sort above.
1216  for (OutlinedFunction &OF : FunctionList) {
1217  // If we outlined something that overlapped with a candidate in a previous
1218  // step, then we can't outline from it.
1219  erase_if(OF.Candidates, [&Mapper](Candidate &C) {
1220  return std::any_of(
1221  Mapper.UnsignedVec.begin() + C.getStartIdx(),
1222  Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
1223  [](unsigned I) { return (I == static_cast<unsigned>(-1)); });
1224  });
1225 
1226  // If we made it unbeneficial to outline this function, skip it.
1227  if (OF.getBenefit() < 1)
1228  continue;
1229 
1230  // It's beneficial. Create the function and outline its sequence's
1231  // occurrences.
1232  OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum);
1233  emitOutlinedFunctionRemark(OF);
1234  FunctionsCreated++;
1235  OutlinedFunctionNum++; // Created a function, move to the next name.
1236  MachineFunction *MF = OF.MF;
1237  const TargetSubtargetInfo &STI = MF->getSubtarget();
1238  const TargetInstrInfo &TII = *STI.getInstrInfo();
1239 
1240  // Replace occurrences of the sequence with calls to the new function.
1241  for (Candidate &C : OF.Candidates) {
1242  MachineBasicBlock &MBB = *C.getMBB();
1243  MachineBasicBlock::iterator StartIt = C.front();
1244  MachineBasicBlock::iterator EndIt = C.back();
1245 
1246  // Insert the call.
1247  auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
1248 
1249  // If the caller tracks liveness, then we need to make sure that
1250  // anything we outline doesn't break liveness assumptions. The outlined
1251  // functions themselves currently don't track liveness, but we should
1252  // make sure that the ranges we yank things out of aren't wrong.
1253  if (MBB.getParent()->getProperties().hasProperty(
1255  // Helper lambda for adding implicit def operands to the call
1256  // instruction.
1257  auto CopyDefs = [&CallInst](MachineInstr &MI) {
1258  for (MachineOperand &MOP : MI.operands()) {
1259  // Skip over anything that isn't a register.
1260  if (!MOP.isReg())
1261  continue;
1262 
1263  // If it's a def, add it to the call instruction.
1264  if (MOP.isDef())
1265  CallInst->addOperand(MachineOperand::CreateReg(
1266  MOP.getReg(), true, /* isDef = true */
1267  true /* isImp = true */));
1268  }
1269  };
1270  // Copy over the defs in the outlined range.
1271  // First inst in outlined range <-- Anything that's defined in this
1272  // ... .. range has to be added as an
1273  // implicit Last inst in outlined range <-- def to the call
1274  // instruction.
1275  std::for_each(CallInst, std::next(EndIt), CopyDefs);
1276  }
1277 
1278  // Erase from the point after where the call was inserted up to, and
1279  // including, the final instruction in the sequence.
1280  // Erase needs one past the end, so we need std::next there too.
1281  MBB.erase(std::next(StartIt), std::next(EndIt));
1282 
1283  // Keep track of what we removed by marking them all as -1.
1284  std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(),
1285  Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
1286  [](unsigned &I) { I = static_cast<unsigned>(-1); });
1287  OutlinedSomething = true;
1288 
1289  // Statistics.
1290  NumOutlined++;
1291  }
1292  }
1293 
1294  LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);
1295 
1296  return OutlinedSomething;
1297 }
1298 
1299 void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
1300  MachineModuleInfo &MMI) {
1301  // Build instruction mappings for each function in the module. Start by
1302  // iterating over each Function in M.
1303  for (Function &F : M) {
1304 
1305  // If there's nothing in F, then there's no reason to try and outline from
1306  // it.
1307  if (F.empty())
1308  continue;
1309 
1310  // There's something in F. Check if it has a MachineFunction associated with
1311  // it.
1313 
1314  // If it doesn't, then there's nothing to outline from. Move to the next
1315  // Function.
1316  if (!MF)
1317  continue;
1318 
1319  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1320 
1321  if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))
1322  continue;
1323 
1324  // We have a MachineFunction. Ask the target if it's suitable for outlining.
1325  // If it isn't, then move on to the next Function in the module.
1326  if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs))
1327  continue;
1328 
1329  // We have a function suitable for outlining. Iterate over every
1330  // MachineBasicBlock in MF and try to map its instructions to a list of
1331  // unsigned integers.
1332  for (MachineBasicBlock &MBB : *MF) {
1333  // If there isn't anything in MBB, then there's no point in outlining from
1334  // it.
1335  // If there are fewer than 2 instructions in the MBB, then it can't ever
1336  // contain something worth outlining.
1337  // FIXME: This should be based off of the maximum size in B of an outlined
1338  // call versus the size in B of the MBB.
1339  if (MBB.empty() || MBB.size() < 2)
1340  continue;
1341 
1342  // Check if MBB could be the target of an indirect branch. If it is, then
1343  // we don't want to outline from it.
1344  if (MBB.hasAddressTaken())
1345  continue;
1346 
1347  // MBB is suitable for outlining. Map it to a list of unsigneds.
1348  Mapper.convertToUnsignedVec(MBB, *TII);
1349  }
1350  }
1351 }
1352 
1353 void MachineOutliner::initSizeRemarkInfo(
1354  const Module &M, const MachineModuleInfo &MMI,
1355  StringMap<unsigned> &FunctionToInstrCount) {
1356  // Collect instruction counts for every function. We'll use this to emit
1357  // per-function size remarks later.
1358  for (const Function &F : M) {
1360 
1361  // We only care about MI counts here. If there's no MachineFunction at this
1362  // point, then there won't be after the outliner runs, so let's move on.
1363  if (!MF)
1364  continue;
1365  FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
1366  }
1367 }
1368 
1369 void MachineOutliner::emitInstrCountChangedRemark(
1370  const Module &M, const MachineModuleInfo &MMI,
1371  const StringMap<unsigned> &FunctionToInstrCount) {
1372  // Iterate over each function in the module and emit remarks.
1373  // Note that we won't miss anything by doing this, because the outliner never
1374  // deletes functions.
1375  for (const Function &F : M) {
1377 
1378  // The outliner never deletes functions. If we don't have a MF here, then we
1379  // didn't have one prior to outlining either.
1380  if (!MF)
1381  continue;
1382 
1383  std::string Fname = F.getName();
1384  unsigned FnCountAfter = MF->getInstructionCount();
1385  unsigned FnCountBefore = 0;
1386 
1387  // Check if the function was recorded before.
1388  auto It = FunctionToInstrCount.find(Fname);
1389 
1390  // Did we have a previously-recorded size? If yes, then set FnCountBefore
1391  // to that.
1392  if (It != FunctionToInstrCount.end())
1393  FnCountBefore = It->second;
1394 
1395  // Compute the delta and emit a remark if there was a change.
1396  int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
1397  static_cast<int64_t>(FnCountBefore);
1398  if (FnDelta == 0)
1399  continue;
1400 
1401  MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
1402  MORE.emit([&]() {
1403  MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
1405  &MF->front());
1406  R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
1407  << ": Function: "
1408  << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
1409  << ": MI instruction count changed from "
1410  << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
1411  FnCountBefore)
1412  << " to "
1413  << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
1414  FnCountAfter)
1415  << "; Delta: "
1416  << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
1417  return R;
1418  });
1419  }
1420 }
1421 
1422 bool MachineOutliner::runOnModule(Module &M) {
1423  // Check if there's anything in the module. If it's empty, then there's
1424  // nothing to outline.
1425  if (M.empty())
1426  return false;
1427 
1428  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
1429 
1430  // If the user passed -enable-machine-outliner=always or
1431  // -enable-machine-outliner, the pass will run on all functions in the module.
1432  // Otherwise, if the target supports default outlining, it will run on all
1433  // functions deemed by the target to be worth outlining from by default. Tell
1434  // the user how the outliner is running.
1435  LLVM_DEBUG(
1436  dbgs() << "Machine Outliner: Running on ";
1437  if (RunOnAllFunctions)
1438  dbgs() << "all functions";
1439  else
1440  dbgs() << "target-default functions";
1441  dbgs() << "\n"
1442  );
1443 
1444  // If the user specifies that they want to outline from linkonceodrs, set
1445  // it here.
1446  OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
1447  InstructionMapper Mapper;
1448 
1449  // Prepare instruction mappings for the suffix tree.
1450  populateMapper(Mapper, M, MMI);
1451  std::vector<OutlinedFunction> FunctionList;
1452 
1453  // Find all of the outlining candidates.
1454  findCandidates(Mapper, FunctionList);
1455 
1456  // If we've requested size remarks, then collect the MI counts of every
1457  // function before outlining, and the MI counts after outlining.
1458  // FIXME: This shouldn't be in the outliner at all; it should ultimately be
1459  // the pass manager's responsibility.
1460  // This could pretty easily be placed in outline instead, but because we
1461  // really ultimately *don't* want this here, it's done like this for now
1462  // instead.
1463 
1464  // Check if we want size remarks.
1465  bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
1466  StringMap<unsigned> FunctionToInstrCount;
1467  if (ShouldEmitSizeRemarks)
1468  initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
1469 
1470  // Outline each of the candidates and return true if something was outlined.
1471  bool OutlinedSomething = outline(M, FunctionList, Mapper);
1472 
1473  // If we outlined something, we definitely changed the MI count of the
1474  // module. If we've asked for size remarks, then output them.
1475  // FIXME: This should be in the pass manager.
1476  if (ShouldEmitSizeRemarks && OutlinedSomething)
1477  emitInstrCountChangedRemark(M, MMI, FunctionToInstrCount);
1478 
1479  return OutlinedSomething;
1480 }
void finalize()
Construct any deferred debug info descriptors.
Definition: DIBuilder.cpp:69
const NoneType None
Definition: None.h:24
uint64_t CallInst * C
unsigned getInstructionCount() const
Return the number of MachineInstrs in this MachineFunction.
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Constant * getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:144
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
const MachineFunctionProperties & getProperties() const
Get the function properties.
DIFile * getFile() const
Diagnostic information for applied optimization remarks.
This class represents a function call, abstracting a target machine&#39;s calling convention.
iterator find(StringRef Key)
Definition: StringMap.h:333
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
STATISTIC(NumFunctions, "Total number of functions")
A debug info location.
Definition: DebugLoc.h:34
F(f)
DISubprogram * createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine, DINode::DIFlags Flags=DINode::FlagZero, DISubprogram::DISPFlags SPFlags=DISubprogram::SPFlagZero, DITemplateParameterArray TParams=nullptr, DISubprogram *Decl=nullptr, DITypeArray ThrownTypes=nullptr)
Create a new descriptor for the specified subprogram.
Definition: DIBuilder.cpp:752
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
This file defines the MallocAllocator and BumpPtrAllocator interfaces.
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
AnalysisUsage & addRequired()
amdgpu Simplify well known AMD library false Value Value const Twine & Name
Definition: BitVector.h:938
virtual MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const outliner::Candidate &C) const
Insert a call to an outlined function into the program.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
const HexagonInstrInfo * TII
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
DISubroutineType * createSubroutineType(DITypeRefArray ParameterTypes, DINode::DIFlags Flags=DINode::FlagZero, unsigned CC=0)
Create subroutine type.
Definition: DIBuilder.cpp:498
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2091
Diagnostic information for optimization analysis remarks.
Subprogram description.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:92
bool empty() const
Definition: Module.h:604
virtual const TargetInstrInfo * getInstrInfo() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr&#39;s memory reference descriptor list.
virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const
Return true if the function can safely be outlined from.
TargetInstrInfo - Interface to description of machine instruction set.
DITypeRefArray getOrCreateTypeArray(ArrayRef< Metadata *> Elements)
Get a DITypeRefArray, create one if required.
Definition: DIBuilder.cpp:612
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
virtual bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const
Return true if the function should be outlined from by default.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
void setSubprogram(DISubprogram *SP)
Set the attached subprogram.
Definition: Metadata.cpp:1504
bool isLeaf(ID id)
Returns true if the intrinsic is a leaf, i.e.
Definition: Function.cpp:1003
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:141
bool shouldEmitInstrCountChangedRemark()
Return true if size-info optimization remark is enabled, false otherwise.
Definition: Module.h:263
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
struct UnitT Unit
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
virtual outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const
Returns how or if MI should be outlined.
Contains all data structures shared between the outliner implemented in MachineOutliner.cpp and target implementations of the outliner.
virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const
Optional target hook that returns true if MBB is safe to outline from, and returns any target-specifi...
Represent the analysis usage information of a pass.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
T * Allocate(size_t num=1)
Allocate space for an array of objects without constructing them.
Definition: Allocator.h:491
Used in the streaming interface as the general argument type.
bool hasAddressTaken() const
Test whether this block is potentially the target of an indirect branch.
const MachineBasicBlock & front() const
MachineFunction & getOrCreateMachineFunction(const Function &F)
Returns the MachineFunction constructed for the IR function F.
ModulePass * createMachineOutlinerPass(bool RunOnAllFunctions=true)
This pass performs outlining on machine instructions directly before printing assembly.
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:499
void initializeMachineOutlinerPass(PassRegistry &)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1167
virtual void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const
Insert a custom frame for outlined functions.
The optimization diagnostic interface.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr...
MachineOperand class - Representation of each machine instruction operand.
#define MORE()
Definition: regcomp.c:251
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:442
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:445
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2&#39;s erase_if which is equivalent t...
Definition: STLExtras.h:1330
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
DISubprogram * getSubprogram() const
Get the subprogram for this scope.
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
void setPreservesAll()
Set by analyses that do not transform their input at all.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1969
Representation of each machine instruction.
Definition: MachineInstr.h:64
ReturnInst * CreateRetVoid()
Create a &#39;ret void&#39; instruction.
Definition: IRBuilder.h:824
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:216
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:225
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Rename collisions when linking (static functions).
Definition: GlobalValue.h:56
void finalizeSubprogram(DISubprogram *SP)
Finalize a specific subprogram - no new variables may be added to this subprogram afterwards...
Definition: DIBuilder.cpp:49
Diagnostic information for missed-optimization remarks.
INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) void MachineOutliner
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool hasProperty(Property P) const
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:483
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable&#39;s name.
Definition: Mangler.cpp:112
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.h:230
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1967
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1179
static cl::opt< bool > EnableLinkOnceODROutlining("enable-linkonceodr-outlining", cl::Hidden, cl::desc("Enable the machine outliner on linkonceodr functions"), cl::init(false))
#define LLVM_DEBUG(X)
Definition: Debug.h:123
virtual outliner::OutlinedFunction getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const
Returns a outliner::OutlinedFunction struct containing target-specific information for a set of outli...
#define DEBUG_TYPE
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:48
iterator end()
Definition: StringMap.h:318
This class contains meta information specific to a module.