LLVM  8.0.1
SplitModule.cpp
Go to the documentation of this file.
1 //===- SplitModule.cpp - Split a module into partitions -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the function llvm::SplitModule, which splits a module
11 // into multiple linkable partitions. It can be used to implement parallel code
12 // generation for link-time optimization.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/Comdat.h"
23 #include "llvm/IR/Constant.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/GlobalAlias.h"
27 #include "llvm/IR/GlobalObject.h"
29 #include "llvm/IR/GlobalValue.h"
30 #include "llvm/IR/GlobalVariable.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/User.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/MD5.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <iterator>
45 #include <memory>
46 #include <queue>
47 #include <utility>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "split-module"
53 
54 namespace {
55 
56 using ClusterMapType = EquivalenceClasses<const GlobalValue *>;
57 using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>;
58 using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>;
59 
60 } // end anonymous namespace
61 
62 static void addNonConstUser(ClusterMapType &GVtoClusterMap,
63  const GlobalValue *GV, const User *U) {
64  assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user");
65 
66  if (const Instruction *I = dyn_cast<Instruction>(U)) {
67  const GlobalValue *F = I->getParent()->getParent();
68  GVtoClusterMap.unionSets(GV, F);
69  } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
70  isa<GlobalVariable>(U)) {
71  GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
72  } else {
73  llvm_unreachable("Underimplemented use case");
74  }
75 }
76 
77 // Adds all GlobalValue users of V to the same cluster as GV.
78 static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
79  const GlobalValue *GV, const Value *V) {
80  for (auto *U : V->users()) {
82  Worklist.push_back(U);
83  while (!Worklist.empty()) {
84  const User *UU = Worklist.pop_back_val();
85  // For each constant that is not a GV (a pure const) recurse.
86  if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) {
87  Worklist.append(UU->user_begin(), UU->user_end());
88  continue;
89  }
90  addNonConstUser(GVtoClusterMap, GV, UU);
91  }
92  }
93 }
94 
95 // Find partitions for module in the way that no locals need to be
96 // globalized.
97 // Try to balance pack those partitions into N files since this roughly equals
98 // thread balancing for the backend codegen step.
99 static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
100  unsigned N) {
101  // At this point module should have the proper mix of globals and locals.
102  // As we attempt to partition this module, we must not change any
103  // locals to globals.
104  LLVM_DEBUG(dbgs() << "Partition module with (" << M->size()
105  << ")functions\n");
106  ClusterMapType GVtoClusterMap;
107  ComdatMembersType ComdatMembers;
108 
109  auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) {
110  if (GV.isDeclaration())
111  return;
112 
113  if (!GV.hasName())
114  GV.setName("__llvmsplit_unnamed");
115 
116  // Comdat groups must not be partitioned. For comdat groups that contain
117  // locals, record all their members here so we can keep them together.
118  // Comdat groups that only contain external globals are already handled by
119  // the MD5-based partitioning.
120  if (const Comdat *C = GV.getComdat()) {
121  auto &Member = ComdatMembers[C];
122  if (Member)
123  GVtoClusterMap.unionSets(Member, &GV);
124  else
125  Member = &GV;
126  }
127 
128  // For aliases we should not separate them from their aliasees regardless
129  // of linkage.
130  if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
131  if (const GlobalObject *Base = GIS->getBaseObject())
132  GVtoClusterMap.unionSets(&GV, Base);
133  }
134 
135  if (const Function *F = dyn_cast<Function>(&GV)) {
136  for (const BasicBlock &BB : *F) {
138  if (!BA || !BA->isConstantUsed())
139  continue;
140  addAllGlobalValueUsers(GVtoClusterMap, F, BA);
141  }
142  }
143 
144  if (GV.hasLocalLinkage())
145  addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
146  };
147 
148  llvm::for_each(M->functions(), recordGVSet);
149  llvm::for_each(M->globals(), recordGVSet);
150  llvm::for_each(M->aliases(), recordGVSet);
151 
152  // Assigned all GVs to merged clusters while balancing number of objects in
153  // each.
154  auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
155  const std::pair<unsigned, unsigned> &b) {
156  if (a.second || b.second)
157  return a.second > b.second;
158  else
159  return a.first > b.first;
160  };
161 
162  std::priority_queue<std::pair<unsigned, unsigned>,
163  std::vector<std::pair<unsigned, unsigned>>,
164  decltype(CompareClusters)>
165  BalancinQueue(CompareClusters);
166  // Pre-populate priority queue with N slot blanks.
167  for (unsigned i = 0; i < N; ++i)
168  BalancinQueue.push(std::make_pair(i, 0));
169 
170  using SortType = std::pair<unsigned, ClusterMapType::iterator>;
171 
174 
175  // To guarantee determinism, we have to sort SCC according to size.
176  // When size is the same, use leader's name.
177  for (ClusterMapType::iterator I = GVtoClusterMap.begin(),
178  E = GVtoClusterMap.end(); I != E; ++I)
179  if (I->isLeader())
180  Sets.push_back(
181  std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
182  GVtoClusterMap.member_end()), I));
183 
184  llvm::sort(Sets, [](const SortType &a, const SortType &b) {
185  if (a.first == b.first)
186  return a.second->getData()->getName() > b.second->getData()->getName();
187  else
188  return a.first > b.first;
189  });
190 
191  for (auto &I : Sets) {
192  unsigned CurrentClusterID = BalancinQueue.top().first;
193  unsigned CurrentClusterSize = BalancinQueue.top().second;
194  BalancinQueue.pop();
195 
196  LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size("
197  << I.first << ") ----> " << I.second->getData()->getName()
198  << "\n");
199 
200  for (ClusterMapType::member_iterator MI =
201  GVtoClusterMap.findLeader(I.second);
202  MI != GVtoClusterMap.member_end(); ++MI) {
203  if (!Visited.insert(*MI).second)
204  continue;
205  LLVM_DEBUG(dbgs() << "----> " << (*MI)->getName()
206  << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
207  Visited.insert(*MI);
208  ClusterIDMap[*MI] = CurrentClusterID;
209  CurrentClusterSize++;
210  }
211  // Add this set size to the number of entries in this cluster.
212  BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize));
213  }
214 }
215 
216 static void externalize(GlobalValue *GV) {
217  if (GV->hasLocalLinkage()) {
220  }
221 
222  // Unnamed entities must be named consistently between modules. setName will
223  // give a distinct name to each such entity.
224  if (!GV->hasName())
225  GV->setName("__llvmsplit_unnamed");
226 }
227 
228 // Returns whether GV should be in partition (0-based) I of N.
229 static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
230  if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
231  if (const GlobalObject *Base = GIS->getBaseObject())
232  GV = Base;
233 
234  StringRef Name;
235  if (const Comdat *C = GV->getComdat())
236  Name = C->getName();
237  else
238  Name = GV->getName();
239 
240  // Partition by MD5 hash. We only need a few bits for evenness as the number
241  // of partitions will generally be in the 1-2 figure range; the low 16 bits
242  // are enough.
243  MD5 H;
244  MD5::MD5Result R;
245  H.update(Name);
246  H.final(R);
247  return (R[0] | (R[1] << 8)) % N == I;
248 }
249 
251  std::unique_ptr<Module> M, unsigned N,
252  function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
253  bool PreserveLocals) {
254  if (!PreserveLocals) {
255  for (Function &F : *M)
256  externalize(&F);
257  for (GlobalVariable &GV : M->globals())
258  externalize(&GV);
259  for (GlobalAlias &GA : M->aliases())
260  externalize(&GA);
261  for (GlobalIFunc &GIF : M->ifuncs())
262  externalize(&GIF);
263  }
264 
265  // This performs splitting without a need for externalization, which might not
266  // always be possible.
267  ClusterIDMapType ClusterIDMap;
268  findPartitions(M.get(), ClusterIDMap, N);
269 
270  // FIXME: We should be able to reuse M as the last partition instead of
271  // cloning it.
272  for (unsigned I = 0; I < N; ++I) {
273  ValueToValueMapTy VMap;
274  std::unique_ptr<Module> MPart(
275  CloneModule(*M, VMap, [&](const GlobalValue *GV) {
276  if (ClusterIDMap.count(GV))
277  return (ClusterIDMap[GV] == I);
278  else
279  return isInPartition(GV, I, N);
280  }));
281  if (I != 0)
282  MPart->setModuleInlineAsm("");
283  ModuleCallback(std::move(MPart));
284  }
285 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:239
uint64_t CallInst * C
bool hasLocalLinkage() const
Definition: GlobalValue.h:436
This class represents lattice values for constants.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
bool isConstantUsed() const
Return true if the constant has users other than constant expressions and other dangling things...
Definition: Constants.cpp:477
void SplitModule(std::unique_ptr< Module > M, unsigned N, function_ref< void(std::unique_ptr< Module > MPart)> ModuleCallback, bool PreserveLocals=false)
Splits the module M into N linkable partitions.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:117
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM...
Externally visible function.
Definition: GlobalValue.h:49
static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, const GlobalValue *GV, const Value *V)
Definition: SplitModule.cpp:78
F(f)
static void externalize(GlobalValue *GV)
The address of a basic block.
Definition: Constants.h:840
amdgpu Simplify well known AMD library false Value Value const Twine & Name
void update(ArrayRef< uint8_t > Data)
Updates the hash for the byte stream provided.
Definition: MD5.cpp:189
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, unsigned N)
Definition: SplitModule.cpp:99
std::unique_ptr< Module > CloneModule(const Module &M)
Return an exact copy of the specified module.
Definition: CloneModule.cpp:35
iterator_range< iterator > functions()
Definition: Module.h:606
bool hasName() const
Definition: Value.h:251
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void addNonConstUser(ClusterMapType &GVtoClusterMap, const GlobalValue *GV, const User *U)
Definition: SplitModule.cpp:62
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:445
static BlockAddress * lookup(const BasicBlock *BB)
Lookup an existing BlockAddress constant for the given BasicBlock.
Definition: Constants.cpp:1452
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
const Comdat * getComdat() const
Definition: Globals.cpp:171
iterator_range< user_iterator > users()
Definition: Value.h:400
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
size_t size() const
Definition: Module.h:603
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Definition: MD5.h:41
static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:376
LLVM Value Representation.
Definition: Value.h:73
void final(MD5Result &Result)
Finishes off the hash and puts the result in result.
Definition: MD5.cpp:234
iterator_range< global_iterator > globals()
Definition: Module.h:584
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1179
#define LLVM_DEBUG(X)
Definition: Debug.h:123
iterator_range< alias_iterator > aliases()
Definition: Module.h:624
user_iterator user_end()
Definition: Value.h:384