LLVM  8.0.1
Internalize.cpp
Go to the documentation of this file.
1 //===-- Internalize.cpp - Mark functions internal -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass loops over all of the functions and variables in the input module.
11 // If the function or variable does not need to be preserved according to the
12 // client supplied callback, it is marked as internal.
13 //
14 // This transformation would not be legal in a regular compilation, but it gets
15 // extra information from the linker about what is safe.
16 //
17 // For example: Internalizing a function with external linkage. Only if we are
18 // told it is only used from within this module, it is safe to do it.
19 //
20 //===----------------------------------------------------------------------===//
21 
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringSet.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/Pass.h"
30 #include "llvm/Support/Debug.h"
32 #include "llvm/Transforms/IPO.h"
34 #include <fstream>
35 #include <set>
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "internalize"
39 
40 STATISTIC(NumAliases, "Number of aliases internalized");
41 STATISTIC(NumFunctions, "Number of functions internalized");
42 STATISTIC(NumGlobals, "Number of global vars internalized");
43 
44 // APIFile - A file which contains a list of symbols that should not be marked
45 // external.
47  APIFile("internalize-public-api-file", cl::value_desc("filename"),
48  cl::desc("A file containing list of symbol names to preserve"));
49 
50 // APIList - A list of symbols that should not be marked internal.
52  APIList("internalize-public-api-list", cl::value_desc("list"),
53  cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
54 
55 namespace {
56 // Helper to load an API list to preserve from file and expose it as a functor
57 // for internalization.
58 class PreserveAPIList {
59 public:
60  PreserveAPIList() {
61  if (!APIFile.empty())
62  LoadFile(APIFile);
63  ExternalNames.insert(APIList.begin(), APIList.end());
64  }
65 
66  bool operator()(const GlobalValue &GV) {
67  return ExternalNames.count(GV.getName());
68  }
69 
70 private:
71  // Contains the set of symbols loaded from file
72  StringSet<> ExternalNames;
73 
74  void LoadFile(StringRef Filename) {
75  // Load the APIFile...
76  std::ifstream In(Filename.data());
77  if (!In.good()) {
78  errs() << "WARNING: Internalize couldn't load file '" << Filename
79  << "'! Continuing as if it's empty.\n";
80  return; // Just continue as if the file were empty
81  }
82  while (In) {
83  std::string Symbol;
84  In >> Symbol;
85  if (!Symbol.empty())
86  ExternalNames.insert(Symbol);
87  }
88  }
89 };
90 } // end anonymous namespace
91 
92 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
93  // Function must be defined here
94  if (GV.isDeclaration())
95  return true;
96 
97  // Available externally is really just a "declaration with a body".
99  return true;
100 
101  // Assume that dllexported symbols are referenced elsewhere
102  if (GV.hasDLLExportStorageClass())
103  return true;
104 
105  // Already local, has nothing to do.
106  if (GV.hasLocalLinkage())
107  return false;
108 
109  // Check some special cases
110  if (AlwaysPreserved.count(GV.getName()))
111  return true;
112 
113  return MustPreserveGV(GV);
114 }
115 
116 bool InternalizePass::maybeInternalize(
117  GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
118  if (Comdat *C = GV.getComdat()) {
119  if (ExternalComdats.count(C))
120  return false;
121 
122  // If a comdat is not externally visible we can drop it.
123  if (auto GO = dyn_cast<GlobalObject>(&GV))
124  GO->setComdat(nullptr);
125 
126  if (GV.hasLocalLinkage())
127  return false;
128  } else {
129  if (GV.hasLocalLinkage())
130  return false;
131 
132  if (shouldPreserveGV(GV))
133  return false;
134  }
135 
138  return true;
139 }
140 
141 // If GV is part of a comdat and is externally visible, keep track of its
142 // comdat so that we don't internalize any of its members.
143 void InternalizePass::checkComdatVisibility(
144  GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
145  Comdat *C = GV.getComdat();
146  if (!C)
147  return;
148 
149  if (shouldPreserveGV(GV))
150  ExternalComdats.insert(C);
151 }
152 
154  bool Changed = false;
155  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
156 
158  collectUsedGlobalVariables(M, Used, false);
159 
160  // Collect comdat visiblity information for the module.
161  std::set<const Comdat *> ExternalComdats;
162  if (!M.getComdatSymbolTable().empty()) {
163  for (Function &F : M)
164  checkComdatVisibility(F, ExternalComdats);
165  for (GlobalVariable &GV : M.globals())
166  checkComdatVisibility(GV, ExternalComdats);
167  for (GlobalAlias &GA : M.aliases())
168  checkComdatVisibility(GA, ExternalComdats);
169  }
170 
171  // We must assume that globals in llvm.used have a reference that not even
172  // the linker can see, so we don't internalize them.
173  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
174  // linker can drop those symbols. If this pass is running as part of LTO,
175  // one might think that it could just drop llvm.compiler.used. The problem
176  // is that even in LTO llvm doesn't see every reference. For example,
177  // we don't see references from function local inline assembly. To be
178  // conservative, we internalize symbols in llvm.compiler.used, but we
179  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
180  for (GlobalValue *V : Used) {
181  AlwaysPreserved.insert(V->getName());
182  }
183 
184  // Mark all functions not in the api as internal.
185  for (Function &I : M) {
186  if (!maybeInternalize(I, ExternalComdats))
187  continue;
188  Changed = true;
189 
190  if (ExternalNode)
191  // Remove a callgraph edge from the external node to this function.
192  ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
193 
194  ++NumFunctions;
195  LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
196  }
197 
198  // Never internalize the llvm.used symbol. It is used to implement
199  // attribute((used)).
200  // FIXME: Shouldn't this just filter on llvm.metadata section??
201  AlwaysPreserved.insert("llvm.used");
202  AlwaysPreserved.insert("llvm.compiler.used");
203 
204  // Never internalize anchors used by the machine module info, else the info
205  // won't find them. (see MachineModuleInfo.)
206  AlwaysPreserved.insert("llvm.global_ctors");
207  AlwaysPreserved.insert("llvm.global_dtors");
208  AlwaysPreserved.insert("llvm.global.annotations");
209 
210  // Never internalize symbols code-gen inserts.
211  // FIXME: We should probably add this (and the __stack_chk_guard) via some
212  // type of call-back in CodeGen.
213  AlwaysPreserved.insert("__stack_chk_fail");
214  AlwaysPreserved.insert("__stack_chk_guard");
215 
216  // Mark all global variables with initializers that are not in the api as
217  // internal as well.
218  for (auto &GV : M.globals()) {
219  if (!maybeInternalize(GV, ExternalComdats))
220  continue;
221  Changed = true;
222 
223  ++NumGlobals;
224  LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
225  }
226 
227  // Mark all aliases that are not in the api as internal as well.
228  for (auto &GA : M.aliases()) {
229  if (!maybeInternalize(GA, ExternalComdats))
230  continue;
231  Changed = true;
232 
233  ++NumAliases;
234  LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
235  }
236 
237  return Changed;
238 }
239 
240 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
241 
244  return PreservedAnalyses::all();
245 
248  return PA;
249 }
250 
251 namespace {
252 class InternalizeLegacyPass : public ModulePass {
253  // Client supplied callback to control wheter a symbol must be preserved.
254  std::function<bool(const GlobalValue &)> MustPreserveGV;
255 
256 public:
257  static char ID; // Pass identification, replacement for typeid
258 
259  InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
260 
261  InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
262  : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
264  }
265 
266  bool runOnModule(Module &M) override {
267  if (skipModule(M))
268  return false;
269 
270  CallGraphWrapperPass *CGPass =
271  getAnalysisIfAvailable<CallGraphWrapperPass>();
272  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
273  return internalizeModule(M, MustPreserveGV, CG);
274  }
275 
276  void getAnalysisUsage(AnalysisUsage &AU) const override {
277  AU.setPreservesCFG();
279  }
280 };
281 }
282 
284 INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
285  "Internalize Global Symbols", false, false)
286 
288  return new InternalizeLegacyPass();
289 }
290 
292  std::function<bool(const GlobalValue &)> MustPreserveGV) {
293  return new InternalizeLegacyPass(std::move(MustPreserveGV));
294 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:239
uint64_t CallInst * C
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:265
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool hasLocalLinkage() const
Definition: GlobalValue.h:436
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initializeInternalizeLegacyPassPass(PassRegistry &)
This class represents lattice values for constants.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:423
STATISTIC(NumFunctions, "Total number of functions")
F(f)
A node in the call graph for a module.
Definition: CallGraph.h:165
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
INITIALIZE_PASS(InternalizeLegacyPass, "internalize", "Internalize Global Symbols", false, false) ModulePass *llvm
void removeOneAbstractEdgeTo(CallGraphNode *Callee)
Removes one edge associated with a null callsite from this node to the specified callee function...
Definition: CallGraph.cpp:215
const ComdatSymTabType & getComdatSymbolTable() const
Get the Module&#39;s symbol table for COMDATs (constant).
Definition: Module.h:570
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
The ModulePass which wraps up a CallGraph and the logic to build it.
Definition: CallGraph.h:324
Represent the analysis usage information of a pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:160
std::pair< typename base::iterator, bool > insert(StringRef Key)
Definition: StringSet.h:38
const CallGraph & getCallGraph() const
The internal CallGraph around which the rest of this interface is wrapped.
Definition: CallGraph.h:335
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallPtrSetImpl< GlobalValue *> &Set, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:596
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
Module.h This file contains the declarations for the Module class.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:445
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
const Comdat * getComdat() const
Definition: Globals.cpp:171
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:292
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:74
bool internalizeModule(Module &TheModule, CallGraph *CG=nullptr)
Run the internalizer on TheModule, returns true if any changes was made.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:225
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:789
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:175
Rename collisions when linking (static functions).
Definition: GlobalValue.h:56
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:137
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:206
bool empty() const
Definition: StringMap.h:111
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:28
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
#define LLVM_DEBUG(X)
Definition: Debug.h:123