LLVM  8.0.1
StripSymbols.cpp
Go to the documentation of this file.
1 //===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The StripSymbols transformation implements code stripping. Specifically, it
11 // can delete:
12 //
13 // * names for virtual registers
14 // * symbols for internal globals and functions
15 // * debug information
16 //
17 // Note that this transformation makes code much less readable, so it should
18 // only be used in situations where the 'strip' utility would be used, such as
19 // reducing code size or making it harder to reverse engineer code.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/DebugInfo.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/TypeFinder.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Transforms/IPO.h"
34 using namespace llvm;
35 
36 namespace {
37  class StripSymbols : public ModulePass {
38  bool OnlyDebugInfo;
39  public:
40  static char ID; // Pass identification, replacement for typeid
41  explicit StripSymbols(bool ODI = false)
42  : ModulePass(ID), OnlyDebugInfo(ODI) {
44  }
45 
46  bool runOnModule(Module &M) override;
47 
48  void getAnalysisUsage(AnalysisUsage &AU) const override {
49  AU.setPreservesAll();
50  }
51  };
52 
53  class StripNonDebugSymbols : public ModulePass {
54  public:
55  static char ID; // Pass identification, replacement for typeid
56  explicit StripNonDebugSymbols()
57  : ModulePass(ID) {
59  }
60 
61  bool runOnModule(Module &M) override;
62 
63  void getAnalysisUsage(AnalysisUsage &AU) const override {
64  AU.setPreservesAll();
65  }
66  };
67 
68  class StripDebugDeclare : public ModulePass {
69  public:
70  static char ID; // Pass identification, replacement for typeid
71  explicit StripDebugDeclare()
72  : ModulePass(ID) {
74  }
75 
76  bool runOnModule(Module &M) override;
77 
78  void getAnalysisUsage(AnalysisUsage &AU) const override {
79  AU.setPreservesAll();
80  }
81  };
82 
83  class StripDeadDebugInfo : public ModulePass {
84  public:
85  static char ID; // Pass identification, replacement for typeid
86  explicit StripDeadDebugInfo()
87  : ModulePass(ID) {
89  }
90 
91  bool runOnModule(Module &M) override;
92 
93  void getAnalysisUsage(AnalysisUsage &AU) const override {
94  AU.setPreservesAll();
95  }
96  };
97 }
98 
99 char StripSymbols::ID = 0;
100 INITIALIZE_PASS(StripSymbols, "strip",
101  "Strip all symbols from a module", false, false)
102 
103 ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
104  return new StripSymbols(OnlyDebugInfo);
105 }
106 
107 char StripNonDebugSymbols::ID = 0;
108 INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
109  "Strip all symbols, except dbg symbols, from a module",
110  false, false)
111 
113  return new StripNonDebugSymbols();
114 }
115 
116 char StripDebugDeclare::ID = 0;
117 INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
118  "Strip all llvm.dbg.declare intrinsics", false, false)
119 
121  return new StripDebugDeclare();
122 }
123 
124 char StripDeadDebugInfo::ID = 0;
125 INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
126  "Strip debug info for unused symbols", false, false)
127 
129  return new StripDeadDebugInfo();
130 }
131 
132 /// OnlyUsedBy - Return true if V is only used by Usr.
133 static bool OnlyUsedBy(Value *V, Value *Usr) {
134  for (User *U : V->users())
135  if (U != Usr)
136  return false;
137 
138  return true;
139 }
140 
142  assert(C->use_empty() && "Constant is not dead!");
143  SmallPtrSet<Constant*, 4> Operands;
144  for (Value *Op : C->operands())
145  if (OnlyUsedBy(Op, C))
146  Operands.insert(cast<Constant>(Op));
147  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
148  if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals.
149  GV->eraseFromParent();
150  }
151  else if (!isa<Function>(C))
152  if (isa<CompositeType>(C->getType()))
153  C->destroyConstant();
154 
155  // If the constant referenced anything, see if we can delete it as well.
156  for (Constant *O : Operands)
158 }
159 
160 // Strip the symbol table of its names.
161 //
162 static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
163  for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
164  Value *V = VI->getValue();
165  ++VI;
166  if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
167  if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
168  // Set name to "", removing from symbol table!
169  V->setName("");
170  }
171  }
172 }
173 
174 // Strip any named types of their names.
175 static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
176  TypeFinder StructTypes;
177  StructTypes.run(M, false);
178 
179  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
180  StructType *STy = StructTypes[i];
181  if (STy->isLiteral() || STy->getName().empty()) continue;
182 
183  if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
184  continue;
185 
186  STy->setName("");
187  }
188 }
189 
190 /// Find values that are marked as llvm.used.
191 static void findUsedValues(GlobalVariable *LLVMUsed,
193  if (!LLVMUsed) return;
194  UsedValues.insert(LLVMUsed);
195 
196  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
197 
198  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
199  if (GlobalValue *GV =
200  dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
201  UsedValues.insert(GV);
202 }
203 
204 /// StripSymbolNames - Strip symbol names.
205 static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
206 
207  SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
208  findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
209  findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
210 
212  I != E; ++I) {
213  if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
214  if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
215  I->setName(""); // Internal symbols can't participate in linkage
216  }
217 
218  for (Function &I : M) {
219  if (I.hasLocalLinkage() && llvmUsedValues.count(&I) == 0)
220  if (!PreserveDbgInfo || !I.getName().startswith("llvm.dbg"))
221  I.setName(""); // Internal symbols can't participate in linkage
222  if (auto *Symtab = I.getValueSymbolTable())
223  StripSymtab(*Symtab, PreserveDbgInfo);
224  }
225 
226  // Remove all names from types.
227  StripTypeNames(M, PreserveDbgInfo);
228 
229  return true;
230 }
231 
232 bool StripSymbols::runOnModule(Module &M) {
233  if (skipModule(M))
234  return false;
235 
236  bool Changed = false;
237  Changed |= StripDebugInfo(M);
238  if (!OnlyDebugInfo)
239  Changed |= StripSymbolNames(M, false);
240  return Changed;
241 }
242 
243 bool StripNonDebugSymbols::runOnModule(Module &M) {
244  if (skipModule(M))
245  return false;
246 
247  return StripSymbolNames(M, true);
248 }
249 
250 bool StripDebugDeclare::runOnModule(Module &M) {
251  if (skipModule(M))
252  return false;
253 
254  Function *Declare = M.getFunction("llvm.dbg.declare");
255  std::vector<Constant*> DeadConstants;
256 
257  if (Declare) {
258  while (!Declare->use_empty()) {
259  CallInst *CI = cast<CallInst>(Declare->user_back());
260  Value *Arg1 = CI->getArgOperand(0);
261  Value *Arg2 = CI->getArgOperand(1);
262  assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
263  CI->eraseFromParent();
264  if (Arg1->use_empty()) {
265  if (Constant *C = dyn_cast<Constant>(Arg1))
266  DeadConstants.push_back(C);
267  else
269  }
270  if (Arg2->use_empty())
271  if (Constant *C = dyn_cast<Constant>(Arg2))
272  DeadConstants.push_back(C);
273  }
274  Declare->eraseFromParent();
275  }
276 
277  while (!DeadConstants.empty()) {
278  Constant *C = DeadConstants.back();
279  DeadConstants.pop_back();
280  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
281  if (GV->hasLocalLinkage())
282  RemoveDeadConstant(GV);
283  } else
285  }
286 
287  return true;
288 }
289 
290 /// Remove any debug info for global variables/functions in the given module for
291 /// which said global variable/function no longer exists (i.e. is null).
292 ///
293 /// Debugging information is encoded in llvm IR using metadata. This is designed
294 /// such a way that debug info for symbols preserved even if symbols are
295 /// optimized away by the optimizer. This special pass removes debug info for
296 /// such symbols.
297 bool StripDeadDebugInfo::runOnModule(Module &M) {
298  if (skipModule(M))
299  return false;
300 
301  bool Changed = false;
302 
303  LLVMContext &C = M.getContext();
304 
305  // Find all debug info in F. This is actually overkill in terms of what we
306  // want to do, but we want to try and be as resilient as possible in the face
307  // of potential debug info changes by using the formal interfaces given to us
308  // as much as possible.
310  F.processModule(M);
311 
312  // For each compile unit, find the live set of global variables/functions and
313  // replace the current list of potentially dead global variables/functions
314  // with the live list.
315  SmallVector<Metadata *, 64> LiveGlobalVariables;
317 
318  std::set<DIGlobalVariableExpression *> LiveGVs;
319  for (GlobalVariable &GV : M.globals()) {
321  GV.getDebugInfo(GVEs);
322  for (auto *GVE : GVEs)
323  LiveGVs.insert(GVE);
324  }
325 
326  std::set<DICompileUnit *> LiveCUs;
327  // Any CU referenced from a subprogram is live.
328  for (DISubprogram *SP : F.subprograms()) {
329  if (SP->getUnit())
330  LiveCUs.insert(SP->getUnit());
331  }
332 
333  bool HasDeadCUs = false;
334  for (DICompileUnit *DIC : F.compile_units()) {
335  // Create our live global variable list.
336  bool GlobalVariableChange = false;
337  for (auto *DIG : DIC->getGlobalVariables()) {
338  if (DIG->getExpression() && DIG->getExpression()->isConstant())
339  LiveGVs.insert(DIG);
340 
341  // Make sure we only visit each global variable only once.
342  if (!VisitedSet.insert(DIG).second)
343  continue;
344 
345  // If a global variable references DIG, the global variable is live.
346  if (LiveGVs.count(DIG))
347  LiveGlobalVariables.push_back(DIG);
348  else
349  GlobalVariableChange = true;
350  }
351 
352  if (!LiveGlobalVariables.empty())
353  LiveCUs.insert(DIC);
354  else if (!LiveCUs.count(DIC))
355  HasDeadCUs = true;
356 
357  // If we found dead global variables, replace the current global
358  // variable list with our new live global variable list.
359  if (GlobalVariableChange) {
360  DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
361  Changed = true;
362  }
363 
364  // Reset lists for the next iteration.
365  LiveGlobalVariables.clear();
366  }
367 
368  if (HasDeadCUs) {
369  // Delete the old node and replace it with a new one
370  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
371  NMD->clearOperands();
372  if (!LiveCUs.empty()) {
373  for (DICompileUnit *CU : LiveCUs)
374  NMD->addOperand(CU);
375  }
376  Changed = true;
377  }
378 
379  return Changed;
380 }
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
This class provides a symbol table of name/value pairs.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1133
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
iterator begin()
Get an iterator that from the beginning of the symbol table.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
void initializeStripDeadDebugInfoPass(PassRegistry &)
This class represents lattice values for constants.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo)
void clearOperands()
Drop all references to this node&#39;s operands.
Definition: Metadata.cpp:1096
static bool StripSymbolNames(Module &M, bool PreserveDbgInfo)
StripSymbolNames - Strip symbol names.
Implements a dense probed hash-table based set.
Definition: DenseSet.h:250
void addOperand(MDNode *M)
Definition: Metadata.cpp:1087
This class represents a function call, abstracting a target machine&#39;s calling convention.
NamedMDNode * getOrInsertNamedMetadata(StringRef Name)
Return the named MDNode in the module with the specified name.
Definition: Module.cpp:261
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
Definition: Module.h:387
F(f)
void processModule(const Module &M)
Process entire module and collect debug info anchors.
Definition: DebugInfo.cpp:63
ModulePass * createStripNonDebugSymbolsPass()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1135
void initializeStripSymbolsPass(PassRegistry &)
ModulePass * createStripDeadDebugInfoPass()
A tuple of MDNodes.
Definition: Metadata.h:1326
Class to represent struct types.
Definition: DerivedTypes.h:201
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
Utility to find all debug info in a module.
Definition: DebugInfo.h:65
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
global_iterator global_begin()
Definition: Module.h:578
Subprogram description.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
void initializeStripNonDebugSymbolsPass(PassRegistry &)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static bool OnlyUsedBy(Value *V, Value *Usr)
OnlyUsedBy - Return true if V is only used by Usr.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:351
static void RemoveDeadConstant(Constant *C)
iterator end()
Get an iterator to the end of the symbol table.
Value * getOperand(unsigned i) const
Definition: User.h:170
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
static void findUsedValues(GlobalVariable *LLVMUsed, SmallPtrSetImpl< const GlobalValue *> &UsedValues)
Find values that are marked as llvm.used.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Represent the analysis usage information of a pass.
StringRef getName() const
Return the name for this struct type if it has an identity.
Definition: Type.cpp:500
op_range operands()
Definition: User.h:238
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
iterator_range< compile_unit_iterator > compile_units() const
Definition: DebugInfo.h:104
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:430
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
ModulePass * createStripDebugDeclarePass()
global_iterator global_end()
Definition: Module.h:580
INITIALIZE_PASS(StripSymbols, "strip", "Strip all symbols from a module", false, false) ModulePass *llvm
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
void run(const Module &M, bool onlyNamed)
Definition: TypeFinder.cpp:32
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:176
bool isLiteral() const
Return true if this type is uniqued by structural equivalence, false if it is a struct definition...
Definition: DerivedTypes.h:265
ConstantArray - Constant Array Declarations.
Definition: Constants.h:414
void push_back(pointer val)
Definition: ilist.h:313
void setPreservesAll()
Set by analyses that do not transform their input at all.
iterator_range< user_iterator > users()
Definition: Value.h:400
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:478
size_t size() const
Definition: TypeFinder.h:57
void setName(StringRef Name)
Change the name of this type to the specified name, or to a name with a suffix if there is a collisio...
Definition: Type.cpp:386
static void StripTypeNames(Module &M, bool PreserveDbgInfo)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:225
void destroyConstant()
Called if some element of this constant is no longer valid.
Definition: Constants.cpp:362
void eraseFromParent()
eraseFromParent - This method unlinks &#39;this&#39; from the containing module and deletes it...
Definition: Function.cpp:214
void initializeStripDebugDeclarePass(PassRegistry &)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
ModulePass * createStripSymbolsPass(bool OnlyDebugInfo=false)
iterator_range< subprogram_iterator > subprograms() const
Definition: DebugInfo.h:108
iterator_range< global_iterator > globals()
Definition: Module.h:584
TypeFinder - Walk over a module, identifying all of the types that are used by the module...
Definition: TypeFinder.h:31
bool use_empty() const
Definition: Value.h:323
User * user_back()
Definition: Value.h:386