LLVM  8.0.1
COFFModuleDefinition.cpp
Go to the documentation of this file.
1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Windows-specific.
11 // A parser for the module-definition file (.def file).
12 //
13 // The format of module-definition files are described in this document:
14 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
15 //
16 //===----------------------------------------------------------------------===//
17 
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/Object/COFF.h"
23 #include "llvm/Object/Error.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/Path.h"
27 
28 using namespace llvm::COFF;
29 using namespace llvm;
30 
31 namespace llvm {
32 namespace object {
33 
34 enum Kind {
35  Unknown,
36  Eof,
52 };
53 
54 struct Token {
55  explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
58 };
59 
60 static bool isDecorated(StringRef Sym, bool MingwDef) {
61  // In def files, the symbols can either be listed decorated or undecorated.
62  //
63  // - For cdecl symbols, only the undecorated form is allowed.
64  // - For fastcall and vectorcall symbols, both fully decorated or
65  // undecorated forms can be present.
66  // - For stdcall symbols in non-MinGW environments, the decorated form is
67  // fully decorated with leading underscore and trailing stack argument
68  // size - like "_Func@0".
69  // - In MinGW def files, a decorated stdcall symbol does not include the
70  // leading underscore though, like "Func@0".
71 
72  // This function controls whether a leading underscore should be added to
73  // the given symbol name or not. For MinGW, treat a stdcall symbol name such
74  // as "Func@0" as undecorated, i.e. a leading underscore must be added.
75  // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
76  // as decorated, i.e. don't add any more leading underscores.
77  // We can't check for a leading underscore here, since function names
78  // themselves can start with an underscore, while a second one still needs
79  // to be added.
80  return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
81  (!MingwDef && Sym.contains('@'));
82 }
83 
84 static Error createError(const Twine &Err) {
85  return make_error<StringError>(StringRef(Err.str()),
86  object_error::parse_failed);
87 }
88 
89 class Lexer {
90 public:
91  Lexer(StringRef S) : Buf(S) {}
92 
93  Token lex() {
94  Buf = Buf.trim();
95  if (Buf.empty())
96  return Token(Eof);
97 
98  switch (Buf[0]) {
99  case '\0':
100  return Token(Eof);
101  case ';': {
102  size_t End = Buf.find('\n');
103  Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
104  return lex();
105  }
106  case '=':
107  Buf = Buf.drop_front();
108  if (Buf.startswith("=")) {
109  Buf = Buf.drop_front();
110  return Token(EqualEqual, "==");
111  }
112  return Token(Equal, "=");
113  case ',':
114  Buf = Buf.drop_front();
115  return Token(Comma, ",");
116  case '"': {
117  StringRef S;
118  std::tie(S, Buf) = Buf.substr(1).split('"');
119  return Token(Identifier, S);
120  }
121  default: {
122  size_t End = Buf.find_first_of("=,;\r\n \t\v");
123  StringRef Word = Buf.substr(0, End);
125  .Case("BASE", KwBase)
126  .Case("CONSTANT", KwConstant)
127  .Case("DATA", KwData)
128  .Case("EXPORTS", KwExports)
129  .Case("HEAPSIZE", KwHeapsize)
130  .Case("LIBRARY", KwLibrary)
131  .Case("NAME", KwName)
132  .Case("NONAME", KwNoname)
133  .Case("PRIVATE", KwPrivate)
134  .Case("STACKSIZE", KwStacksize)
135  .Case("VERSION", KwVersion)
137  Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
138  return Token(K, Word);
139  }
140  }
141  }
142 
143 private:
144  StringRef Buf;
145 };
146 
147 class Parser {
148 public:
149  explicit Parser(StringRef S, MachineTypes M, bool B)
150  : Lex(S), Machine(M), MingwDef(B) {}
151 
153  do {
154  if (Error Err = parseOne())
155  return std::move(Err);
156  } while (Tok.K != Eof);
157  return Info;
158  }
159 
160 private:
161  void read() {
162  if (Stack.empty()) {
163  Tok = Lex.lex();
164  return;
165  }
166  Tok = Stack.back();
167  Stack.pop_back();
168  }
169 
170  Error readAsInt(uint64_t *I) {
171  read();
172  if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
173  return createError("integer expected");
174  return Error::success();
175  }
176 
178  read();
179  if (Tok.K != Expected)
180  return createError(Msg);
181  return Error::success();
182  }
183 
184  void unget() { Stack.push_back(Tok); }
185 
186  Error parseOne() {
187  read();
188  switch (Tok.K) {
189  case Eof:
190  return Error::success();
191  case KwExports:
192  for (;;) {
193  read();
194  if (Tok.K != Identifier) {
195  unget();
196  return Error::success();
197  }
198  if (Error Err = parseExport())
199  return Err;
200  }
201  case KwHeapsize:
202  return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
203  case KwStacksize:
204  return parseNumbers(&Info.StackReserve, &Info.StackCommit);
205  case KwLibrary:
206  case KwName: {
207  bool IsDll = Tok.K == KwLibrary; // Check before parseName.
208  std::string Name;
209  if (Error Err = parseName(&Name, &Info.ImageBase))
210  return Err;
211 
212  Info.ImportName = Name;
213 
214  // Set the output file, but don't override /out if it was already passed.
215  if (Info.OutputFile.empty()) {
216  Info.OutputFile = Name;
217  // Append the appropriate file extension if not already present.
218  if (!sys::path::has_extension(Name))
219  Info.OutputFile += IsDll ? ".dll" : ".exe";
220  }
221 
222  return Error::success();
223  }
224  case KwVersion:
225  return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
226  default:
227  return createError("unknown directive: " + Tok.Value);
228  }
229  }
230 
231  Error parseExport() {
233  E.Name = Tok.Value;
234  read();
235  if (Tok.K == Equal) {
236  read();
237  if (Tok.K != Identifier)
238  return createError("identifier expected, but got " + Tok.Value);
239  E.ExtName = E.Name;
240  E.Name = Tok.Value;
241  } else {
242  unget();
243  }
244 
246  if (!isDecorated(E.Name, MingwDef))
247  E.Name = (std::string("_").append(E.Name));
248  if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
249  E.ExtName = (std::string("_").append(E.ExtName));
250  }
251 
252  for (;;) {
253  read();
254  if (Tok.K == Identifier && Tok.Value[0] == '@') {
255  if (Tok.Value == "@") {
256  // "foo @ 10"
257  read();
258  Tok.Value.getAsInteger(10, E.Ordinal);
259  } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
260  // "foo \n @bar" - Not an ordinal modifier at all, but the next
261  // export (fastcall decorated) - complete the current one.
262  unget();
263  Info.Exports.push_back(E);
264  return Error::success();
265  }
266  // "foo @10"
267  read();
268  if (Tok.K == KwNoname) {
269  E.Noname = true;
270  } else {
271  unget();
272  }
273  continue;
274  }
275  if (Tok.K == KwData) {
276  E.Data = true;
277  continue;
278  }
279  if (Tok.K == KwConstant) {
280  E.Constant = true;
281  continue;
282  }
283  if (Tok.K == KwPrivate) {
284  E.Private = true;
285  continue;
286  }
287  if (Tok.K == EqualEqual) {
288  read();
289  E.AliasTarget = Tok.Value;
290  if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
291  E.AliasTarget = std::string("_").append(E.AliasTarget);
292  continue;
293  }
294  unget();
295  Info.Exports.push_back(E);
296  return Error::success();
297  }
298  }
299 
300  // HEAPSIZE/STACKSIZE reserve[,commit]
301  Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
302  if (Error Err = readAsInt(Reserve))
303  return Err;
304  read();
305  if (Tok.K != Comma) {
306  unget();
307  Commit = nullptr;
308  return Error::success();
309  }
310  if (Error Err = readAsInt(Commit))
311  return Err;
312  return Error::success();
313  }
314 
315  // NAME outputPath [BASE=address]
316  Error parseName(std::string *Out, uint64_t *Baseaddr) {
317  read();
318  if (Tok.K == Identifier) {
319  *Out = Tok.Value;
320  } else {
321  *Out = "";
322  unget();
323  return Error::success();
324  }
325  read();
326  if (Tok.K == KwBase) {
327  if (Error Err = expect(Equal, "'=' expected"))
328  return Err;
329  if (Error Err = readAsInt(Baseaddr))
330  return Err;
331  } else {
332  unget();
333  *Baseaddr = 0;
334  }
335  return Error::success();
336  }
337 
338  // VERSION major[.minor]
339  Error parseVersion(uint32_t *Major, uint32_t *Minor) {
340  read();
341  if (Tok.K != Identifier)
342  return createError("identifier expected, but got " + Tok.Value);
343  StringRef V1, V2;
344  std::tie(V1, V2) = Tok.Value.split('.');
345  if (V1.getAsInteger(10, *Major))
346  return createError("integer expected, but got " + Tok.Value);
347  if (V2.empty())
348  *Minor = 0;
349  else if (V2.getAsInteger(10, *Minor))
350  return createError("integer expected, but got " + Tok.Value);
351  return Error::success();
352  }
353 
354  Lexer Lex;
355  Token Tok;
356  std::vector<Token> Stack;
359  bool MingwDef;
360 };
361 
364  bool MingwDef) {
365  return Parser(MB.getBuffer(), Machine, MingwDef).parse();
366 }
367 
368 } // namespace object
369 } // namespace llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:24
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:448
bool has_extension(const Twine &path, Style style=Style::native)
Has extension?
Definition: Path.cpp:681
static bool isDecorated(StringRef Sym, bool MingwDef)
Parser(StringRef S, MachineTypes M, bool B)
Expected< COFFModuleDefinition > parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine, bool MingwDef=false)
MachineTypes
Definition: COFF.h:94
amdgpu Simplify well known AMD library false Value Value const Twine & Name
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
StringRef getBuffer() const
Definition: MemoryBuffer.h:273
support::ulittle32_t Word
Definition: IRSymtab.h:51
Tagged union holding either a T or a Error.
Definition: CachePruning.h:23
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(T Value)
Definition: StringSwitch.h:203
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
static Error createError(const Twine &Err)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::Expected< Value > parse(llvm::StringRef JSON)
Parses the provided JSON source, or returns a ParseError.
Definition: JSON.cpp:511
Token(Kind T=Unknown, StringRef S="")
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
static ErrorSuccess success()
Create a success value.
Definition: Error.h:327
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:70
Expected< COFFModuleDefinition > parse()
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition: Endian.h:66
#define I(x, y, z)
Definition: MD5.cpp:58
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:18
static Version parseVersion(StringRef Name)
LLVM Value Representation.
Definition: Value.h:73
Lightweight error class with error context and mandatory checking.
Definition: Error.h:158
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49