LLVM  8.0.1
Magic.cpp
Go to the documentation of this file.
1 //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 
12 #include "llvm/BinaryFormat/COFF.h"
13 #include "llvm/BinaryFormat/ELF.h"
15 #include "llvm/Support/Endian.h"
18 
19 #if !defined(_MSC_VER) && !defined(__MINGW32__)
20 #include <unistd.h>
21 #else
22 #include <io.h>
23 #endif
24 
25 using namespace llvm;
26 using namespace llvm::support::endian;
27 using namespace llvm::sys::fs;
28 
29 template <size_t N>
30 static bool startswith(StringRef Magic, const char (&S)[N]) {
31  return Magic.startswith(StringRef(S, N - 1));
32 }
33 
34 /// Identify the magic in magic.
36  if (Magic.size() < 4)
37  return file_magic::unknown;
38  switch ((unsigned char)Magic[0]) {
39  case 0x00: {
40  // COFF bigobj, CL.exe's LTO object file, or short import library file
41  if (startswith(Magic, "\0\0\xFF\xFF")) {
42  size_t MinSize =
44  if (Magic.size() < MinSize)
46 
47  const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
48  if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
50  if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
53  }
54  // Windows resource file
55  if (Magic.size() >= sizeof(COFF::WinResMagic) &&
56  memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
58  // 0x0000 = COFF unknown machine type
59  if (Magic[1] == 0)
61  if (startswith(Magic, "\0asm"))
63  break;
64  }
65  case 0xDE: // 0x0B17C0DE = BC wraper
66  if (startswith(Magic, "\xDE\xC0\x17\x0B"))
67  return file_magic::bitcode;
68  break;
69  case 'B':
70  if (startswith(Magic, "BC\xC0\xDE"))
71  return file_magic::bitcode;
72  break;
73  case '!':
74  if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
75  return file_magic::archive;
76  break;
77 
78  case '\177':
79  if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
80  bool Data2MSB = Magic[5] == 2;
81  unsigned high = Data2MSB ? 16 : 17;
82  unsigned low = Data2MSB ? 17 : 16;
83  if (Magic[high] == 0) {
84  switch (Magic[low]) {
85  default:
86  return file_magic::elf;
87  case 1:
89  case 2:
91  case 3:
93  case 4:
94  return file_magic::elf_core;
95  }
96  }
97  // It's still some type of ELF file.
98  return file_magic::elf;
99  }
100  break;
101 
102  case 0xCA:
103  if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
104  startswith(Magic, "\xCA\xFE\xBA\xBF")) {
105  // This is complicated by an overlap with Java class files.
106  // See the Mach-O section in /usr/share/file/magic for details.
107  if (Magic.size() >= 8 && Magic[7] < 43)
109  }
110  break;
111 
112  // The two magic numbers for mach-o are:
113  // 0xfeedface - 32-bit mach-o
114  // 0xfeedfacf - 64-bit mach-o
115  case 0xFE:
116  case 0xCE:
117  case 0xCF: {
118  uint16_t type = 0;
119  if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
120  startswith(Magic, "\xFE\xED\xFA\xCF")) {
121  /* Native endian */
122  size_t MinSize;
123  if (Magic[3] == char(0xCE))
124  MinSize = sizeof(MachO::mach_header);
125  else
126  MinSize = sizeof(MachO::mach_header_64);
127  if (Magic.size() >= MinSize)
128  type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
129  } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
130  startswith(Magic, "\xCF\xFA\xED\xFE")) {
131  /* Reverse endian */
132  size_t MinSize;
133  if (Magic[0] == char(0xCE))
134  MinSize = sizeof(MachO::mach_header);
135  else
136  MinSize = sizeof(MachO::mach_header_64);
137  if (Magic.size() >= MinSize)
138  type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
139  }
140  switch (type) {
141  default:
142  break;
143  case 1:
145  case 2:
147  case 3:
149  case 4:
150  return file_magic::macho_core;
151  case 5:
153  case 6:
155  case 7:
157  case 8:
159  case 9:
161  case 10:
163  case 11:
165  }
166  break;
167  }
168  case 0xF0: // PowerPC Windows
169  case 0x83: // Alpha 32-bit
170  case 0x84: // Alpha 64-bit
171  case 0x66: // MPS R4000 Windows
172  case 0x50: // mc68K
173  case 0x4c: // 80386 Windows
174  case 0xc4: // ARMNT Windows
175  if (Magic[1] == 0x01)
178 
179  case 0x90: // PA-RISC Windows
180  case 0x68: // mc68K Windows
181  if (Magic[1] == 0x02)
183  break;
184 
185  case 'M': // Possible MS-DOS stub on Windows PE file or MSF/PDB file.
186  if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
187  uint32_t off = read32le(Magic.data() + 0x3c);
188  // PE/COFF file, either EXE or DLL.
189  if (Magic.substr(off).startswith(
192  }
193  if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
194  return file_magic::pdb;
195  break;
196 
197  case 0x64: // x86-64 or ARM64 Windows.
198  if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
200  break;
201 
202  default:
203  break;
204  }
205  return file_magic::unknown;
206 }
207 
208 std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
209  auto FileOrError = MemoryBuffer::getFile(Path, -1LL, false);
210  if (!FileOrError)
211  return FileOrError.getError();
212 
213  std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
214  Result = identify_magic(FileBuffer->getBuffer());
215 
216  return std::error_code();
217 }
Mach-O Object file.
Definition: Magic.h:31
ELF core image.
Definition: Magic.h:30
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Microsoft cl.exe&#39;s intermediate code file.
Definition: Magic.h:43
ELF dynamically linked shared lib.
Definition: Magic.h:29
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
Windows compiled resource file (.res)
Definition: Magic.h:47
Mach-O Bundle file.
Definition: Magic.h:38
Mach-O Preloaded Executable.
Definition: Magic.h:35
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
static const char BigObjMagic[]
Definition: COFF.h:39
Bitcode file.
Definition: Magic.h:24
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition: Magic.cpp:35
The Mach-O dynamic linker.
Definition: Magic.h:37
ELF Relocatable object file.
Definition: Magic.h:27
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:30
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
COFF import library.
Definition: Magic.h:45
Mach-O universal binary.
Definition: Magic.h:42
static const char ClGlObjMagic[]
Definition: COFF.h:44
Mach-O Core File.
Definition: Magic.h:34
Mach-O dSYM companion file.
Definition: Magic.h:40
#define offsetof(TYPE, MEMBER)
PECOFF executable file.
Definition: Magic.h:46
COFF object file.
Definition: Magic.h:44
static const char *const Magic
Definition: Archive.cpp:42
Mach-O kext bundle file.
Definition: Magic.h:41
ar style archive file
Definition: Magic.h:25
ELF Unknown type.
Definition: Magic.h:26
ELF Executable image.
Definition: Magic.h:28
WebAssembly Object file.
Definition: Magic.h:48
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:867
#define N
uint32_t read32le(const void *P)
Definition: Endian.h:369
static const char PEMagic[]
Definition: COFF.h:37
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful, otherwise returning null.
static const char WinResMagic[]
Definition: COFF.h:50
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
Mach-O Executable.
Definition: Magic.h:32
Unrecognized file.
Definition: Magic.h:23
Windows PDB debug info file.
Definition: Magic.h:49
file_magic - An "enum class" enumeration of file types based on magic (the first N bytes of the file)...
Definition: Magic.h:21