LLVM  8.0.1
PDBFile.cpp
Go to the documentation of this file.
1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/Path.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstdint>
32 
33 using namespace llvm;
34 using namespace llvm::codeview;
35 using namespace llvm::msf;
36 using namespace llvm::pdb;
37 
38 namespace {
39 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
40 } // end anonymous namespace
41 
42 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
44  : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
45 
46 PDBFile::~PDBFile() = default;
47 
48 StringRef PDBFile::getFilePath() const { return FilePath; }
49 
51  return sys::path::parent_path(FilePath);
52 }
53 
54 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
55 
57  return ContainerLayout.SB->FreeBlockMapBlock;
58 }
59 
61  return ContainerLayout.SB->NumBlocks;
62 }
63 
65  return ContainerLayout.SB->NumDirectoryBytes;
66 }
67 
69  return ContainerLayout.SB->BlockMapAddr;
70 }
71 
72 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
73 
75  return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
76  ContainerLayout.SB->BlockSize);
77 }
78 
79 uint64_t PDBFile::getBlockMapOffset() const {
80  return (uint64_t)ContainerLayout.SB->BlockMapAddr *
81  ContainerLayout.SB->BlockSize;
82 }
83 
85  return ContainerLayout.StreamSizes.size();
86 }
87 
89  return *std::max_element(ContainerLayout.StreamSizes.begin(),
90  ContainerLayout.StreamSizes.end());
91 }
92 
94  return ContainerLayout.StreamSizes[StreamIndex];
95 }
96 
99  return ContainerLayout.StreamMap[StreamIndex];
100 }
101 
102 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
103 
105  uint32_t NumBytes) const {
106  uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
107 
108  ArrayRef<uint8_t> Result;
109  if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
110  return std::move(EC);
111  return Result;
112 }
113 
115  ArrayRef<uint8_t> Data) const {
116  return make_error<RawError>(raw_error_code::not_writable,
117  "PDBFile is immutable");
118 }
119 
121  BinaryStreamReader Reader(*Buffer);
122 
123  // Initialize SB.
124  const msf::SuperBlock *SB = nullptr;
125  if (auto EC = Reader.readObject(SB)) {
126  consumeError(std::move(EC));
127  return make_error<RawError>(raw_error_code::corrupt_file,
128  "MSF superblock is missing");
129  }
130 
131  if (auto EC = msf::validateSuperBlock(*SB))
132  return EC;
133 
134  if (Buffer->getLength() % SB->BlockSize != 0)
135  return make_error<RawError>(raw_error_code::corrupt_file,
136  "File size is not a multiple of block size");
137  ContainerLayout.SB = SB;
138 
139  // Initialize Free Page Map.
140  ContainerLayout.FreePageMap.resize(SB->NumBlocks);
141  // The Fpm exists either at block 1 or block 2 of the MSF. However, this
142  // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
143  // thusly an equal number of total blocks in the file. For a block size
144  // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
145  // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
146  // the Fpm is split across the file at `getBlockSize()` intervals. As a
147  // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
148  // for any non-negative integer k is an Fpm block. In theory, we only really
149  // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
150  // current versions of the MSF format already expect the Fpm to be arranged
151  // at getBlockSize() intervals, so we have to be compatible.
152  // See the function fpmPn() for more information:
153  // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
154  auto FpmStream =
155  MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
156  BinaryStreamReader FpmReader(*FpmStream);
157  ArrayRef<uint8_t> FpmBytes;
158  if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
159  return EC;
160  uint32_t BlocksRemaining = getBlockCount();
161  uint32_t BI = 0;
162  for (auto Byte : FpmBytes) {
163  uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
164  for (uint32_t I = 0; I < BlocksThisByte; ++I) {
165  if (Byte & (1 << I))
166  ContainerLayout.FreePageMap[BI] = true;
167  --BlocksRemaining;
168  ++BI;
169  }
170  }
171 
172  Reader.setOffset(getBlockMapOffset());
173  if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
175  return EC;
176 
177  return Error::success();
178 }
179 
181  assert(ContainerLayout.SB);
182  if (DirectoryStream)
183  return Error::success();
184 
185  uint32_t NumStreams = 0;
186 
187  // Normally you can't use a MappedBlockStream without having fully parsed the
188  // PDB file, because it accesses the directory and various other things, which
189  // is exactly what we are attempting to parse. By specifying a custom
190  // subclass of IPDBStreamData which only accesses the fields that have already
191  // been parsed, we can avoid this and reuse MappedBlockStream.
192  auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
193  Allocator);
194  BinaryStreamReader Reader(*DS);
195  if (auto EC = Reader.readInteger(NumStreams))
196  return EC;
197 
198  if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
199  return EC;
200  for (uint32_t I = 0; I < NumStreams; ++I) {
201  uint32_t StreamSize = getStreamByteSize(I);
202  // FIXME: What does StreamSize ~0U mean?
203  uint64_t NumExpectedStreamBlocks =
204  StreamSize == UINT32_MAX
205  ? 0
206  : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
207 
208  // For convenience, we store the block array contiguously. This is because
209  // if someone calls setStreamMap(), it is more convenient to be able to call
210  // it with an ArrayRef instead of setting up a StreamRef. Since the
211  // DirectoryStream is cached in the class and thus lives for the life of the
212  // class, we can be guaranteed that readArray() will return a stable
213  // reference, even if it has to allocate from its internal pool.
215  if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
216  return EC;
217  for (uint32_t Block : Blocks) {
218  uint64_t BlockEndOffset =
219  (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
220  if (BlockEndOffset > getFileSize())
221  return make_error<RawError>(raw_error_code::corrupt_file,
222  "Stream block map is corrupt.");
223  }
224  ContainerLayout.StreamMap.push_back(Blocks);
225  }
226 
227  // We should have read exactly SB->NumDirectoryBytes bytes.
228  assert(Reader.bytesRemaining() == 0);
229  DirectoryStream = std::move(DS);
230  return Error::success();
231 }
232 
234  return ContainerLayout.DirectoryBlocks;
235 }
236 
237 std::unique_ptr<MappedBlockStream> PDBFile::createIndexedStream(uint16_t SN) {
238  if (SN == kInvalidStreamIndex)
239  return nullptr;
240  return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
241  Allocator);
242 }
243 
245  MSFStreamLayout Result;
246  auto Blocks = getStreamBlockList(StreamIdx);
247  Result.Blocks.assign(Blocks.begin(), Blocks.end());
248  Result.Length = getStreamByteSize(StreamIdx);
249  return Result;
250 }
251 
253  return msf::getFpmStreamLayout(ContainerLayout);
254 }
255 
257  if (!Globals) {
258  auto DbiS = getPDBDbiStream();
259  if (!DbiS)
260  return DbiS.takeError();
261 
262  auto GlobalS = safelyCreateIndexedStream(
263  ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
264  if (!GlobalS)
265  return GlobalS.takeError();
266  auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
267  if (auto EC = TempGlobals->reload())
268  return std::move(EC);
269  Globals = std::move(TempGlobals);
270  }
271  return *Globals;
272 }
273 
275  if (!Info) {
276  auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
277  if (!InfoS)
278  return InfoS.takeError();
279  auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
280  if (auto EC = TempInfo->reload())
281  return std::move(EC);
282  Info = std::move(TempInfo);
283  }
284  return *Info;
285 }
286 
288  if (!Dbi) {
289  auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
290  if (!DbiS)
291  return DbiS.takeError();
292  auto TempDbi = llvm::make_unique<DbiStream>(std::move(*DbiS));
293  if (auto EC = TempDbi->reload(this))
294  return std::move(EC);
295  Dbi = std::move(TempDbi);
296  }
297  return *Dbi;
298 }
299 
301  if (!Tpi) {
302  auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
303  if (!TpiS)
304  return TpiS.takeError();
305  auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
306  if (auto EC = TempTpi->reload())
307  return std::move(EC);
308  Tpi = std::move(TempTpi);
309  }
310  return *Tpi;
311 }
312 
314  if (!Ipi) {
315  if (!hasPDBIpiStream())
316  return make_error<RawError>(raw_error_code::no_stream);
317 
318  auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
319  if (!IpiS)
320  return IpiS.takeError();
321  auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
322  if (auto EC = TempIpi->reload())
323  return std::move(EC);
324  Ipi = std::move(TempIpi);
325  }
326  return *Ipi;
327 }
328 
330  if (!Publics) {
331  auto DbiS = getPDBDbiStream();
332  if (!DbiS)
333  return DbiS.takeError();
334 
335  auto PublicS = safelyCreateIndexedStream(
336  ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
337  if (!PublicS)
338  return PublicS.takeError();
339  auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS));
340  if (auto EC = TempPublics->reload())
341  return std::move(EC);
342  Publics = std::move(TempPublics);
343  }
344  return *Publics;
345 }
346 
348  if (!Symbols) {
349  auto DbiS = getPDBDbiStream();
350  if (!DbiS)
351  return DbiS.takeError();
352 
353  uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
354  auto SymbolS =
355  safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
356  if (!SymbolS)
357  return SymbolS.takeError();
358 
359  auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
360  if (auto EC = TempSymbols->reload())
361  return std::move(EC);
362  Symbols = std::move(TempSymbols);
363  }
364  return *Symbols;
365 }
366 
368  if (!Strings) {
369  auto IS = getPDBInfoStream();
370  if (!IS)
371  return IS.takeError();
372 
373  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
374  if (!ExpectedNSI)
375  return ExpectedNSI.takeError();
376  uint32_t NameStreamIndex = *ExpectedNSI;
377 
378  auto NS =
379  safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
380  if (!NS)
381  return NS.takeError();
382 
383  auto N = llvm::make_unique<PDBStringTable>();
384  BinaryStreamReader Reader(**NS);
385  if (auto EC = N->reload(Reader))
386  return std::move(EC);
387  assert(Reader.bytesRemaining() == 0);
388  StringTableStream = std::move(*NS);
389  Strings = std::move(N);
390  }
391  return *Strings;
392 }
393 
395  auto DbiS = getPDBDbiStream();
396  if (!DbiS)
397  return 0;
398  PDB_Machine Machine = DbiS->getMachineType();
399  if (Machine == PDB_Machine::Amd64)
400  return 8;
401  return 4;
402 }
403 
405  return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
406 }
407 
409  auto DbiS = getPDBDbiStream();
410  if (!DbiS) {
411  consumeError(DbiS.takeError());
412  return false;
413  }
414 
415  return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
416 }
417 
419 
421  if (!hasPDBInfoStream())
422  return false;
423 
424  if (StreamIPI >= getNumStreams())
425  return false;
426 
427  auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
428  return InfoStream.containsIdStream();
429 }
430 
432  auto DbiS = getPDBDbiStream();
433  if (!DbiS) {
434  consumeError(DbiS.takeError());
435  return false;
436  }
437  return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
438 }
439 
441  auto DbiS = getPDBDbiStream();
442  if (!DbiS)
443  return false;
444  return DbiS->getSymRecordStreamIndex() < getNumStreams();
445 }
446 
448 
450  auto IS = getPDBInfoStream();
451  if (!IS)
452  return false;
453  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
454  if (!ExpectedNSI) {
455  consumeError(ExpectedNSI.takeError());
456  return false;
457  }
458  assert(*ExpectedNSI < getNumStreams());
459  return true;
460 }
461 
462 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
463 /// stream with that index actually exists. If it does not, the return value
464 /// will have an MSFError with code msf_error_code::no_stream. Else, the return
465 /// value will contain the stream returned by createIndexedStream().
467 PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
468  BinaryStreamRef MsfData,
469  uint32_t StreamIndex) const {
470  if (StreamIndex >= getNumStreams())
471  return make_error<RawError>(raw_error_code::no_stream);
472  return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,
473  Allocator);
474 }
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:372
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:704
Expected< PDBStringTable & > getStringTable()
Definition: PDBFile.cpp:367
bool hasPDBSymbolStream()
Definition: PDBFile.cpp:440
bool hasPDBStringTable()
Definition: PDBFile.cpp:449
ArrayRef< support::ulittle32_t > getDirectoryBlockArray() const
Definition: PDBFile.cpp:233
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Error validateSuperBlock(const SuperBlock &SB)
Definition: MSFCommon.cpp:20
Error readInteger(T &Dest)
Read an integer of the specified endianness into Dest and update the stream&#39;s offset.
uint32_t getNumDirectoryBlocks() const
Definition: PDBFile.cpp:74
Error parseStreamData()
Definition: PDBFile.cpp:180
uint64_t blockToOffset(uint64_t BlockNumber, uint64_t BlockSize)
Definition: MSFCommon.h:113
Error readObject(const T *&Dest)
Get a pointer to an object of type T from the underlying stream, as if by memcpy, and store the resul...
Expected< GlobalsStream & > getPDBGlobalsStream()
Definition: PDBFile.cpp:256
Error takeError()
Take ownership of the stored error.
Definition: Error.h:553
bool hasPDBPublicsStream()
Definition: PDBFile.cpp:431
Expected< TpiStream & > getPDBTpiStream()
Definition: PDBFile.cpp:300
Error parseFileHeaders()
Definition: PDBFile.cpp:120
uint32_t getNumStreams() const override
Definition: PDBFile.cpp:84
Error setBlockData(uint32_t BlockIndex, uint32_t Offset, ArrayRef< uint8_t > Data) const override
Definition: PDBFile.cpp:114
Definition: BitVector.h:938
uint64_t getBlockMapOffset() const
Definition: PDBFile.cpp:79
Describes the layout of a stream in an MSF layout.
Definition: MSFCommon.h:78
MSFStreamLayout getFpmStreamLayout(const MSFLayout &Msf, bool IncludeUnusedFpmData=false, bool AltFpm=false)
Determine the layout of the FPM stream, given the MSF layout.
Definition: MSFCommon.cpp:63
bool hasPDBTpiStream() const
Definition: PDBFile.cpp:447
uint32_t getStreamByteSize(uint32_t StreamIndex) const override
Definition: PDBFile.cpp:93
msf::MSFStreamLayout getFpmStreamLayout() const
Definition: PDBFile.cpp:252
uint32_t getBlockSize() const override
Definition: PDBFile.cpp:54
Tagged union holding either a T or a Error.
Definition: CachePruning.h:23
uint32_t getUnknown1() const
Definition: PDBFile.cpp:72
uint32_t getPointerSize()
Definition: PDBFile.cpp:394
std::vector< support::ulittle32_t > Blocks
Definition: MSFCommon.h:81
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
support::ulittle32_t BlockSize
Definition: MSFCommon.h:37
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:363
bool hasPDBDbiStream() const
Definition: PDBFile.cpp:404
uint32_t getMaxStreamSize() const
Definition: PDBFile.cpp:88
const uint16_t kInvalidStreamIndex
Definition: RawConstants.h:20
support::ulittle32_t BlockMapAddr
Definition: MSFCommon.h:49
~PDBFile() override
uint32_t getNumDirectoryBytes() const
Definition: PDBFile.cpp:64
std::unique_ptr< msf::MappedBlockStream > createIndexedStream(uint16_t SN)
Definition: PDBFile.cpp:237
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:141
Expected< SymbolStream & > getPDBSymbolStream()
Definition: PDBFile.cpp:347
Expected< TpiStream & > getPDBIpiStream()
Definition: PDBFile.cpp:313
StringRef getFileDirectory() const
Definition: PDBFile.cpp:50
uint32_t getBlockMapIndex() const
Definition: PDBFile.cpp:68
StringRef parent_path(StringRef path, Style style=Style::native)
Get parent path.
Definition: Path.cpp:491
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:982
Expected< DbiStream & > getPDBDbiStream()
Definition: PDBFile.cpp:287
support::ulittle32_t Unknown1
Definition: MSFCommon.h:47
Basic Register Allocator
uint32_t getFreeBlockMapBlock() const
Definition: PDBFile.cpp:56
void setOffset(uint32_t Off)
static ErrorSuccess success()
Create a success value.
Definition: Error.h:327
ArrayRef< support::ulittle32_t > DirectoryBlocks
Definition: MSFCommon.h:67
BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.
bool hasPDBIpiStream() const
Definition: PDBFile.cpp:420
uint32_t getFileSize() const
Definition: PDBFile.cpp:102
StringRef getFilePath() const
Definition: PDBFile.cpp:48
Expected< PublicsStream & > getPDBPublicsStream()
Definition: PDBFile.cpp:329
BitVector FreePageMap
Definition: MSFCommon.h:66
uint64_t bytesToBlocks(uint64_t NumBytes, uint64_t BlockSize)
Definition: MSFCommon.h:109
Error readBytes(ArrayRef< uint8_t > &Buffer, uint32_t Size)
Read Size bytes from the underlying stream at the current offset and and set Buffer to the resulting ...
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
std::vector< ArrayRef< support::ulittle32_t > > StreamMap
Definition: MSFCommon.h:69
bool hasPDBGlobalsStream()
Definition: PDBFile.cpp:408
uint32_t bytesRemaining() const
Expected< ArrayRef< uint8_t > > getBlockData(uint32_t BlockIndex, uint32_t NumBytes) const override
Definition: PDBFile.cpp:104
ArrayRef< support::ulittle32_t > StreamSizes
Definition: MSFCommon.h:68
Expected< InfoStream & > getPDBInfoStream()
Definition: PDBFile.cpp:274
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
support::ulittle32_t FreeBlockMapBlock
Definition: MSFCommon.h:39
Lightweight error class with error context and mandatory checking.
Definition: Error.h:158
bool containsIdStream() const
Definition: InfoStream.cpp:100
Provides read only access to a subclass of BinaryStream.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
ArrayRef< support::ulittle32_t > getStreamBlockList(uint32_t StreamIndex) const override
Definition: PDBFile.cpp:98
const SuperBlock * SB
Definition: MSFCommon.h:65
uint32_t getBlockCount() const override
Definition: PDBFile.cpp:60
support::ulittle32_t NumDirectoryBytes
Definition: MSFCommon.h:45
support::ulittle32_t NumBlocks
Definition: MSFCommon.h:43
bool hasPDBInfoStream() const
Definition: PDBFile.cpp:418
Error readArray(ArrayRef< T > &Array, uint32_t NumElements)
Get a reference to a NumElements element array of objects of type T from the underlying stream as if ...
msf::MSFStreamLayout getStreamLayout(uint32_t StreamIdx) const
Definition: PDBFile.cpp:244