LLVM  8.0.1
GSIStreamBuilder.cpp
Go to the documentation of this file.
1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 
12 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/Support/xxhash.h"
25 #include <algorithm>
26 #include <vector>
27 
28 using namespace llvm;
29 using namespace llvm::msf;
30 using namespace llvm::pdb;
31 using namespace llvm::codeview;
32 
34  struct UdtDenseMapInfo {
35  static inline CVSymbol getEmptyKey() {
36  static CVSymbol Empty;
37  return Empty;
38  }
39  static inline CVSymbol getTombstoneKey() {
40  static CVSymbol Tombstone(static_cast<SymbolKind>(-1),
42  return Tombstone;
43  }
44  static unsigned getHashValue(const CVSymbol &Val) {
45  return xxHash64(Val.RecordData);
46  }
47  static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
48  return LHS.RecordData == RHS.RecordData;
49  }
50  };
51 
52  std::vector<CVSymbol> Records;
55  std::vector<PSHashRecord> HashRecords;
56  std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
57  std::vector<support::ulittle32_t> HashBuckets;
58 
59  uint32_t calculateSerializedLength() const;
60  uint32_t calculateRecordByteSize() const;
61  Error commit(BinaryStreamWriter &Writer);
62  void finalizeBuckets(uint32_t RecordZeroOffset);
63 
64  template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
65  T Copy(Symbol);
66  addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
67  CodeViewContainer::Pdb));
68  }
69  void addSymbol(const CVSymbol &Symbol) {
70  if (Symbol.kind() == S_UDT) {
71  auto Iter = UdtHashes.insert(Symbol);
72  if (!Iter.second)
73  return;
74  }
75 
76  Records.push_back(Symbol);
77  }
78 };
79 
80 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
81  uint32_t Size = sizeof(GSIHashHeader);
82  Size += HashRecords.size() * sizeof(PSHashRecord);
83  Size += HashBitmap.size() * sizeof(uint32_t);
84  Size += HashBuckets.size() * sizeof(uint32_t);
85  return Size;
86 }
87 
88 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const {
89  uint32_t Size = 0;
90  for (const auto &Sym : Records)
91  Size += Sym.length();
92  return Size;
93 }
94 
95 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
96  GSIHashHeader Header;
97  Header.VerSignature = GSIHashHeader::HdrSignature;
98  Header.VerHdr = GSIHashHeader::HdrVersion;
99  Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
100  Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
101 
102  if (auto EC = Writer.writeObject(Header))
103  return EC;
104 
105  if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
106  return EC;
107  if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
108  return EC;
109  if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
110  return EC;
111  return Error::success();
112 }
113 
114 static bool isAsciiString(StringRef S) {
115  return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
116 }
117 
118 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
119 static bool gsiRecordLess(StringRef S1, StringRef S2) {
120  size_t LS = S1.size();
121  size_t RS = S2.size();
122  // Shorter strings always compare less than longer strings.
123  if (LS != RS)
124  return LS < RS;
125 
126  // If either string contains non ascii characters, memcmp them.
127  if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
128  return memcmp(S1.data(), S2.data(), LS) < 0;
129 
130  // Both strings are ascii, perform a case-insenstive comparison.
131  return S1.compare_lower(S2.data()) < 0;
132 }
133 
134 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
135  std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
136  TmpBuckets;
137  uint32_t SymOffset = RecordZeroOffset;
138  for (const CVSymbol &Sym : Records) {
139  PSHashRecord HR;
140  // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
141  HR.Off = SymOffset + 1;
142  HR.CRef = 1; // Always use a refcount of 1.
143 
144  // Hash the name to figure out which bucket this goes into.
146  size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
147  TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
148  SymOffset += Sym.length();
149  }
150 
151  // Compute the three tables: the hash records in bucket and chain order, the
152  // bucket presence bitmap, and the bucket chain start offsets.
153  HashRecords.reserve(Records.size());
154  for (ulittle32_t &Word : HashBitmap)
155  Word = 0;
156  for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) {
157  auto &Bucket = TmpBuckets[BucketIdx];
158  if (Bucket.empty())
159  continue;
160  HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32);
161 
162  // Calculate what the offset of the first hash record in the chain would
163  // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
164  // each record would be 12 bytes. See HROffsetCalc in gsi.h.
165  const int SizeOfHROffsetCalc = 12;
166  ulittle32_t ChainStartOff =
167  ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
168  HashBuckets.push_back(ChainStartOff);
169 
170  // Sort each bucket by memcmp of the symbol's name. It's important that
171  // we use the same sorting algorithm as is used by the reference
172  // implementation to ensure that the search for a record within a bucket
173  // can properly early-out when it detects the record won't be found. The
174  // algorithm used here corredsponds to the function
175  // caseInsensitiveComparePchPchCchCch in the reference implementation.
176  llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left,
177  const std::pair<StringRef, PSHashRecord> &Right) {
178  return gsiRecordLess(Left.first, Right.first);
179  });
180 
181  for (const auto &Entry : Bucket)
182  HashRecords.push_back(Entry.second);
183  }
184 }
185 
186 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
187  : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()),
189 
191 
192 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
193  uint32_t Size = 0;
194  Size += sizeof(PublicsStreamHeader);
195  Size += PSH->calculateSerializedLength();
196  Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap
197  // FIXME: Add thunk map and section offsets for incremental linking.
198 
199  return Size;
200 }
201 
202 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
203  return GSH->calculateSerializedLength();
204 }
205 
207  // First we write public symbol records, then we write global symbol records.
208  uint32_t PSHZero = 0;
209  uint32_t GSHZero = PSH->calculateRecordByteSize();
210 
211  PSH->finalizeBuckets(PSHZero);
212  GSH->finalizeBuckets(GSHZero);
213 
214  Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
215  if (!Idx)
216  return Idx.takeError();
217  GSH->StreamIndex = *Idx;
218  Idx = Msf.addStream(calculatePublicsHashStreamSize());
219  if (!Idx)
220  return Idx.takeError();
221  PSH->StreamIndex = *Idx;
222 
223  uint32_t RecordBytes =
224  GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
225 
226  Idx = Msf.addStream(RecordBytes);
227  if (!Idx)
228  return Idx.takeError();
229  RecordStreamIdx = *Idx;
230  return Error::success();
231 }
232 
234  const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
235  const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
236  if (LS.second->Segment != RS.second->Segment)
237  return LS.second->Segment < RS.second->Segment;
238  if (LS.second->Offset != RS.second->Offset)
239  return LS.second->Offset < RS.second->Offset;
240 
241  return LS.second->Name < RS.second->Name;
242 }
243 
244 /// Compute the address map. The address map is an array of symbol offsets
245 /// sorted so that it can be binary searched by address.
246 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
247  // Make a vector of pointers to the symbols so we can sort it by address.
248  // Also gather the symbol offsets while we're at it.
249 
250  std::vector<PublicSym32> DeserializedPublics;
251  std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
252  std::vector<uint32_t> SymOffsets;
253  DeserializedPublics.reserve(Records.size());
254  PublicsByAddr.reserve(Records.size());
255  SymOffsets.reserve(Records.size());
256 
257  uint32_t SymOffset = 0;
258  for (const CVSymbol &Sym : Records) {
259  assert(Sym.kind() == SymbolKind::S_PUB32);
260  DeserializedPublics.push_back(
261  cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
262  PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
263  SymOffsets.push_back(SymOffset);
264  SymOffset += Sym.length();
265  }
266  std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(),
268 
269  // Fill in the symbol offsets in the appropriate order.
270  std::vector<ulittle32_t> AddrMap;
271  AddrMap.reserve(Records.size());
272  for (auto &Sym : PublicsByAddr) {
273  ptrdiff_t Idx = std::distance(Records.data(), Sym.first);
274  assert(Idx >= 0 && size_t(Idx) < Records.size());
275  AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
276  }
277  return AddrMap;
278 }
279 
281  return PSH->StreamIndex;
282 }
283 
285  return GSH->StreamIndex;
286 }
287 
289  PSH->addSymbol(Pub, Msf);
290 }
291 
293  GSH->addSymbol(Sym, Msf);
294 }
295 
297  GSH->addSymbol(Sym, Msf);
298 }
299 
301  GSH->addSymbol(Sym, Msf);
302 }
303 
305  GSH->addSymbol(Sym);
306 }
307 
309  ArrayRef<CVSymbol> Records) {
311  ItemStream.setItems(Records);
312  BinaryStreamRef RecordsRef(ItemStream);
313  return Writer.writeStreamRef(RecordsRef);
314 }
315 
316 Error GSIStreamBuilder::commitSymbolRecordStream(
317  WritableBinaryStreamRef Stream) {
318  BinaryStreamWriter Writer(Stream);
319 
320  // Write public symbol records first, followed by global symbol records. This
321  // must match the order that we assume in finalizeMsfLayout when computing
322  // PSHZero and GSHZero.
323  if (auto EC = writeRecords(Writer, PSH->Records))
324  return EC;
325  if (auto EC = writeRecords(Writer, GSH->Records))
326  return EC;
327 
328  return Error::success();
329 }
330 
331 Error GSIStreamBuilder::commitPublicsHashStream(
332  WritableBinaryStreamRef Stream) {
333  BinaryStreamWriter Writer(Stream);
334  PublicsStreamHeader Header;
335 
336  // FIXME: Fill these in. They are for incremental linking.
337  Header.SymHash = PSH->calculateSerializedLength();
338  Header.AddrMap = PSH->Records.size() * 4;
339  Header.NumThunks = 0;
340  Header.SizeOfThunk = 0;
341  Header.ISectThunkTable = 0;
342  memset(Header.Padding, 0, sizeof(Header.Padding));
343  Header.OffThunkTable = 0;
344  Header.NumSections = 0;
345  if (auto EC = Writer.writeObject(Header))
346  return EC;
347 
348  if (auto EC = PSH->commit(Writer))
349  return EC;
350 
351  std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records);
352  if (auto EC = Writer.writeArray(makeArrayRef(AddrMap)))
353  return EC;
354 
355  return Error::success();
356 }
357 
358 Error GSIStreamBuilder::commitGlobalsHashStream(
359  WritableBinaryStreamRef Stream) {
360  BinaryStreamWriter Writer(Stream);
361  return GSH->commit(Writer);
362 }
363 
365  WritableBinaryStreamRef Buffer) {
366  auto GS = WritableMappedBlockStream::createIndexedStream(
367  Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
368  auto PS = WritableMappedBlockStream::createIndexedStream(
369  Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
370  auto PRS = WritableMappedBlockStream::createIndexedStream(
371  Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator());
372 
373  if (auto EC = commitSymbolRecordStream(*PRS))
374  return EC;
375  if (auto EC = commitGlobalsHashStream(*GS))
376  return EC;
377  if (auto EC = commitPublicsHashStream(*PS))
378  return EC;
379  return Error::success();
380 }
Error writeObject(const T &Obj)
Writes the object Obj to the underlying stream, as if by using memcpy.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:704
uint64_t CallInst * C
std::vector< CVSymbol > Records
support::ulittle32_t VerSignature
Definition: RawTypes.h:34
Kind kind() const
Definition: CVRecord.h:37
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static bool isAsciiString(StringRef S)
Implements a dense probed hash-table based set.
Definition: DenseSet.h:250
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
uint64_t xxHash64(llvm::StringRef Data)
Definition: xxhash.cpp:71
static std::vector< ulittle32_t > computeAddrMap(ArrayRef< CVSymbol > Records)
Compute the address map.
support::ulittle32_t VerHdr
Definition: RawTypes.h:35
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:1349
Error takeError()
Take ownership of the stored error.
Definition: Error.h:553
support::ulittle32_t NumThunks
Definition: RawTypes.h:268
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
std::vector< PSHashRecord > HashRecords
uint32_t getPublicsStreamIndex() const
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:192
amdgpu Simplify well known AMD library false Value Value const Twine & Name
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
Header of the hash tables found in the globals and publics sections.
Definition: RawTypes.h:29
static StringRef getSymbolName(SymbolKind SymKind)
support::ulittle16_t ISectThunkTable
Definition: RawTypes.h:270
support::ulittle32_t CRef
Definition: RawTypes.h:43
support::ulittle32_t Word
Definition: IRSymtab.h:51
Tagged union holding either a T or a Error.
Definition: CachePruning.h:23
uint32_t getRecordStreamIdx() const
support::ulittle32_t Off
Definition: RawTypes.h:42
static Error writeRecords(BinaryStreamWriter &Writer, ArrayRef< CVSymbol > Records)
void addSymbol(const CVSymbol &Symbol)
void addSymbol(const T &Symbol, MSFBuilder &Msf)
static bool comparePubSymByAddrAndName(const std::pair< const CVSymbol *, const PublicSym32 *> &LS, const std::pair< const CVSymbol *, const PublicSym32 *> &RS)
support::ulittle32_t SymHash
Definition: RawTypes.h:266
uint32_t getGlobalsStreamIndex() const
Error writeArray(ArrayRef< T > Array)
Writes an array of objects of type T to the underlying stream, as if by using memcpy.
support::ulittle32_t NumBuckets
Definition: RawTypes.h:37
static bool gsiRecordLess(StringRef S1, StringRef S2)
llvm::DenseSet< CVSymbol, UdtDenseMapInfo > UdtHashes
uint32_t hashStringV1(StringRef Str)
Definition: Hash.cpp:21
detail::packed_endian_specific_integral< uint32_t, little, unaligned > ulittle32_t
Definition: Endian.h:271
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static unsigned getHashValue(const CVSymbol &Val)
Error writeStreamRef(BinaryStreamRef Ref)
Efficiently reads all data from Ref, and writes it to this stream.
Provides write only access to a subclass of WritableBinaryStream.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
LLVM_NODISCARD int compare_lower(StringRef RHS) const
compare_lower - Compare two strings, ignoring case.
Definition: StringRef.cpp:38
support::ulittle32_t AddrMap
Definition: RawTypes.h:267
static ErrorSuccess success()
Create a success value.
Definition: Error.h:327
std::vector< support::ulittle32_t > HashBuckets
BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.
support::ulittle32_t SizeOfThunk
Definition: RawTypes.h:269
Expected< uint32_t > addStream(uint32_t Size, ArrayRef< uint32_t > Blocks)
Add a stream to the MSF file with the given size, occupying the given list of blocks.
Definition: MSFBuilder.cpp:155
void addGlobalSymbol(const codeview::ProcRefSym &Sym)
support::ulittle32_t OffThunkTable
Definition: RawTypes.h:272
void addPublicSymbol(const codeview::PublicSym32 &Pub)
ArrayRef< uint8_t > RecordData
Definition: CVRecord.h:49
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:867
Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer)
uint32_t Size
Definition: Profile.cpp:47
static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
BinaryItemStream represents a sequence of objects stored in some kind of external container but for w...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:158
void setItems(ArrayRef< T > ItemArray)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
support::ulittle32_t NumSections
Definition: RawTypes.h:273
support::ulittle32_t HrSize
Definition: RawTypes.h:36
BumpPtrAllocator & getAllocator()
Definition: MSFBuilder.h:119