LLVM  8.0.1
TypeStreamMerger.cpp
Go to the documentation of this file.
1 //===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "llvm/ADT/SmallString.h"
12 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/Support/Error.h"
20 
21 using namespace llvm;
22 using namespace llvm::codeview;
23 
24 static inline size_t slotForIndex(TypeIndex Idx) {
25  assert(!Idx.isSimple() && "simple type indices have no slots");
27 }
28 
29 namespace {
30 
31 /// Implementation of CodeView type stream merging.
32 ///
33 /// A CodeView type stream is a series of records that reference each other
34 /// through type indices. A type index is either "simple", meaning it is less
35 /// than 0x1000 and refers to a builtin type, or it is complex, meaning it
36 /// refers to a prior type record in the current stream. The type index of a
37 /// record is equal to the number of records before it in the stream plus
38 /// 0x1000.
39 ///
40 /// Type records are only allowed to use type indices smaller than their own, so
41 /// a type stream is effectively a topologically sorted DAG. Cycles occuring in
42 /// the type graph of the source program are resolved with forward declarations
43 /// of composite types. This class implements the following type stream merging
44 /// algorithm, which relies on this DAG structure:
45 ///
46 /// - Begin with a new empty stream, and a new empty hash table that maps from
47 /// type record contents to new type index.
48 /// - For each new type stream, maintain a map from source type index to
49 /// destination type index.
50 /// - For each record, copy it and rewrite its type indices to be valid in the
51 /// destination type stream.
52 /// - If the new type record is not already present in the destination stream
53 /// hash table, append it to the destination type stream, assign it the next
54 /// type index, and update the two hash tables.
55 /// - If the type record already exists in the destination stream, discard it
56 /// and update the type index map to forward the source type index to the
57 /// existing destination type index.
58 ///
59 /// As an additional complication, type stream merging actually produces two
60 /// streams: an item (or IPI) stream and a type stream, as this is what is
61 /// actually stored in the final PDB. We choose which records go where by
62 /// looking at the record kind.
63 class TypeStreamMerger {
64 public:
65  explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
66  : IndexMap(SourceToDest) {
67  // When dealing with precompiled headers objects, all data in SourceToDest
68  // belongs to the precompiled headers object, and is assumed to be already
69  // remapped to the target PDB. Any forthcoming type that will be merged in
70  // might potentially back-reference this data. We also don't want to resolve
71  // twice the types in the precompiled object.
72  CurIndex += SourceToDest.size();
73  }
74 
75  static const TypeIndex Untranslated;
76 
77  // Local hashing entry points
78  Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
79  MergingTypeTableBuilder &DestTypes,
80  const CVTypeArray &IdsAndTypes, Optional<uint32_t> &S);
82  ArrayRef<TypeIndex> TypeSourceToDest,
83  const CVTypeArray &Ids);
85  const CVTypeArray &Types);
86 
87  // Global hashing entry points
88  Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
89  GlobalTypeTableBuilder &DestTypes,
90  const CVTypeArray &IdsAndTypes,
94  ArrayRef<TypeIndex> TypeSourceToDest,
95  const CVTypeArray &Ids,
100 
101 private:
102  Error doit(const CVTypeArray &Types);
103 
104  Error remapAllTypes(const CVTypeArray &Types);
105 
106  Error remapType(const CVType &Type);
107 
108  void addMapping(TypeIndex Idx);
109 
110  inline bool remapTypeIndex(TypeIndex &Idx) {
111  // If we're mapping a pure index stream, then IndexMap only contains
112  // mappings from OldIdStream -> NewIdStream, in which case we will need to
113  // use the special mapping from OldTypeStream -> NewTypeStream which was
114  // computed externally. Regardless, we use this special map if and only if
115  // we are doing an id-only mapping.
116  if (!hasTypeStream())
117  return remapIndex(Idx, TypeLookup);
118 
119  assert(TypeLookup.empty());
120  return remapIndex(Idx, IndexMap);
121  }
122  inline bool remapItemIndex(TypeIndex &Idx) {
123  assert(hasIdStream());
124  return remapIndex(Idx, IndexMap);
125  }
126 
127  bool hasTypeStream() const {
128  return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream);
129  }
130 
131  bool hasIdStream() const {
132  return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream);
133  }
134 
135  ArrayRef<uint8_t> remapIndices(const CVType &OriginalType,
136  MutableArrayRef<uint8_t> Storage);
137 
138  inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) {
139  if (LLVM_LIKELY(remapIndexSimple(Idx, Map)))
140  return true;
141 
142  return remapIndexFallback(Idx, Map);
143  }
144 
145  inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const {
146  // Simple types are unchanged.
147  if (Idx.isSimple())
148  return true;
149 
150  // Check if this type index refers to a record we've already translated
151  // successfully. If it refers to a type later in the stream or a record we
152  // had to defer, defer it until later pass.
153  unsigned MapPos = slotForIndex(Idx);
154  if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated))
155  return false;
156 
157  Idx = Map[MapPos];
158  return true;
159  }
160 
161  bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
162 
163  Error errorCorruptRecord() const {
164  return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
165  }
166 
167  Expected<bool> shouldRemapType(const CVType &Type);
168 
169  Optional<Error> LastError;
170 
171  bool UseGlobalHashes = false;
172 
173  bool IsSecondPass = false;
174 
175  unsigned NumBadIndices = 0;
176 
178 
179  MergingTypeTableBuilder *DestIdStream = nullptr;
180  MergingTypeTableBuilder *DestTypeStream = nullptr;
181 
182  GlobalTypeTableBuilder *DestGlobalIdStream = nullptr;
183  GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr;
184 
185  ArrayRef<GloballyHashedType> GlobalHashes;
186 
187  // If we're only mapping id records, this array contains the mapping for
188  // type records.
189  ArrayRef<TypeIndex> TypeLookup;
190 
191  /// Map from source type index to destination type index. Indexed by source
192  /// type index minus 0x1000.
193  SmallVectorImpl<TypeIndex> &IndexMap;
194 
195  /// Temporary storage that we use to copy a record's data while re-writing
196  /// its type indices.
197  SmallVector<uint8_t, 256> RemapStorage;
198 
199  Optional<uint32_t> PCHSignature;
200 };
201 
202 } // end anonymous namespace
203 
204 const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
205 
206 static bool isIdRecord(TypeLeafKind K) {
207  switch (K) {
208  case TypeLeafKind::LF_FUNC_ID:
209  case TypeLeafKind::LF_MFUNC_ID:
210  case TypeLeafKind::LF_STRING_ID:
211  case TypeLeafKind::LF_SUBSTR_LIST:
212  case TypeLeafKind::LF_BUILDINFO:
213  case TypeLeafKind::LF_UDT_SRC_LINE:
214  case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
215  return true;
216  default:
217  return false;
218  }
219 }
220 
221 void TypeStreamMerger::addMapping(TypeIndex Idx) {
222  if (!IsSecondPass) {
223  assert(IndexMap.size() == slotForIndex(CurIndex) &&
224  "visitKnownRecord should add one index map entry");
225  IndexMap.push_back(Idx);
226  } else {
227  assert(slotForIndex(CurIndex) < IndexMap.size());
228  IndexMap[slotForIndex(CurIndex)] = Idx;
229  }
230 }
231 
232 bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
233  ArrayRef<TypeIndex> Map) {
234  size_t MapPos = slotForIndex(Idx);
235 
236  // If this is the second pass and this index isn't in the map, then it points
237  // outside the current type stream, and this is a corrupt record.
238  if (IsSecondPass && MapPos >= Map.size()) {
239  // FIXME: Print a more useful error. We can give the current record and the
240  // index that we think its pointing to.
241  if (LastError)
242  LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
243  else
244  LastError = errorCorruptRecord();
245  }
246 
247  ++NumBadIndices;
248 
249  // This type index is invalid. Remap this to "not translated by cvpack",
250  // and return failure.
251  Idx = Untranslated;
252  return false;
253 }
254 
255 // Local hashing entry points
257  const CVTypeArray &Types) {
258  DestTypeStream = &Dest;
259  UseGlobalHashes = false;
260 
261  return doit(Types);
262 }
263 
265  ArrayRef<TypeIndex> TypeSourceToDest,
266  const CVTypeArray &Ids) {
267  DestIdStream = &Dest;
268  TypeLookup = TypeSourceToDest;
269  UseGlobalHashes = false;
270 
271  return doit(Ids);
272 }
273 
274 Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
275  MergingTypeTableBuilder &DestTypes,
276  const CVTypeArray &IdsAndTypes,
277  Optional<uint32_t> &S) {
278  DestIdStream = &DestIds;
279  DestTypeStream = &DestTypes;
280  UseGlobalHashes = false;
281  auto Err = doit(IdsAndTypes);
282  S = PCHSignature;
283  return Err;
284 }
285 
286 // Global hashing entry points
288  const CVTypeArray &Types,
290  Optional<uint32_t> &S) {
291  DestGlobalTypeStream = &Dest;
292  UseGlobalHashes = true;
293  GlobalHashes = Hashes;
294  auto Err = doit(Types);
295  S = PCHSignature;
296  return Err;
297 }
298 
300  ArrayRef<TypeIndex> TypeSourceToDest,
301  const CVTypeArray &Ids,
303  DestGlobalIdStream = &Dest;
304  TypeLookup = TypeSourceToDest;
305  UseGlobalHashes = true;
306  GlobalHashes = Hashes;
307 
308  return doit(Ids);
309 }
310 
311 Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
312  GlobalTypeTableBuilder &DestTypes,
313  const CVTypeArray &IdsAndTypes,
315  Optional<uint32_t> &S) {
316  DestGlobalIdStream = &DestIds;
317  DestGlobalTypeStream = &DestTypes;
318  UseGlobalHashes = true;
319  GlobalHashes = Hashes;
320  auto Err = doit(IdsAndTypes);
321  S = PCHSignature;
322  return Err;
323 }
324 
325 Error TypeStreamMerger::doit(const CVTypeArray &Types) {
326  if (auto EC = remapAllTypes(Types))
327  return EC;
328 
329  // If we found bad indices but no other errors, try doing another pass and see
330  // if we can resolve the indices that weren't in the map on the first pass.
331  // This may require multiple passes, but we should always make progress. MASM
332  // is the only known CodeView producer that makes type streams that aren't
333  // topologically sorted. The standard library contains MASM-produced objects,
334  // so this is important to handle correctly, but we don't have to be too
335  // efficient. MASM type streams are usually very small.
336  while (!LastError && NumBadIndices > 0) {
337  unsigned BadIndicesRemaining = NumBadIndices;
338  IsSecondPass = true;
339  NumBadIndices = 0;
341 
342  if (auto EC = remapAllTypes(Types))
343  return EC;
344 
345  assert(NumBadIndices <= BadIndicesRemaining &&
346  "second pass found more bad indices");
347  if (!LastError && NumBadIndices == BadIndicesRemaining) {
348  return llvm::make_error<CodeViewError>(
349  cv_error_code::corrupt_record, "Input type graph contains cycles");
350  }
351  }
352 
353  if (LastError)
354  return std::move(*LastError);
355  return Error::success();
356 }
357 
358 Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
359  BinaryStreamRef Stream = Types.getUnderlyingStream();
360  ArrayRef<uint8_t> Buffer;
361  cantFail(Stream.readBytes(0, Stream.getLength(), Buffer));
362 
363  return forEachCodeViewRecord<CVType>(
364  Buffer, [this](const CVType &T) { return remapType(T); });
365 }
366 
367 Error TypeStreamMerger::remapType(const CVType &Type) {
368  auto R = shouldRemapType(Type);
369  if (!R)
370  return R.takeError();
371 
372  TypeIndex DestIdx = Untranslated;
373  if (*R) {
374  auto DoSerialize =
375  [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
376  return remapIndices(Type, Storage);
377  };
378  if (LLVM_LIKELY(UseGlobalHashes)) {
379  GlobalTypeTableBuilder &Dest =
380  isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
381  GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
382  DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize);
383  } else {
385  isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
386 
387  RemapStorage.resize(Type.RecordData.size());
388  ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
389  if (!Result.empty())
390  DestIdx = Dest.insertRecordBytes(Result);
391  }
392  }
393  addMapping(DestIdx);
394 
395  ++CurIndex;
396  assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) &&
397  "visitKnownRecord should add one index map entry");
398  return Error::success();
399 }
400 
402 TypeStreamMerger::remapIndices(const CVType &OriginalType,
403  MutableArrayRef<uint8_t> Storage) {
405  discoverTypeIndices(OriginalType.RecordData, Refs);
406  if (Refs.empty())
407  return OriginalType.RecordData;
408 
409  ::memcpy(Storage.data(), OriginalType.RecordData.data(),
410  OriginalType.RecordData.size());
411 
412  uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix);
413 
414  for (auto &Ref : Refs) {
415  TypeIndex *DestTIs =
416  reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset);
417 
418  for (size_t I = 0; I < Ref.Count; ++I) {
419  TypeIndex &TI = DestTIs[I];
420  bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI)
421  : remapTypeIndex(TI);
422  if (LLVM_UNLIKELY(!Success))
423  return {};
424  }
425  }
426  return Storage;
427 }
428 
430  SmallVectorImpl<TypeIndex> &SourceToDest,
431  const CVTypeArray &Types) {
432  TypeStreamMerger M(SourceToDest);
433  return M.mergeTypeRecords(Dest, Types);
434 }
435 
437  ArrayRef<TypeIndex> TypeSourceToDest,
438  SmallVectorImpl<TypeIndex> &SourceToDest,
439  const CVTypeArray &Ids) {
440  TypeStreamMerger M(SourceToDest);
441  return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
442 }
443 
446  SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
447  Optional<uint32_t> &PCHSignature) {
448  TypeStreamMerger M(SourceToDest);
449  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHSignature);
450 }
451 
453  GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
454  SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
455  ArrayRef<GloballyHashedType> Hashes, Optional<uint32_t> &PCHSignature) {
456  TypeStreamMerger M(SourceToDest);
457  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes,
458  PCHSignature);
459 }
460 
462  SmallVectorImpl<TypeIndex> &SourceToDest,
463  const CVTypeArray &Types,
465  Optional<uint32_t> &PCHSignature) {
466  TypeStreamMerger M(SourceToDest);
467  return M.mergeTypeRecords(Dest, Types, Hashes, PCHSignature);
468 }
469 
471  ArrayRef<TypeIndex> Types,
472  SmallVectorImpl<TypeIndex> &SourceToDest,
473  const CVTypeArray &Ids,
475  TypeStreamMerger M(SourceToDest);
476  return M.mergeIdRecords(Dest, Types, Ids, Hashes);
477 }
478 
479 Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
480  // For object files containing precompiled types, we need to extract the
481  // signature, through EndPrecompRecord. This is done here for performance
482  // reasons, to avoid re-parsing the Types stream.
483  if (Type.kind() == LF_ENDPRECOMP) {
484  EndPrecompRecord EP;
485  if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type),
486  EP))
487  return joinErrors(std::move(EC), errorCorruptRecord());
488  if (PCHSignature.hasValue())
489  return errorCorruptRecord();
490  PCHSignature.emplace(EP.getSignature());
491  return false;
492  }
493  return true;
494 }
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:704
void discoverTypeIndices(ArrayRef< uint8_t > RecordData, SmallVectorImpl< TiReference > &Refs)
Kind kind() const
Definition: CVRecord.h:37
This class represents lattice values for constants.
Definition: AllocatorList.h:24
TypeLeafKind
Duplicate copy of the above enum, but using the official CV names.
Definition: CodeView.h:34
BinaryStreamRef getUnderlyingStream() const
Error mergeTypeRecords(MergingTypeTableBuilder &Dest, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &Types)
Merge one set of type records into another.
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:192
Error mergeTypeAndIdRecords(MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &IdsAndTypes, Optional< uint32_t > &PCHSignature)
Merge a unified set of type and id records, splitting them into separate output streams.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
The access may reference the value stored in memory.
Tagged union holding either a T or a Error.
Definition: CachePruning.h:23
#define T
static const uint32_t FirstNonSimpleIndex
Definition: TypeIndex.h:98
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
A 32-bit type reference.
Definition: TypeIndex.h:96
A globally hashed type represents a hash value that is sufficient to uniquely identify a record acros...
Definition: TypeHashing.h:78
Error readBytes(uint32_t Offset, uint32_t Size, ArrayRef< uint8_t > &Buffer) const
Given an Offset into this StreamRef and a Size, return a reference to a buffer owned by the stream...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
#define H(x, y, z)
Definition: MD5.cpp:57
static Error deserializeAs(CVType &CVT, T &Record)
size_t size() const
Definition: SmallVector.h:53
const T * data() const
Definition: ArrayRef.h:146
uint32_t getIndex() const
Definition: TypeIndex.h:111
uint32_t getLength() const
static ErrorSuccess success()
Create a success value.
Definition: Error.h:327
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.
static bool isIdRecord(TypeLeafKind K)
static size_t slotForIndex(TypeIndex Idx)
#define Success
ArrayRef< uint8_t > RecordData
Definition: CVRecord.h:49
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
Error joinErrors(Error E1, Error E2)
Concatenate errors.
Definition: Error.h:424
TypeIndex insertRecordBytes(ArrayRef< uint8_t > &Record)
#define I(x, y, z)
Definition: MD5.cpp:58
Error mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef< TypeIndex > Types, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &Ids)
Merge one set of id records into another.
T * data() const
Definition: ArrayRef.h:329
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TypeIndex insertRecordAs(GloballyHashedType Hash, size_t RecordSize, CreateFunc Create)
Lightweight error class with error context and mandatory checking.
Definition: Error.h:158
#define LLVM_LIKELY(EXPR)
Definition: Compiler.h:191
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144