LLVM  8.0.1
HashTable.h
Go to the documentation of this file.
1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
11 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
12 
14 #include "llvm/ADT/iterator.h"
18 #include "llvm/Support/Endian.h"
19 #include "llvm/Support/Error.h"
20 #include <cstdint>
21 #include <iterator>
22 #include <utility>
23 #include <vector>
24 
25 namespace llvm {
26 
27 class BinaryStreamReader;
28 class BinaryStreamWriter;
29 
30 namespace pdb {
31 
32 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
33 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
34 
35 template <typename ValueT, typename TraitsT> class HashTable;
36 
37 template <typename ValueT, typename TraitsT>
39  : public iterator_facade_base<HashTableIterator<ValueT, TraitsT>,
40  std::forward_iterator_tag,
41  std::pair<uint32_t, ValueT>> {
43 
45  bool IsEnd)
46  : Map(&Map), Index(Index), IsEnd(IsEnd) {}
47 
48 public:
50  int I = Map.Present.find_first();
51  if (I == -1) {
52  Index = 0;
53  IsEnd = true;
54  } else {
55  Index = static_cast<uint32_t>(I);
56  IsEnd = false;
57  }
58  }
59 
61  Map = R.Map;
62  return *this;
63  }
64  bool operator==(const HashTableIterator &R) const {
65  if (IsEnd && R.IsEnd)
66  return true;
67  if (IsEnd != R.IsEnd)
68  return false;
69 
70  return (Map == R.Map) && (Index == R.Index);
71  }
72  const std::pair<uint32_t, ValueT> &operator*() const {
73  assert(Map->Present.test(Index));
74  return Map->Buckets[Index];
75  }
77  while (Index < Map->Buckets.size()) {
78  ++Index;
79  if (Map->Present.test(Index))
80  return *this;
81  }
82 
83  IsEnd = true;
84  return *this;
85  }
86 
87 private:
88  bool isEnd() const { return IsEnd; }
89  uint32_t index() const { return Index; }
90 
91  const HashTable<ValueT, TraitsT> *Map;
92  uint32_t Index;
93  bool IsEnd;
94 };
95 
96 template <typename T> struct PdbHashTraits {};
97 
98 template <> struct PdbHashTraits<uint32_t> {
99  uint32_t hashLookupKey(uint32_t N) const { return N; }
102 };
103 
104 template <typename ValueT, typename TraitsT = PdbHashTraits<ValueT>>
105 class HashTable {
106  using iterator = HashTableIterator<ValueT, TraitsT>;
107  friend iterator;
108 
109  struct Header {
111  support::ulittle32_t Capacity;
112  };
113 
114  using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
115 
116 public:
117  HashTable() { Buckets.resize(8); }
118 
119  explicit HashTable(TraitsT Traits) : HashTable(8, std::move(Traits)) {}
120  HashTable(uint32_t Capacity, TraitsT Traits) : Traits(Traits) {
121  Buckets.resize(Capacity);
122  }
123 
125  const Header *H;
126  if (auto EC = Stream.readObject(H))
127  return EC;
128  if (H->Capacity == 0)
129  return make_error<RawError>(raw_error_code::corrupt_file,
130  "Invalid Hash Table Capacity");
131  if (H->Size > maxLoad(H->Capacity))
132  return make_error<RawError>(raw_error_code::corrupt_file,
133  "Invalid Hash Table Size");
134 
135  Buckets.resize(H->Capacity);
136 
137  if (auto EC = readSparseBitVector(Stream, Present))
138  return EC;
139  if (Present.count() != H->Size)
140  return make_error<RawError>(raw_error_code::corrupt_file,
141  "Present bit vector does not match size!");
142 
143  if (auto EC = readSparseBitVector(Stream, Deleted))
144  return EC;
145  if (Present.intersects(Deleted))
146  return make_error<RawError>(raw_error_code::corrupt_file,
147  "Present bit vector interesects deleted!");
148 
149  for (uint32_t P : Present) {
150  if (auto EC = Stream.readInteger(Buckets[P].first))
151  return EC;
152  const ValueT *Value;
153  if (auto EC = Stream.readObject(Value))
154  return EC;
155  Buckets[P].second = *Value;
156  }
157 
158  return Error::success();
159  }
160 
162  uint32_t Size = sizeof(Header);
163 
164  constexpr int BitsPerWord = 8 * sizeof(uint32_t);
165 
166  int NumBitsP = Present.find_last() + 1;
167  int NumBitsD = Deleted.find_last() + 1;
168 
169  uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
170  uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
171 
172  // Present bit set number of words (4 bytes), followed by that many actual
173  // words (4 bytes each).
174  Size += sizeof(uint32_t);
175  Size += NumWordsP * sizeof(uint32_t);
176 
177  // Deleted bit set number of words (4 bytes), followed by that many actual
178  // words (4 bytes each).
179  Size += sizeof(uint32_t);
180  Size += NumWordsD * sizeof(uint32_t);
181 
182  // One (Key, ValueT) pair for each entry Present.
183  Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
184 
185  return Size;
186  }
187 
188  Error commit(BinaryStreamWriter &Writer) const {
189  Header H;
190  H.Size = size();
191  H.Capacity = capacity();
192  if (auto EC = Writer.writeObject(H))
193  return EC;
194 
195  if (auto EC = writeSparseBitVector(Writer, Present))
196  return EC;
197 
198  if (auto EC = writeSparseBitVector(Writer, Deleted))
199  return EC;
200 
201  for (const auto &Entry : *this) {
202  if (auto EC = Writer.writeInteger(Entry.first))
203  return EC;
204  if (auto EC = Writer.writeObject(Entry.second))
205  return EC;
206  }
207  return Error::success();
208  }
209 
210  void clear() {
211  Buckets.resize(8);
212  Present.clear();
213  Deleted.clear();
214  }
215 
216  bool empty() const { return size() == 0; }
217  uint32_t capacity() const { return Buckets.size(); }
218  uint32_t size() const { return Present.count(); }
219 
220  iterator begin() const { return iterator(*this); }
221  iterator end() const { return iterator(*this, 0, true); }
222 
223  /// Find the entry whose key has the specified hash value, using the specified
224  /// traits defining hash function and equality.
225  template <typename Key> iterator find_as(const Key &K) const {
226  uint32_t H = Traits.hashLookupKey(K) % capacity();
227  uint32_t I = H;
228  Optional<uint32_t> FirstUnused;
229  do {
230  if (isPresent(I)) {
231  if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
232  return iterator(*this, I, false);
233  } else {
234  if (!FirstUnused)
235  FirstUnused = I;
236  // Insertion occurs via linear probing from the slot hint, and will be
237  // inserted at the first empty / deleted location. Therefore, if we are
238  // probing and find a location that is neither present nor deleted, then
239  // nothing must have EVER been inserted at this location, and thus it is
240  // not possible for a matching value to occur later.
241  if (!isDeleted(I))
242  break;
243  }
244  I = (I + 1) % capacity();
245  } while (I != H);
246 
247  // The only way FirstUnused would not be set is if every single entry in the
248  // table were Present. But this would violate the load factor constraints
249  // that we impose, so it should never happen.
250  assert(FirstUnused);
251  return iterator(*this, *FirstUnused, true);
252  }
253 
254  /// Set the entry using a key type that the specified Traits can convert
255  /// from a real key to an internal key.
256  template <typename Key> bool set_as(const Key &K, ValueT V) {
257  return set_as_internal(K, std::move(V), None);
258  }
259 
260  template <typename Key> ValueT get(const Key &K) const {
261  auto Iter = find_as(K);
262  assert(Iter != end());
263  return (*Iter).second;
264  }
265 
266 protected:
267  bool isPresent(uint32_t K) const { return Present.test(K); }
268  bool isDeleted(uint32_t K) const { return Deleted.test(K); }
269 
271  BucketList Buckets;
274 
275 private:
276  /// Set the entry using a key type that the specified Traits can convert
277  /// from a real key to an internal key.
278  template <typename Key>
279  bool set_as_internal(const Key &K, ValueT V, Optional<uint32_t> InternalKey) {
280  auto Entry = find_as(K);
281  if (Entry != end()) {
282  assert(isPresent(Entry.index()));
283  assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
284  // We're updating, no need to do anything special.
285  Buckets[Entry.index()].second = V;
286  return false;
287  }
288 
289  auto &B = Buckets[Entry.index()];
290  assert(!isPresent(Entry.index()));
291  assert(Entry.isEnd());
292  B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
293  B.second = V;
294  Present.set(Entry.index());
295  Deleted.reset(Entry.index());
296 
297  grow();
298 
299  assert((find_as(K)) != end());
300  return true;
301  }
302 
303  static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
304 
305  void grow() {
306  uint32_t S = size();
307  uint32_t MaxLoad = maxLoad(capacity());
308  if (S < maxLoad(capacity()))
309  return;
310  assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
311 
312  uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
313 
314  // Growing requires rebuilding the table and re-hashing every item. Make a
315  // copy with a larger capacity, insert everything into the copy, then swap
316  // it in.
317  HashTable NewMap(NewCapacity, Traits);
318  for (auto I : Present) {
319  auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
320  NewMap.set_as_internal(LookupKey, Buckets[I].second, Buckets[I].first);
321  }
322 
323  Buckets.swap(NewMap.Buckets);
324  std::swap(Present, NewMap.Present);
325  std::swap(Deleted, NewMap.Deleted);
326  assert(capacity() == NewCapacity);
327  assert(size() == S);
328  }
329 };
330 
331 } // end namespace pdb
332 
333 } // end namespace llvm
334 
335 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
Error writeObject(const T &Obj)
Writes the object Obj to the underlying stream, as if by using memcpy.
iterator end() const
Definition: HashTable.h:221
Error load(BinaryStreamReader &Stream)
Definition: HashTable.h:124
SparseBitVector Deleted
Definition: HashTable.h:273
uint32_t lookupKeyToStorageKey(uint32_t N)
Definition: HashTable.h:101
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
uint32_t size() const
Definition: HashTable.h:218
HashTableIterator & operator++()
Definition: HashTable.h:76
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec)
Definition: HashTable.cpp:47
Error readInteger(T &Dest)
Read an integer of the specified endianness into Dest and update the stream&#39;s offset.
void set(unsigned Idx)
Error readObject(const T *&Dest)
Get a pointer to an object of type T from the underlying stream, as if by memcpy, and store the resul...
unsigned second
iterator find_as(const Key &K) const
Find the entry whose key has the specified hash value, using the specified traits defining hash funct...
Definition: HashTable.h:225
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
iterator begin() const
Definition: HashTable.h:220
uint32_t capacity() const
Definition: HashTable.h:217
const std::pair< uint32_t, ValueT > & operator*() const
Definition: HashTable.h:72
HashTableIterator & operator=(const HashTableIterator &R)
Definition: HashTable.h:60
Definition: BitVector.h:938
Key
PAL metadata keys.
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:68
Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V)
Definition: HashTable.cpp:25
#define P(N)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define H(x, y, z)
Definition: MD5.cpp:57
Provides write only access to a subclass of WritableBinaryStream.
Error writeInteger(T Value)
Write the integer Value to the underlying stream in the specified endianness.
void reset(unsigned Idx)
bool set_as(const Key &K, ValueT V)
Set the entry using a key type that the specified Traits can convert from a real key to an internal k...
Definition: HashTable.h:256
SparseBitVector Present
Definition: HashTable.h:272
unsigned first
static ErrorSuccess success()
Create a success value.
Definition: Error.h:327
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1167
HashTable(TraitsT Traits)
Definition: HashTable.h:119
bool operator==(const HashTableIterator &R) const
Definition: HashTable.h:64
uint32_t hashLookupKey(uint32_t N) const
Definition: HashTable.h:99
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
bool empty() const
Definition: HashTable.h:216
HashTable(uint32_t Capacity, TraitsT Traits)
Definition: HashTable.h:120
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
uint32_t Size
Definition: Profile.cpp:47
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
BucketList Buckets
Definition: HashTable.h:271
HashTableIterator(const HashTable< ValueT, TraitsT > &Map)
Definition: HashTable.h:49
LLVM Value Representation.
Definition: Value.h:73
Lightweight error class with error context and mandatory checking.
Definition: Error.h:158
Error commit(BinaryStreamWriter &Writer) const
Definition: HashTable.h:188
uint32_t calculateSerializedLength() const
Definition: HashTable.h:161
uint32_t storageKeyToLookupKey(uint32_t N) const
Definition: HashTable.h:100
bool isPresent(uint32_t K) const
Definition: HashTable.h:267
Provides read only access to a subclass of BinaryStream.
bool isDeleted(uint32_t K) const
Definition: HashTable.h:268