LLVM  8.0.1
SampleProfReader.cpp
Go to the documentation of this file.
1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the class that reads LLVM sample profiles. It
11 // supports three file formats: text, binary and gcov.
12 //
13 // The textual representation is useful for debugging and testing purposes. The
14 // binary representation is more compact, resulting in smaller file sizes.
15 //
16 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
17 // tool (https://github.com/google/autofdo)
18 //
19 // All three encodings can be used interchangeably as an input sample profile.
20 //
21 //===----------------------------------------------------------------------===//
22 
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/IR/ProfileSummary.h"
30 #include "llvm/Support/ErrorOr.h"
31 #include "llvm/Support/LEB128.h"
33 #include "llvm/Support/MD5.h"
36 #include <algorithm>
37 #include <cstddef>
38 #include <cstdint>
39 #include <limits>
40 #include <memory>
41 #include <system_error>
42 #include <vector>
43 
44 using namespace llvm;
45 using namespace sampleprof;
46 
47 /// Dump the function profile for \p FName.
48 ///
49 /// \param FName Name of the function to print.
50 /// \param OS Stream to emit the output to.
52  raw_ostream &OS) {
53  OS << "Function: " << FName << ": " << Profiles[FName];
54 }
55 
56 /// Dump all the function profiles found on stream \p OS.
58  for (const auto &I : Profiles)
59  dumpFunctionProfile(I.getKey(), OS);
60 }
61 
62 /// Parse \p Input as function head.
63 ///
64 /// Parse one line of \p Input, and update function name in \p FName,
65 /// function's total sample count in \p NumSamples, function's entry
66 /// count in \p NumHeadSamples.
67 ///
68 /// \returns true if parsing is successful.
69 static bool ParseHead(const StringRef &Input, StringRef &FName,
70  uint64_t &NumSamples, uint64_t &NumHeadSamples) {
71  if (Input[0] == ' ')
72  return false;
73  size_t n2 = Input.rfind(':');
74  size_t n1 = Input.rfind(':', n2 - 1);
75  FName = Input.substr(0, n1);
76  if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
77  return false;
78  if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
79  return false;
80  return true;
81 }
82 
83 /// Returns true if line offset \p L is legal (only has 16 bits).
84 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
85 
86 /// Parse \p Input as line sample.
87 ///
88 /// \param Input input line.
89 /// \param IsCallsite true if the line represents an inlined callsite.
90 /// \param Depth the depth of the inline stack.
91 /// \param NumSamples total samples of the line/inlined callsite.
92 /// \param LineOffset line offset to the start of the function.
93 /// \param Discriminator discriminator of the line.
94 /// \param TargetCountMap map from indirect call target to count.
95 ///
96 /// returns true if parsing is successful.
97 static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
98  uint64_t &NumSamples, uint32_t &LineOffset,
99  uint32_t &Discriminator, StringRef &CalleeName,
100  DenseMap<StringRef, uint64_t> &TargetCountMap) {
101  for (Depth = 0; Input[Depth] == ' '; Depth++)
102  ;
103  if (Depth == 0)
104  return false;
105 
106  size_t n1 = Input.find(':');
107  StringRef Loc = Input.substr(Depth, n1 - Depth);
108  size_t n2 = Loc.find('.');
109  if (n2 == StringRef::npos) {
110  if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
111  return false;
112  Discriminator = 0;
113  } else {
114  if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
115  return false;
116  if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
117  return false;
118  }
119 
120  StringRef Rest = Input.substr(n1 + 2);
121  if (Rest[0] >= '0' && Rest[0] <= '9') {
122  IsCallsite = false;
123  size_t n3 = Rest.find(' ');
124  if (n3 == StringRef::npos) {
125  if (Rest.getAsInteger(10, NumSamples))
126  return false;
127  } else {
128  if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
129  return false;
130  }
131  // Find call targets and their sample counts.
132  // Note: In some cases, there are symbols in the profile which are not
133  // mangled. To accommodate such cases, use colon + integer pairs as the
134  // anchor points.
135  // An example:
136  // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
137  // ":1000" and ":437" are used as anchor points so the string above will
138  // be interpreted as
139  // target: _M_construct<char *>
140  // count: 1000
141  // target: string_view<std::allocator<char> >
142  // count: 437
143  while (n3 != StringRef::npos) {
144  n3 += Rest.substr(n3).find_first_not_of(' ');
145  Rest = Rest.substr(n3);
146  n3 = Rest.find_first_of(':');
147  if (n3 == StringRef::npos || n3 == 0)
148  return false;
149 
151  uint64_t count, n4;
152  while (true) {
153  // Get the segment after the current colon.
154  StringRef AfterColon = Rest.substr(n3 + 1);
155  // Get the target symbol before the current colon.
156  Target = Rest.substr(0, n3);
157  // Check if the word after the current colon is an integer.
158  n4 = AfterColon.find_first_of(' ');
159  n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
160  StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
161  if (!WordAfterColon.getAsInteger(10, count))
162  break;
163 
164  // Try to find the next colon.
165  uint64_t n5 = AfterColon.find_first_of(':');
166  if (n5 == StringRef::npos)
167  return false;
168  n3 += n5 + 1;
169  }
170 
171  // An anchor point is found. Save the {target, count} pair
172  TargetCountMap[Target] = count;
173  if (n4 == Rest.size())
174  break;
175  // Change n3 to the next blank space after colon + integer pair.
176  n3 = n4;
177  }
178  } else {
179  IsCallsite = true;
180  size_t n3 = Rest.find_last_of(':');
181  CalleeName = Rest.substr(0, n3);
182  if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
183  return false;
184  }
185  return true;
186 }
187 
188 /// Load samples from a text file.
189 ///
190 /// See the documentation at the top of the file for an explanation of
191 /// the expected format.
192 ///
193 /// \returns true if the file was loaded successfully, false otherwise.
194 std::error_code SampleProfileReaderText::read() {
195  line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
197 
198  InlineCallStack InlineStack;
199 
200  for (; !LineIt.is_at_eof(); ++LineIt) {
201  if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
202  continue;
203  // Read the header of each function.
204  //
205  // Note that for function identifiers we are actually expecting
206  // mangled names, but we may not always get them. This happens when
207  // the compiler decides not to emit the function (e.g., it was inlined
208  // and removed). In this case, the binary will not have the linkage
209  // name for the function, so the profiler will emit the function's
210  // unmangled name, which may contain characters like ':' and '>' in its
211  // name (member functions, templates, etc).
212  //
213  // The only requirement we place on the identifier, then, is that it
214  // should not begin with a number.
215  if ((*LineIt)[0] != ' ') {
216  uint64_t NumSamples, NumHeadSamples;
217  StringRef FName;
218  if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
219  reportError(LineIt.line_number(),
220  "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
222  }
223  Profiles[FName] = FunctionSamples();
224  FunctionSamples &FProfile = Profiles[FName];
225  FProfile.setName(FName);
226  MergeResult(Result, FProfile.addTotalSamples(NumSamples));
227  MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
228  InlineStack.clear();
229  InlineStack.push_back(&FProfile);
230  } else {
231  uint64_t NumSamples;
232  StringRef FName;
233  DenseMap<StringRef, uint64_t> TargetCountMap;
234  bool IsCallsite;
235  uint32_t Depth, LineOffset, Discriminator;
236  if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset,
237  Discriminator, FName, TargetCountMap)) {
238  reportError(LineIt.line_number(),
239  "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
240  *LineIt);
242  }
243  if (IsCallsite) {
244  while (InlineStack.size() > Depth) {
245  InlineStack.pop_back();
246  }
247  FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
248  LineLocation(LineOffset, Discriminator))[FName];
249  FSamples.setName(FName);
250  MergeResult(Result, FSamples.addTotalSamples(NumSamples));
251  InlineStack.push_back(&FSamples);
252  } else {
253  while (InlineStack.size() > Depth) {
254  InlineStack.pop_back();
255  }
256  FunctionSamples &FProfile = *InlineStack.back();
257  for (const auto &name_count : TargetCountMap) {
258  MergeResult(Result, FProfile.addCalledTargetSamples(
259  LineOffset, Discriminator, name_count.first,
260  name_count.second));
261  }
262  MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
263  NumSamples));
264  }
265  }
266  }
267  if (Result == sampleprof_error::success)
268  computeSummary();
269 
270  return Result;
271 }
272 
274  bool result = false;
275 
276  // Check that the first non-comment line is a valid function header.
277  line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
278  if (!LineIt.is_at_eof()) {
279  if ((*LineIt)[0] != ' ') {
280  uint64_t NumSamples, NumHeadSamples;
281  StringRef FName;
282  result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
283  }
284  }
285 
286  return result;
287 }
288 
290  unsigned NumBytesRead = 0;
291  std::error_code EC;
292  uint64_t Val = decodeULEB128(Data, &NumBytesRead);
293 
294  if (Val > std::numeric_limits<T>::max())
296  else if (Data + NumBytesRead > End)
298  else
300 
301  if (EC) {
302  reportError(0, EC.message());
303  return EC;
304  }
305 
306  Data += NumBytesRead;
307  return static_cast<T>(Val);
308 }
309 
311  std::error_code EC;
312  StringRef Str(reinterpret_cast<const char *>(Data));
313  if (Data + Str.size() + 1 > End) {
315  reportError(0, EC.message());
316  return EC;
317  }
318 
319  Data += Str.size() + 1;
320  return Str;
321 }
322 
323 template <typename T>
325  std::error_code EC;
326 
327  if (Data + sizeof(T) > End) {
329  reportError(0, EC.message());
330  return EC;
331  }
332 
333  using namespace support;
334  T Val = endian::readNext<T, little, unaligned>(Data);
335  return Val;
336 }
337 
338 template <typename T>
340  std::error_code EC;
341  auto Idx = readNumber<uint32_t>();
342  if (std::error_code EC = Idx.getError())
343  return EC;
344  if (*Idx >= Table.size())
346  return *Idx;
347 }
348 
349 ErrorOr<StringRef> SampleProfileReaderRawBinary::readStringFromTable() {
350  auto Idx = readStringIndex(NameTable);
351  if (std::error_code EC = Idx.getError())
352  return EC;
353 
354  return NameTable[*Idx];
355 }
356 
357 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
358  auto Idx = readStringIndex(NameTable);
359  if (std::error_code EC = Idx.getError())
360  return EC;
361 
362  return StringRef(NameTable[*Idx]);
363 }
364 
365 std::error_code
367  auto NumSamples = readNumber<uint64_t>();
368  if (std::error_code EC = NumSamples.getError())
369  return EC;
370  FProfile.addTotalSamples(*NumSamples);
371 
372  // Read the samples in the body.
373  auto NumRecords = readNumber<uint32_t>();
374  if (std::error_code EC = NumRecords.getError())
375  return EC;
376 
377  for (uint32_t I = 0; I < *NumRecords; ++I) {
378  auto LineOffset = readNumber<uint64_t>();
379  if (std::error_code EC = LineOffset.getError())
380  return EC;
381 
382  if (!isOffsetLegal(*LineOffset)) {
383  return std::error_code();
384  }
385 
386  auto Discriminator = readNumber<uint64_t>();
387  if (std::error_code EC = Discriminator.getError())
388  return EC;
389 
390  auto NumSamples = readNumber<uint64_t>();
391  if (std::error_code EC = NumSamples.getError())
392  return EC;
393 
394  auto NumCalls = readNumber<uint32_t>();
395  if (std::error_code EC = NumCalls.getError())
396  return EC;
397 
398  for (uint32_t J = 0; J < *NumCalls; ++J) {
399  auto CalledFunction(readStringFromTable());
400  if (std::error_code EC = CalledFunction.getError())
401  return EC;
402 
403  auto CalledFunctionSamples = readNumber<uint64_t>();
404  if (std::error_code EC = CalledFunctionSamples.getError())
405  return EC;
406 
407  FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
408  *CalledFunction, *CalledFunctionSamples);
409  }
410 
411  FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
412  }
413 
414  // Read all the samples for inlined function calls.
415  auto NumCallsites = readNumber<uint32_t>();
416  if (std::error_code EC = NumCallsites.getError())
417  return EC;
418 
419  for (uint32_t J = 0; J < *NumCallsites; ++J) {
420  auto LineOffset = readNumber<uint64_t>();
421  if (std::error_code EC = LineOffset.getError())
422  return EC;
423 
424  auto Discriminator = readNumber<uint64_t>();
425  if (std::error_code EC = Discriminator.getError())
426  return EC;
427 
428  auto FName(readStringFromTable());
429  if (std::error_code EC = FName.getError())
430  return EC;
431 
432  FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
433  LineLocation(*LineOffset, *Discriminator))[*FName];
434  CalleeProfile.setName(*FName);
435  if (std::error_code EC = readProfile(CalleeProfile))
436  return EC;
437  }
438 
440 }
441 
443  auto NumHeadSamples = readNumber<uint64_t>();
444  if (std::error_code EC = NumHeadSamples.getError())
445  return EC;
446 
447  auto FName(readStringFromTable());
448  if (std::error_code EC = FName.getError())
449  return EC;
450 
451  Profiles[*FName] = FunctionSamples();
452  FunctionSamples &FProfile = Profiles[*FName];
453  FProfile.setName(*FName);
454 
455  FProfile.addHeadSamples(*NumHeadSamples);
456 
457  if (std::error_code EC = readProfile(FProfile))
458  return EC;
460 }
461 
463  while (!at_eof()) {
464  if (std::error_code EC = readFuncProfile())
465  return EC;
466  }
467 
469 }
470 
472  for (auto Name : FuncsToUse) {
473  auto GUID = std::to_string(MD5Hash(Name));
474  auto iter = FuncOffsetTable.find(StringRef(GUID));
475  if (iter == FuncOffsetTable.end())
476  continue;
477  const uint8_t *SavedData = Data;
478  Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
479  iter->second;
480  if (std::error_code EC = readFuncProfile())
481  return EC;
482  Data = SavedData;
483  }
485 }
486 
487 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
488  if (Magic == SPMagic())
491 }
492 
493 std::error_code
494 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
495  if (Magic == SPMagic(SPF_Compact_Binary))
498 }
499 
500 std::error_code SampleProfileReaderRawBinary::readNameTable() {
501  auto Size = readNumber<uint32_t>();
502  if (std::error_code EC = Size.getError())
503  return EC;
504  NameTable.reserve(*Size);
505  for (uint32_t I = 0; I < *Size; ++I) {
506  auto Name(readString());
507  if (std::error_code EC = Name.getError())
508  return EC;
509  NameTable.push_back(*Name);
510  }
511 
513 }
514 
515 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
516  auto Size = readNumber<uint64_t>();
517  if (std::error_code EC = Size.getError())
518  return EC;
519  NameTable.reserve(*Size);
520  for (uint32_t I = 0; I < *Size; ++I) {
521  auto FID = readNumber<uint64_t>();
522  if (std::error_code EC = FID.getError())
523  return EC;
524  NameTable.push_back(std::to_string(*FID));
525  }
527 }
528 
530  Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
531  End = Data + Buffer->getBufferSize();
532 
533  // Read and check the magic identifier.
534  auto Magic = readNumber<uint64_t>();
535  if (std::error_code EC = Magic.getError())
536  return EC;
537  else if (std::error_code EC = verifySPMagic(*Magic))
538  return EC;
539 
540  // Read the version number.
541  auto Version = readNumber<uint64_t>();
542  if (std::error_code EC = Version.getError())
543  return EC;
544  else if (*Version != SPVersion())
546 
547  if (std::error_code EC = readSummary())
548  return EC;
549 
550  if (std::error_code EC = readNameTable())
551  return EC;
553 }
554 
555 std::error_code SampleProfileReaderCompactBinary::readHeader() {
557  if (std::error_code EC = readFuncOffsetTable())
558  return EC;
560 }
561 
562 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
563  auto TableOffset = readUnencodedNumber<uint64_t>();
564  if (std::error_code EC = TableOffset.getError())
565  return EC;
566 
567  const uint8_t *SavedData = Data;
568  const uint8_t *TableStart =
569  reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
570  *TableOffset;
571  Data = TableStart;
572 
573  auto Size = readNumber<uint64_t>();
574  if (std::error_code EC = Size.getError())
575  return EC;
576 
577  FuncOffsetTable.reserve(*Size);
578  for (uint32_t I = 0; I < *Size; ++I) {
579  auto FName(readStringFromTable());
580  if (std::error_code EC = FName.getError())
581  return EC;
582 
583  auto Offset = readNumber<uint64_t>();
584  if (std::error_code EC = Offset.getError())
585  return EC;
586 
587  FuncOffsetTable[*FName] = *Offset;
588  }
589  End = TableStart;
590  Data = SavedData;
592 }
593 
595  FuncsToUse.clear();
596  for (auto &F : M) {
597  StringRef Fname = F.getName().split('.').first;
598  FuncsToUse.insert(Fname);
599  }
600 }
601 
602 std::error_code SampleProfileReaderBinary::readSummaryEntry(
603  std::vector<ProfileSummaryEntry> &Entries) {
604  auto Cutoff = readNumber<uint64_t>();
605  if (std::error_code EC = Cutoff.getError())
606  return EC;
607 
608  auto MinBlockCount = readNumber<uint64_t>();
609  if (std::error_code EC = MinBlockCount.getError())
610  return EC;
611 
612  auto NumBlocks = readNumber<uint64_t>();
613  if (std::error_code EC = NumBlocks.getError())
614  return EC;
615 
616  Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
618 }
619 
620 std::error_code SampleProfileReaderBinary::readSummary() {
621  auto TotalCount = readNumber<uint64_t>();
622  if (std::error_code EC = TotalCount.getError())
623  return EC;
624 
625  auto MaxBlockCount = readNumber<uint64_t>();
626  if (std::error_code EC = MaxBlockCount.getError())
627  return EC;
628 
629  auto MaxFunctionCount = readNumber<uint64_t>();
630  if (std::error_code EC = MaxFunctionCount.getError())
631  return EC;
632 
633  auto NumBlocks = readNumber<uint64_t>();
634  if (std::error_code EC = NumBlocks.getError())
635  return EC;
636 
637  auto NumFunctions = readNumber<uint64_t>();
638  if (std::error_code EC = NumFunctions.getError())
639  return EC;
640 
641  auto NumSummaryEntries = readNumber<uint64_t>();
642  if (std::error_code EC = NumSummaryEntries.getError())
643  return EC;
644 
645  std::vector<ProfileSummaryEntry> Entries;
646  for (unsigned i = 0; i < *NumSummaryEntries; i++) {
647  std::error_code EC = readSummaryEntry(Entries);
648  if (EC != sampleprof_error::success)
649  return EC;
650  }
651  Summary = llvm::make_unique<ProfileSummary>(
652  ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
653  *MaxFunctionCount, *NumBlocks, *NumFunctions);
654 
656 }
657 
659  const uint8_t *Data =
660  reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
661  uint64_t Magic = decodeULEB128(Data);
662  return Magic == SPMagic();
663 }
664 
666  const uint8_t *Data =
667  reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
668  uint64_t Magic = decodeULEB128(Data);
669  return Magic == SPMagic(SPF_Compact_Binary);
670 }
671 
673  uint32_t dummy;
674  if (!GcovBuffer.readInt(dummy))
677 }
678 
680  if (sizeof(T) <= sizeof(uint32_t)) {
681  uint32_t Val;
682  if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
683  return static_cast<T>(Val);
684  } else if (sizeof(T) <= sizeof(uint64_t)) {
685  uint64_t Val;
686  if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
687  return static_cast<T>(Val);
688  }
689 
690  std::error_code EC = sampleprof_error::malformed;
691  reportError(0, EC.message());
692  return EC;
693 }
694 
696  StringRef Str;
697  if (!GcovBuffer.readString(Str))
699  return Str;
700 }
701 
703  // Read the magic identifier.
704  if (!GcovBuffer.readGCDAFormat())
706 
707  // Read the version number. Note - the GCC reader does not validate this
708  // version, but the profile creator generates v704.
709  GCOV::GCOVVersion version;
710  if (!GcovBuffer.readGCOVVersion(version))
712 
713  if (version != GCOV::V704)
715 
716  // Skip the empty integer.
717  if (std::error_code EC = skipNextWord())
718  return EC;
719 
721 }
722 
724  uint32_t Tag;
725  if (!GcovBuffer.readInt(Tag))
727 
728  if (Tag != Expected)
730 
731  if (std::error_code EC = skipNextWord())
732  return EC;
733 
735 }
736 
738  if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
739  return EC;
740 
741  uint32_t Size;
742  if (!GcovBuffer.readInt(Size))
744 
745  for (uint32_t I = 0; I < Size; ++I) {
746  StringRef Str;
747  if (!GcovBuffer.readString(Str))
749  Names.push_back(Str);
750  }
751 
753 }
754 
756  if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
757  return EC;
758 
759  uint32_t NumFunctions;
760  if (!GcovBuffer.readInt(NumFunctions))
762 
763  InlineCallStack Stack;
764  for (uint32_t I = 0; I < NumFunctions; ++I)
765  if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
766  return EC;
767 
768  computeSummary();
770 }
771 
773  const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
774  uint64_t HeadCount = 0;
775  if (InlineStack.size() == 0)
776  if (!GcovBuffer.readInt64(HeadCount))
778 
779  uint32_t NameIdx;
780  if (!GcovBuffer.readInt(NameIdx))
782 
783  StringRef Name(Names[NameIdx]);
784 
785  uint32_t NumPosCounts;
786  if (!GcovBuffer.readInt(NumPosCounts))
788 
789  uint32_t NumCallsites;
790  if (!GcovBuffer.readInt(NumCallsites))
792 
793  FunctionSamples *FProfile = nullptr;
794  if (InlineStack.size() == 0) {
795  // If this is a top function that we have already processed, do not
796  // update its profile again. This happens in the presence of
797  // function aliases. Since these aliases share the same function
798  // body, there will be identical replicated profiles for the
799  // original function. In this case, we simply not bother updating
800  // the profile of the original function.
801  FProfile = &Profiles[Name];
802  FProfile->addHeadSamples(HeadCount);
803  if (FProfile->getTotalSamples() > 0)
804  Update = false;
805  } else {
806  // Otherwise, we are reading an inlined instance. The top of the
807  // inline stack contains the profile of the caller. Insert this
808  // callee in the caller's CallsiteMap.
809  FunctionSamples *CallerProfile = InlineStack.front();
810  uint32_t LineOffset = Offset >> 16;
811  uint32_t Discriminator = Offset & 0xffff;
812  FProfile = &CallerProfile->functionSamplesAt(
813  LineLocation(LineOffset, Discriminator))[Name];
814  }
815  FProfile->setName(Name);
816 
817  for (uint32_t I = 0; I < NumPosCounts; ++I) {
819  if (!GcovBuffer.readInt(Offset))
821 
822  uint32_t NumTargets;
823  if (!GcovBuffer.readInt(NumTargets))
825 
826  uint64_t Count;
827  if (!GcovBuffer.readInt64(Count))
829 
830  // The line location is encoded in the offset as:
831  // high 16 bits: line offset to the start of the function.
832  // low 16 bits: discriminator.
833  uint32_t LineOffset = Offset >> 16;
834  uint32_t Discriminator = Offset & 0xffff;
835 
836  InlineCallStack NewStack;
837  NewStack.push_back(FProfile);
838  NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
839  if (Update) {
840  // Walk up the inline stack, adding the samples on this line to
841  // the total sample count of the callers in the chain.
842  for (auto CallerProfile : NewStack)
843  CallerProfile->addTotalSamples(Count);
844 
845  // Update the body samples for the current profile.
846  FProfile->addBodySamples(LineOffset, Discriminator, Count);
847  }
848 
849  // Process the list of functions called at an indirect call site.
850  // These are all the targets that a function pointer (or virtual
851  // function) resolved at runtime.
852  for (uint32_t J = 0; J < NumTargets; J++) {
853  uint32_t HistVal;
854  if (!GcovBuffer.readInt(HistVal))
856 
857  if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
859 
860  uint64_t TargetIdx;
861  if (!GcovBuffer.readInt64(TargetIdx))
863  StringRef TargetName(Names[TargetIdx]);
864 
865  uint64_t TargetCount;
866  if (!GcovBuffer.readInt64(TargetCount))
868 
869  if (Update)
870  FProfile->addCalledTargetSamples(LineOffset, Discriminator,
871  TargetName, TargetCount);
872  }
873  }
874 
875  // Process all the inlined callers into the current function. These
876  // are all the callsites that were inlined into this function.
877  for (uint32_t I = 0; I < NumCallsites; I++) {
878  // The offset is encoded as:
879  // high 16 bits: line offset to the start of the function.
880  // low 16 bits: discriminator.
882  if (!GcovBuffer.readInt(Offset))
884  InlineCallStack NewStack;
885  NewStack.push_back(FProfile);
886  NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
887  if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
888  return EC;
889  }
890 
892 }
893 
894 /// Read a GCC AutoFDO profile.
895 ///
896 /// This format is generated by the Linux Perf conversion tool at
897 /// https://github.com/google/autofdo.
898 std::error_code SampleProfileReaderGCC::read() {
899  // Read the string table.
900  if (std::error_code EC = readNameTable())
901  return EC;
902 
903  // Read the source profile.
904  if (std::error_code EC = readFunctionProfiles())
905  return EC;
906 
908 }
909 
911  StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
912  return Magic == "adcg*704";
913 }
914 
916  // If the underlying data is in compact format, we can't remap it because
917  // we don't know what the original function names were.
918  if (getFormat() == SPF_Compact_Binary) {
920  Buffer->getBufferIdentifier(),
921  "Profile data remapping cannot be applied to profile data "
922  "in compact format (original mangled names are not available).",
923  DS_Warning));
925  }
926 
927  if (Error E = Remappings.read(*Buffer)) {
929  std::move(E), [&](const SymbolRemappingParseError &ParseError) {
930  reportError(ParseError.getLineNum(), ParseError.getMessage());
931  });
933  }
934 
935  for (auto &Sample : getProfiles())
936  if (auto Key = Remappings.insert(Sample.first()))
937  SampleMap.insert({Key, &Sample.second});
938 
940 }
941 
944  if (auto Key = Remappings.lookup(Fname))
945  return SampleMap.lookup(Key);
947 }
948 
949 /// Prepare a memory buffer for the contents of \p Filename.
950 ///
951 /// \returns an error code indicating the status of the buffer.
953 setupMemoryBuffer(const Twine &Filename) {
954  auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
955  if (std::error_code EC = BufferOrErr.getError())
956  return EC;
957  auto Buffer = std::move(BufferOrErr.get());
958 
959  // Sanity check the file.
960  if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
962 
963  return std::move(Buffer);
964 }
965 
966 /// Create a sample profile reader based on the format of the input file.
967 ///
968 /// \param Filename The file to open.
969 ///
970 /// \param C The LLVM context to use to emit diagnostics.
971 ///
972 /// \returns an error code indicating the status of the created reader.
975  auto BufferOrError = setupMemoryBuffer(Filename);
976  if (std::error_code EC = BufferOrError.getError())
977  return EC;
978  return create(BufferOrError.get(), C);
979 }
980 
981 /// Create a sample profile remapper from the given input, to remap the
982 /// function names in the given profile data.
983 ///
984 /// \param Filename The file to open.
985 ///
986 /// \param C The LLVM context to use to emit diagnostics.
987 ///
988 /// \param Underlying The underlying profile data reader to remap.
989 ///
990 /// \returns an error code indicating the status of the created reader.
993  const Twine &Filename, LLVMContext &C,
994  std::unique_ptr<SampleProfileReader> Underlying) {
995  auto BufferOrError = setupMemoryBuffer(Filename);
996  if (std::error_code EC = BufferOrError.getError())
997  return EC;
998  return llvm::make_unique<SampleProfileReaderItaniumRemapper>(
999  std::move(BufferOrError.get()), C, std::move(Underlying));
1000 }
1001 
1002 /// Create a sample profile reader based on the format of the input data.
1003 ///
1004 /// \param B The memory buffer to create the reader from (assumes ownership).
1005 ///
1006 /// \param C The LLVM context to use to emit diagnostics.
1007 ///
1008 /// \returns an error code indicating the status of the created reader.
1010 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) {
1011  std::unique_ptr<SampleProfileReader> Reader;
1013  Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1015  Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1016  else if (SampleProfileReaderGCC::hasFormat(*B))
1017  Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1019  Reader.reset(new SampleProfileReaderText(std::move(B), C));
1020  else
1022 
1023  FunctionSamples::Format = Reader->getFormat();
1024  if (std::error_code EC = Reader->readHeader())
1025  return EC;
1026 
1027  return std::move(Reader);
1028 }
1029 
1030 // For text and GCC file formats, we compute the summary after reading the
1031 // profile. Binary format has the profile summary in its header.
1034  for (const auto &I : Profiles) {
1035  const FunctionSamples &Profile = I.second;
1036  Builder.addRecord(Profile);
1037  }
1038  Summary = Builder.getSummary();
1039 }
uint64_t CallInst * C
std::unique_ptr< MemoryBuffer > Buffer
Memory buffer holding the profile file.
Represents either an error or a value T.
Definition: ErrorOr.h:57
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
FunctionSamples * getSamplesFor(StringRef FunctionName) override
Return the samples collected for function F.
std::error_code read() override
Read remapping file and apply it to the sample profile.
void collectFuncsToUse(const Module &M) override
Collect functions to be used when compiling Module M.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LLVM_NODISCARD size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition: StringRef.h:360
std::error_code read() override
Read sample profiles from the associated file.
void push_back(const T &Elt)
Definition: SmallVector.h:218
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
virtual std::error_code readHeader() override
Read and validate the file header.
uint64_t MD5Hash(StringRef Str)
Helper to compute and return lower 64 bits of the given string&#39;s MD5 hash.
Definition: MD5.h:109
LLVM_NODISCARD size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Definition: StringRef.h:421
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C)
Create a sample profile reader appropriate to the file format.
std::error_code read() override
Read sample profiles from the associated file.
A forward iterator which reads text lines from a buffer.
Definition: LineIterator.h:32
static const ArrayRef< uint32_t > DefaultCutoffs
A vector of useful cutoff values for detailed summary.
Definition: ProfileCommon.h:65
std::error_code readProfile(FunctionSamples &FProfile)
Read the contents of the given profile instance.
F(f)
static SampleProfileFormat Format
Definition: SampleProf.h:447
static bool hasFormat(const MemoryBuffer &Buffer)
Return true if Buffer is in the format supported by this class.
Representation of the samples collected for a function.
Definition: SampleProf.h:217
amdgpu Simplify well known AMD library false Value Value const Twine & Name
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
StringMap< FunctionSamples > Profiles
Map every function to its associated profile.
Tagged union holding either a T or a Error.
Definition: CachePruning.h:23
bool is_at_eof() const
Return true if we&#39;ve reached EOF or are an "end" iterator.
Definition: LineIterator.h:50
Key
PAL metadata keys.
SampleProfileFormat getFormat()
Return the profile format.
static bool ParseHead(const StringRef &Input, StringRef &FName, uint64_t &NumSamples, uint64_t &NumHeadSamples)
Parse Input as function head.
void setName(StringRef FunctionName)
Set the name of the function.
Definition: SampleProf.h:404
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
static uint64_t SPVersion()
Definition: SampleProf.h:107
static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth, uint64_t &NumSamples, uint32_t &LineOffset, uint32_t &Discriminator, StringRef &CalleeName, DenseMap< StringRef, uint64_t > &TargetCountMap)
Parse Input as line sample.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1252
int64_t line_number() const
Return the current line number. May return any number at EOF.
Definition: LineIterator.h:56
std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack, bool Update, uint32_t Offset)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
sampleprof_error
Definition: SampleProf.h:41
static bool hasFormat(const MemoryBuffer &Buffer)
Return true if Buffer is in the format supported by this class.
FunctionSamples * getSamplesFor(const Function &F)
Return the samples collected for function F.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, uint64_t Num, uint64_t Weight=1)
Definition: SampleProf.h:240
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
ErrorOr< T > readNumber()
Read a numeric value of type T from the profile.
LLVM_NODISCARD size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Definition: StringRef.cpp:250
LLVMContext & Ctx
LLVM context used to emit diagnostics.
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E&#39;s underlying type.
Definition: BitmaskEnum.h:91
uint64_t decodeULEB128(const uint8_t *p, unsigned *n=nullptr, const uint8_t *end=nullptr, const char **error=nullptr)
Utility function to decode a ULEB128 value.
Definition: LEB128.h:129
ErrorOr< StringRef > readString()
Read a string from the profile.
std::error_code readFuncProfile()
Read the next function profile instance.
size_t size() const
Definition: SmallVector.h:53
GCOVVersion
Definition: GCOV.h:44
void dumpFunctionProfile(StringRef FName, raw_ostream &OS=dbgs())
Print the profile for FName on stream OS.
static const char *const Magic
Definition: Archive.cpp:42
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:905
sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight=1)
Definition: SampleProf.h:224
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
void addRecord(const sampleprof::FunctionSamples &FS)
FunctionSamplesMap & functionSamplesAt(const LineLocation &Loc)
Return the function samples at the given callsite location.
Definition: SampleProf.h:278
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:42
static StringRef readString(WasmObjectFile::ReadContext &Ctx)
Target - Wrapper for Target specific information.
StringMap< FunctionSamples > & getProfiles()
Return all the profiles.
static bool hasFormat(const MemoryBuffer &Buffer)
Return true if Buffer is in the format supported by this class.
static bool hasFormat(const MemoryBuffer &Buffer)
Return true if Buffer is in the format supported by this class.
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:478
std::unique_ptr< ProfileSummary > getSummary()
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileOrSTDIN(const Twine &Filename, int64_t FileSize=-1, bool RequiresNullTerminator=true)
Open the specified file as a MemoryBuffer, or open stdin if the Filename is "-".
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
ErrorOr< T > readUnencodedNumber()
Read a numeric value of type T from the profile.
static const size_t npos
Definition: StringRef.h:51
Represents the relative location of an instruction.
Definition: SampleProf.h:118
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:395
#define I(x, y, z)
Definition: MD5.cpp:58
uint32_t Size
Definition: Profile.cpp:47
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const char * getBufferStart() const
Definition: MemoryBuffer.h:60
static uint64_t SPMagic(SampleProfileFormat Format=SPF_Binary)
Definition: SampleProf.h:90
sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight=1)
Definition: SampleProf.h:232
const std::string to_string(const T &Value)
Definition: ScopedPrinter.h:62
Provides ErrorOr<T> smart pointer.
std::error_code readSectionTag(uint32_t Expected)
Read the section tag and check that it&#39;s the same as Expected.
static ErrorOr< std::unique_ptr< MemoryBuffer > > setupMemoryBuffer(const Twine &Filename)
Prepare a memory buffer for the contents of Filename.
uint64_t getTotalSamples() const
Return the total number of samples collected inside the function.
Definition: SampleProf.h:321
Lightweight error class with error context and mandatory checking.
Definition: Error.h:158
ErrorOr< uint32_t > readStringIndex(T &Table)
Read the string index and check whether it overflows the table.
void reportError(int64_t LineNumber, Twine Msg) const
Report a parse error message.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
Diagnostic information for the sample profiler.
std::error_code readHeader() override
Read and validate the file header.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:298
void computeSummary()
Compute summary for this profile.
void dump(raw_ostream &OS=dbgs())
Print all the profiles on stream OS.
std::error_code read() override
Read sample profiles from the associated file.
const uint64_t Version
Definition: InstrProf.h:895
static bool isOffsetLegal(unsigned L)
Returns true if line offset L is legal (only has 16 bits).
sampleprof_error addCalledTargetSamples(uint32_t LineOffset, uint32_t Discriminator, StringRef FName, uint64_t Num, uint64_t Weight=1)
Definition: SampleProf.h:246
sampleprof_error MergeResult(sampleprof_error &Accumulator, sampleprof_error Result)
Definition: SampleProf.h:60
std::unique_ptr< ProfileSummary > Summary
Profile summary information.
static ErrorOr< std::unique_ptr< SampleProfileReader > > create(const Twine &Filename, LLVMContext &C, std::unique_ptr< SampleProfileReader > Underlying)
Create a remapped sample profile from the given remapping file and underlying samples.
std::error_code read() override
Read samples only for functions to use.