LLVM  8.0.1
MILexer.cpp
Go to the documentation of this file.
1 //===- MILexer.cpp - Machine instructions lexer implementation ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the lexing of machine instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MILexer.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/Twine.h"
22 #include <algorithm>
23 #include <cassert>
24 #include <cctype>
25 #include <string>
26 
27 using namespace llvm;
28 
29 namespace {
30 
31 using ErrorCallbackType =
33 
34 /// This class provides a way to iterate and get characters from the source
35 /// string.
36 class Cursor {
37  const char *Ptr = nullptr;
38  const char *End = nullptr;
39 
40 public:
41  Cursor(NoneType) {}
42 
43  explicit Cursor(StringRef Str) {
44  Ptr = Str.data();
45  End = Ptr + Str.size();
46  }
47 
48  bool isEOF() const { return Ptr == End; }
49 
50  char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; }
51 
52  void advance(unsigned I = 1) { Ptr += I; }
53 
54  StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
55 
56  StringRef upto(Cursor C) const {
57  assert(C.Ptr >= Ptr && C.Ptr <= End);
58  return StringRef(Ptr, C.Ptr - Ptr);
59  }
60 
61  StringRef::iterator location() const { return Ptr; }
62 
63  operator bool() const { return Ptr != nullptr; }
64 };
65 
66 } // end anonymous namespace
67 
69  this->Kind = Kind;
70  this->Range = Range;
71  return *this;
72 }
73 
75  StringValue = StrVal;
76  return *this;
77 }
78 
80  StringValueStorage = std::move(StrVal);
81  StringValue = StringValueStorage;
82  return *this;
83 }
84 
86  this->IntVal = std::move(IntVal);
87  return *this;
88 }
89 
90 /// Skip the leading whitespace characters and return the updated cursor.
91 static Cursor skipWhitespace(Cursor C) {
92  while (isblank(C.peek()))
93  C.advance();
94  return C;
95 }
96 
97 static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; }
98 
99 /// Skip a line comment and return the updated cursor.
100 static Cursor skipComment(Cursor C) {
101  if (C.peek() != ';')
102  return C;
103  while (!isNewlineChar(C.peek()) && !C.isEOF())
104  C.advance();
105  return C;
106 }
107 
108 /// Return true if the given character satisfies the following regular
109 /// expression: [-a-zA-Z$._0-9]
110 static bool isIdentifierChar(char C) {
111  return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' ||
112  C == '$';
113 }
114 
115 /// Unescapes the given string value.
116 ///
117 /// Expects the string value to be quoted.
118 static std::string unescapeQuotedString(StringRef Value) {
119  assert(Value.front() == '"' && Value.back() == '"');
120  Cursor C = Cursor(Value.substr(1, Value.size() - 2));
121 
122  std::string Str;
123  Str.reserve(C.remaining().size());
124  while (!C.isEOF()) {
125  char Char = C.peek();
126  if (Char == '\\') {
127  if (C.peek(1) == '\\') {
128  // Two '\' become one
129  Str += '\\';
130  C.advance(2);
131  continue;
132  }
133  if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
134  Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
135  C.advance(3);
136  continue;
137  }
138  }
139  Str += Char;
140  C.advance();
141  }
142  return Str;
143 }
144 
145 /// Lex a string constant using the following regular expression: \"[^\"]*\"
146 static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) {
147  assert(C.peek() == '"');
148  for (C.advance(); C.peek() != '"'; C.advance()) {
149  if (C.isEOF() || isNewlineChar(C.peek())) {
150  ErrorCallback(
151  C.location(),
152  "end of machine instruction reached before the closing '\"'");
153  return None;
154  }
155  }
156  C.advance();
157  return C;
158 }
159 
160 static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
161  unsigned PrefixLength, ErrorCallbackType ErrorCallback) {
162  auto Range = C;
163  C.advance(PrefixLength);
164  if (C.peek() == '"') {
165  if (Cursor R = lexStringConstant(C, ErrorCallback)) {
166  StringRef String = Range.upto(R);
167  Token.reset(Type, String)
169  unescapeQuotedString(String.drop_front(PrefixLength)));
170  return R;
171  }
172  Token.reset(MIToken::Error, Range.remaining());
173  return Range;
174  }
175  while (isIdentifierChar(C.peek()))
176  C.advance();
177  Token.reset(Type, Range.upto(C))
178  .setStringValue(Range.upto(C).drop_front(PrefixLength));
179  return C;
180 }
181 
183  return StringSwitch<MIToken::TokenKind>(Identifier)
184  .Case("_", MIToken::underscore)
185  .Case("implicit", MIToken::kw_implicit)
186  .Case("implicit-def", MIToken::kw_implicit_define)
187  .Case("def", MIToken::kw_def)
188  .Case("dead", MIToken::kw_dead)
189  .Case("killed", MIToken::kw_killed)
190  .Case("undef", MIToken::kw_undef)
191  .Case("internal", MIToken::kw_internal)
192  .Case("early-clobber", MIToken::kw_early_clobber)
193  .Case("debug-use", MIToken::kw_debug_use)
194  .Case("renamable", MIToken::kw_renamable)
195  .Case("tied-def", MIToken::kw_tied_def)
196  .Case("frame-setup", MIToken::kw_frame_setup)
197  .Case("frame-destroy", MIToken::kw_frame_destroy)
198  .Case("nnan", MIToken::kw_nnan)
199  .Case("ninf", MIToken::kw_ninf)
200  .Case("nsz", MIToken::kw_nsz)
201  .Case("arcp", MIToken::kw_arcp)
202  .Case("contract", MIToken::kw_contract)
203  .Case("afn", MIToken::kw_afn)
204  .Case("reassoc", MIToken::kw_reassoc)
205  .Case("nuw" , MIToken::kw_nuw)
206  .Case("nsw" , MIToken::kw_nsw)
207  .Case("exact" , MIToken::kw_exact)
208  .Case("debug-location", MIToken::kw_debug_location)
209  .Case("same_value", MIToken::kw_cfi_same_value)
210  .Case("offset", MIToken::kw_cfi_offset)
211  .Case("rel_offset", MIToken::kw_cfi_rel_offset)
212  .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register)
213  .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
214  .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset)
215  .Case("escape", MIToken::kw_cfi_escape)
216  .Case("def_cfa", MIToken::kw_cfi_def_cfa)
217  .Case("remember_state", MIToken::kw_cfi_remember_state)
218  .Case("restore", MIToken::kw_cfi_restore)
219  .Case("restore_state", MIToken::kw_cfi_restore_state)
220  .Case("undefined", MIToken::kw_cfi_undefined)
221  .Case("register", MIToken::kw_cfi_register)
222  .Case("window_save", MIToken::kw_cfi_window_save)
223  .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state)
224  .Case("blockaddress", MIToken::kw_blockaddress)
225  .Case("intrinsic", MIToken::kw_intrinsic)
226  .Case("target-index", MIToken::kw_target_index)
227  .Case("half", MIToken::kw_half)
228  .Case("float", MIToken::kw_float)
229  .Case("double", MIToken::kw_double)
230  .Case("x86_fp80", MIToken::kw_x86_fp80)
231  .Case("fp128", MIToken::kw_fp128)
232  .Case("ppc_fp128", MIToken::kw_ppc_fp128)
233  .Case("target-flags", MIToken::kw_target_flags)
234  .Case("volatile", MIToken::kw_volatile)
235  .Case("non-temporal", MIToken::kw_non_temporal)
236  .Case("dereferenceable", MIToken::kw_dereferenceable)
237  .Case("invariant", MIToken::kw_invariant)
238  .Case("align", MIToken::kw_align)
239  .Case("addrspace", MIToken::kw_addrspace)
240  .Case("stack", MIToken::kw_stack)
241  .Case("got", MIToken::kw_got)
242  .Case("jump-table", MIToken::kw_jump_table)
243  .Case("constant-pool", MIToken::kw_constant_pool)
244  .Case("call-entry", MIToken::kw_call_entry)
245  .Case("liveout", MIToken::kw_liveout)
246  .Case("address-taken", MIToken::kw_address_taken)
247  .Case("landing-pad", MIToken::kw_landing_pad)
248  .Case("liveins", MIToken::kw_liveins)
249  .Case("successors", MIToken::kw_successors)
250  .Case("floatpred", MIToken::kw_floatpred)
251  .Case("intpred", MIToken::kw_intpred)
252  .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
253  .Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
254  .Case("unknown-size", MIToken::kw_unknown_size)
256 }
257 
258 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
259  if (!isalpha(C.peek()) && C.peek() != '_')
260  return None;
261  auto Range = C;
262  while (isIdentifierChar(C.peek()))
263  C.advance();
264  auto Identifier = Range.upto(C);
265  Token.reset(getIdentifierKind(Identifier), Identifier)
266  .setStringValue(Identifier);
267  return C;
268 }
269 
270 static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
271  ErrorCallbackType ErrorCallback) {
272  bool IsReference = C.remaining().startswith("%bb.");
273  if (!IsReference && !C.remaining().startswith("bb."))
274  return None;
275  auto Range = C;
276  unsigned PrefixLength = IsReference ? 4 : 3;
277  C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
278  if (!isdigit(C.peek())) {
279  Token.reset(MIToken::Error, C.remaining());
280  ErrorCallback(C.location(), "expected a number after '%bb.'");
281  return C;
282  }
283  auto NumberRange = C;
284  while (isdigit(C.peek()))
285  C.advance();
286  StringRef Number = NumberRange.upto(C);
287  unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
288  // TODO: The format bb.<id>.<irname> is supported only when it's not a
289  // reference. Once we deprecate the format where the irname shows up, we
290  // should only lex forward if it is a reference.
291  if (C.peek() == '.') {
292  C.advance(); // Skip '.'
293  ++StringOffset;
294  while (isIdentifierChar(C.peek()))
295  C.advance();
296  }
297  Token.reset(IsReference ? MIToken::MachineBasicBlock
299  Range.upto(C))
300  .setIntegerValue(APSInt(Number))
301  .setStringValue(Range.upto(C).drop_front(StringOffset));
302  return C;
303 }
304 
305 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
307  if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
308  return None;
309  auto Range = C;
310  C.advance(Rule.size());
311  auto NumberRange = C;
312  while (isdigit(C.peek()))
313  C.advance();
314  Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C)));
315  return C;
316 }
317 
318 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
320  if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
321  return None;
322  auto Range = C;
323  C.advance(Rule.size());
324  auto NumberRange = C;
325  while (isdigit(C.peek()))
326  C.advance();
327  StringRef Number = NumberRange.upto(C);
328  unsigned StringOffset = Rule.size() + Number.size();
329  if (C.peek() == '.') {
330  C.advance();
331  ++StringOffset;
332  while (isIdentifierChar(C.peek()))
333  C.advance();
334  }
335  Token.reset(Kind, Range.upto(C))
336  .setIntegerValue(APSInt(Number))
337  .setStringValue(Range.upto(C).drop_front(StringOffset));
338  return C;
339 }
340 
341 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) {
342  return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex);
343 }
344 
345 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) {
346  return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject);
347 }
348 
349 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) {
350  return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
351 }
352 
353 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
354  return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
355 }
356 
357 static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
358  ErrorCallbackType ErrorCallback) {
359  const StringRef Rule = "%subreg.";
360  if (!C.remaining().startswith(Rule))
361  return None;
362  return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(),
363  ErrorCallback);
364 }
365 
366 static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
367  ErrorCallbackType ErrorCallback) {
368  const StringRef Rule = "%ir-block.";
369  if (!C.remaining().startswith(Rule))
370  return None;
371  if (isdigit(C.peek(Rule.size())))
372  return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
373  return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
374 }
375 
376 static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
377  ErrorCallbackType ErrorCallback) {
378  const StringRef Rule = "%ir.";
379  if (!C.remaining().startswith(Rule))
380  return None;
381  if (isdigit(C.peek(Rule.size())))
382  return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
383  return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
384 }
385 
386 static Cursor maybeLexStringConstant(Cursor C, MIToken &Token,
387  ErrorCallbackType ErrorCallback) {
388  if (C.peek() != '"')
389  return None;
390  return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0,
391  ErrorCallback);
392 }
393 
394 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
395  auto Range = C;
396  C.advance(); // Skip '%'
397  auto NumberRange = C;
398  while (isdigit(C.peek()))
399  C.advance();
400  Token.reset(MIToken::VirtualRegister, Range.upto(C))
401  .setIntegerValue(APSInt(NumberRange.upto(C)));
402  return C;
403 }
404 
405 /// Returns true for a character allowed in a register name.
406 static bool isRegisterChar(char C) {
407  return isIdentifierChar(C) && C != '.';
408 }
409 
410 static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) {
411  Cursor Range = C;
412  C.advance(); // Skip '%'
413  while (isRegisterChar(C.peek()))
414  C.advance();
415  Token.reset(MIToken::NamedVirtualRegister, Range.upto(C))
416  .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
417  return C;
418 }
419 
420 static Cursor maybeLexRegister(Cursor C, MIToken &Token,
421  ErrorCallbackType ErrorCallback) {
422  if (C.peek() != '%' && C.peek() != '$')
423  return None;
424 
425  if (C.peek() == '%') {
426  if (isdigit(C.peek(1)))
427  return lexVirtualRegister(C, Token);
428 
429  if (isRegisterChar(C.peek(1)))
430  return lexNamedVirtualRegister(C, Token);
431 
432  return None;
433  }
434 
435  assert(C.peek() == '$');
436  auto Range = C;
437  C.advance(); // Skip '$'
438  while (isRegisterChar(C.peek()))
439  C.advance();
440  Token.reset(MIToken::NamedRegister, Range.upto(C))
441  .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$'
442  return C;
443 }
444 
445 static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
446  ErrorCallbackType ErrorCallback) {
447  if (C.peek() != '@')
448  return None;
449  if (!isdigit(C.peek(1)))
450  return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
451  ErrorCallback);
452  auto Range = C;
453  C.advance(1); // Skip the '@'
454  auto NumberRange = C;
455  while (isdigit(C.peek()))
456  C.advance();
457  Token.reset(MIToken::GlobalValue, Range.upto(C))
458  .setIntegerValue(APSInt(NumberRange.upto(C)));
459  return C;
460 }
461 
462 static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
463  ErrorCallbackType ErrorCallback) {
464  if (C.peek() != '&')
465  return None;
466  return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
467  ErrorCallback);
468 }
469 
470 static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
471  ErrorCallbackType ErrorCallback) {
472  const StringRef Rule = "<mcsymbol ";
473  if (!C.remaining().startswith(Rule))
474  return None;
475  auto Start = C;
476  C.advance(Rule.size());
477 
478  // Try a simple unquoted name.
479  if (C.peek() != '"') {
480  while (isIdentifierChar(C.peek()))
481  C.advance();
482  StringRef String = Start.upto(C).drop_front(Rule.size());
483  if (C.peek() != '>') {
484  ErrorCallback(C.location(),
485  "expected the '<mcsymbol ...' to be closed by a '>'");
486  Token.reset(MIToken::Error, Start.remaining());
487  return Start;
488  }
489  C.advance();
490 
491  Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String);
492  return C;
493  }
494 
495  // Otherwise lex out a quoted name.
496  Cursor R = lexStringConstant(C, ErrorCallback);
497  if (!R) {
498  ErrorCallback(C.location(),
499  "unable to parse quoted string from opening quote");
500  Token.reset(MIToken::Error, Start.remaining());
501  return Start;
502  }
503  StringRef String = Start.upto(R).drop_front(Rule.size());
504  if (R.peek() != '>') {
505  ErrorCallback(R.location(),
506  "expected the '<mcsymbol ...' to be closed by a '>'");
507  Token.reset(MIToken::Error, Start.remaining());
508  return Start;
509  }
510  R.advance();
511 
512  Token.reset(MIToken::MCSymbol, Start.upto(R))
513  .setOwnedStringValue(unescapeQuotedString(String));
514  return R;
515 }
516 
517 static bool isValidHexFloatingPointPrefix(char C) {
518  return C == 'H' || C == 'K' || C == 'L' || C == 'M';
519 }
520 
521 static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
522  C.advance();
523  // Skip over [0-9]*([eE][-+]?[0-9]+)?
524  while (isdigit(C.peek()))
525  C.advance();
526  if ((C.peek() == 'e' || C.peek() == 'E') &&
527  (isdigit(C.peek(1)) ||
528  ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) {
529  C.advance(2);
530  while (isdigit(C.peek()))
531  C.advance();
532  }
533  Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
534  return C;
535 }
536 
537 static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
538  if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X'))
539  return None;
540  Cursor Range = C;
541  C.advance(2);
542  unsigned PrefLen = 2;
543  if (isValidHexFloatingPointPrefix(C.peek())) {
544  C.advance();
545  PrefLen++;
546  }
547  while (isxdigit(C.peek()))
548  C.advance();
549  StringRef StrVal = Range.upto(C);
550  if (StrVal.size() <= PrefLen)
551  return None;
552  if (PrefLen == 2)
553  Token.reset(MIToken::HexLiteral, Range.upto(C));
554  else // It must be 3, which means that there was a floating-point prefix.
555  Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
556  return C;
557 }
558 
559 static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
560  if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
561  return None;
562  auto Range = C;
563  C.advance();
564  while (isdigit(C.peek()))
565  C.advance();
566  if (C.peek() == '.')
567  return lexFloatingPointLiteral(Range, C, Token);
568  StringRef StrVal = Range.upto(C);
569  Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal));
570  return C;
571 }
572 
574  return StringSwitch<MIToken::TokenKind>(Identifier)
575  .Case("!tbaa", MIToken::md_tbaa)
576  .Case("!alias.scope", MIToken::md_alias_scope)
577  .Case("!noalias", MIToken::md_noalias)
578  .Case("!range", MIToken::md_range)
579  .Case("!DIExpression", MIToken::md_diexpr)
580  .Case("!DILocation", MIToken::md_dilocation)
582 }
583 
584 static Cursor maybeLexExlaim(Cursor C, MIToken &Token,
585  ErrorCallbackType ErrorCallback) {
586  if (C.peek() != '!')
587  return None;
588  auto Range = C;
589  C.advance(1);
590  if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) {
591  Token.reset(MIToken::exclaim, Range.upto(C));
592  return C;
593  }
594  while (isIdentifierChar(C.peek()))
595  C.advance();
596  StringRef StrVal = Range.upto(C);
597  Token.reset(getMetadataKeywordKind(StrVal), StrVal);
598  if (Token.isError())
599  ErrorCallback(Token.location(),
600  "use of unknown metadata keyword '" + StrVal + "'");
601  return C;
602 }
603 
605  switch (C) {
606  case ',':
607  return MIToken::comma;
608  case '.':
609  return MIToken::dot;
610  case '=':
611  return MIToken::equal;
612  case ':':
613  return MIToken::colon;
614  case '(':
615  return MIToken::lparen;
616  case ')':
617  return MIToken::rparen;
618  case '{':
619  return MIToken::lbrace;
620  case '}':
621  return MIToken::rbrace;
622  case '+':
623  return MIToken::plus;
624  case '-':
625  return MIToken::minus;
626  case '<':
627  return MIToken::less;
628  case '>':
629  return MIToken::greater;
630  default:
631  return MIToken::Error;
632  }
633 }
634 
635 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
637  unsigned Length = 1;
638  if (C.peek() == ':' && C.peek(1) == ':') {
639  Kind = MIToken::coloncolon;
640  Length = 2;
641  } else
642  Kind = symbolToken(C.peek());
643  if (Kind == MIToken::Error)
644  return None;
645  auto Range = C;
646  C.advance(Length);
647  Token.reset(Kind, Range.upto(C));
648  return C;
649 }
650 
651 static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
652  if (!isNewlineChar(C.peek()))
653  return None;
654  auto Range = C;
655  C.advance();
656  Token.reset(MIToken::Newline, Range.upto(C));
657  return C;
658 }
659 
660 static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token,
661  ErrorCallbackType ErrorCallback) {
662  if (C.peek() != '`')
663  return None;
664  auto Range = C;
665  C.advance();
666  auto StrRange = C;
667  while (C.peek() != '`') {
668  if (C.isEOF() || isNewlineChar(C.peek())) {
669  ErrorCallback(
670  C.location(),
671  "end of machine instruction reached before the closing '`'");
672  Token.reset(MIToken::Error, Range.remaining());
673  return C;
674  }
675  C.advance();
676  }
677  StringRef Value = StrRange.upto(C);
678  C.advance();
679  Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value);
680  return C;
681 }
682 
684  ErrorCallbackType ErrorCallback) {
685  auto C = skipComment(skipWhitespace(Cursor(Source)));
686  if (C.isEOF()) {
687  Token.reset(MIToken::Eof, C.remaining());
688  return C.remaining();
689  }
690 
691  if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
692  return R.remaining();
693  if (Cursor R = maybeLexIdentifier(C, Token))
694  return R.remaining();
695  if (Cursor R = maybeLexJumpTableIndex(C, Token))
696  return R.remaining();
697  if (Cursor R = maybeLexStackObject(C, Token))
698  return R.remaining();
699  if (Cursor R = maybeLexFixedStackObject(C, Token))
700  return R.remaining();
701  if (Cursor R = maybeLexConstantPoolItem(C, Token))
702  return R.remaining();
703  if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback))
704  return R.remaining();
705  if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
706  return R.remaining();
707  if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
708  return R.remaining();
709  if (Cursor R = maybeLexRegister(C, Token, ErrorCallback))
710  return R.remaining();
711  if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
712  return R.remaining();
713  if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
714  return R.remaining();
715  if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback))
716  return R.remaining();
717  if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
718  return R.remaining();
719  if (Cursor R = maybeLexNumericalLiteral(C, Token))
720  return R.remaining();
721  if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback))
722  return R.remaining();
723  if (Cursor R = maybeLexSymbol(C, Token))
724  return R.remaining();
725  if (Cursor R = maybeLexNewline(C, Token))
726  return R.remaining();
727  if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
728  return R.remaining();
729  if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback))
730  return R.remaining();
731 
732  Token.reset(MIToken::Error, C.remaining());
733  ErrorCallback(C.location(),
734  Twine("unexpected character '") + Twine(C.peek()) + "'");
735  return C.remaining();
736 }
static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:386
uint64_t CallInst * C
static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:366
static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier)
Definition: MILexer.cpp:573
bool isError() const
Definition: MILexer.h:179
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static bool isRegisterChar(char C)
Returns true for a character allowed in a register name.
Definition: MILexer.cpp:406
static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token)
Definition: MILexer.cpp:410
NoneType
A simple null object to allow implicit construction of Optional<T> and similar types without having t...
Definition: None.h:23
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:160
static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback)
Lex a string constant using the following regular expression: "[^"]*".
Definition: MILexer.cpp:146
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:117
static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:660
StringRef lexMIToken(StringRef Source, MIToken &Token, function_ref< void(StringRef::iterator, const Twine &)> ErrorCallback)
Consume a single machine instruction token in the given source and return the remaining source string...
MIToken & reset(TokenKind Kind, StringRef Range)
Definition: MILexer.cpp:68
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static bool isNewlineChar(char C)
Definition: MILexer.cpp:97
static bool isValidHexFloatingPointPrefix(char C)
Definition: MILexer.cpp:517
static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token)
Definition: MILexer.cpp:559
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(T Value)
Definition: StringSwitch.h:203
static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:270
static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token)
Definition: MILexer.cpp:521
static Cursor skipWhitespace(Cursor C)
Skip the leading whitespace characters and return the updated cursor.
Definition: MILexer.cpp:91
static Cursor maybeLexIdentifier(Cursor C, MIToken &Token)
Definition: MILexer.cpp:258
static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:445
static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:470
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:462
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static Cursor maybeLexNewline(Cursor C, MIToken &Token)
Definition: MILexer.cpp:651
static Cursor maybeLexStackObject(Cursor C, MIToken &Token)
Definition: MILexer.cpp:345
static Cursor lexVirtualRegister(Cursor C, MIToken &Token)
Definition: MILexer.cpp:394
static MIToken::TokenKind symbolToken(char C)
Definition: MILexer.cpp:604
MIToken & setOwnedStringValue(std::string StrVal)
Definition: MILexer.cpp:79
LLVM_NODISCARD char back() const
back - Get the last character in the string.
Definition: StringRef.h:149
static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token)
Definition: MILexer.cpp:349
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
Definition: MILexer.cpp:100
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_front(size_t N=1) const
Return a StringRef equal to &#39;this&#39; but with the first N elements dropped.
Definition: StringRef.h:645
MIToken & setStringValue(StringRef StrVal)
Definition: MILexer.cpp:74
static Cursor maybeLexSymbol(Cursor C, MIToken &Token)
Definition: MILexer.cpp:635
StringRef::iterator location() const
Definition: MILexer.h:208
static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, MIToken::TokenKind Kind)
Definition: MILexer.cpp:318
static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token)
Definition: MILexer.cpp:537
static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token)
Definition: MILexer.cpp:341
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
Definition: StringExtras.h:69
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:70
static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token)
Definition: MILexer.cpp:353
#define I(x, y, z)
Definition: MD5.cpp:58
const unsigned Kind
static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, MIToken::TokenKind Kind)
Definition: MILexer.cpp:305
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM_NODISCARD char front() const
front - Get the first character in the string.
Definition: StringRef.h:142
static Cursor maybeLexExlaim(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:584
LLVM Value Representation.
Definition: Value.h:73
A token produced by the machine instruction lexer.
Definition: MILexer.h:28
static Cursor maybeLexRegister(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:420
static Cursor maybeLexIRValue(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:376
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
MIToken & setIntegerValue(APSInt IntVal)
Definition: MILexer.cpp:85
static MIToken::TokenKind getIdentifierKind(StringRef Identifier)
Definition: MILexer.cpp:182
static bool isIdentifierChar(char C)
Return true if the given character satisfies the following regular expression: [-a-zA-Z$._0-9].
Definition: MILexer.cpp:110
static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback)
Definition: MILexer.cpp:357
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:439
static std::string unescapeQuotedString(StringRef Value)
Unescapes the given string value.
Definition: MILexer.cpp:118