LLVM  8.0.1
AsmLexer.cpp
Go to the documentation of this file.
1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This class implements the lexer for assembly files.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/MC/MCAsmInfo.h"
22 #include "llvm/Support/SMLoc.h"
24 #include <cassert>
25 #include <cctype>
26 #include <cstdio>
27 #include <cstring>
28 #include <string>
29 #include <tuple>
30 #include <utility>
31 
32 using namespace llvm;
33 
34 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
36 }
37 
38 AsmLexer::~AsmLexer() = default;
39 
40 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
41  CurBuf = Buf;
42 
43  if (ptr)
44  CurPtr = ptr;
45  else
46  CurPtr = CurBuf.begin();
47 
48  TokStart = nullptr;
49 }
50 
51 /// ReturnError - Set the error to the specified string at the specified
52 /// location. This is defined to always return AsmToken::Error.
53 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
55 
56  return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
57 }
58 
59 int AsmLexer::getNextChar() {
60  if (CurPtr == CurBuf.end())
61  return EOF;
62  return (unsigned char)*CurPtr++;
63 }
64 
65 /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
66 ///
67 /// The leading integral digit sequence and dot should have already been
68 /// consumed, some or all of the fractional digit sequence *can* have been
69 /// consumed.
70 AsmToken AsmLexer::LexFloatLiteral() {
71  // Skip the fractional digit sequence.
72  while (isDigit(*CurPtr))
73  ++CurPtr;
74 
75  // Check for exponent; we intentionally accept a slighlty wider set of
76  // literals here and rely on the upstream client to reject invalid ones (e.g.,
77  // "1e+").
78  if (*CurPtr == 'e' || *CurPtr == 'E') {
79  ++CurPtr;
80  if (*CurPtr == '-' || *CurPtr == '+')
81  ++CurPtr;
82  while (isDigit(*CurPtr))
83  ++CurPtr;
84  }
85 
86  return AsmToken(AsmToken::Real,
87  StringRef(TokStart, CurPtr - TokStart));
88 }
89 
90 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
91 /// while making sure there are enough actual digits around for the constant to
92 /// be valid.
93 ///
94 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
95 /// before we get here.
96 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
97  assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
98  "unexpected parse state in floating hex");
99  bool NoFracDigits = true;
100 
101  // Skip the fractional part if there is one
102  if (*CurPtr == '.') {
103  ++CurPtr;
104 
105  const char *FracStart = CurPtr;
106  while (isHexDigit(*CurPtr))
107  ++CurPtr;
108 
109  NoFracDigits = CurPtr == FracStart;
110  }
111 
112  if (NoIntDigits && NoFracDigits)
113  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
114  "expected at least one significand digit");
115 
116  // Make sure we do have some kind of proper exponent part
117  if (*CurPtr != 'p' && *CurPtr != 'P')
118  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
119  "expected exponent part 'p'");
120  ++CurPtr;
121 
122  if (*CurPtr == '+' || *CurPtr == '-')
123  ++CurPtr;
124 
125  // N.b. exponent digits are *not* hex
126  const char *ExpStart = CurPtr;
127  while (isDigit(*CurPtr))
128  ++CurPtr;
129 
130  if (CurPtr == ExpStart)
131  return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
132  "expected at least one exponent digit");
133 
134  return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
135 }
136 
137 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
138 static bool IsIdentifierChar(char c, bool AllowAt) {
139  return isAlnum(c) || c == '_' || c == '$' || c == '.' ||
140  (c == '@' && AllowAt) || c == '?';
141 }
142 
143 AsmToken AsmLexer::LexIdentifier() {
144  // Check for floating point literals.
145  if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
146  // Disambiguate a .1243foo identifier from a floating literal.
147  while (isDigit(*CurPtr))
148  ++CurPtr;
149  if (*CurPtr == 'e' || *CurPtr == 'E' ||
151  return LexFloatLiteral();
152  }
153 
154  while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
155  ++CurPtr;
156 
157  // Handle . as a special case.
158  if (CurPtr == TokStart+1 && TokStart[0] == '.')
160 
162 }
163 
164 /// LexSlash: Slash: /
165 /// C-Style Comment: /* ... */
166 AsmToken AsmLexer::LexSlash() {
167  switch (*CurPtr) {
168  case '*':
169  IsAtStartOfStatement = false;
170  break; // C style comment.
171  case '/':
172  ++CurPtr;
173  return LexLineComment();
174  default:
175  IsAtStartOfStatement = false;
177  }
178 
179  // C Style comment.
180  ++CurPtr; // skip the star.
181  const char *CommentTextStart = CurPtr;
182  while (CurPtr != CurBuf.end()) {
183  switch (*CurPtr++) {
184  case '*':
185  // End of the comment?
186  if (*CurPtr != '/')
187  break;
188  // If we have a CommentConsumer, notify it about the comment.
189  if (CommentConsumer) {
191  SMLoc::getFromPointer(CommentTextStart),
192  StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
193  }
194  ++CurPtr; // End the */.
196  StringRef(TokStart, CurPtr - TokStart));
197  }
198  }
199  return ReturnError(TokStart, "unterminated comment");
200 }
201 
202 /// LexLineComment: Comment: #[^\n]*
203 /// : //[^\n]*
204 AsmToken AsmLexer::LexLineComment() {
205  // Mark This as an end of statement with a body of the
206  // comment. While it would be nicer to leave this two tokens,
207  // backwards compatability with TargetParsers makes keeping this in this form
208  // better.
209  const char *CommentTextStart = CurPtr;
210  int CurChar = getNextChar();
211  while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
212  CurChar = getNextChar();
213  if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
214  ++CurPtr;
215 
216  // If we have a CommentConsumer, notify it about the comment.
217  if (CommentConsumer) {
219  SMLoc::getFromPointer(CommentTextStart),
220  StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
221  }
222 
223  IsAtStartOfLine = true;
224  // This is a whole line comment. leave newline
225  if (IsAtStartOfStatement)
227  StringRef(TokStart, CurPtr - TokStart));
228  IsAtStartOfStatement = true;
229 
231  StringRef(TokStart, CurPtr - 1 - TokStart));
232 }
233 
234 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
235  // Skip ULL, UL, U, L and LL suffices.
236  if (CurPtr[0] == 'U')
237  ++CurPtr;
238  if (CurPtr[0] == 'L')
239  ++CurPtr;
240  if (CurPtr[0] == 'L')
241  ++CurPtr;
242 }
243 
244 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
245 // integer as a hexadecimal, possibly with leading zeroes.
246 static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
247  bool LexHex) {
248  const char *FirstNonDec = nullptr;
249  const char *LookAhead = CurPtr;
250  while (true) {
251  if (isDigit(*LookAhead)) {
252  ++LookAhead;
253  } else {
254  if (!FirstNonDec)
255  FirstNonDec = LookAhead;
256 
257  // Keep going if we are looking for a 'h' suffix.
258  if (LexHex && isHexDigit(*LookAhead))
259  ++LookAhead;
260  else
261  break;
262  }
263  }
264  bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
265  CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
266  if (isHex)
267  return 16;
268  return DefaultRadix;
269 }
270 
272 {
273  if (Value.isIntN(64))
274  return AsmToken(AsmToken::Integer, Ref, Value);
275  return AsmToken(AsmToken::BigNum, Ref, Value);
276 }
277 
278 /// LexDigit: First character is [0-9].
279 /// Local Label: [0-9][:]
280 /// Forward/Backward Label: [0-9][fb]
281 /// Binary integer: 0b[01]+
282 /// Octal integer: 0[0-7]+
283 /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
284 /// Decimal integer: [1-9][0-9]*
285 AsmToken AsmLexer::LexDigit() {
286  // MASM-flavor binary integer: [01]+[bB]
287  // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
288  if (LexMasmIntegers && isdigit(CurPtr[-1])) {
289  const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
290  CurPtr - 1 : nullptr;
291  const char *OldCurPtr = CurPtr;
292  while (isHexDigit(*CurPtr)) {
293  if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
294  FirstNonBinary = CurPtr;
295  ++CurPtr;
296  }
297 
298  unsigned Radix = 0;
299  if (*CurPtr == 'h' || *CurPtr == 'H') {
300  // hexadecimal number
301  ++CurPtr;
302  Radix = 16;
303  } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
304  (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
305  Radix = 2;
306 
307  if (Radix == 2 || Radix == 16) {
308  StringRef Result(TokStart, CurPtr - TokStart);
309  APInt Value(128, 0, true);
310 
311  if (Result.drop_back().getAsInteger(Radix, Value))
312  return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
313  "invalid hexdecimal number");
314 
315  // MSVC accepts and ignores type suffices on integer literals.
316  SkipIgnoredIntegerSuffix(CurPtr);
317 
318  return intToken(Result, Value);
319  }
320 
321  // octal/decimal integers, or floating point numbers, fall through
322  CurPtr = OldCurPtr;
323  }
324 
325  // Decimal integer: [1-9][0-9]*
326  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
327  unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
328  bool isHex = Radix == 16;
329  // Check for floating point literals.
330  if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
331  ++CurPtr;
332  return LexFloatLiteral();
333  }
334 
335  StringRef Result(TokStart, CurPtr - TokStart);
336 
337  APInt Value(128, 0, true);
338  if (Result.getAsInteger(Radix, Value))
339  return ReturnError(TokStart, !isHex ? "invalid decimal number" :
340  "invalid hexdecimal number");
341 
342  // Consume the [hH].
343  if (LexMasmIntegers && Radix == 16)
344  ++CurPtr;
345 
346  // The darwin/x86 (and x86-64) assembler accepts and ignores type
347  // suffices on integer literals.
348  SkipIgnoredIntegerSuffix(CurPtr);
349 
350  return intToken(Result, Value);
351  }
352 
353  if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
354  ++CurPtr;
355  // See if we actually have "0b" as part of something like "jmp 0b\n"
356  if (!isDigit(CurPtr[0])) {
357  --CurPtr;
358  StringRef Result(TokStart, CurPtr - TokStart);
359  return AsmToken(AsmToken::Integer, Result, 0);
360  }
361  const char *NumStart = CurPtr;
362  while (CurPtr[0] == '0' || CurPtr[0] == '1')
363  ++CurPtr;
364 
365  // Requires at least one binary digit.
366  if (CurPtr == NumStart)
367  return ReturnError(TokStart, "invalid binary number");
368 
369  StringRef Result(TokStart, CurPtr - TokStart);
370 
371  APInt Value(128, 0, true);
372  if (Result.substr(2).getAsInteger(2, Value))
373  return ReturnError(TokStart, "invalid binary number");
374 
375  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
376  // suffixes on integer literals.
377  SkipIgnoredIntegerSuffix(CurPtr);
378 
379  return intToken(Result, Value);
380  }
381 
382  if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
383  ++CurPtr;
384  const char *NumStart = CurPtr;
385  while (isHexDigit(CurPtr[0]))
386  ++CurPtr;
387 
388  // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
389  // diagnosed by LexHexFloatLiteral).
390  if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
391  return LexHexFloatLiteral(NumStart == CurPtr);
392 
393  // Otherwise requires at least one hex digit.
394  if (CurPtr == NumStart)
395  return ReturnError(CurPtr-2, "invalid hexadecimal number");
396 
397  APInt Result(128, 0);
398  if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
399  return ReturnError(TokStart, "invalid hexadecimal number");
400 
401  // Consume the optional [hH].
402  if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
403  ++CurPtr;
404 
405  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
406  // suffixes on integer literals.
407  SkipIgnoredIntegerSuffix(CurPtr);
408 
409  return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
410  }
411 
412  // Either octal or hexadecimal.
413  APInt Value(128, 0, true);
414  unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
415  bool isHex = Radix == 16;
416  StringRef Result(TokStart, CurPtr - TokStart);
417  if (Result.getAsInteger(Radix, Value))
418  return ReturnError(TokStart, !isHex ? "invalid octal number" :
419  "invalid hexdecimal number");
420 
421  // Consume the [hH].
422  if (Radix == 16)
423  ++CurPtr;
424 
425  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
426  // suffixes on integer literals.
427  SkipIgnoredIntegerSuffix(CurPtr);
428 
429  return intToken(Result, Value);
430 }
431 
432 /// LexSingleQuote: Integer: 'b'
433 AsmToken AsmLexer::LexSingleQuote() {
434  int CurChar = getNextChar();
435 
436  if (CurChar == '\\')
437  CurChar = getNextChar();
438 
439  if (CurChar == EOF)
440  return ReturnError(TokStart, "unterminated single quote");
441 
442  CurChar = getNextChar();
443 
444  if (CurChar != '\'')
445  return ReturnError(TokStart, "single quote way too long");
446 
447  // The idea here being that 'c' is basically just an integral
448  // constant.
449  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
450  long long Value;
451 
452  if (Res.startswith("\'\\")) {
453  char theChar = Res[2];
454  switch (theChar) {
455  default: Value = theChar; break;
456  case '\'': Value = '\''; break;
457  case 't': Value = '\t'; break;
458  case 'n': Value = '\n'; break;
459  case 'b': Value = '\b'; break;
460  }
461  } else
462  Value = TokStart[1];
463 
464  return AsmToken(AsmToken::Integer, Res, Value);
465 }
466 
467 /// LexQuote: String: "..."
468 AsmToken AsmLexer::LexQuote() {
469  int CurChar = getNextChar();
470  // TODO: does gas allow multiline string constants?
471  while (CurChar != '"') {
472  if (CurChar == '\\') {
473  // Allow \", etc.
474  CurChar = getNextChar();
475  }
476 
477  if (CurChar == EOF)
478  return ReturnError(TokStart, "unterminated string constant");
479 
480  CurChar = getNextChar();
481  }
482 
484 }
485 
487  TokStart = CurPtr;
488 
489  while (!isAtStartOfComment(CurPtr) && // Start of line comment.
490  !isAtStatementSeparator(CurPtr) && // End of statement marker.
491  *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
492  ++CurPtr;
493  }
494  return StringRef(TokStart, CurPtr-TokStart);
495 }
496 
497 StringRef AsmLexer::LexUntilEndOfLine() {
498  TokStart = CurPtr;
499 
500  while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
501  ++CurPtr;
502  }
503  return StringRef(TokStart, CurPtr-TokStart);
504 }
505 
507  bool ShouldSkipSpace) {
508  SaveAndRestore<const char *> SavedTokenStart(TokStart);
509  SaveAndRestore<const char *> SavedCurPtr(CurPtr);
510  SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
511  SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
512  SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
513  SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
514  std::string SavedErr = getErr();
515  SMLoc SavedErrLoc = getErrLoc();
516 
517  size_t ReadCount;
518  for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
519  AsmToken Token = LexToken();
520 
521  Buf[ReadCount] = Token;
522 
523  if (Token.is(AsmToken::Eof))
524  break;
525  }
526 
527  SetError(SavedErrLoc, SavedErr);
528  return ReadCount;
529 }
530 
531 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
532  StringRef CommentString = MAI.getCommentString();
533 
534  if (CommentString.size() == 1)
535  return CommentString[0] == Ptr[0];
536 
537  // Allow # preprocessor commments also be counted as comments for "##" cases
538  if (CommentString[1] == '#')
539  return CommentString[0] == Ptr[0];
540 
541  return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
542 }
543 
544 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
545  return strncmp(Ptr, MAI.getSeparatorString(),
546  strlen(MAI.getSeparatorString())) == 0;
547 }
548 
550  TokStart = CurPtr;
551  // This always consumes at least one character.
552  int CurChar = getNextChar();
553 
554  if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
555  // If this starts with a '#', this may be a cpp
556  // hash directive and otherwise a line comment.
557  AsmToken TokenBuf[2];
558  MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
559  size_t num = peekTokens(Buf, true);
560  // There cannot be a space preceeding this
561  if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
562  TokenBuf[1].is(AsmToken::String)) {
563  CurPtr = TokStart; // reset curPtr;
564  StringRef s = LexUntilEndOfLine();
565  UnLex(TokenBuf[1]);
566  UnLex(TokenBuf[0]);
568  }
569  return LexLineComment();
570  }
571 
572  if (isAtStartOfComment(TokStart))
573  return LexLineComment();
574 
575  if (isAtStatementSeparator(TokStart)) {
576  CurPtr += strlen(MAI.getSeparatorString()) - 1;
577  IsAtStartOfLine = true;
578  IsAtStartOfStatement = true;
580  StringRef(TokStart, strlen(MAI.getSeparatorString())));
581  }
582 
583  // If we're missing a newline at EOF, make sure we still get an
584  // EndOfStatement token before the Eof token.
585  if (CurChar == EOF && !IsAtStartOfStatement) {
586  IsAtStartOfLine = true;
587  IsAtStartOfStatement = true;
589  }
590  IsAtStartOfLine = false;
591  bool OldIsAtStartOfStatement = IsAtStartOfStatement;
592  IsAtStartOfStatement = false;
593  switch (CurChar) {
594  default:
595  // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
596  if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
597  return LexIdentifier();
598 
599  // Unknown character, emit an error.
600  return ReturnError(TokStart, "invalid character in input");
601  case EOF:
602  IsAtStartOfLine = true;
603  IsAtStartOfStatement = true;
605  case 0:
606  case ' ':
607  case '\t':
608  IsAtStartOfStatement = OldIsAtStartOfStatement;
609  while (*CurPtr == ' ' || *CurPtr == '\t')
610  CurPtr++;
611  if (SkipSpace)
612  return LexToken(); // Ignore whitespace.
613  else
614  return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
615  case '\r': {
616  IsAtStartOfLine = true;
617  IsAtStartOfStatement = true;
618  // If this is a CR followed by LF, treat that as one token.
619  if (CurPtr != CurBuf.end() && *CurPtr == '\n')
620  ++CurPtr;
622  StringRef(TokStart, CurPtr - TokStart));
623  }
624  case '\n':
625  IsAtStartOfLine = true;
626  IsAtStartOfStatement = true;
628  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
629  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
630  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
631  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
632  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
633  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
634  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
635  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
636  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
637  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
638  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
639  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
640  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
641  case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
642  case '=':
643  if (*CurPtr == '=') {
644  ++CurPtr;
646  }
648  case '-':
649  if (*CurPtr == '>') {
650  ++CurPtr;
652  }
654  case '|':
655  if (*CurPtr == '|') {
656  ++CurPtr;
658  }
660  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
661  case '&':
662  if (*CurPtr == '&') {
663  ++CurPtr;
665  }
667  case '!':
668  if (*CurPtr == '=') {
669  ++CurPtr;
671  }
673  case '%':
674  if (MAI.hasMipsExpressions()) {
676  unsigned OperatorLength;
677 
678  std::tie(Operator, OperatorLength) =
680  StringRef(CurPtr))
681  .StartsWith("call16", {AsmToken::PercentCall16, 7})
682  .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
683  .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
684  .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
685  .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
686  .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
687  .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
688  .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
689  .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
690  .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
691  .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
692  .StartsWith("got", {AsmToken::PercentGot, 4})
693  .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
694  .StartsWith("higher", {AsmToken::PercentHigher, 7})
695  .StartsWith("highest", {AsmToken::PercentHighest, 8})
696  .StartsWith("hi", {AsmToken::PercentHi, 3})
697  .StartsWith("lo", {AsmToken::PercentLo, 3})
698  .StartsWith("neg", {AsmToken::PercentNeg, 4})
699  .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
700  .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
701  .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
702  .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
703  .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
704  .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
705  .Default({AsmToken::Percent, 1});
706 
707  if (Operator != AsmToken::Percent) {
708  CurPtr += OperatorLength - 1;
709  return AsmToken(Operator, StringRef(TokStart, OperatorLength));
710  }
711  }
713  case '/':
714  IsAtStartOfStatement = OldIsAtStartOfStatement;
715  return LexSlash();
716  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
717  case '\'': return LexSingleQuote();
718  case '"': return LexQuote();
719  case '0': case '1': case '2': case '3': case '4':
720  case '5': case '6': case '7': case '8': case '9':
721  return LexDigit();
722  case '<':
723  switch (*CurPtr) {
724  case '<':
725  ++CurPtr;
727  case '=':
728  ++CurPtr;
730  case '>':
731  ++CurPtr;
733  default:
735  }
736  case '>':
737  switch (*CurPtr) {
738  case '>':
739  ++CurPtr;
741  case '=':
742  ++CurPtr;
744  default:
746  }
747 
748  // TODO: Quoted identifiers (objc methods etc)
749  // local labels: [0-9][:]
750  // Forward/backward labels: [0-9][fb]
751  // Integers, fp constants, character constants.
752  }
753 }
AsmCommentConsumer * CommentConsumer
Definition: MCAsmLexer.h:54
This class represents lattice values for constants.
Definition: AllocatorList.h:24
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:136
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, bool LexHex)
Definition: AsmLexer.cpp:246
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
Definition: StringExtras.h:89
void setBuffer(StringRef Buf, const char *ptr=nullptr)
Definition: AsmLexer.cpp:40
The access may reference the value stored in memory.
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:22
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:30
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true) override
Look ahead an arbitrary number of tokens.
Definition: AsmLexer.cpp:506
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
void SetError(SMLoc errLoc, const std::string &err)
Definition: MCAsmLexer.h:60
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:291
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
bool isHexDigit(char C)
Checks if character C is a hexadecimal numeric character.
Definition: StringExtras.h:80
const char * getSeparatorString() const
Definition: MCAsmInfo.h:480
static AsmToken intToken(StringRef Ref, APInt &Value)
Definition: AsmLexer.cpp:271
const char * TokStart
Definition: MCAsmLexer.h:49
StringRef getCommentString() const
Definition: MCAsmInfo.h:486
const std::string & getErr()
Get the current error string.
Definition: MCAsmLexer.h:128
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:450
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_back(size_t N=1) const
Return a StringRef equal to &#39;this&#39; but with the last N elements dropped.
Definition: StringRef.h:654
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:497
bool hasMipsExpressions() const
Definition: MCAsmInfo.h:637
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:88
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:31
StringRef LexUntilEndOfStatement() override
Definition: AsmLexer.cpp:486
bool is(TokenKind K) const
Definition: MCAsmMacro.h:83
Class for arbitrary precision integers.
Definition: APInt.h:70
iterator begin() const
Definition: StringRef.h:106
A utility class that uses RAII to save and restore the value of a variable.
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:37
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
Definition: StringExtras.h:77
~AsmLexer() override
AsmToken LexToken() override
LexToken - Read the next token and return its code.
Definition: AsmLexer.cpp:549
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
SMLoc getErrLoc()
Get the current error location.
Definition: MCAsmLexer.h:123
This file provides utility classes that use RAII to save and restore values.
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
Definition: AsmLexer.cpp:234
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
Represents a location in source code.
Definition: SMLoc.h:24
static bool IsIdentifierChar(char c, bool AllowAt)
LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*.
Definition: AsmLexer.cpp:138
bool LexMasmIntegers
Definition: MCAsmLexer.h:53
iterator end() const
Definition: StringRef.h:108
bool AllowAtInIdentifier
Definition: MCAsmLexer.h:51
virtual void HandleComment(SMLoc Loc, StringRef CommentText)=0
Callback function for when a comment is lexed.
AsmLexer(const MCAsmInfo &MAI)
Definition: AsmLexer.cpp:34