LLVM  8.0.1
MCAsmLexer.h
Go to the documentation of this file.
1 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
12 
13 #include "llvm/ADT/ArrayRef.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/MC/MCAsmMacro.h"
16 #include <algorithm>
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <string>
21 
22 namespace llvm {
23 
24 /// A callback class which is notified of each comment in an assembly file as
25 /// it is lexed.
27 public:
28  virtual ~AsmCommentConsumer() = default;
29 
30  /// Callback function for when a comment is lexed. Loc is the start of the
31  /// comment text (excluding the comment-start marker). CommentText is the text
32  /// of the comment, excluding the comment start and end markers, and the
33  /// newline for single-line comments.
34  virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
35 };
36 
37 
38 /// Generic assembler lexer interface, for use by target specific assembly
39 /// lexers.
40 class MCAsmLexer {
41  /// The current token, stored in the base class for faster access.
43 
44  /// The location and description of the current error
45  SMLoc ErrLoc;
46  std::string Err;
47 
48 protected: // Can only create subclasses.
49  const char *TokStart = nullptr;
50  bool SkipSpace = true;
52  bool IsAtStartOfStatement = true;
53  bool LexMasmIntegers = false;
54  AsmCommentConsumer *CommentConsumer = nullptr;
55 
56  MCAsmLexer();
57 
58  virtual AsmToken LexToken() = 0;
59 
60  void SetError(SMLoc errLoc, const std::string &err) {
61  ErrLoc = errLoc;
62  Err = err;
63  }
64 
65 public:
66  MCAsmLexer(const MCAsmLexer &) = delete;
67  MCAsmLexer &operator=(const MCAsmLexer &) = delete;
68  virtual ~MCAsmLexer();
69 
70  /// Consume the next token from the input stream and return it.
71  ///
72  /// The lexer will continuously return the end-of-file token once the end of
73  /// the main input file has been reached.
74  const AsmToken &Lex() {
75  assert(!CurTok.empty());
76  // Mark if we parsing out a EndOfStatement.
77  IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
78  CurTok.erase(CurTok.begin());
79  // LexToken may generate multiple tokens via UnLex but will always return
80  // the first one. Place returned value at head of CurTok vector.
81  if (CurTok.empty()) {
82  AsmToken T = LexToken();
83  CurTok.insert(CurTok.begin(), T);
84  }
85  return CurTok.front();
86  }
87 
88  void UnLex(AsmToken const &Token) {
89  IsAtStartOfStatement = false;
90  CurTok.insert(CurTok.begin(), Token);
91  }
92 
93  bool isAtStartOfStatement() { return IsAtStartOfStatement; }
94 
95  virtual StringRef LexUntilEndOfStatement() = 0;
96 
97  /// Get the current source location.
98  SMLoc getLoc() const;
99 
100  /// Get the current (last) lexed token.
101  const AsmToken &getTok() const {
102  return CurTok[0];
103  }
104 
105  /// Look ahead at the next token to be lexed.
106  const AsmToken peekTok(bool ShouldSkipSpace = true) {
107  AsmToken Tok;
108 
109  MutableArrayRef<AsmToken> Buf(Tok);
110  size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
111 
112  assert(ReadCount == 1);
113  (void)ReadCount;
114 
115  return Tok;
116  }
117 
118  /// Look ahead an arbitrary number of tokens.
119  virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
120  bool ShouldSkipSpace = true) = 0;
121 
122  /// Get the current error location
124  return ErrLoc;
125  }
126 
127  /// Get the current error string
128  const std::string &getErr() {
129  return Err;
130  }
131 
132  /// Get the kind of current token.
133  AsmToken::TokenKind getKind() const { return getTok().getKind(); }
134 
135  /// Check if the current token has kind \p K.
136  bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
137 
138  /// Check if the current token has kind \p K.
139  bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
140 
141  /// Set whether spaces should be ignored by the lexer
142  void setSkipSpace(bool val) { SkipSpace = val; }
143 
144  bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
145  void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
146 
147  void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
148  this->CommentConsumer = CommentConsumer;
149  }
150 
151  /// Set whether to lex masm-style binary and hex literals. They look like
152  /// 0b1101 and 0ABCh respectively.
153  void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
154 };
155 
156 } // end namespace llvm
157 
158 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H
const AsmToken & getTok() const
Get the current (last) lexed token.
Definition: MCAsmLexer.h:101
This class represents lattice values for constants.
Definition: AllocatorList.h:24
void setSkipSpace(bool val)
Set whether spaces should be ignored by the lexer.
Definition: MCAsmLexer.h:142
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:136
Generic assembler lexer interface, for use by target specific assembly lexers.
Definition: MCAsmLexer.h:40
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:22
#define T
void setLexMasmIntegers(bool V)
Set whether to lex masm-style binary and hex literals.
Definition: MCAsmLexer.h:153
void SetError(SMLoc errLoc, const std::string &err)
Definition: MCAsmLexer.h:60
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:291
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
const std::string & getErr()
Get the current error string.
Definition: MCAsmLexer.h:128
const AsmToken peekTok(bool ShouldSkipSpace=true)
Look ahead at the next token to be lexed.
Definition: MCAsmLexer.h:106
bool getAllowAtInIdentifier()
Definition: MCAsmLexer.h:144
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
void setCommentConsumer(AsmCommentConsumer *CommentConsumer)
Definition: MCAsmLexer.h:147
AsmToken::TokenKind getKind() const
Get the kind of current token.
Definition: MCAsmLexer.h:133
void UnLex(AsmToken const &Token)
Definition: MCAsmLexer.h:88
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
virtual ~AsmCommentConsumer()=default
const AsmToken & Lex()
Consume the next token from the input stream and return it.
Definition: MCAsmLexer.h:74
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:478
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
bool isNot(AsmToken::TokenKind K) const
Check if the current token has kind K.
Definition: MCAsmLexer.h:139
A callback class which is notified of each comment in an assembly file as it is lexed.
Definition: MCAsmLexer.h:26
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SMLoc getErrLoc()
Get the current error location.
Definition: MCAsmLexer.h:123
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
Represents a location in source code.
Definition: SMLoc.h:24
bool isAtStartOfStatement()
Definition: MCAsmLexer.h:93
bool AllowAtInIdentifier
Definition: MCAsmLexer.h:51
virtual void HandleComment(SMLoc Loc, StringRef CommentText)=0
Callback function for when a comment is lexed.
void setAllowAtInIdentifier(bool v)
Definition: MCAsmLexer.h:145