LLVM  8.0.1
TGLexer.h
Go to the documentation of this file.
1 //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This class represents the Lexer for tablegen files.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
15 #define LLVM_LIB_TABLEGEN_TGLEXER_H
16 
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/Support/DataTypes.h"
21 #include "llvm/Support/SMLoc.h"
22 #include <cassert>
23 #include <map>
24 #include <memory>
25 #include <string>
26 
27 namespace llvm {
28 class SourceMgr;
29 class SMLoc;
30 class Twine;
31 
32 namespace tgtok {
33  enum TokKind {
34  // Markers
36 
37  // Tokens with no info.
38  minus, plus, // - +
39  l_square, r_square, // [ ]
40  l_brace, r_brace, // { }
41  l_paren, r_paren, // ( )
42  less, greater, // < >
43  colon, semi, // : ;
44  comma, period, // , .
45  equal, question, // = ?
46  paste, // #
47 
48  // Keywords.
51 
52  // !keywords.
55  XNe, XLe, XLt, XGe, XGt,
56 
57  // Integer value.
59 
60  // Binary constant. Note that these are sized according to the number of
61  // bits given.
63 
64  // String valued tokens.
66 
67  // Preprocessing tokens for internal usage by the lexer.
68  // They are never returned as a result of Lex().
70  };
71 }
72 
73 /// TGLexer - TableGen Lexer class.
74 class TGLexer {
76 
77  const char *CurPtr;
78  StringRef CurBuf;
79 
80  // Information about the current token.
81  const char *TokStart;
82  tgtok::TokKind CurCode;
83  std::string CurStrVal; // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
84  int64_t CurIntVal; // This is valid for INTVAL.
85 
86  /// CurBuffer - This is the current buffer index we're lexing from as managed
87  /// by the SourceMgr object.
88  unsigned CurBuffer;
89 
90 public:
91  typedef std::map<std::string, SMLoc> DependenciesMapTy;
92 private:
93  /// Dependencies - This is the list of all included files.
94  DependenciesMapTy Dependencies;
95 
96 public:
97  TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
98 
100  return CurCode = LexToken(CurPtr == CurBuf.begin());
101  }
102 
103  const DependenciesMapTy &getDependencies() const {
104  return Dependencies;
105  }
106 
107  tgtok::TokKind getCode() const { return CurCode; }
108 
109  const std::string &getCurStrVal() const {
110  assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
111  CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
112  "This token doesn't have a string value");
113  return CurStrVal;
114  }
115  int64_t getCurIntVal() const {
116  assert(CurCode == tgtok::IntVal && "This token isn't an integer");
117  return CurIntVal;
118  }
119  std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
120  assert(CurCode == tgtok::BinaryIntVal &&
121  "This token isn't a binary integer");
122  return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
123  }
124 
125  SMLoc getLoc() const;
126 
127 private:
128  /// LexToken - Read the next token and return its code.
129  tgtok::TokKind LexToken(bool FileOrLineStart = false);
130 
131  tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
132  tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
133 
134  int getNextChar();
135  int peekNextChar(int Index) const;
136  void SkipBCPLComment();
137  bool SkipCComment();
138  tgtok::TokKind LexIdentifier();
139  bool LexInclude();
140  tgtok::TokKind LexString();
141  tgtok::TokKind LexVarName();
142  tgtok::TokKind LexNumber();
143  tgtok::TokKind LexBracket();
144  tgtok::TokKind LexExclaim();
145 
146  // Process EOF encountered in LexToken().
147  // If EOF is met in an include file, then the method will update
148  // CurPtr, CurBuf and preprocessing include stack, and return true.
149  // If EOF is met in the top-level file, then the method will
150  // update and check the preprocessing include stack, and return false.
151  bool processEOF();
152 
153  // *** Structures and methods for preprocessing support ***
154 
155  // A set of macro names that are defined either via command line or
156  // by using:
157  // #define NAME
158  StringSet<> DefinedMacros;
159 
160  // Each of #ifdef and #else directives has a descriptor associated
161  // with it.
162  //
163  // An ordered list of preprocessing controls defined by #ifdef/#else
164  // directives that are in effect currently is called preprocessing
165  // control stack. It is represented as a vector of PreprocessorControlDesc's.
166  //
167  // The control stack is updated according to the following rules:
168  //
169  // For each #ifdef we add an element to the control stack.
170  // For each #else we replace the top element with a descriptor
171  // with an inverted IsDefined value.
172  // For each #endif we pop the top element from the control stack.
173  //
174  // When CurPtr reaches the current buffer's end, the control stack
175  // must be empty, i.e. #ifdef and the corresponding #endif
176  // must be located in the same file.
177  struct PreprocessorControlDesc {
178  // Either tgtok::Ifdef or tgtok::Else.
180 
181  // True, if the condition for this directive is true, false - otherwise.
182  // Examples:
183  // #ifdef NAME : true, if NAME is defined, false - otherwise.
184  // ...
185  // #else : false, if NAME is defined, true - otherwise.
186  bool IsDefined;
187 
188  // Pointer into CurBuf to the beginning of the preprocessing directive
189  // word, e.g.:
190  // #ifdef NAME
191  // ^ - SrcPos
192  SMLoc SrcPos;
193  };
194 
195  // We want to disallow code like this:
196  // file1.td:
197  // #define NAME
198  // #ifdef NAME
199  // include "file2.td"
200  // EOF
201  // file2.td:
202  // #endif
203  // EOF
204  //
205  // To do this, we clear the preprocessing control stack on entry
206  // to each of the included file. PrepIncludeStack is used to store
207  // preprocessing control stacks for the current file and all its
208  // parent files. The back() element is the preprocessing control
209  // stack for the current file.
210  std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
211  PrepIncludeStack;
212 
213  // Validate that the current preprocessing control stack is empty,
214  // since we are about to exit a file, and pop the include stack.
215  //
216  // If IncludeStackMustBeEmpty is true, the include stack must be empty
217  // after the popping, otherwise, the include stack must not be empty
218  // after the popping. Basically, the include stack must be empty
219  // only if we exit the "top-level" file (i.e. finish lexing).
220  //
221  // The method returns false, if the current preprocessing control stack
222  // is not empty (e.g. there is an unterminated #ifdef/#else),
223  // true - otherwise.
224  bool prepExitInclude(bool IncludeStackMustBeEmpty);
225 
226  // Look ahead for a preprocessing directive starting from CurPtr. The caller
227  // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
228  // a preprocessing directive word followed by a whitespace, then it returns
229  // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
230  //
231  // CurPtr is not adjusted by this method.
232  tgtok::TokKind prepIsDirective() const;
233 
234  // Given a preprocessing token kind, adjusts CurPtr to the end
235  // of the preprocessing directive word. Returns true, unless
236  // an unsupported token kind is passed in.
237  //
238  // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
239  // to avoid adjusting CurPtr before we are sure that '#' is followed
240  // by a preprocessing directive. If it is not, then we fall back to
241  // tgtok::paste interpretation of '#'.
242  bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
243 
244  // The main "exit" point from the token parsing to preprocessor.
245  //
246  // The method is called for CurPtr, when prepIsDirective() returns
247  // true. The first parameter matches the result of prepIsDirective(),
248  // denoting the actual preprocessor directive to be processed.
249  //
250  // If the preprocessing directive disables the tokens processing, e.g.:
251  // #ifdef NAME // NAME is undefined
252  // then lexPreprocessor() enters the lines-skipping mode.
253  // In this mode, it does not parse any tokens, because the code under
254  // the #ifdef may not even be a correct tablegen code. The preprocessor
255  // looks for lines containing other preprocessing directives, which
256  // may be prepended with whitespaces and C-style comments. If the line
257  // does not contain a preprocessing directive, it is skipped completely.
258  // Otherwise, the preprocessing directive is processed by recursively
259  // calling lexPreprocessor(). The processing of the encountered
260  // preprocessing directives includes updating preprocessing control stack
261  // and adding new macros into DefinedMacros set.
262  //
263  // The second parameter controls whether lexPreprocessor() is called from
264  // LexToken() (true) or recursively from lexPreprocessor() (false).
265  //
266  // If ReturnNextLiveToken is true, the method returns the next
267  // LEX token following the current directive or following the end
268  // of the disabled preprocessing region corresponding to this directive.
269  // If ReturnNextLiveToken is false, the method returns the first parameter,
270  // unless there were errors encountered in the disabled preprocessing
271  // region - in this case, it returns tgtok::Error.
272  tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
273  bool ReturnNextLiveToken = true);
274 
275  // Worker method for lexPreprocessor() to skip lines after some
276  // preprocessing directive up to the buffer end or to the directive
277  // that re-enables token processing. The method returns true
278  // upon processing the next directive that re-enables tokens
279  // processing. False is returned if an error was encountered.
280  //
281  // Note that prepSkipRegion() calls lexPreprocessor() to process
282  // encountered preprocessing directives. In this case, the second
283  // parameter to lexPreprocessor() is set to false. Being passed
284  // false ReturnNextLiveToken, lexPreprocessor() must never call
285  // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
286  // to prepSkipRegion() and checking that it is never set to false.
287  bool prepSkipRegion(bool MustNeverBeFalse);
288 
289  // Lex name of the macro after either #ifdef or #define. We could have used
290  // LexIdentifier(), but it has special handling of "include" word, which
291  // could result in awkward diagnostic errors. Consider:
292  // ----
293  // #ifdef include
294  // class ...
295  // ----
296  // LexIdentifier() will engage LexInclude(), which will complain about
297  // missing file with name "class". Instead, prepLexMacroName() will treat
298  // "include" as a normal macro name.
299  //
300  // On entry, CurPtr points to the end of a preprocessing directive word.
301  // The method allows for whitespaces between the preprocessing directive
302  // and the macro name. The allowed whitespaces are ' ' and '\t'.
303  //
304  // If the first non-whitespace symbol after the preprocessing directive
305  // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
306  // the method updates TokStart to the position of the first non-whitespace
307  // symbol, sets CurPtr to the position of the macro name's last symbol,
308  // and returns a string reference to the macro name. Otherwise,
309  // TokStart is set to the first non-whitespace symbol after the preprocessing
310  // directive, and the method returns an empty string reference.
311  //
312  // In all cases, TokStart may be used to point to the word following
313  // the preprocessing directive.
314  StringRef prepLexMacroName();
315 
316  // Skip any whitespaces starting from CurPtr. The method is used
317  // only in the lines-skipping mode to find the first non-whitespace
318  // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
319  // and '\r'. The method skips C-style comments as well, because
320  // it is used to find the beginning of the preprocessing directive.
321  // If we do not handle C-style comments the following code would
322  // result in incorrect detection of a preprocessing directive:
323  // /*
324  // #ifdef NAME
325  // */
326  // As long as we skip C-style comments, the following code is correctly
327  // recognized as a preprocessing directive:
328  // /* first line comment
329  // second line comment */ #ifdef NAME
330  //
331  // The method returns true upon reaching the first non-whitespace symbol
332  // or EOF, CurPtr is set to point to this symbol. The method returns false,
333  // if an error occured during skipping of a C-style comment.
334  bool prepSkipLineBegin();
335 
336  // Skip any whitespaces or comments after a preprocessing directive.
337  // The method returns true upon reaching either end of the line
338  // or end of the file. If there is a multiline C-style comment
339  // after the preprocessing directive, the method skips
340  // the comment, so the final CurPtr may point to one of the next lines.
341  // The method returns false, if an error occured during skipping
342  // C- or C++-style comment, or a non-whitespace symbol appears
343  // after the preprocessing directive.
344  //
345  // The method maybe called both during lines-skipping and tokens
346  // processing. It actually verifies that only whitespaces or/and
347  // comments follow a preprocessing directive.
348  //
349  // After the execution of this mehod, CurPtr points either to new line
350  // symbol, buffer end or non-whitespace symbol following the preprocesing
351  // directive.
352  bool prepSkipDirectiveEnd();
353 
354  // Skip all symbols to the end of the line/file.
355  // The method adjusts CurPtr, so that it points to either new line
356  // symbol in the current line or the buffer end.
357  void prepSkipToLineEnd();
358 
359  // Return true, if the current preprocessor control stack is such that
360  // we should allow lexer to process the next token, false - otherwise.
361  //
362  // In particular, the method returns true, if all the #ifdef/#else
363  // controls on the stack have their IsDefined member set to true.
364  bool prepIsProcessingEnabled();
365 
366  // Report an error, if we reach EOF with non-empty preprocessing control
367  // stack. This means there is no matching #endif for the previous
368  // #ifdef/#else.
369  void prepReportPreprocessorStackError();
370 };
371 
372 } // end namespace llvm
373 
374 #endif
This class represents lattice values for constants.
Definition: AllocatorList.h:24
SourceMgr SrcMgr
Definition: Error.cpp:24
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
const std::string & getCurStrVal() const
Definition: TGLexer.h:109
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling...
Definition: SourceMgr.h:42
tgtok::TokKind Lex()
Definition: TGLexer.h:99
iterator begin() const
Definition: StringRef.h:106
std::map< std::string, SMLoc > DependenciesMapTy
Definition: TGLexer.h:91
std::pair< int64_t, unsigned > getCurBinaryIntVal() const
Definition: TGLexer.h:119
tgtok::TokKind getCode() const
Definition: TGLexer.h:107
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const DependenciesMapTy & getDependencies() const
Definition: TGLexer.h:103
int64_t getCurIntVal() const
Definition: TGLexer.h:115
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:28
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
Represents a location in source code.
Definition: SMLoc.h:24
TGLexer - TableGen Lexer class.
Definition: TGLexer.h:74