LLVM  8.0.1
MCTargetAsmParser.h
Go to the documentation of this file.
1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
12 
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInstrInfo.h"
20 #include "llvm/Support/SMLoc.h"
21 #include <cstdint>
22 #include <memory>
23 
24 namespace llvm {
25 
26 class MCInst;
27 class MCParsedAsmOperand;
28 class MCStreamer;
29 class MCSubtargetInfo;
30 template <typename T> class SmallVectorImpl;
31 
33 
35  AOK_Align, // Rewrite align as .align.
36  AOK_EVEN, // Rewrite even as .even.
37  AOK_Emit, // Rewrite _emit as .byte.
38  AOK_Input, // Rewrite in terms of $N.
39  AOK_Output, // Rewrite in terms of $N.
40  AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr).
41  AOK_Label, // Rewrite local labels.
42  AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
43  AOK_Skip, // Skip emission (e.g., offset/type operators).
44  AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
45 };
46 
47 const char AsmRewritePrecedence [] = {
48  2, // AOK_Align
49  2, // AOK_EVEN
50  2, // AOK_Emit
51  3, // AOK_Input
52  3, // AOK_Output
53  5, // AOK_SizeDirective
54  1, // AOK_Label
55  5, // AOK_EndOfStatement
56  2, // AOK_Skip
57  2 // AOK_IntelExpr
58 };
59 
60 // Represnt the various parts which makes up an intel expression,
61 // used for emitting compound intel expressions
62 struct IntelExpr {
63  bool NeedBracs;
64  int64_t Imm;
67  unsigned Scale;
68 
69  IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
70  BaseReg(StringRef()), IndexReg(StringRef()),
71  Scale(1) {}
72  // Compund immediate expression
73  IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
74  Imm = imm;
75  }
76  // [Reg + ImmediateExpression]
77  // We don't bother to emit an immediate expression evaluated to zero
78  IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
79  bool needBracs = true) :
80  IntelExpr(imm, needBracs) {
81  IndexReg = reg;
82  if (scale)
83  Scale = scale;
84  }
85  // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
86  IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
87  int64_t imm = 0, bool needBracs = true) :
88  IntelExpr(indexReg, imm, scale, needBracs) {
89  BaseReg = baseReg;
90  }
91  bool hasBaseReg() const {
92  return BaseReg.size();
93  }
94  bool hasIndexReg() const {
95  return IndexReg.size();
96  }
97  bool hasRegs() const {
98  return hasBaseReg() || hasIndexReg();
99  }
100  bool isValid() const {
101  return (Scale == 1) ||
102  (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
103  }
104 };
105 
106 struct AsmRewrite {
109  unsigned Len;
110  int64_t Val;
113 
114 public:
115  AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
116  : Kind(kind), Loc(loc), Len(len), Val(val) {}
117  AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
118  : AsmRewrite(kind, loc, len) { Label = label; }
119  AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
120  : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
121 };
122 
124  SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
125 
126  ParseInstructionInfo() = default;
128  : AsmRewrites(rewrites) {}
129 };
130 
132  MatchOperand_Success, // operand matched successfully
133  MatchOperand_NoMatch, // operand did not match
134  MatchOperand_ParseFail // operand matched but had errors
135 };
136 
138  Match,
139  NearMatch,
140  NoMatch,
141 };
142 
143 // When an operand is parsed, the assembler will try to iterate through a set of
144 // possible operand classes that the operand might match and call the
145 // corresponding PredicateMethod to determine that.
146 //
147 // If there are two AsmOperands that would give a specific diagnostic if there
148 // is no match, there is currently no mechanism to distinguish which operand is
149 // a closer match. The DiagnosticPredicate distinguishes between 'completely
150 // no match' and 'near match', so the assembler can decide whether to give a
151 // specific diagnostic, or use 'InvalidOperand' and continue to find a
152 // 'better matching' diagnostic.
153 //
154 // For example:
155 // opcode opnd0, onpd1, opnd2
156 //
157 // where:
158 // opnd2 could be an 'immediate of range [-8, 7]'
159 // opnd2 could be a 'register + shift/extend'.
160 //
161 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
162 // little sense to give a diagnostic that the operand should be an immediate
163 // in range [-8, 7].
164 //
165 // This is a light-weight alternative to the 'NearMissInfo' approach
166 // below which collects *all* possible diagnostics. This alternative
167 // is optional and fully backward compatible with existing
168 // PredicateMethods that return a 'bool' (match or no match).
171 
172  explicit DiagnosticPredicate(bool Match)
173  : Type(Match ? DiagnosticPredicateTy::Match
176  DiagnosticPredicate(const DiagnosticPredicate &) = default;
177 
178  operator bool() const { return Type == DiagnosticPredicateTy::Match; }
179  bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
180  bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
181  bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
182 };
183 
184 // When matching of an assembly instruction fails, there may be multiple
185 // encodings that are close to being a match. It's often ambiguous which one
186 // the programmer intended to use, so we want to report an error which mentions
187 // each of these "near-miss" encodings. This struct contains information about
188 // one such encoding, and why it did not match the parsed instruction.
190 public:
197  };
198 
199  // The encoding is valid for the parsed assembly string. This is only used
200  // internally to the table-generated assembly matcher.
201  static NearMissInfo getSuccess() { return NearMissInfo(); }
202 
203  // The instruction encoding is not valid because it requires some target
204  // features that are not currently enabled. MissingFeatures has a bit set for
205  // each feature that the encoding needs but which is not enabled.
206  static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
207  NearMissInfo Result;
208  Result.Kind = NearMissFeature;
209  Result.Features = MissingFeatures;
210  return Result;
211  }
212 
213  // The instruction encoding is not valid because the target-specific
214  // predicate function returned an error code. FailureCode is the
215  // target-specific error code returned by the predicate.
216  static NearMissInfo getMissedPredicate(unsigned FailureCode) {
217  NearMissInfo Result;
218  Result.Kind = NearMissPredicate;
219  Result.PredicateError = FailureCode;
220  return Result;
221  }
222 
223  // The instruction encoding is not valid because one (and only one) parsed
224  // operand is not of the correct type. OperandError is the error code
225  // relating to the operand class expected by the encoding. OperandClass is
226  // the type of the expected operand. Opcode is the opcode of the encoding.
227  // OperandIndex is the index into the parsed operand list.
228  static NearMissInfo getMissedOperand(unsigned OperandError,
229  unsigned OperandClass, unsigned Opcode,
230  unsigned OperandIndex) {
231  NearMissInfo Result;
232  Result.Kind = NearMissOperand;
233  Result.MissedOperand.Error = OperandError;
234  Result.MissedOperand.Class = OperandClass;
235  Result.MissedOperand.Opcode = Opcode;
236  Result.MissedOperand.Index = OperandIndex;
237  return Result;
238  }
239 
240  // The instruction encoding is not valid because it expects more operands
241  // than were parsed. OperandClass is the class of the expected operand that
242  // was not provided. Opcode is the instruction encoding.
243  static NearMissInfo getTooFewOperands(unsigned OperandClass,
244  unsigned Opcode) {
245  NearMissInfo Result;
246  Result.Kind = NearMissTooFewOperands;
247  Result.TooFewOperands.Class = OperandClass;
248  Result.TooFewOperands.Opcode = Opcode;
249  return Result;
250  }
251 
252  operator bool() const { return Kind != NoNearMiss; }
253 
254  NearMissKind getKind() const { return Kind; }
255 
256  // Feature flags required by the instruction, that the current target does
257  // not have.
258  uint64_t getFeatures() const {
259  assert(Kind == NearMissFeature);
260  return Features;
261  }
262  // Error code returned by the target predicate when validating this
263  // instruction encoding.
264  unsigned getPredicateError() const {
265  assert(Kind == NearMissPredicate);
266  return PredicateError;
267  }
268  // MatchClassKind of the operand that we expected to see.
269  unsigned getOperandClass() const {
270  assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
271  return MissedOperand.Class;
272  }
273  // Opcode of the encoding we were trying to match.
274  unsigned getOpcode() const {
275  assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
276  return MissedOperand.Opcode;
277  }
278  // Error code returned when validating the operand.
279  unsigned getOperandError() const {
280  assert(Kind == NearMissOperand);
281  return MissedOperand.Error;
282  }
283  // Index of the actual operand we were trying to match in the list of parsed
284  // operands.
285  unsigned getOperandIndex() const {
286  assert(Kind == NearMissOperand);
287  return MissedOperand.Index;
288  }
289 
290 private:
292 
293  // These two structs share a common prefix, so we can safely rely on the fact
294  // that they overlap in the union.
295  struct MissedOpInfo {
296  unsigned Class;
297  unsigned Opcode;
298  unsigned Error;
299  unsigned Index;
300  };
301 
302  struct TooFewOperandsInfo {
303  unsigned Class;
304  unsigned Opcode;
305  };
306 
307  union {
308  uint64_t Features;
309  unsigned PredicateError;
310  MissedOpInfo MissedOperand;
311  TooFewOperandsInfo TooFewOperands;
312  };
313 
314  NearMissInfo() : Kind(NoNearMiss) {}
315 };
316 
317 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
319 public:
327  FIRST_TARGET_MATCH_RESULT_TY
328  };
329 
330 protected: // Can only create subclasses.
332  const MCInstrInfo &MII);
333 
334  /// Create a copy of STI and return a non-const reference to it.
335  MCSubtargetInfo &copySTI();
336 
337  /// AvailableFeatures - The current set of available features.
338  uint64_t AvailableFeatures = 0;
339 
340  /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
341  bool ParsingInlineAsm = false;
342 
343  /// SemaCallback - The Sema callback implementation. Must be set when parsing
344  /// ms-style inline assembly.
346 
347  /// Set of options which affects instrumentation of inline assembly.
349 
350  /// Current STI.
352 
353  const MCInstrInfo &MII;
354 
355 public:
356  MCTargetAsmParser(const MCTargetAsmParser &) = delete;
357  MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
358 
359  ~MCTargetAsmParser() override;
360 
361  const MCSubtargetInfo &getSTI() const;
362 
363  uint64_t getAvailableFeatures() const { return AvailableFeatures; }
364  void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
365 
366  bool isParsingInlineAsm () { return ParsingInlineAsm; }
367  void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
368 
369  MCTargetOptions getTargetOptions() const { return MCOptions; }
370 
372  SemaCallback = Callback;
373  }
374 
375  // Target-specific parsing of expression.
376  virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
377  return getParser().parsePrimaryExpr(Res, EndLoc);
378  }
379 
380  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
381  SMLoc &EndLoc) = 0;
382 
383  /// Sets frame register corresponding to the current MachineFunction.
384  virtual void SetFrameRegister(unsigned RegNo) {}
385 
386  /// ParseInstruction - Parse one assembly instruction.
387  ///
388  /// The parser is positioned following the instruction name. The target
389  /// specific instruction parser should parse the entire instruction and
390  /// construct the appropriate MCInst, or emit an error. On success, the entire
391  /// line should be parsed up to and including the end-of-statement token. On
392  /// failure, the parser is not required to read to the end of the line.
393  //
394  /// \param Name - The instruction name.
395  /// \param NameLoc - The source location of the name.
396  /// \param Operands [out] - The list of parsed operands, this returns
397  /// ownership of them to the caller.
398  /// \return True on failure.
399  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
400  SMLoc NameLoc, OperandVector &Operands) = 0;
402  AsmToken Token, OperandVector &Operands) {
403  return ParseInstruction(Info, Name, Token.getLoc(), Operands);
404  }
405 
406  /// ParseDirective - Parse a target specific assembler directive
407  ///
408  /// The parser is positioned following the directive name. The target
409  /// specific directive parser should parse the entire directive doing or
410  /// recording any target specific work, or return true and do nothing if the
411  /// directive is not target specific. If the directive is specific for
412  /// the target, the entire line is parsed up to and including the
413  /// end-of-statement token and false is returned.
414  ///
415  /// \param DirectiveID - the identifier token of the directive.
416  virtual bool ParseDirective(AsmToken DirectiveID) = 0;
417 
418  /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
419  /// instruction as an actual MCInst and emit it to the specified MCStreamer.
420  /// This returns false on success and returns true on failure to match.
421  ///
422  /// On failure, the target parser is responsible for emitting a diagnostic
423  /// explaining the match failure.
424  virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
425  OperandVector &Operands, MCStreamer &Out,
426  uint64_t &ErrorInfo,
427  bool MatchingInlineAsm) = 0;
428 
429  /// Allows targets to let registers opt out of clobber lists.
430  virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
431 
432  /// Allow a target to add special case operand matching for things that
433  /// tblgen doesn't/can't handle effectively. For example, literal
434  /// immediates on ARM. TableGen expects a token operand, but the parser
435  /// will recognize them as immediates.
437  unsigned Kind) {
438  return Match_InvalidOperand;
439  }
440 
441  /// Validate the instruction match against any complex target predicates
442  /// before rendering any operands to it.
443  virtual unsigned
445  return Match_Success;
446  }
447 
448  /// checkTargetMatchPredicate - Validate the instruction match against
449  /// any complex target predicates not expressible via match classes.
450  virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
451  return Match_Success;
452  }
453 
454  virtual void convertToMapAndConstraints(unsigned Kind,
455  const OperandVector &Operands) = 0;
456 
457  /// Returns whether two registers are equal and is used by the tied-operands
458  /// checks in the AsmMatcher. This method can be overridden allow e.g. a
459  /// sub- or super-register as the tied operand.
460  virtual bool regsEqual(const MCParsedAsmOperand &Op1,
461  const MCParsedAsmOperand &Op2) const {
462  assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
463  return Op1.getReg() == Op2.getReg();
464  }
465 
466  // Return whether this parser uses assignment statements with equals tokens
467  virtual bool equalIsAsmAssignment() { return true; };
468  // Return whether this start of statement identifier is a label
469  virtual bool isLabel(AsmToken &Token) { return true; };
470  // Return whether this parser accept star as start of statement
471  virtual bool starIsStartOfStatement() { return false; };
472 
473  virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
475  MCContext &Ctx) {
476  return nullptr;
477  }
478 
479  // For actions that have to be performed before a label is emitted
481 
482  virtual void onLabelParsed(MCSymbol *Symbol) {}
483 
484  /// Ensure that all previously parsed instructions have been emitted to the
485  /// output streamer, if the target does not emit them immediately.
486  virtual void flushPendingInstructions(MCStreamer &Out) {}
487 
488  virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
489  AsmToken::TokenKind OperatorToken,
490  MCContext &Ctx) {
491  return nullptr;
492  }
493 
494  // For any checks or cleanups at the end of parsing.
495  virtual void onEndOfFile() {}
496 };
497 
498 } // end namespace llvm
499 
500 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
virtual bool isLabel(AsmToken &Token)
bool hasIndexReg() const
MCAsmParserSemaCallback * SemaCallback
SemaCallback - The Sema callback implementation.
This class represents lattice values for constants.
Definition: AllocatorList.h:24
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
virtual unsigned checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands)
Validate the instruction match against any complex target predicates before rendering any operands to...
IntelExpr(StringRef reg, int64_t imm=0, unsigned scale=0, bool needBracs=true)
virtual bool regsEqual(const MCParsedAsmOperand &Op1, const MCParsedAsmOperand &Op2) const
Returns whether two registers are equal and is used by the tied-operands checks in the AsmMatcher...
MCTargetAsmParser - Generic interface to target specific assembly parsers.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
const MCSubtargetInfo * STI
Current STI.
const FeatureBitset Features
uint64_t getAvailableFeatures() const
static NearMissInfo getMissedOperand(unsigned OperandError, unsigned OperandClass, unsigned Opcode, unsigned OperandIndex)
ParseInstructionInfo(SmallVectorImpl< AsmRewrite > *rewrites)
virtual void onLabelParsed(MCSymbol *Symbol)
unsigned getOperandClass() const
AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
MCTargetOptions MCOptions
Set of options which affects instrumentation of inline assembly.
bool hasRegs() const
amdgpu Simplify well known AMD library false Value Value const Twine & Name
unsigned getOpcode() const
IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale=0, int64_t imm=0, bool needBracs=true)
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn&#39;t/can&#39;t handle effec...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:36
DiagnosticPredicate(DiagnosticPredicateTy T)
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:22
IntelExpr(bool needBracs=false)
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand...
static NearMissInfo getTooFewOperands(unsigned OperandClass, unsigned Opcode)
Context object for machine code objects.
Definition: MCContext.h:63
unsigned getPredicateError() const
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:28
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:161
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
virtual void SetFrameRegister(unsigned RegNo)
Sets frame register corresponding to the current MachineFunction.
uint64_t getFeatures() const
Streaming machine code generation interface.
Definition: MCStreamer.h:189
virtual bool OmitRegisterFromClobberLists(unsigned RegNo)
Allows targets to let registers opt out of clobber lists.
MissedOpInfo MissedOperand
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static NearMissInfo getMissedPredicate(unsigned FailureCode)
void setSemaCallback(MCAsmParserSemaCallback *Callback)
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:24
bool hasBaseReg() const
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
void setParsingInlineAsm(bool Value)
static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D)
Generic Sema callback for assembly parser.
Definition: MCAsmParser.h:95
static NearMissInfo getMissedFeature(uint64_t MissingFeatures)
virtual bool starIsStartOfStatement()
virtual unsigned getReg() const =0
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, AsmToken Token, OperandVector &Operands)
virtual bool equalIsAsmAssignment()
IntelExpr(int64_t imm, bool needBracs)
virtual const MCExpr * createTargetUnaryExpr(const MCExpr *E, AsmToken::TokenKind OperatorToken, MCContext &Ctx)
const MCInstrInfo & MII
DiagnosticPredicateTy Type
static NearMissInfo getSuccess()
AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len=0, int64_t val=0)
Base class for user error types.
Definition: Error.h:345
virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc)
void setAvailableFeatures(uint64_t Value)
bool isValid() const
Generic base class for all target subtargets.
unsigned getOperandError() const
virtual const MCExpr * applyModifierToExpr(const MCExpr *E, MCSymbolRefExpr::VariantKind, MCContext &Ctx)
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
NearMissKind getKind() const
unsigned getOperandIndex() const
virtual void flushPendingInstructions(MCStreamer &Out)
Ensure that all previously parsed instructions have been emitted to the output streamer, if the target does not emit them immediately.
Generic interface for extending the MCAsmParser, which is implemented by target and object file assem...
const char AsmRewritePrecedence[]
TooFewOperandsInfo TooFewOperands
LLVM Value Representation.
Definition: Value.h:73
virtual void doBeforeLabelEmit(MCSymbol *Symbol)
AsmRewriteKind Kind
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
Represents a location in source code.
Definition: SMLoc.h:24
MCTargetOptions getTargetOptions() const
virtual bool isReg() const =0
isReg - Is this a register operand?