LLVM  8.0.1
BPFAsmParser.cpp
Go to the documentation of this file.
1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCRegisterInfo.h"
20 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/Support/Casting.h"
24 
25 using namespace llvm;
26 
27 namespace {
28 struct BPFOperand;
29 
30 class BPFAsmParser : public MCTargetAsmParser {
31 
32  SMLoc getLoc() const { return getParser().getTok().getLoc(); }
33 
34  bool PreMatchCheck(OperandVector &Operands);
35 
36  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
37  OperandVector &Operands, MCStreamer &Out,
38  uint64_t &ErrorInfo,
39  bool MatchingInlineAsm) override;
40 
41  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
42 
43  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
44  SMLoc NameLoc, OperandVector &Operands) override;
45 
46  bool ParseDirective(AsmToken DirectiveID) override;
47 
48  // "=" is used as assignment operator for assembly statment, so can't be used
49  // for symbol assignment.
50  bool equalIsAsmAssignment() override { return false; }
51  // "*" is used for dereferencing memory that it will be the start of
52  // statement.
53  bool starIsStartOfStatement() override { return true; }
54 
55 #define GET_ASSEMBLER_HEADER
56 #include "BPFGenAsmMatcher.inc"
57 
59  OperandMatchResultTy parseRegister(OperandVector &Operands);
60  OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
61 
62 public:
63  enum BPFMatchResultTy {
64  Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
65 #define GET_OPERAND_DIAGNOSTIC_TYPES
66 #include "BPFGenAsmMatcher.inc"
67 #undef GET_OPERAND_DIAGNOSTIC_TYPES
68  };
69 
70  BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
71  const MCInstrInfo &MII, const MCTargetOptions &Options)
72  : MCTargetAsmParser(Options, STI, MII) {
73  setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
74  }
75 };
76 
77 /// BPFOperand - Instances of this class represent a parsed machine
78 /// instruction
79 struct BPFOperand : public MCParsedAsmOperand {
80 
81  enum KindTy {
82  Token,
83  Register,
84  Immediate,
85  } Kind;
86 
87  struct RegOp {
88  unsigned RegNum;
89  };
90 
91  struct ImmOp {
92  const MCExpr *Val;
93  };
94 
95  SMLoc StartLoc, EndLoc;
96  union {
97  StringRef Tok;
98  RegOp Reg;
99  ImmOp Imm;
100  };
101 
102  BPFOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
103 
104 public:
105  BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
106  Kind = o.Kind;
107  StartLoc = o.StartLoc;
108  EndLoc = o.EndLoc;
109 
110  switch (Kind) {
111  case Register:
112  Reg = o.Reg;
113  break;
114  case Immediate:
115  Imm = o.Imm;
116  break;
117  case Token:
118  Tok = o.Tok;
119  break;
120  }
121  }
122 
123  bool isToken() const override { return Kind == Token; }
124  bool isReg() const override { return Kind == Register; }
125  bool isImm() const override { return Kind == Immediate; }
126  bool isMem() const override { return false; }
127 
128  bool isConstantImm() const {
129  return isImm() && dyn_cast<MCConstantExpr>(getImm());
130  }
131 
132  int64_t getConstantImm() const {
133  const MCExpr *Val = getImm();
134  return static_cast<const MCConstantExpr *>(Val)->getValue();
135  }
136 
137  bool isSImm12() const {
138  return (isConstantImm() && isInt<12>(getConstantImm()));
139  }
140 
141  /// getStartLoc - Gets location of the first token of this operand
142  SMLoc getStartLoc() const override { return StartLoc; }
143  /// getEndLoc - Gets location of the last token of this operand
144  SMLoc getEndLoc() const override { return EndLoc; }
145 
146  unsigned getReg() const override {
147  assert(Kind == Register && "Invalid type access!");
148  return Reg.RegNum;
149  }
150 
151  const MCExpr *getImm() const {
152  assert(Kind == Immediate && "Invalid type access!");
153  return Imm.Val;
154  }
155 
156  StringRef getToken() const {
157  assert(Kind == Token && "Invalid type access!");
158  return Tok;
159  }
160 
161  void print(raw_ostream &OS) const override {
162  switch (Kind) {
163  case Immediate:
164  OS << *getImm();
165  break;
166  case Register:
167  OS << "<register x";
168  OS << getReg() << ">";
169  break;
170  case Token:
171  OS << "'" << getToken() << "'";
172  break;
173  }
174  }
175 
176  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
177  assert(Expr && "Expr shouldn't be null!");
178 
179  if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
180  Inst.addOperand(MCOperand::createImm(CE->getValue()));
181  else
182  Inst.addOperand(MCOperand::createExpr(Expr));
183  }
184 
185  // Used by the TableGen Code
186  void addRegOperands(MCInst &Inst, unsigned N) const {
187  assert(N == 1 && "Invalid number of operands!");
189  }
190 
191  void addImmOperands(MCInst &Inst, unsigned N) const {
192  assert(N == 1 && "Invalid number of operands!");
193  addExpr(Inst, getImm());
194  }
195 
196  static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
197  auto Op = make_unique<BPFOperand>(Token);
198  Op->Tok = Str;
199  Op->StartLoc = S;
200  Op->EndLoc = S;
201  return Op;
202  }
203 
204  static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
205  SMLoc E) {
206  auto Op = make_unique<BPFOperand>(Register);
207  Op->Reg.RegNum = RegNo;
208  Op->StartLoc = S;
209  Op->EndLoc = E;
210  return Op;
211  }
212 
213  static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
214  SMLoc E) {
215  auto Op = make_unique<BPFOperand>(Immediate);
216  Op->Imm.Val = Val;
217  Op->StartLoc = S;
218  Op->EndLoc = E;
219  return Op;
220  }
221 
222  // Identifiers that can be used at the start of a statment.
223  static bool isValidIdAtStart(StringRef Name) {
224  return StringSwitch<bool>(Name.lower())
225  .Case("if", true)
226  .Case("call", true)
227  .Case("goto", true)
228  .Case("*", true)
229  .Case("exit", true)
230  .Case("lock", true)
231  .Case("ld_pseudo", true)
232  .Default(false);
233  }
234 
235  // Identifiers that can be used in the middle of a statment.
236  static bool isValidIdInMiddle(StringRef Name) {
237  return StringSwitch<bool>(Name.lower())
238  .Case("u64", true)
239  .Case("u32", true)
240  .Case("u16", true)
241  .Case("u8", true)
242  .Case("be64", true)
243  .Case("be32", true)
244  .Case("be16", true)
245  .Case("le64", true)
246  .Case("le32", true)
247  .Case("le16", true)
248  .Case("goto", true)
249  .Case("ll", true)
250  .Case("skb", true)
251  .Case("s", true)
252  .Default(false);
253  }
254 };
255 } // end anonymous namespace.
256 
257 #define GET_REGISTER_MATCHER
258 #define GET_MATCHER_IMPLEMENTATION
259 #include "BPFGenAsmMatcher.inc"
260 
261 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
262 
263  if (Operands.size() == 4) {
264  // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
265  // reg1 must be the same as reg2
266  BPFOperand &Op0 = (BPFOperand &)*Operands[0];
267  BPFOperand &Op1 = (BPFOperand &)*Operands[1];
268  BPFOperand &Op2 = (BPFOperand &)*Operands[2];
269  BPFOperand &Op3 = (BPFOperand &)*Operands[3];
270  if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
271  && Op1.getToken() == "="
272  && (Op2.getToken() == "-" || Op2.getToken() == "be16"
273  || Op2.getToken() == "be32" || Op2.getToken() == "be64"
274  || Op2.getToken() == "le16" || Op2.getToken() == "le32"
275  || Op2.getToken() == "le64")
276  && Op0.getReg() != Op3.getReg())
277  return true;
278  }
279 
280  return false;
281 }
282 
283 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
284  OperandVector &Operands,
285  MCStreamer &Out, uint64_t &ErrorInfo,
286  bool MatchingInlineAsm) {
287  MCInst Inst;
288  SMLoc ErrorLoc;
289 
290  if (PreMatchCheck(Operands))
291  return Error(IDLoc, "additional inst constraint not met");
292 
293  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
294  default:
295  break;
296  case Match_Success:
297  Inst.setLoc(IDLoc);
298  Out.EmitInstruction(Inst, getSTI());
299  return false;
300  case Match_MissingFeature:
301  return Error(IDLoc, "instruction use requires an option to be enabled");
302  case Match_MnemonicFail:
303  return Error(IDLoc, "unrecognized instruction mnemonic");
304  case Match_InvalidOperand:
305  ErrorLoc = IDLoc;
306 
307  if (ErrorInfo != ~0U) {
308  if (ErrorInfo >= Operands.size())
309  return Error(ErrorLoc, "too few operands for instruction");
310 
311  ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
312 
313  if (ErrorLoc == SMLoc())
314  ErrorLoc = IDLoc;
315  }
316 
317  return Error(ErrorLoc, "invalid operand for instruction");
318  }
319 
320  llvm_unreachable("Unknown match type detected!");
321 }
322 
323 bool BPFAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
324  SMLoc &EndLoc) {
325  const AsmToken &Tok = getParser().getTok();
326  StartLoc = Tok.getLoc();
327  EndLoc = Tok.getEndLoc();
328  RegNo = 0;
329  StringRef Name = getLexer().getTok().getIdentifier();
330 
331  if (!MatchRegisterName(Name)) {
332  getParser().Lex(); // Eat identifier token.
333  return false;
334  }
335 
336  return Error(StartLoc, "invalid register name");
337 }
338 
340 BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
341  SMLoc S = getLoc();
342 
343  if (getLexer().getKind() == AsmToken::Identifier) {
344  StringRef Name = getLexer().getTok().getIdentifier();
345 
346  if (BPFOperand::isValidIdInMiddle(Name)) {
347  getLexer().Lex();
348  Operands.push_back(BPFOperand::createToken(Name, S));
349  return MatchOperand_Success;
350  }
351 
352  return MatchOperand_NoMatch;
353  }
354 
355  switch (getLexer().getKind()) {
356  case AsmToken::Minus:
357  case AsmToken::Plus: {
358  if (getLexer().peekTok().is(AsmToken::Integer))
359  return MatchOperand_NoMatch;
361  }
362 
363  case AsmToken::Equal:
364  case AsmToken::Greater:
365  case AsmToken::Less:
366  case AsmToken::Pipe:
367  case AsmToken::Star:
368  case AsmToken::LParen:
369  case AsmToken::RParen:
370  case AsmToken::LBrac:
371  case AsmToken::RBrac:
372  case AsmToken::Slash:
373  case AsmToken::Amp:
374  case AsmToken::Percent:
375  case AsmToken::Caret: {
376  StringRef Name = getLexer().getTok().getString();
377  getLexer().Lex();
378  Operands.push_back(BPFOperand::createToken(Name, S));
379 
380  return MatchOperand_Success;
381  }
382 
387  case AsmToken::LessEqual:
388  case AsmToken::LessLess: {
389  Operands.push_back(BPFOperand::createToken(
390  getLexer().getTok().getString().substr(0, 1), S));
391  Operands.push_back(BPFOperand::createToken(
392  getLexer().getTok().getString().substr(1, 1), S));
393  getLexer().Lex();
394 
395  return MatchOperand_Success;
396  }
397 
398  default:
399  break;
400  }
401 
402  return MatchOperand_NoMatch;
403 }
404 
405 OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
406  SMLoc S = getLoc();
408 
409  switch (getLexer().getKind()) {
410  default:
411  return MatchOperand_NoMatch;
413  StringRef Name = getLexer().getTok().getIdentifier();
414  unsigned RegNo = MatchRegisterName(Name);
415 
416  if (RegNo == 0)
417  return MatchOperand_NoMatch;
418 
419  getLexer().Lex();
420  Operands.push_back(BPFOperand::createReg(RegNo, S, E));
421  }
422  return MatchOperand_Success;
423 }
424 
426  switch (getLexer().getKind()) {
427  default:
428  return MatchOperand_NoMatch;
429  case AsmToken::LParen:
430  case AsmToken::Minus:
431  case AsmToken::Plus:
432  case AsmToken::Integer:
433  case AsmToken::String:
435  break;
436  }
437 
438  const MCExpr *IdVal;
439  SMLoc S = getLoc();
440 
441  if (getParser().parseExpression(IdVal))
442  return MatchOperand_ParseFail;
443 
445  Operands.push_back(BPFOperand::createImm(IdVal, S, E));
446 
447  return MatchOperand_Success;
448 }
449 
450 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
451 /// format.
452 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
453  SMLoc NameLoc, OperandVector &Operands) {
454  // The first operand could be either register or actually an operator.
455  unsigned RegNo = MatchRegisterName(Name);
456 
457  if (RegNo != 0) {
458  SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
459  Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
460  } else if (BPFOperand::isValidIdAtStart (Name))
461  Operands.push_back(BPFOperand::createToken(Name, NameLoc));
462  else
463  return Error(NameLoc, "invalid register/token name");
464 
465  while (!getLexer().is(AsmToken::EndOfStatement)) {
466  // Attempt to parse token as operator
467  if (parseOperandAsOperator(Operands) == MatchOperand_Success)
468  continue;
469 
470  // Attempt to parse token as register
471  if (parseRegister(Operands) == MatchOperand_Success)
472  continue;
473 
474  // Attempt to parse token as an immediate
475  if (parseImmediate(Operands) != MatchOperand_Success) {
476  SMLoc Loc = getLexer().getLoc();
477  return Error(Loc, "unexpected token");
478  }
479  }
480 
481  if (getLexer().isNot(AsmToken::EndOfStatement)) {
482  SMLoc Loc = getLexer().getLoc();
483 
484  getParser().eatToEndOfStatement();
485 
486  return Error(Loc, "unexpected token");
487  }
488 
489  // Consume the EndOfStatement.
490  getParser().Lex();
491  return false;
492 }
493 
494 bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
495 
496 extern "C" void LLVMInitializeBPFAsmParser() {
500 }
static bool isReg(const MCInst &MI, unsigned OpNo)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static unsigned MatchRegisterName(StringRef Name)
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:110
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:137
MCTargetAsmParser - Generic interface to target specific assembly parsers.
void push_back(const T &Elt)
Definition: SmallVector.h:218
unsigned Reg
virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool PrintSchedInfo=false)
Emit the given Instruction into the current section.
Definition: MCStreamer.cpp:956
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
amdgpu Simplify well known AMD library false Value Value const Twine & Name
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:116
const FeatureBitset & getFeatureBits() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:36
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:22
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(T Value)
Definition: StringSwitch.h:203
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition: X86InstrInfo.h:161
zlib-gnu style compression
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand...
std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \\\)
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
RegisterMCAsmParser - Helper template for registering a target specific assembly parser, for use in the target machine initialization function.
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:28
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:161
Analysis containing CSE Info
Definition: CSEInfo.cpp:21
const char * getPointer() const
Definition: SMLoc.h:35
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
Streaming machine code generation interface.
Definition: MCStreamer.h:189
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:32
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:24
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_t size() const
Definition: SmallVector.h:53
void setLoc(SMLoc loc)
Definition: MCInst.h:179
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void LLVMInitializeBPFAsmParser()
Promote Memory to Register
Definition: Mem2Reg.cpp:110
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
Base class for user error types.
Definition: Error.h:345
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:70
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:37
#define N
Generic base class for all target subtargets.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
const unsigned Kind
LLVM_NODISCARD std::string lower() const
Definition: StringRef.cpp:108
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target & getTheBPFleTarget()
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
void addOperand(const MCOperand &Op)
Definition: MCInst.h:186
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
Target & getTheBPFbeTarget()
Represents a location in source code.
Definition: SMLoc.h:24
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:123
Target & getTheBPFTarget()
bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes)