LLVM  8.0.1
X86DisassemblerDecoder.h
Go to the documentation of this file.
1 //===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is part of the X86 Disassembler.
11 // It contains the public interface of the instruction decoder.
12 // Documentation for the disassembler can be found in X86Disassembler.h.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
17 #define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
18 
19 #include "llvm/ADT/ArrayRef.h"
21 
22 namespace llvm {
23 namespace X86Disassembler {
24 
25 // Accessor functions for various fields of an Intel instruction
26 #define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
27 #define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
28 #define rmFromModRM(modRM) ((modRM) & 0x7)
29 #define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
30 #define indexFromSIB(sib) (((sib) & 0x38) >> 3)
31 #define baseFromSIB(sib) ((sib) & 0x7)
32 #define wFromREX(rex) (((rex) & 0x8) >> 3)
33 #define rFromREX(rex) (((rex) & 0x4) >> 2)
34 #define xFromREX(rex) (((rex) & 0x2) >> 1)
35 #define bFromREX(rex) ((rex) & 0x1)
36 
37 #define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7)
38 #define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
39 #define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
40 #define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
41 #define mmFromEVEX2of4(evex) ((evex) & 0x3)
42 #define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
43 #define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
44 #define ppFromEVEX3of4(evex) ((evex) & 0x3)
45 #define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7)
46 #define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6)
47 #define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5)
48 #define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4)
49 #define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3)
50 #define aaaFromEVEX4of4(evex) ((evex) & 0x7)
51 
52 #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
53 #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
54 #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
55 #define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
56 #define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
57 #define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
58 #define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
59 #define ppFromVEX3of3(vex) ((vex) & 0x3)
60 
61 #define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
62 #define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
63 #define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
64 #define ppFromVEX2of2(vex) ((vex) & 0x3)
65 
66 #define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
67 #define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
68 #define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
69 #define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
70 #define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
71 #define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
72 #define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
73 #define ppFromXOP3of3(xop) ((xop) & 0x3)
74 
75 // These enums represent Intel registers for use by the decoder.
76 #define REGS_8BIT \
77  ENTRY(AL) \
78  ENTRY(CL) \
79  ENTRY(DL) \
80  ENTRY(BL) \
81  ENTRY(AH) \
82  ENTRY(CH) \
83  ENTRY(DH) \
84  ENTRY(BH) \
85  ENTRY(R8B) \
86  ENTRY(R9B) \
87  ENTRY(R10B) \
88  ENTRY(R11B) \
89  ENTRY(R12B) \
90  ENTRY(R13B) \
91  ENTRY(R14B) \
92  ENTRY(R15B) \
93  ENTRY(SPL) \
94  ENTRY(BPL) \
95  ENTRY(SIL) \
96  ENTRY(DIL)
97 
98 #define EA_BASES_16BIT \
99  ENTRY(BX_SI) \
100  ENTRY(BX_DI) \
101  ENTRY(BP_SI) \
102  ENTRY(BP_DI) \
103  ENTRY(SI) \
104  ENTRY(DI) \
105  ENTRY(BP) \
106  ENTRY(BX) \
107  ENTRY(R8W) \
108  ENTRY(R9W) \
109  ENTRY(R10W) \
110  ENTRY(R11W) \
111  ENTRY(R12W) \
112  ENTRY(R13W) \
113  ENTRY(R14W) \
114  ENTRY(R15W)
115 
116 #define REGS_16BIT \
117  ENTRY(AX) \
118  ENTRY(CX) \
119  ENTRY(DX) \
120  ENTRY(BX) \
121  ENTRY(SP) \
122  ENTRY(BP) \
123  ENTRY(SI) \
124  ENTRY(DI) \
125  ENTRY(R8W) \
126  ENTRY(R9W) \
127  ENTRY(R10W) \
128  ENTRY(R11W) \
129  ENTRY(R12W) \
130  ENTRY(R13W) \
131  ENTRY(R14W) \
132  ENTRY(R15W)
133 
134 #define EA_BASES_32BIT \
135  ENTRY(EAX) \
136  ENTRY(ECX) \
137  ENTRY(EDX) \
138  ENTRY(EBX) \
139  ENTRY(sib) \
140  ENTRY(EBP) \
141  ENTRY(ESI) \
142  ENTRY(EDI) \
143  ENTRY(R8D) \
144  ENTRY(R9D) \
145  ENTRY(R10D) \
146  ENTRY(R11D) \
147  ENTRY(R12D) \
148  ENTRY(R13D) \
149  ENTRY(R14D) \
150  ENTRY(R15D)
151 
152 #define REGS_32BIT \
153  ENTRY(EAX) \
154  ENTRY(ECX) \
155  ENTRY(EDX) \
156  ENTRY(EBX) \
157  ENTRY(ESP) \
158  ENTRY(EBP) \
159  ENTRY(ESI) \
160  ENTRY(EDI) \
161  ENTRY(R8D) \
162  ENTRY(R9D) \
163  ENTRY(R10D) \
164  ENTRY(R11D) \
165  ENTRY(R12D) \
166  ENTRY(R13D) \
167  ENTRY(R14D) \
168  ENTRY(R15D)
169 
170 #define EA_BASES_64BIT \
171  ENTRY(RAX) \
172  ENTRY(RCX) \
173  ENTRY(RDX) \
174  ENTRY(RBX) \
175  ENTRY(sib64) \
176  ENTRY(RBP) \
177  ENTRY(RSI) \
178  ENTRY(RDI) \
179  ENTRY(R8) \
180  ENTRY(R9) \
181  ENTRY(R10) \
182  ENTRY(R11) \
183  ENTRY(R12) \
184  ENTRY(R13) \
185  ENTRY(R14) \
186  ENTRY(R15)
187 
188 #define REGS_64BIT \
189  ENTRY(RAX) \
190  ENTRY(RCX) \
191  ENTRY(RDX) \
192  ENTRY(RBX) \
193  ENTRY(RSP) \
194  ENTRY(RBP) \
195  ENTRY(RSI) \
196  ENTRY(RDI) \
197  ENTRY(R8) \
198  ENTRY(R9) \
199  ENTRY(R10) \
200  ENTRY(R11) \
201  ENTRY(R12) \
202  ENTRY(R13) \
203  ENTRY(R14) \
204  ENTRY(R15)
205 
206 #define REGS_MMX \
207  ENTRY(MM0) \
208  ENTRY(MM1) \
209  ENTRY(MM2) \
210  ENTRY(MM3) \
211  ENTRY(MM4) \
212  ENTRY(MM5) \
213  ENTRY(MM6) \
214  ENTRY(MM7)
215 
216 #define REGS_XMM \
217  ENTRY(XMM0) \
218  ENTRY(XMM1) \
219  ENTRY(XMM2) \
220  ENTRY(XMM3) \
221  ENTRY(XMM4) \
222  ENTRY(XMM5) \
223  ENTRY(XMM6) \
224  ENTRY(XMM7) \
225  ENTRY(XMM8) \
226  ENTRY(XMM9) \
227  ENTRY(XMM10) \
228  ENTRY(XMM11) \
229  ENTRY(XMM12) \
230  ENTRY(XMM13) \
231  ENTRY(XMM14) \
232  ENTRY(XMM15) \
233  ENTRY(XMM16) \
234  ENTRY(XMM17) \
235  ENTRY(XMM18) \
236  ENTRY(XMM19) \
237  ENTRY(XMM20) \
238  ENTRY(XMM21) \
239  ENTRY(XMM22) \
240  ENTRY(XMM23) \
241  ENTRY(XMM24) \
242  ENTRY(XMM25) \
243  ENTRY(XMM26) \
244  ENTRY(XMM27) \
245  ENTRY(XMM28) \
246  ENTRY(XMM29) \
247  ENTRY(XMM30) \
248  ENTRY(XMM31)
249 
250 #define REGS_YMM \
251  ENTRY(YMM0) \
252  ENTRY(YMM1) \
253  ENTRY(YMM2) \
254  ENTRY(YMM3) \
255  ENTRY(YMM4) \
256  ENTRY(YMM5) \
257  ENTRY(YMM6) \
258  ENTRY(YMM7) \
259  ENTRY(YMM8) \
260  ENTRY(YMM9) \
261  ENTRY(YMM10) \
262  ENTRY(YMM11) \
263  ENTRY(YMM12) \
264  ENTRY(YMM13) \
265  ENTRY(YMM14) \
266  ENTRY(YMM15) \
267  ENTRY(YMM16) \
268  ENTRY(YMM17) \
269  ENTRY(YMM18) \
270  ENTRY(YMM19) \
271  ENTRY(YMM20) \
272  ENTRY(YMM21) \
273  ENTRY(YMM22) \
274  ENTRY(YMM23) \
275  ENTRY(YMM24) \
276  ENTRY(YMM25) \
277  ENTRY(YMM26) \
278  ENTRY(YMM27) \
279  ENTRY(YMM28) \
280  ENTRY(YMM29) \
281  ENTRY(YMM30) \
282  ENTRY(YMM31)
283 
284 #define REGS_ZMM \
285  ENTRY(ZMM0) \
286  ENTRY(ZMM1) \
287  ENTRY(ZMM2) \
288  ENTRY(ZMM3) \
289  ENTRY(ZMM4) \
290  ENTRY(ZMM5) \
291  ENTRY(ZMM6) \
292  ENTRY(ZMM7) \
293  ENTRY(ZMM8) \
294  ENTRY(ZMM9) \
295  ENTRY(ZMM10) \
296  ENTRY(ZMM11) \
297  ENTRY(ZMM12) \
298  ENTRY(ZMM13) \
299  ENTRY(ZMM14) \
300  ENTRY(ZMM15) \
301  ENTRY(ZMM16) \
302  ENTRY(ZMM17) \
303  ENTRY(ZMM18) \
304  ENTRY(ZMM19) \
305  ENTRY(ZMM20) \
306  ENTRY(ZMM21) \
307  ENTRY(ZMM22) \
308  ENTRY(ZMM23) \
309  ENTRY(ZMM24) \
310  ENTRY(ZMM25) \
311  ENTRY(ZMM26) \
312  ENTRY(ZMM27) \
313  ENTRY(ZMM28) \
314  ENTRY(ZMM29) \
315  ENTRY(ZMM30) \
316  ENTRY(ZMM31)
317 
318 #define REGS_MASKS \
319  ENTRY(K0) \
320  ENTRY(K1) \
321  ENTRY(K2) \
322  ENTRY(K3) \
323  ENTRY(K4) \
324  ENTRY(K5) \
325  ENTRY(K6) \
326  ENTRY(K7)
327 
328 #define REGS_SEGMENT \
329  ENTRY(ES) \
330  ENTRY(CS) \
331  ENTRY(SS) \
332  ENTRY(DS) \
333  ENTRY(FS) \
334  ENTRY(GS)
335 
336 #define REGS_DEBUG \
337  ENTRY(DR0) \
338  ENTRY(DR1) \
339  ENTRY(DR2) \
340  ENTRY(DR3) \
341  ENTRY(DR4) \
342  ENTRY(DR5) \
343  ENTRY(DR6) \
344  ENTRY(DR7) \
345  ENTRY(DR8) \
346  ENTRY(DR9) \
347  ENTRY(DR10) \
348  ENTRY(DR11) \
349  ENTRY(DR12) \
350  ENTRY(DR13) \
351  ENTRY(DR14) \
352  ENTRY(DR15)
353 
354 #define REGS_CONTROL \
355  ENTRY(CR0) \
356  ENTRY(CR1) \
357  ENTRY(CR2) \
358  ENTRY(CR3) \
359  ENTRY(CR4) \
360  ENTRY(CR5) \
361  ENTRY(CR6) \
362  ENTRY(CR7) \
363  ENTRY(CR8) \
364  ENTRY(CR9) \
365  ENTRY(CR10) \
366  ENTRY(CR11) \
367  ENTRY(CR12) \
368  ENTRY(CR13) \
369  ENTRY(CR14) \
370  ENTRY(CR15)
371 
372 #define REGS_BOUND \
373  ENTRY(BND0) \
374  ENTRY(BND1) \
375  ENTRY(BND2) \
376  ENTRY(BND3)
377 
378 #define ALL_EA_BASES \
379  EA_BASES_16BIT \
380  EA_BASES_32BIT \
381  EA_BASES_64BIT
382 
383 #define ALL_SIB_BASES \
384  REGS_32BIT \
385  REGS_64BIT
386 
387 #define ALL_REGS \
388  REGS_8BIT \
389  REGS_16BIT \
390  REGS_32BIT \
391  REGS_64BIT \
392  REGS_MMX \
393  REGS_XMM \
394  REGS_YMM \
395  REGS_ZMM \
396  REGS_MASKS \
397  REGS_SEGMENT \
398  REGS_DEBUG \
399  REGS_CONTROL \
400  REGS_BOUND \
401  ENTRY(RIP)
402 
403 /// All possible values of the base field for effective-address
404 /// computations, a.k.a. the Mod and R/M fields of the ModR/M byte.
405 /// We distinguish between bases (EA_BASE_*) and registers that just happen
406 /// to be referred to when Mod == 0b11 (EA_REG_*).
407 enum EABase {
409 #define ENTRY(x) EA_BASE_##x,
411 #undef ENTRY
412 #define ENTRY(x) EA_REG_##x,
413  ALL_REGS
414 #undef ENTRY
416 };
417 
418 /// All possible values of the SIB index field.
419 /// borrows entries from ALL_EA_BASES with the special case that
420 /// sib is synonymous with NONE.
421 /// Vector SIB: index can be XMM or YMM.
422 enum SIBIndex {
424 #define ENTRY(x) SIB_INDEX_##x,
426  REGS_XMM
427  REGS_YMM
428  REGS_ZMM
429 #undef ENTRY
431 };
432 
433 /// All possible values of the SIB base field.
434 enum SIBBase {
436 #define ENTRY(x) SIB_BASE_##x,
438 #undef ENTRY
440 };
441 
442 /// Possible displacement types for effective-address computations.
443 typedef enum {
449 
450 /// All possible values of the reg field in the ModR/M byte.
451 enum Reg {
452 #define ENTRY(x) MODRM_REG_##x,
453  ALL_REGS
454 #undef ENTRY
456 };
457 
458 /// All possible segment overrides.
468 };
469 
470 /// Possible values for the VEX.m-mmmm field
472  VEX_LOB_0F = 0x1,
475 };
476 
481 };
482 
483 /// Possible values for the VEX.pp/EVEX.pp field
489 };
490 
493  TYPE_VEX_2B = 0x1,
494  TYPE_VEX_3B = 0x2,
495  TYPE_EVEX = 0x3,
496  TYPE_XOP = 0x4
497 };
498 
499 /// Type for the byte reader that the consumer must provide to
500 /// the decoder. Reads a single byte from the instruction's address space.
501 /// \param arg A baton that the consumer can associate with any internal
502 /// state that it needs.
503 /// \param byte A pointer to a single byte in memory that should be set to
504 /// contain the value at address.
505 /// \param address The address in the instruction's address space that should
506 /// be read from.
507 /// \return -1 if the byte cannot be read for any reason; 0 otherwise.
508 typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address);
509 
510 /// Type for the logging function that the consumer can provide to
511 /// get debugging output from the decoder.
512 /// \param arg A baton that the consumer can associate with any internal
513 /// state that it needs.
514 /// \param log A string that contains the message. Will be reused after
515 /// the logger returns.
516 typedef void (*dlog_t)(void *arg, const char *log);
517 
518 /// The specification for how to extract and interpret a full instruction and
519 /// its operands.
521  uint16_t operands;
522 };
523 
524 /// The x86 internal instruction, which is produced by the decoder.
526  // Reader interface (C)
528  // Opaque value passed to the reader
529  const void* readerArg;
530  // The address of the next byte to read via the reader
531  uint64_t readerCursor;
532 
533  // Logger interface (C)
535  // Opaque value passed to the logger
536  void* dlogArg;
537 
538  // General instruction information
539 
540  // The mode to disassemble for (64-bit, protected, real)
542  // The start of the instruction, usable with the reader
543  uint64_t startLocation;
544  // The length of the instruction, in bytes
545  size_t length;
546 
547  // Prefix state
548 
549  // The possible mandatory prefix
551  // The value of the vector extension prefix(EVEX/VEX/XOP), if present
552  uint8_t vectorExtensionPrefix[4];
553  // The type of the vector extension prefix
555  // The value of the REX prefix, if present
556  uint8_t rexPrefix;
557  // The segment override type
559  // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease
561 
562  // Address-size override
563  bool hasAdSize;
564  // Operand-size override
565  bool hasOpSize;
566  // Lock prefix
568  // The repeat prefix if any
569  uint8_t repeatPrefix;
570 
571  // Sizes of various critical pieces of data, in bytes
572  uint8_t registerSize;
573  uint8_t addressSize;
575  uint8_t immediateSize;
576 
577  // Offsets from the start of the instruction to the pieces of data, which is
578  // needed to find relocation entries for adding symbolic operands.
581 
582  // opcode state
583 
584  // The last byte of the opcode, not counting any ModR/M extension
585  uint8_t opcode;
586 
587  // decode state
588 
589  // The type of opcode, used for indexing into the array of decode tables
591  // The instruction ID, extracted from the decode table
592  uint16_t instructionID;
593  // The specifier for the instruction, from the instruction info table
595 
596  // state for additional bytes, consumed during operand decode. Pattern:
597  // consumed___ indicates that the byte was already consumed and does not
598  // need to be consumed again.
599 
600  // The VEX.vvvv field, which contains a third register operand for some AVX
601  // instructions.
603 
604  // The writemask for AVX-512 instructions which is contained in EVEX.aaa
606 
607  // The ModR/M byte, which contains most register operands and some portion of
608  // all memory operands.
610  uint8_t modRM;
611 
612  // The SIB byte, used for more complex 32- or 64-bit memory operands
614  uint8_t sib;
615 
616  // The displacement, used for memory operands
618  int32_t displacement;
619 
620  // Immediates. There can be two in some cases
623  uint64_t immediates[2];
624 
625  // A register or immediate operand encoded into the opcode
627 
628  // Portions of the ModR/M byte
629 
630  // These fields determine the allowable values for the ModR/M fields, which
631  // depend on operand and address widths.
634 
635  // The Mod and R/M fields can encode a base for an effective address, or a
636  // register. These are separated into two fields here.
638  EADisplacement eaDisplacement;
639  // The reg field always encodes a register
641 
642  // SIB state
645  uint8_t sibScale;
647 
648  // Embedded rounding control.
649  uint8_t RC;
650 
652 };
653 
654 /// Decode one instruction and store the decoding results in
655 /// a buffer provided by the consumer.
656 /// \param insn The buffer to store the instruction in. Allocated by the
657 /// consumer.
658 /// \param reader The byteReader_t for the bytes to be read.
659 /// \param readerArg An argument to pass to the reader for storing context
660 /// specific to the consumer. May be NULL.
661 /// \param logger The dlog_t to be used in printing status messages from the
662 /// disassembler. May be NULL.
663 /// \param loggerArg An argument to pass to the logger for storing context
664 /// specific to the logger. May be NULL.
665 /// \param startLoc The address (in the reader's address space) of the first
666 /// byte in the instruction.
667 /// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in.
668 /// \return Nonzero if there was an error during decode, 0 otherwise.
670  byteReader_t reader,
671  const void *readerArg,
672  dlog_t logger,
673  void *loggerArg,
674  const void *miiArg,
675  uint64_t startLoc,
677 
678 /// Print a message to debugs()
679 /// \param file The name of the file printing the debug message.
680 /// \param line The line number that printed the debug message.
681 /// \param s The message to print.
682 void Debug(const char *file, unsigned line, const char *s);
683 
684 StringRef GetInstrName(unsigned Opcode, const void *mii);
685 
686 } // namespace X86Disassembler
687 } // namespace llvm
688 
689 #endif
*ViewGraph Emit a dot run run gv on the postscript file
Definition: GraphWriter.h:363
void(* dlog_t)(void *arg, const char *log)
Type for the logging function that the consumer can provide to get debugging output from the decoder...
This class represents lattice values for constants.
Definition: AllocatorList.h:24
The specification for how to extract and interpret a full instruction and its operands.
int(* byteReader_t)(const void *arg, uint8_t *byte, uint64_t address)
Type for the byte reader that the consumer must provide to the decoder.
#define ALL_REGS
EADisplacement
Possible displacement types for effective-address computations.
amode Optimize addressing mode
SIBIndex
All possible values of the SIB index field.
Reg
All possible values of the reg field in the ModR/M byte.
#define ALL_EA_BASES
VEXLeadingOpcodeByte
Possible values for the VEX.m-mmmm field.
int decodeInstruction(InternalInstruction *insn, byteReader_t reader, const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, uint64_t startLoc, DisassemblerMode mode)
Decode one instruction and store the decoding results in a buffer provided by the consumer...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
EABase
All possible values of the base field for effective-address computations, a.k.a.
#define ALL_SIB_BASES
void Debug(const char *file, unsigned line, const char *s)
Print a message to debugs()
The x86 internal instruction, which is produced by the decoder.
VEXPrefixCode
Possible values for the VEX.pp/EVEX.pp field.
StringRef GetInstrName(unsigned Opcode, const void *mii)
SegmentOverride
All possible segment overrides.
#define REGS_ZMM
SIBBase
All possible values of the SIB base field.
#define REGS_XMM
static void logger(void *arg, const char *log)
logger - a callback function that wraps the operator<< method from raw_ostream.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
#define REGS_YMM
DisassemblerMode
Decoding mode for the Intel disassembler.