48 #include "X86GenDisassemblerTables.inc" 51 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0) 53 #define debug(s) do { } while (0) 112 modrm_type != MODRM_ONEENTRY;
133 dec = &
ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136 dec = &
TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139 dec = &
THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142 dec = &
THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145 dec = &
XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148 dec = &
XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
151 dec = &
XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
160 debug(
"Corrupt table! Unknown modrm_type");
172 case MODRM_SPLITMISC:
227 #define CONSUME_FUNC(name, type) \ 228 static int name(struct InternalInstruction* insn, type* ptr) { \ 231 for (offset = 0; offset < sizeof(type); ++offset) { \ 233 int ret = insn->reader(insn->readerArg, \ 235 insn->readerCursor + offset); \ 238 combined = combined | ((uint64_t)byte << (offset * 8)); \ 241 insn->readerCursor += sizeof(type); \ 279 va_start(ap, format);
280 (void)vsnprintf(buffer,
sizeof(buffer),
format, ap);
283 insn->dlog(insn->dlogArg, buffer);
288 return prefix >= 0x40 && prefix <= 0x4f;
314 if (
isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
357 if ((byte == 0xf2 || byte == 0xf3) && !
lookAtByte(insn, &nextByte)) {
365 if (((nextByte == 0xf0) ||
366 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
368 if (!(byte == 0xf3 && nextByte == 0x90))
377 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
378 nextByte == 0xc6 || nextByte == 0xc7)) {
380 if (nextByte != 0x90)
383 if (
isREX(insn, nextByte)) {
427 debug(
"Unhandled override");
446 dbgprintf(insn,
"Found prefix 0x%hhx", byte);
452 uint8_t byte1, byte2;
455 dbgprintf(insn,
"Couldn't read second byte of EVEX prefix");
460 dbgprintf(insn,
"Couldn't read third byte of EVEX prefix");
465 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
476 dbgprintf(insn,
"Couldn't read third byte of EVEX prefix");
480 dbgprintf(insn,
"Couldn't read fourth byte of EVEX prefix");
493 dbgprintf(insn,
"Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
497 }
else if (byte == 0xc4) {
501 dbgprintf(insn,
"Couldn't read second byte of VEX");
524 dbgprintf(insn,
"Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
528 }
else if (byte == 0xc5) {
532 dbgprintf(insn,
"Couldn't read second byte of VEX");
557 dbgprintf(insn,
"Found VEX prefix 0x%hhx 0x%hhx",
561 }
else if (byte == 0x8f) {
565 dbgprintf(insn,
"Couldn't read second byte of XOP");
569 if ((byte1 & 0x38) != 0x0)
596 dbgprintf(insn,
"Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
600 }
else if (
isREX(insn, byte)) {
604 dbgprintf(insn,
"Found REX prefix 0x%hhx", byte);
656 dbgprintf(insn,
"Unhandled mm field for instruction (0x%hhx)",
672 dbgprintf(insn,
"Unhandled m-mmmm field for instruction (0x%hhx)",
691 dbgprintf(insn,
"Unhandled m-mmmm field for instruction (0x%hhx)",
709 if (current == 0x0f) {
710 dbgprintf(insn,
"Found a two-byte escape prefix (0x%hhx)", current);
715 if (current == 0x38) {
716 dbgprintf(insn,
"Found a three-byte escape prefix (0x%hhx)", current);
722 }
else if (current == 0x3a) {
723 dbgprintf(insn,
"Found a three-byte escape prefix (0x%hhx)", current);
729 }
else if (current == 0x0f) {
730 dbgprintf(insn,
"Found a 3dnow escape prefix (0x%hhx)", current);
741 dbgprintf(insn,
"Didn't find a three-byte escape prefix");
775 bool hasModRMExtension;
783 if (hasModRMExtension) {
812 if (orig[i] ==
'\0' && equiv[i] ==
'\0')
814 if (orig[i] ==
'\0' || equiv[i] ==
'\0')
816 if (orig[i] != equiv[i]) {
817 if ((orig[i] ==
'Q' || orig[i] ==
'L') && equiv[i] ==
'W')
819 if ((orig[i] ==
'6' || orig[i] ==
'3') && equiv[i] ==
'1')
821 if ((orig[i] ==
'4' || orig[i] ==
'2') && equiv[i] ==
'6')
839 if (name[i] ==
'6' && name[i+1] ==
'4')
855 uint16_t instructionID;
859 attrMask = ATTR_NONE;
862 attrMask |= ATTR_64BIT;
870 attrMask |= ATTR_OPSIZE;
881 attrMask |= ATTR_EVEXKZ;
883 attrMask |= ATTR_EVEXB;
885 attrMask |= ATTR_EVEXK;
887 attrMask |= ATTR_EVEXL;
889 attrMask |= ATTR_EVEXL2;
893 attrMask |= ATTR_OPSIZE;
904 attrMask |= ATTR_VEXL;
908 attrMask |= ATTR_OPSIZE;
919 attrMask |= ATTR_VEXL;
923 attrMask |= ATTR_OPSIZE;
934 attrMask |= ATTR_VEXL;
941 attrMask |= ATTR_OPSIZE;
943 attrMask |= ATTR_ADSIZE;
964 attrMask |= ATTR_OPSIZE;
967 attrMask |= ATTR_ADSIZE;
974 attrMask |= ATTR_REXW;
975 attrMask &= ~ATTR_ADSIZE;
984 attrMask ^= ATTR_ADSIZE;
992 attrMask |= ATTR_OPSIZE;
997 attrMask |= ATTR_OPSIZE;
1017 uint16_t instructionIDWithREXW;
1019 insn, attrMask | ATTR_REXW)) {
1025 auto SpecName =
GetInstrName(instructionIDWithREXW, miiArg);
1027 if (!
is64Bit(SpecName.data())) {
1047 attrMask |= ATTR_ADSIZE;
1049 attrMask |= ATTR_OPSIZE;
1053 attrMask ^= ATTR_ADSIZE;
1057 attrMask ^= ATTR_OPSIZE;
1069 !(attrMask & ATTR_OPSIZE)) {
1079 uint16_t instructionIDWithOpsize;
1086 attrMask | ATTR_OPSIZE)) {
1098 specWithOpSizeName =
GetInstrName(instructionIDWithOpsize, miiArg);
1119 uint16_t instructionIDWithNewOpcode;
1143 insn->
spec = specWithNewOpcode;
1163 uint8_t index, base;
1174 dbgprintf(insn,
"SIB-based addressing doesn't work in 16-bit mode");
1178 sibBaseBase = SIB_BASE_EAX;
1182 sibBaseBase = SIB_BASE_RAX;
1218 debug(
"Cannot have Mod = 0b11 and a SIB byte");
1255 if (consumeInt8(insn, &d8))
1260 if (consumeInt16(insn, &d16))
1265 if (consumeInt32(insn, &d32))
1283 uint8_t mod, rm, reg, evexrm;
1309 insn->
regBase = MODRM_REG_EAX;
1313 insn->
regBase = MODRM_REG_RAX;
1331 EABase eaBaseBase = EA_BASE_BX_SI;
1379 EA_BASE_sib : EA_BASE_sib64);
1401 insn->
eaBase = EA_BASE_sib;
1424 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \ 1425 static uint16_t name(struct InternalInstruction *insn, \ 1432 debug("Unhandled register type"); \ 1436 return base + index; \ 1441 if (insn->rexPrefix && \ 1442 index >= 4 && index <= 7) { \ 1443 return prefix##_SPL + (index - 4); \ 1445 return prefix##_AL + index; \ 1451 return prefix##_AX + index; \ 1456 return prefix##_EAX + index; \ 1461 return prefix##_RAX + index; \ 1463 return prefix##_ZMM0 + index; \ 1465 return prefix##_YMM0 + index; \ 1467 return prefix##_XMM0 + index; \ 1472 return prefix##_K0 + index; \ 1474 return prefix##_MM0 + (index & 0x7); \ 1475 case TYPE_SEGMENTREG: \ 1476 if ((index & 7) > 5) \ 1478 return prefix##_ES + (index & 7); \ 1479 case TYPE_DEBUGREG: \ 1480 return prefix##_DR0 + index; \ 1481 case TYPE_CONTROLREG: \ 1482 return prefix##_CR0 + index; \ 1486 return prefix##_BND0 + index; \ 1488 return prefix##_XMM0 + index; \ 1490 return prefix##_YMM0 + index; \ 1492 return prefix##_ZMM0 + index; \ 1529 debug(
"Expected a REG or R/M encoding in fixupReg");
1532 insn->vvvv = (
Reg)fixupRegValue(insn,
1540 insn->reg = (
Reg)fixupRegValue(insn,
1542 insn->reg - insn->regBase,
1548 if (insn->eaBase >= insn->eaRegBase) {
1549 insn->eaBase = (
EABase)fixupRMValue(insn,
1551 insn->eaBase - insn->eaRegBase,
1574 dbgprintf(insn,
"readOpcodeRegister()");
1629 debug(
"Already consumed two immediates");
1646 if (consumeUInt16(insn, &imm16))
1651 if (consumeUInt32(insn, &imm32))
1656 if (consumeUInt64(insn, &imm64))
1693 insn->
vvvv =
static_cast<Reg>(vvvv);
1723 int hasVVVV, needVVVV;
1731 needVVVV = hasVVVV && (insn->
vvvv != 0);
1734 switch (
Op.encoding) {
1742 needVVVV = hasVVVV & ((insn->
vvvv & 0xf) != 0);
1747 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1762 debug(
"Unhandled VSIB index type");
1803 if (
Op.type == TYPE_XMM ||
Op.type == TYPE_YMM)
1861 case ENCODING_WRITEMASK:
1868 dbgprintf(insn,
"Encountered an operand with an unknown encoding.");
1874 if (needVVVV)
return -1;
1901 const void *readerArg,
dlog_t logger,
void *loggerArg,
const void *miiArg,
1916 getID(insn, miiArg) ||
1925 dbgprintf(insn,
"Read from 0x%llx to 0x%llx: length %zu",
1929 dbgprintf(insn,
"Instruction exceeds 15-byte limit");
#define bFromEVEX4of4(evex)
static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix)
bool consumedDisplacement
void(* dlog_t)(void *arg, const char *log)
Type for the logging function that the consumer can provide to get debugging output from the decoder...
VectorExtensionType vectorExtensionType
#define wFromEVEX3of4(evex)
The specification for how to extract and interpret a full instruction and its operands.
#define bFromVEX2of3(vex)
static int consumeByte(struct InternalInstruction *insn, uint8_t *byte)
int(* byteReader_t)(const void *arg, uint8_t *byte, uint64_t address)
Type for the byte reader that the consumer must provide to the decoder.
#define rmFromModRM(modRM)
static int readSIB(struct InternalInstruction *insn)
#define zFromEVEX4of4(evex)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
#define vvvvFromVEX2of2(vex)
#define vvvvFromEVEX3of4(evex)
static int readDisplacement(struct InternalInstruction *insn)
#define r2FromEVEX2of4(evex)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
#define aaaFromEVEX4of4(evex)
amode Optimize addressing mode
#define bFromEVEX2of4(evex)
SIBIndex
All possible values of the SIB index field.
static int readOpcode(struct InternalInstruction *insn)
#define rFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
#define rFromVEX2of2(vex)
Reg
All possible values of the reg field in the ModR/M byte.
#define lFromVEX2of2(vex)
static int readVVVV(struct InternalInstruction *insn)
static int lookAtByte(struct InternalInstruction *insn, uint8_t *byte)
#define xFromXOP2of3(xop)
#define v2FromEVEX4of4(evex)
SegmentOverride segmentOverride
static int getID(struct InternalInstruction *insn, const void *miiArg)
int decodeInstruction(InternalInstruction *insn, byteReader_t reader, const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, uint64_t startLoc, DisassemblerMode mode)
Decode one instruction and store the decoding results in a buffer provided by the consumer...
#define THREEDNOW_MAP_SYM
ModRMDecision modRMDecisions[256]
static void unconsumeByte(struct InternalInstruction *insn)
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
#define ppFromVEX3of3(vex)
#define GENERIC_FIXUP_FUNC(name, base, prefix, mask)
#define mmmmmFromXOP2of3(xop)
EABase
All possible values of the base field for effective-address computations, a.k.a.
#define vvvvFromVEX3of3(vex)
The specification for how to extract and interpret one operand.
static bool is64Bit(const char *name)
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
#define bFromXOP2of3(xop)
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
#define lFromVEX3of3(vex)
#define ppFromVEX2of2(vex)
static int modRMRequired(OpcodeType type, InstructionContext insnContext, uint16_t opcode)
The x86 internal instruction, which is produced by the decoder.
static int readPrefixes(struct InternalInstruction *insn)
static void dbgprintf(struct InternalInstruction *insn, const char *format,...)
StringRef GetInstrName(unsigned Opcode, const void *mii)
#define lFromXOP3of3(xop)
EADisplacement eaDisplacement
#define mmmmmFromVEX2of3(vex)
#define ppFromEVEX3of4(evex)
#define scaleFromSIB(sib)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
static int readOperands(struct InternalInstruction *insn)
ArrayRef< OperandSpecifier > operands
const InstructionSpecifier * spec
#define xFromVEX2of3(vex)
static bool is16BitEquivalent(const char *orig, const char *equiv)
#define rFromXOP2of3(xop)
#define CONSUME_FUNC(name, type)
#define CASE_ENCODING_VSIB
#define lFromEVEX4of4(evex)
static InstructionContext contextForAttrs(uint16_t attrMask)
#define l2FromEVEX4of4(evex)
#define wFromVEX3of3(vex)
#define wFromXOP3of3(xop)
uint8_t displacementOffset
#define indexFromSIB(sib)
SIBBase
All possible values of the SIB base field.
uint8_t numImmediatesConsumed
static int readModRM(struct InternalInstruction *insn)
#define mmFromEVEX2of4(evex)
static void logger(void *arg, const char *log)
logger - a callback function that wraps the operator<< method from raw_ostream.
#define rFromVEX2of3(vex)
Specifies which opcode->instruction tables to look at given a particular context (set of attributes)...
#define regFromModRM(modRM)
static const struct InstructionSpecifier * specifierForUID(InstrUID uid)
#define modFromModRM(modRM)
OpcodeDecision opcodeDecisions[IC_max]
static int readMaskRegister(struct InternalInstruction *insn)
uint8_t vectorExtensionPrefix[4]
StringRef - Represent a constant reference to a string, i.e.
#define vvvvFromXOP3of3(vex)
static int getIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
#define ppFromXOP3of3(xop)
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
Specifies which set of ModR/M->instruction tables to look at given a particular opcode.
DisassemblerMode
Decoding mode for the Intel disassembler.
Specifies whether a ModR/M byte is needed and (if so) which instruction each possible value of the Mo...