/* * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the * instruction as having them. Also sets the instruction's default operand, * address, and other relevant data sizes to report operands correctly. * * @param insn - The instruction whose prefixes are to be read. * @return - 0 if the instruction could be read until the end of the prefix * bytes, and no prefixes conflicted; nonzero otherwise. */ static int readPrefixes(struct InternalInstruction* insn) { BOOL isPrefix = TRUE; BOOL prefixGroups[4] = { FALSE }; uint64_t prefixLocation; uint8_t byte = 0; BOOL hasAdSize = FALSE; BOOL hasOpSize = FALSE; dbgprintf(insn, "readPrefixes()"); while (isPrefix) { prefixLocation = insn->readerCursor; if (consumeByte(insn, &byte)) return -1; switch (byte) { case 0xf0: /* LOCK */ case 0xf2: /* REPNE/REPNZ */ case 0xf3: /* REP or REPE/REPZ */ if (prefixGroups[0]) dbgprintf(insn, "Redundant Group 1 prefix"); prefixGroups[0] = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; case 0x2e: /* CS segment override -OR- Branch not taken */ case 0x36: /* SS segment override -OR- Branch taken */ case 0x3e: /* DS segment override */ case 0x26: /* ES segment override */ case 0x64: /* FS segment override */ case 0x65: /* GS segment override */ switch (byte) { case 0x2e: insn->segmentOverride = SEG_OVERRIDE_CS; break; case 0x36: insn->segmentOverride = SEG_OVERRIDE_SS; break; case 0x3e: insn->segmentOverride = SEG_OVERRIDE_DS; break; case 0x26: insn->segmentOverride = SEG_OVERRIDE_ES; break; case 0x64: insn->segmentOverride = SEG_OVERRIDE_FS; break; case 0x65: insn->segmentOverride = SEG_OVERRIDE_GS; break; default: debug("Unhandled override"); return -1; } if (prefixGroups[1]) dbgprintf(insn, "Redundant Group 2 prefix"); prefixGroups[1] = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; case 0x66: /* Operand-size override */ if (prefixGroups[2]) dbgprintf(insn, "Redundant Group 3 prefix"); prefixGroups[2] = TRUE; hasOpSize = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; case 0x67: /* Address-size override */ if (prefixGroups[3]) dbgprintf(insn, "Redundant Group 4 prefix"); prefixGroups[3] = TRUE; hasAdSize = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; default: /* Not a prefix byte */ isPrefix = FALSE; break; } if (isPrefix) dbgprintf(insn, "Found prefix 0x%hhx", byte); } insn->vexSize = 0; if (byte == 0xc4) { uint8_t byte1; if (lookAtByte(insn, &byte1)) { dbgprintf(insn, "Couldn't read second byte of VEX"); return -1; } if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 3; insn->necessaryPrefixLocation = insn->readerCursor - 1; } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } if (insn->vexSize == 3) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); consumeByte(insn, &insn->vexPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ if (insn->mode == MODE_64BIT) { insn->rexPrefix = 0x40 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) | (rFromVEX2of3(insn->vexPrefix[1]) << 2) | (xFromVEX2of3(insn->vexPrefix[1]) << 1) | (bFromVEX2of3(insn->vexPrefix[1]) << 0); } switch (ppFromVEX3of3(insn->vexPrefix[2])) { default: break; case VEX_PREFIX_66: hasOpSize = TRUE; break; } dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); } } else if (byte == 0xc5) { uint8_t byte1; if (lookAtByte(insn, &byte1)) { dbgprintf(insn, "Couldn't read second byte of VEX"); return -1; } if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 2; } else { unconsumeByte(insn); } if (insn->vexSize == 2) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); if (insn->mode == MODE_64BIT) { insn->rexPrefix = 0x40 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); } switch (ppFromVEX2of2(insn->vexPrefix[1])) { default: break; case VEX_PREFIX_66: hasOpSize = TRUE; break; } dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); } } else { if (insn->mode == MODE_64BIT) { if ((byte & 0xf0) == 0x40) { uint8_t opcodeByte; if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { dbgprintf(insn, "Redundant REX prefix"); return -1; } insn->rexPrefix = byte; insn->necessaryPrefixLocation = insn->readerCursor - 2; dbgprintf(insn, "Found REX prefix 0x%hhx", byte); } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } } if (insn->mode == MODE_16BIT) { insn->registerSize = (hasOpSize ? 4 : 2); insn->addressSize = (hasAdSize ? 4 : 2); insn->displacementSize = (hasAdSize ? 4 : 2); insn->immediateSize = (hasOpSize ? 4 : 2); } else if (insn->mode == MODE_32BIT) { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 2 : 4); insn->displacementSize = (hasAdSize ? 2 : 4); insn->immediateSize = (hasOpSize ? 2 : 4); } else if (insn->mode == MODE_64BIT) { if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { insn->registerSize = 8; insn->addressSize = (hasAdSize ? 4 : 8); insn->displacementSize = 4; insn->immediateSize = 4; } else if (insn->rexPrefix) { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 4 : 8); insn->displacementSize = (hasOpSize ? 2 : 4); insn->immediateSize = (hasOpSize ? 2 : 4); } else { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 4 : 8); insn->displacementSize = (hasOpSize ? 2 : 4); insn->immediateSize = (hasOpSize ? 2 : 4); } } return 0; }
/* * getID - Determines the ID of an instruction, consuming the ModR/M byte as * appropriate for extended and escape opcodes. Determines the attributes and * context for the instruction before doing so. * * @param insn - The instruction whose ID is to be determined. * @return - 0 if the ModR/M could be read when needed or was not needed; * nonzero otherwise. */ static int getID(struct InternalInstruction* insn, void *miiArg) { uint8_t attrMask; uint16_t instructionID; dbgprintf(insn, "getID()"); attrMask = ATTR_NONE; if (insn->mode == MODE_64BIT) attrMask |= ATTR_64BIT; if (insn->vexSize) { attrMask |= ATTR_VEX; if (insn->vexSize == 3) { switch (ppFromVEX3of3(insn->vexPrefix[2])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; break; case VEX_PREFIX_F2: attrMask |= ATTR_XD; break; } if (lFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_VEXL; } else if (insn->vexSize == 2) { switch (ppFromVEX2of2(insn->vexPrefix[1])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; break; case VEX_PREFIX_F2: attrMask |= ATTR_XD; break; } if (lFromVEX2of2(insn->vexPrefix[1])) attrMask |= ATTR_VEXL; } else { return -1; } } else { if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) attrMask |= ATTR_ADSIZE; else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) attrMask |= ATTR_XS; else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) attrMask |= ATTR_XD; } if (insn->rexPrefix & 0x08) attrMask |= ATTR_REXW; if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; /* The following clauses compensate for limitations of the tables. */ if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && !(attrMask & ATTR_OPSIZE)) { /* * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit * has precedence since there are no L-bit with W-bit entries in the tables. * So if the L-bit isn't significant we should use the W-bit instead. * We only need to do this if the instruction doesn't specify OpSize since * there is a VEX_L_W_OPSIZE table. */ const struct InstructionSpecifier *spec; uint16_t instructionIDWithWBit; const struct InstructionSpecifier *specWithWBit; spec = specifierForUID(instructionID); if (getIDWithAttrMask(&instructionIDWithWBit, insn, (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { insn->instructionID = instructionID; insn->spec = spec; return 0; } specWithWBit = specifierForUID(instructionIDWithWBit); if (instructionID != instructionIDWithWBit) { insn->instructionID = instructionIDWithWBit; insn->spec = specWithWBit; } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { /* * The instruction tables make no distinction between instructions that * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a * particular spot (i.e., many MMX operations). In general we're * conservative, but in the specific case where OpSize is present but not * in the right place we check if there's a 16-bit operation. */ const struct InstructionSpecifier *spec; uint16_t instructionIDWithOpsize; const char *specName, *specWithOpSizeName; spec = specifierForUID(instructionID); if (getIDWithAttrMask(&instructionIDWithOpsize, insn, attrMask | ATTR_OPSIZE)) { /* * ModRM required with OpSize but not present; give up and return version * without OpSize set */ insn->instructionID = instructionID; insn->spec = spec; return 0; } specName = x86DisassemblerGetInstrName(instructionID, miiArg); specWithOpSizeName = x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); if (is16BitEquvalent(specName, specWithOpSizeName)) { insn->instructionID = instructionIDWithOpsize; insn->spec = specifierForUID(instructionIDWithOpsize); } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && insn->rexPrefix & 0x01) { /* * NOOP shouldn't decode as NOOP if REX.b is set. Instead * it should decode as XCHG %r8, %eax. */ const struct InstructionSpecifier *spec; uint16_t instructionIDWithNewOpcode; const struct InstructionSpecifier *specWithNewOpcode; spec = specifierForUID(instructionID); /* Borrow opcode from one of the other XCHGar opcodes */ insn->opcode = 0x91; if (getIDWithAttrMask(&instructionIDWithNewOpcode, insn, attrMask)) { insn->opcode = 0x90; insn->instructionID = instructionID; insn->spec = spec; return 0; } specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); /* Change back */ insn->opcode = 0x90; insn->instructionID = instructionIDWithNewOpcode; insn->spec = specWithNewOpcode; return 0; } insn->instructionID = instructionID; insn->spec = specifierForUID(insn->instructionID); return 0; }
/* * getID - Determines the ID of an instruction, consuming the ModR/M byte as * appropriate for extended and escape opcodes. Determines the attributes and * context for the instruction before doing so. * * @param insn - The instruction whose ID is to be determined. * @return - 0 if the ModR/M could be read when needed or was not needed; * nonzero otherwise. */ static int getID(struct InternalInstruction* insn) { uint8_t attrMask; uint16_t instructionID; dbgprintf(insn, "getID()"); attrMask = ATTR_NONE; if (insn->mode == MODE_64BIT) attrMask |= ATTR_64BIT; if (insn->vexSize) { attrMask |= ATTR_VEX; if (insn->vexSize == 3) { switch (ppFromVEX3of3(insn->vexPrefix[2])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; break; case VEX_PREFIX_F2: attrMask |= ATTR_XD; break; } if (wFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_REXW; if (lFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_VEXL; } else if (insn->vexSize == 2) { switch (ppFromVEX2of2(insn->vexPrefix[1])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; break; case VEX_PREFIX_F2: attrMask |= ATTR_XD; break; } if (lFromVEX2of2(insn->vexPrefix[1])) attrMask |= ATTR_VEXL; } else { return -1; } } else { if (insn->rexPrefix & 0x08) attrMask |= ATTR_REXW; if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) attrMask |= ATTR_XS; else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) attrMask |= ATTR_XD; } if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; /* The following clauses compensate for limitations of the tables. */ if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) { /* * Although for SSE instructions it is usually necessary to treat REX.W+F2 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is * an occasional instruction where F2 is incidental and REX.W is the more * significant. If the decoded instruction is 32-bit and adding REX.W * instead of F2 changes a 32 to a 64, we adopt the new encoding. */ const struct InstructionSpecifier *spec; uint16_t instructionIDWithREXw; const struct InstructionSpecifier *specWithREXw; spec = specifierForUID(instructionID); if (getIDWithAttrMask(&instructionIDWithREXw, insn, attrMask & (~ATTR_XD))) { /* * Decoding with REX.w would yield nothing; give up and return original * decode. */ insn->instructionID = instructionID; insn->spec = spec; return 0; } specWithREXw = specifierForUID(instructionIDWithREXw); if (is64BitEquivalent(spec->name, specWithREXw->name)) { insn->instructionID = instructionIDWithREXw; insn->spec = specWithREXw; } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { /* * The instruction tables make no distinction between instructions that * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a * particular spot (i.e., many MMX operations). In general we're * conservative, but in the specific case where OpSize is present but not * in the right place we check if there's a 16-bit operation. */ const struct InstructionSpecifier *spec; uint16_t instructionIDWithOpsize; const struct InstructionSpecifier *specWithOpsize; spec = specifierForUID(instructionID); if (getIDWithAttrMask(&instructionIDWithOpsize, insn, attrMask | ATTR_OPSIZE)) { /* * ModRM required with OpSize but not present; give up and return version * without OpSize set */ insn->instructionID = instructionID; insn->spec = spec; return 0; } specWithOpsize = specifierForUID(instructionIDWithOpsize); if (is16BitEquvalent(spec->name, specWithOpsize->name)) { insn->instructionID = instructionIDWithOpsize; insn->spec = specWithOpsize; } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } insn->instructionID = instructionID; insn->spec = specifierForUID(insn->instructionID); return 0; }