/* * getID - Determines the ID of an instruction, consuming the ModR/M byte as * appropriate for extended and escape opcodes. Determines the attributes and * context for the instruction before doing so. * * @param insn - The instruction whose ID is to be determined. * @return - 0 if the ModR/M could be read when needed or was not needed; * nonzero otherwise. */ static int getID(struct InternalInstruction* insn, void *miiArg) { uint8_t attrMask; uint16_t instructionID; dbgprintf(insn, "getID()"); attrMask = ATTR_NONE; if (insn->mode == MODE_64BIT) attrMask |= ATTR_64BIT; if (insn->vexSize) { attrMask |= ATTR_VEX; if (insn->vexSize == 3) { switch (ppFromVEX3of3(insn->vexPrefix[2])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; break; case VEX_PREFIX_F2: attrMask |= ATTR_XD; break; } if (lFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_VEXL; } else if (insn->vexSize == 2) { switch (ppFromVEX2of2(insn->vexPrefix[1])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; break; case VEX_PREFIX_F2: attrMask |= ATTR_XD; break; } if (lFromVEX2of2(insn->vexPrefix[1])) attrMask |= ATTR_VEXL; } else { return -1; } } else { if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) attrMask |= ATTR_ADSIZE; else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) attrMask |= ATTR_XS; else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) attrMask |= ATTR_XD; } if (insn->rexPrefix & 0x08) attrMask |= ATTR_REXW; if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; /* The following clauses compensate for limitations of the tables. */ if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && !(attrMask & ATTR_OPSIZE)) { /* * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit * has precedence since there are no L-bit with W-bit entries in the tables. * So if the L-bit isn't significant we should use the W-bit instead. * We only need to do this if the instruction doesn't specify OpSize since * there is a VEX_L_W_OPSIZE table. */ const struct InstructionSpecifier *spec; uint16_t instructionIDWithWBit; const struct InstructionSpecifier *specWithWBit; spec = specifierForUID(instructionID); if (getIDWithAttrMask(&instructionIDWithWBit, insn, (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { insn->instructionID = instructionID; insn->spec = spec; return 0; } specWithWBit = specifierForUID(instructionIDWithWBit); if (instructionID != instructionIDWithWBit) { insn->instructionID = instructionIDWithWBit; insn->spec = specWithWBit; } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { /* * The instruction tables make no distinction between instructions that * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a * particular spot (i.e., many MMX operations). In general we're * conservative, but in the specific case where OpSize is present but not * in the right place we check if there's a 16-bit operation. */ const struct InstructionSpecifier *spec; uint16_t instructionIDWithOpsize; const char *specName, *specWithOpSizeName; spec = specifierForUID(instructionID); if (getIDWithAttrMask(&instructionIDWithOpsize, insn, attrMask | ATTR_OPSIZE)) { /* * ModRM required with OpSize but not present; give up and return version * without OpSize set */ insn->instructionID = instructionID; insn->spec = spec; return 0; } specName = x86DisassemblerGetInstrName(instructionID, miiArg); specWithOpSizeName = x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); if (is16BitEquvalent(specName, specWithOpSizeName)) { insn->instructionID = instructionIDWithOpsize; insn->spec = specifierForUID(instructionIDWithOpsize); } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && insn->rexPrefix & 0x01) { /* * NOOP shouldn't decode as NOOP if REX.b is set. Instead * it should decode as XCHG %r8, %eax. */ const struct InstructionSpecifier *spec; uint16_t instructionIDWithNewOpcode; const struct InstructionSpecifier *specWithNewOpcode; spec = specifierForUID(instructionID); /* Borrow opcode from one of the other XCHGar opcodes */ insn->opcode = 0x91; if (getIDWithAttrMask(&instructionIDWithNewOpcode, insn, attrMask)) { insn->opcode = 0x90; insn->instructionID = instructionID; insn->spec = spec; return 0; } specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); /* Change back */ insn->opcode = 0x90; insn->instructionID = instructionIDWithNewOpcode; insn->spec = specWithNewOpcode; return 0; } insn->instructionID = instructionID; insn->spec = specifierForUID(insn->instructionID); return 0; }
/* * getID - Determines the ID of an instruction, consuming the ModR/M byte as * appropriate for extended and escape opcodes. Determines the attributes and * context for the instruction before doing so. * * @param insn - The instruction whose ID is to be determined. * @return - 0 if the ModR/M could be read when needed or was not needed; * nonzero otherwise. */ static int getID(struct InternalInstruction* insn) { uint8_t attrMask; uint16_t instructionID; dbgprintf(insn, "getID()"); attrMask = ATTR_NONE; if (insn->mode == MODE_64BIT) attrMask |= ATTR_64BIT; if (insn->vexSize) { attrMask |= ATTR_VEX; if (insn->vexSize == 3) { switch (ppFromVEX3of3(insn->vexPrefix[2])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; break; case VEX_PREFIX_F2: attrMask |= ATTR_XD; break; } if (wFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_REXW; if (lFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_VEXL; } else if (insn->vexSize == 2) { switch (ppFromVEX2of2(insn->vexPrefix[1])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; break; case VEX_PREFIX_F2: attrMask |= ATTR_XD; break; } if (lFromVEX2of2(insn->vexPrefix[1])) attrMask |= ATTR_VEXL; } else { return -1; } } else { if (insn->rexPrefix & 0x08) attrMask |= ATTR_REXW; if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) attrMask |= ATTR_XS; else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) attrMask |= ATTR_XD; } if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; /* The following clauses compensate for limitations of the tables. */ if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) { /* * Although for SSE instructions it is usually necessary to treat REX.W+F2 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is * an occasional instruction where F2 is incidental and REX.W is the more * significant. If the decoded instruction is 32-bit and adding REX.W * instead of F2 changes a 32 to a 64, we adopt the new encoding. */ const struct InstructionSpecifier *spec; uint16_t instructionIDWithREXw; const struct InstructionSpecifier *specWithREXw; spec = specifierForUID(instructionID); if (getIDWithAttrMask(&instructionIDWithREXw, insn, attrMask & (~ATTR_XD))) { /* * Decoding with REX.w would yield nothing; give up and return original * decode. */ insn->instructionID = instructionID; insn->spec = spec; return 0; } specWithREXw = specifierForUID(instructionIDWithREXw); if (is64BitEquivalent(spec->name, specWithREXw->name)) { insn->instructionID = instructionIDWithREXw; insn->spec = specWithREXw; } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { /* * The instruction tables make no distinction between instructions that * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a * particular spot (i.e., many MMX operations). In general we're * conservative, but in the specific case where OpSize is present but not * in the right place we check if there's a 16-bit operation. */ const struct InstructionSpecifier *spec; uint16_t instructionIDWithOpsize; const struct InstructionSpecifier *specWithOpsize; spec = specifierForUID(instructionID); if (getIDWithAttrMask(&instructionIDWithOpsize, insn, attrMask | ATTR_OPSIZE)) { /* * ModRM required with OpSize but not present; give up and return version * without OpSize set */ insn->instructionID = instructionID; insn->spec = spec; return 0; } specWithOpsize = specifierForUID(instructionIDWithOpsize); if (is16BitEquvalent(spec->name, specWithOpsize->name)) { insn->instructionID = instructionIDWithOpsize; insn->spec = specWithOpsize; } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } insn->instructionID = instructionID; insn->spec = specifierForUID(insn->instructionID); return 0; }