/* * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of * extended or escape opcodes). * * @param insn - The instruction whose opcode is to be read. * @return - 0 if the opcode could be read successfully; nonzero otherwise. */ static int readOpcode(struct InternalInstruction* insn) { /* Determine the length of the primary opcode */ uint8_t current; dbgprintf(insn, "readOpcode()"); insn->opcodeType = ONEBYTE; if (consumeByte(insn, ¤t)) return -1; if (current == 0x0f) { dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); insn->twoByteEscape = current; if (consumeByte(insn, ¤t)) return -1; if (current == 0x38) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); insn->threeByteEscape = current; if (consumeByte(insn, ¤t)) return -1; insn->opcodeType = THREEBYTE_38; } else if (current == 0x3a) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); insn->threeByteEscape = current; if (consumeByte(insn, ¤t)) return -1; insn->opcodeType = THREEBYTE_3A; } else { dbgprintf(insn, "Didn't find a three-byte escape prefix"); insn->opcodeType = TWOBYTE; } } /* * At this point we have consumed the full opcode. * Anything we consume from here on must be unconsumed. */ insn->opcode = current; return 0; }
/* * readImmediate - Consumes an immediate operand from an instruction, given the * desired operand size. * * @param insn - The instruction whose operand is to be read. * @param size - The width (in bytes) of the operand. * @return - 0 if the immediate was successfully consumed; nonzero * otherwise. */ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { uint8_t imm8; uint16_t imm16; uint32_t imm32; uint64_t imm64; dbgprintf(insn, "readImmediate()"); if (insn->numImmediatesConsumed == 2) { debug("Already consumed two immediates"); return -1; } if (size == 0) size = insn->immediateSize; else insn->immediateSize = size; insn->immediateOffset = insn->readerCursor - insn->startLocation; switch (size) { case 1: if (consumeByte(insn, &imm8)) return -1; insn->immediates[insn->numImmediatesConsumed] = imm8; break; case 2: if (consumeUInt16(insn, &imm16)) return -1; insn->immediates[insn->numImmediatesConsumed] = imm16; break; case 4: if (consumeUInt32(insn, &imm32)) return -1; insn->immediates[insn->numImmediatesConsumed] = imm32; break; case 8: if (consumeUInt64(insn, &imm64)) return -1; insn->immediates[insn->numImmediatesConsumed] = imm64; break; } insn->numImmediatesConsumed++; return 0; }
/* * readSIB - Consumes the SIB byte to determine addressing information for an * instruction. * * @param insn - The instruction whose SIB byte is to be read. * @return - 0 if the SIB byte was successfully read; nonzero otherwise. */ static int readSIB(struct InternalInstruction* insn) { SIBIndex sibIndexBase = 0; SIBBase sibBaseBase = 0; uint8_t index, base; dbgprintf(insn, "readSIB()"); if (insn->consumedSIB) return 0; insn->consumedSIB = TRUE; switch (insn->addressSize) { case 2: dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); return -1; break; case 4: sibIndexBase = SIB_INDEX_EAX; sibBaseBase = SIB_BASE_EAX; break; case 8: sibIndexBase = SIB_INDEX_RAX; sibBaseBase = SIB_BASE_RAX; break; } if (consumeByte(insn, &insn->sib)) return -1; index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); switch (index) { case 0x4: insn->sibIndex = SIB_INDEX_NONE; break; default: insn->sibIndex = (SIBIndex)(sibIndexBase + index); if (insn->sibIndex == SIB_INDEX_sib || insn->sibIndex == SIB_INDEX_sib64) insn->sibIndex = SIB_INDEX_NONE; break; } switch (scaleFromSIB(insn->sib)) { case 0: insn->sibScale = 1; break; case 1: insn->sibScale = 2; break; case 2: insn->sibScale = 4; break; case 3: insn->sibScale = 8; break; } base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); switch (base) { case 0x5: switch (modFromModRM(insn->modRM)) { case 0x0: insn->eaDisplacement = EA_DISP_32; insn->sibBase = SIB_BASE_NONE; break; case 0x1: insn->eaDisplacement = EA_DISP_8; insn->sibBase = (insn->addressSize == 4 ? SIB_BASE_EBP : SIB_BASE_RBP); break; case 0x2: insn->eaDisplacement = EA_DISP_32; insn->sibBase = (insn->addressSize == 4 ? SIB_BASE_EBP : SIB_BASE_RBP); break; case 0x3: debug("Cannot have Mod = 0b11 and a SIB byte"); return -1; } break; default: insn->sibBase = (SIBBase)(sibBaseBase + base); break; } return 0; }
/* * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of * extended or escape opcodes). * * @param insn - The instruction whose opcode is to be read. * @return - 0 if the opcode could be read successfully; nonzero otherwise. */ static int readOpcode(struct InternalInstruction* insn) { /* Determine the length of the primary opcode */ uint8_t current; dbgprintf(insn, "readOpcode()"); insn->opcodeType = ONEBYTE; if (insn->vexSize == 3) { switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) { default: dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); return -1; case 0: break; case VEX_LOB_0F: insn->twoByteEscape = 0x0f; insn->opcodeType = TWOBYTE; return consumeByte(insn, &insn->opcode); case VEX_LOB_0F38: insn->twoByteEscape = 0x0f; insn->threeByteEscape = 0x38; insn->opcodeType = THREEBYTE_38; return consumeByte(insn, &insn->opcode); case VEX_LOB_0F3A: insn->twoByteEscape = 0x0f; insn->threeByteEscape = 0x3a; insn->opcodeType = THREEBYTE_3A; return consumeByte(insn, &insn->opcode); } } else if (insn->vexSize == 2) { insn->twoByteEscape = 0x0f; insn->opcodeType = TWOBYTE; return consumeByte(insn, &insn->opcode); } if (consumeByte(insn, ¤t)) return -1; if (current == 0x0f) { dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); insn->twoByteEscape = current; if (consumeByte(insn, ¤t)) return -1; if (current == 0x38) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); insn->threeByteEscape = current; if (consumeByte(insn, ¤t)) return -1; insn->opcodeType = THREEBYTE_38; } else if (current == 0x3a) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); insn->threeByteEscape = current; if (consumeByte(insn, ¤t)) return -1; insn->opcodeType = THREEBYTE_3A; } else if (current == 0xa6) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); insn->threeByteEscape = current; if (consumeByte(insn, ¤t)) return -1; insn->opcodeType = THREEBYTE_A6; } else if (current == 0xa7) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); insn->threeByteEscape = current; if (consumeByte(insn, ¤t)) return -1; insn->opcodeType = THREEBYTE_A7; } else { dbgprintf(insn, "Didn't find a three-byte escape prefix"); insn->opcodeType = TWOBYTE; } } /* * At this point we have consumed the full opcode. * Anything we consume from here on must be unconsumed. */ insn->opcode = current; return 0; }
/* * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the * instruction as having them. Also sets the instruction's default operand, * address, and other relevant data sizes to report operands correctly. * * @param insn - The instruction whose prefixes are to be read. * @return - 0 if the instruction could be read until the end of the prefix * bytes, and no prefixes conflicted; nonzero otherwise. */ static int readPrefixes(struct InternalInstruction* insn) { BOOL isPrefix = TRUE; BOOL prefixGroups[4] = { FALSE }; uint64_t prefixLocation; uint8_t byte = 0; BOOL hasAdSize = FALSE; BOOL hasOpSize = FALSE; dbgprintf(insn, "readPrefixes()"); while (isPrefix) { prefixLocation = insn->readerCursor; if (consumeByte(insn, &byte)) return -1; switch (byte) { case 0xf0: /* LOCK */ case 0xf2: /* REPNE/REPNZ */ case 0xf3: /* REP or REPE/REPZ */ if (prefixGroups[0]) dbgprintf(insn, "Redundant Group 1 prefix"); prefixGroups[0] = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; case 0x2e: /* CS segment override -OR- Branch not taken */ case 0x36: /* SS segment override -OR- Branch taken */ case 0x3e: /* DS segment override */ case 0x26: /* ES segment override */ case 0x64: /* FS segment override */ case 0x65: /* GS segment override */ switch (byte) { case 0x2e: insn->segmentOverride = SEG_OVERRIDE_CS; break; case 0x36: insn->segmentOverride = SEG_OVERRIDE_SS; break; case 0x3e: insn->segmentOverride = SEG_OVERRIDE_DS; break; case 0x26: insn->segmentOverride = SEG_OVERRIDE_ES; break; case 0x64: insn->segmentOverride = SEG_OVERRIDE_FS; break; case 0x65: insn->segmentOverride = SEG_OVERRIDE_GS; break; default: debug("Unhandled override"); return -1; } if (prefixGroups[1]) dbgprintf(insn, "Redundant Group 2 prefix"); prefixGroups[1] = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; case 0x66: /* Operand-size override */ if (prefixGroups[2]) dbgprintf(insn, "Redundant Group 3 prefix"); prefixGroups[2] = TRUE; hasOpSize = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; case 0x67: /* Address-size override */ if (prefixGroups[3]) dbgprintf(insn, "Redundant Group 4 prefix"); prefixGroups[3] = TRUE; hasAdSize = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; default: /* Not a prefix byte */ isPrefix = FALSE; break; } if (isPrefix) dbgprintf(insn, "Found prefix 0x%hhx", byte); } insn->vexSize = 0; if (byte == 0xc4) { uint8_t byte1; if (lookAtByte(insn, &byte1)) { dbgprintf(insn, "Couldn't read second byte of VEX"); return -1; } if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 3; insn->necessaryPrefixLocation = insn->readerCursor - 1; } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } if (insn->vexSize == 3) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); consumeByte(insn, &insn->vexPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ if (insn->mode == MODE_64BIT) { insn->rexPrefix = 0x40 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) | (rFromVEX2of3(insn->vexPrefix[1]) << 2) | (xFromVEX2of3(insn->vexPrefix[1]) << 1) | (bFromVEX2of3(insn->vexPrefix[1]) << 0); } switch (ppFromVEX3of3(insn->vexPrefix[2])) { default: break; case VEX_PREFIX_66: hasOpSize = TRUE; break; } dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); } } else if (byte == 0xc5) { uint8_t byte1; if (lookAtByte(insn, &byte1)) { dbgprintf(insn, "Couldn't read second byte of VEX"); return -1; } if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 2; } else { unconsumeByte(insn); } if (insn->vexSize == 2) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); if (insn->mode == MODE_64BIT) { insn->rexPrefix = 0x40 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); } switch (ppFromVEX2of2(insn->vexPrefix[1])) { default: break; case VEX_PREFIX_66: hasOpSize = TRUE; break; } dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); } } else { if (insn->mode == MODE_64BIT) { if ((byte & 0xf0) == 0x40) { uint8_t opcodeByte; if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { dbgprintf(insn, "Redundant REX prefix"); return -1; } insn->rexPrefix = byte; insn->necessaryPrefixLocation = insn->readerCursor - 2; dbgprintf(insn, "Found REX prefix 0x%hhx", byte); } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } } if (insn->mode == MODE_16BIT) { insn->registerSize = (hasOpSize ? 4 : 2); insn->addressSize = (hasAdSize ? 4 : 2); insn->displacementSize = (hasAdSize ? 4 : 2); insn->immediateSize = (hasOpSize ? 4 : 2); } else if (insn->mode == MODE_32BIT) { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 2 : 4); insn->displacementSize = (hasAdSize ? 2 : 4); insn->immediateSize = (hasOpSize ? 2 : 4); } else if (insn->mode == MODE_64BIT) { if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { insn->registerSize = 8; insn->addressSize = (hasAdSize ? 4 : 8); insn->displacementSize = 4; insn->immediateSize = 4; } else if (insn->rexPrefix) { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 4 : 8); insn->displacementSize = (hasOpSize ? 2 : 4); insn->immediateSize = (hasOpSize ? 2 : 4); } else { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 4 : 8); insn->displacementSize = (hasOpSize ? 2 : 4); insn->immediateSize = (hasOpSize ? 2 : 4); } } return 0; }
/* * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and * displacement) for an instruction and interprets it. * * @param insn - The instruction whose addressing information is to be read. * @return - 0 if the information was successfully read; nonzero otherwise. */ static int readModRM(struct InternalInstruction* insn) { uint8_t mod, rm, reg; dbgprintf(insn, "readModRM()"); if (insn->consumedModRM) return 0; if (consumeByte(insn, &insn->modRM)) return -1; insn->consumedModRM = TRUE; mod = modFromModRM(insn->modRM); rm = rmFromModRM(insn->modRM); reg = regFromModRM(insn->modRM); /* * This goes by insn->registerSize to pick the correct register, which messes * up if we're using (say) XMM or 8-bit register operands. That gets fixed in * fixupReg(). */ switch (insn->registerSize) { case 2: insn->regBase = MODRM_REG_AX; insn->eaRegBase = EA_REG_AX; break; case 4: insn->regBase = MODRM_REG_EAX; insn->eaRegBase = EA_REG_EAX; break; case 8: insn->regBase = MODRM_REG_RAX; insn->eaRegBase = EA_REG_RAX; break; } reg |= rFromREX(insn->rexPrefix) << 3; rm |= bFromREX(insn->rexPrefix) << 3; insn->reg = (Reg)(insn->regBase + reg); switch (insn->addressSize) { case 2: insn->eaBaseBase = EA_BASE_BX_SI; switch (mod) { case 0x0: if (rm == 0x6) { insn->eaBase = EA_BASE_NONE; insn->eaDisplacement = EA_DISP_16; if (readDisplacement(insn)) return -1; } else { insn->eaBase = (EABase)(insn->eaBaseBase + rm); insn->eaDisplacement = EA_DISP_NONE; } break; case 0x1: insn->eaBase = (EABase)(insn->eaBaseBase + rm); insn->eaDisplacement = EA_DISP_8; if (readDisplacement(insn)) return -1; break; case 0x2: insn->eaBase = (EABase)(insn->eaBaseBase + rm); insn->eaDisplacement = EA_DISP_16; if (readDisplacement(insn)) return -1; break; case 0x3: insn->eaBase = (EABase)(insn->eaRegBase + rm); if (readDisplacement(insn)) return -1; break; } break; case 4: case 8: insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); switch (mod) { case 0x0: insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ switch (rm) { case 0x4: case 0xc: /* in case REXW.b is set */ insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64); readSIB(insn); if (readDisplacement(insn)) return -1; break; case 0x5: insn->eaBase = EA_BASE_NONE; insn->eaDisplacement = EA_DISP_32; if (readDisplacement(insn)) return -1; break; default: insn->eaBase = (EABase)(insn->eaBaseBase + rm); break; } break; case 0x1: case 0x2: insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); switch (rm) { case 0x4: case 0xc: /* in case REXW.b is set */ insn->eaBase = EA_BASE_sib; readSIB(insn); if (readDisplacement(insn)) return -1; break; default: insn->eaBase = (EABase)(insn->eaBaseBase + rm); if (readDisplacement(insn)) return -1; break; } break; case 0x3: insn->eaDisplacement = EA_DISP_NONE; insn->eaBase = (EABase)(insn->eaRegBase + rm); break; } break; } /* switch (insn->addressSize) */ return 0; }
/* * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the * instruction as having them. Also sets the instruction's default operand, * address, and other relevant data sizes to report operands correctly. * * @param insn - The instruction whose prefixes are to be read. * @return - 0 if the instruction could be read until the end of the prefix * bytes, and no prefixes conflicted; nonzero otherwise. */ static int readPrefixes(struct InternalInstruction* insn) { BOOL isPrefix = TRUE; BOOL prefixGroups[4] = { FALSE }; uint64_t prefixLocation; uint8_t byte; BOOL hasAdSize = FALSE; BOOL hasOpSize = FALSE; dbgprintf(insn, "readPrefixes()"); while (isPrefix) { prefixLocation = insn->readerCursor; if (consumeByte(insn, &byte)) return -1; switch (byte) { case 0xf0: /* LOCK */ case 0xf2: /* REPNE/REPNZ */ case 0xf3: /* REP or REPE/REPZ */ if (prefixGroups[0]) dbgprintf(insn, "Redundant Group 1 prefix"); prefixGroups[0] = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; case 0x2e: /* CS segment override -OR- Branch not taken */ case 0x36: /* SS segment override -OR- Branch taken */ case 0x3e: /* DS segment override */ case 0x26: /* ES segment override */ case 0x64: /* FS segment override */ case 0x65: /* GS segment override */ switch (byte) { case 0x2e: insn->segmentOverride = SEG_OVERRIDE_CS; break; case 0x36: insn->segmentOverride = SEG_OVERRIDE_SS; break; case 0x3e: insn->segmentOverride = SEG_OVERRIDE_DS; break; case 0x26: insn->segmentOverride = SEG_OVERRIDE_ES; break; case 0x64: insn->segmentOverride = SEG_OVERRIDE_FS; break; case 0x65: insn->segmentOverride = SEG_OVERRIDE_GS; break; default: unreachable("Unhandled override"); } if (prefixGroups[1]) dbgprintf(insn, "Redundant Group 2 prefix"); prefixGroups[1] = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; case 0x66: /* Operand-size override */ if (prefixGroups[2]) dbgprintf(insn, "Redundant Group 3 prefix"); prefixGroups[2] = TRUE; hasOpSize = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; case 0x67: /* Address-size override */ if (prefixGroups[3]) dbgprintf(insn, "Redundant Group 4 prefix"); prefixGroups[3] = TRUE; hasAdSize = TRUE; setPrefixPresent(insn, byte, prefixLocation); break; default: /* Not a prefix byte */ isPrefix = FALSE; break; } if (isPrefix) dbgprintf(insn, "Found prefix 0x%hhx", byte); } if (insn->mode == MODE_64BIT) { if ((byte & 0xf0) == 0x40) { uint8_t opcodeByte; if(lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { dbgprintf(insn, "Redundant REX prefix"); return -1; } insn->rexPrefix = byte; insn->necessaryPrefixLocation = insn->readerCursor - 2; dbgprintf(insn, "Found REX prefix 0x%hhx", byte); } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } } else { unconsumeByte(insn); } if (insn->mode == MODE_16BIT) { insn->registerSize = (hasOpSize ? 4 : 2); insn->addressSize = (hasAdSize ? 4 : 2); insn->displacementSize = (hasAdSize ? 4 : 2); insn->immediateSize = (hasOpSize ? 4 : 2); } else if (insn->mode == MODE_32BIT) { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 2 : 4); insn->displacementSize = (hasAdSize ? 2 : 4); insn->immediateSize = (hasAdSize ? 2 : 4); } else if (insn->mode == MODE_64BIT) { if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { insn->registerSize = 8; insn->addressSize = (hasAdSize ? 4 : 8); insn->displacementSize = 4; insn->immediateSize = 4; } else if (insn->rexPrefix) { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 4 : 8); insn->displacementSize = (hasOpSize ? 2 : 4); insn->immediateSize = (hasOpSize ? 2 : 4); } else { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 4 : 8); insn->displacementSize = (hasOpSize ? 2 : 4); insn->immediateSize = (hasOpSize ? 2 : 4); } } return 0; }