Exemplo n.º 1
0
/*
 * This function is reponsible to return the instruction information of the first found in code.
 * It returns the _InstInfo of the found instruction, otherwise NULL.
 * code should point to the ModR/M byte upon exit (if used), or after the instruction binary code itself.
 * This function is NOT decoding-type dependant, it is up to the caller to see whether the instruction is valid.
 * Get the instruction info, using a Trie data structure.
 * I call it "raw", because it simply locates an instruction, it doesn't care what bytes it's using, such as prefixes.
 */
static _InstInfo* locate_raw_inst(const uint8_t** code0, int* codeLen0, _OffsetType* codeOffset0, _WString* instructionHex, int isREXPrefixValid, _DecodeType dt)
{
	const uint8_t* code = *code0;
	int codeLen = *codeLen0;
	_OffsetType codeOffset = *codeOffset0;

	unsigned int tmpIndex0 = 0, tmpIndex1 = 0, tmpIndex2 = 0, tmpIndex3 = 0;
	_InstNode* in = NULL;
	_InstInfo* ii = NULL;

	/* Precaution. */
	if (codeLen <= 0) return NULL;

	tmpIndex0 = *code;

	/* Check for NULL node for index 0. */
	in = (_InstNode*)Instructions.list[Instructions.ids[tmpIndex0]];
	if (in == NULL) return NULL;

	/* Single byte instruction (OCST_1BYTE). */
	if (in->type == INT_INFO) {
		str_hex_b(instructionHex, tmpIndex0);

		codeLen -= 1;
		if (codeLen < 0) return NULL;
		code += 1;
		codeOffset += 1;
		*code0 = code;
		*codeLen0 = codeLen;
		*codeOffset0 = codeOffset;

		/*
		 * ARPL/MOVSXD share the same instruction number, and both have different operands and mnemonics, of course.
		 * Practically, I couldn't come up with a comfortable way to merge the operands' types of ARPL/MOVSXD.
		 * And since the DB can't be patched dynamically, because the DB has to be multi-threaded compliant,
		 * I have no choice but to check for ARPL/MOVSXD right here - "right about now, the funk soul brother, check it out now, the funk soul brother...", fatboy slim
		 */
		if (tmpIndex0 == INST_ARPL_INDEX) return dt == Decode64Bits ? (_InstInfo*)&II_movsxd : &II_arpl;

		return (_InstInfo*)in;
	}

	/* Single byte instruction + reg bits (OCST_13BYTES). */
	if (in->type == INT_LIST_GROUP) {
		str_hex_b(instructionHex, tmpIndex0);

		codeLen -= 1;
		if (codeLen <= 0) return NULL;
		code += 1;
		codeOffset += 1;
		*code0 = code;
		*codeLen0 = codeLen;
		*codeOffset0 = codeOffset;
		return (_InstInfo*)in->list[in->ids[(*code >> 3) & 7]];
	}
Exemplo n.º 2
0
// This function is reponsible to return the instruction information of the first found in code.
// It returns the _InstInfo of the found instruction, otherwise NULL.
// code should point to the ModR/M byte upon exit (if used), or after the instruction binary code itself.
// This function is NOT decoding-type dependant, it is up to the caller to see whether the instruction is valid.
// Get the instruction info, using a Trie data structure.
// I call it "basic", because it simply locates an instruction, it doesn't care what bytes it's using, such as prefixes.
//
_InstInfo* locate_basic_inst(const unsigned char** code0, long* codeLen0, _OffsetType* codeOffset0, _WString* instructionHex, int isERXPrefixValid, _DecodeType dt)
{
	const unsigned char* code = *code0;
	long codeLen = *codeLen0;
	_OffsetType codeOffset = *codeOffset0;

	unsigned char tmpIndex0 = *code, tmpIndex1 = 0, tmpIndex2 = 0;
	_InstNode *in = NULL;
	_InstInfo *ii = NULL;

	// Single byte instruction (OCST_1BYTE).
	if (Instructions[tmpIndex0].type == INT_INFO) {
		str_hex_b(instructionHex, tmpIndex0);

		codeLen -= 1;
		if (codeLen < 0) return NULL;
		code += 1;
		codeOffset += 1;
		*code0 = code;
		*codeLen0 = codeLen;
		*codeOffset0 = codeOffset;

		// ARPL/MOVSXD share the same instruction number, and both have differenct operands and mnemonics, of course.
		// Practically, I couldn't come up with a comfortable way to merge the operands' types of ARPL/MOVSXD.
		// And since the DB can't be patched dynamically, because the DB has to be multi-threaded compliant,
		// I have no choice but to check for ARPL/MOVSXD right here - "right about now, the funk soul brother, check it out now, the funk soul brother...", fatboy slim
		if (tmpIndex0 == INST_ARPL_INDEX) return dt == Decode64Bits ? &II_movsxd : &II_arpl;

		return Instructions[tmpIndex0].ii;
	}

	// Single byte instruction + reg bits (OCST_13BYTES).
	if (Instructions[tmpIndex0].type == INT_LIST_GROUP) {
		str_hex_b(instructionHex, tmpIndex0);

		codeLen -= 1;
		if (codeLen <= 0) return NULL;
		code += 1;
		codeOffset += 1;
		*code0 = code;
		*codeLen0 = codeLen;
		*codeOffset0 = codeOffset;
		return Instructions[tmpIndex0].list[(*code >> 3) & 7].ii;
	}
Exemplo n.º 3
0
_DLLEXPORT_ void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
#endif
{
    _WString* str;
    unsigned int i, isDefault;
    int64_t tmpDisp64;
    uint64_t addrMask = (uint64_t)-1;
    uint8_t segment;
    const _WMnemonic* mnemonic;

    /* Set address mask, when default is for 64bits addresses. */
    if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff;
    else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff;

    /* Copy other fields. */
    result->size = di->size;
    result->offset = di->addr & addrMask;

    if (di->flags == FLAG_NOT_DECODABLE) {
        str = &result->mnemonic;
        strclear_WS(&result->operands);
        strcpy_WSN(str, "DB ");
        str_code_hb(str, di->imm.byte);
        strclear_WS(&result->instructionHex);
        str_hex_b(&result->instructionHex, di->imm.byte);
        return; /* Skip to next instruction. */
    }

    str = &result->instructionHex;
    strclear_WS(str);
    for (i = 0; i < di->size; i++)
        str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]);

    str = &result->mnemonic;
    switch (FLAG_GET_PREFIX(di->flags))
    {
    case FLAG_LOCK:
        strcpy_WSN(str, "LOCK ");
        break;
    case FLAG_REP:
        strcpy_WSN(str, "REP ");
        break;
    case FLAG_REPNZ:
        strcpy_WSN(str, "REPNZ ");
        break;
    default:
        /* Init mnemonic string, cause next touch is concatenation. */
        strclear_WS(str);
        break;
    }

    mnemonic = (const _WMnemonic*)&_MNEMONICS[di->opcode];
    memcpy((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1);
    str->length += mnemonic->length;

    /* Format operands: */
    str = &result->operands;
    strclear_WS(str);

    /* Special treatment for String instructions. */
    if ((META_GET_ISC(di->meta) == ISC_INTEGER) &&
            ((di->opcode == I_MOVS) ||
             (di->opcode == I_CMPS) ||
             (di->opcode == I_STOS) ||
             (di->opcode == I_LODS) ||
             (di->opcode == I_SCAS)))
    {
        /*
         * No operands are needed if the address size is the default one,
         * and no segment is overridden, so add the suffix letter,
         * to indicate size of operation and continue to next instruction.
         */
        if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment))) {
            str = &result->mnemonic;
            switch (di->ops[0].size)
            {
            case 8:
                chrcat_WS(str, 'B');
                break;
            case 16:
                chrcat_WS(str, 'W');
                break;
            case 32:
                chrcat_WS(str, 'D');
                break;
            case 64:
                chrcat_WS(str, 'Q');
                break;
            }
            return;
        }
    }

    for (i = 0; ((i < OPERANDS_NO) && (di->ops[i].type != O_NONE)); i++) {
        if (i > 0) strcat_WSN(str, ", ");
        switch (di->ops[i].type)
        {
        case O_REG:
            strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
            break;
        case O_IMM:
            /* If the instruction is 'push', show explicit size (except byte imm). */
            if (di->opcode == I_PUSH && di->ops[i].size != 8) distorm_format_size(str, di, i);
            /* Special fix for negative sign extended immediates. */
            if ((di->flags & FLAG_IMM_SIGNED) && (di->ops[i].size == 8)) {
                if (di->imm.sbyte < 0) {
                    chrcat_WS(str, MINUS_DISP_CHR);
                    str_code_hb(str, -di->imm.sbyte);
                    break;
                }
            }
            if (di->ops[i].size == 64) str_code_hqw(str, (uint8_t*)&di->imm.qword);
            else str_code_hdw(str, di->imm.dword);
            break;
        case O_IMM1:
            str_code_hdw(str, di->imm.ex.i1);
            break;
        case O_IMM2:
            str_code_hdw(str, di->imm.ex.i2);
            break;
        case O_DISP:
            distorm_format_size(str, di, i);
            chrcat_WS(str, OPEN_CHR);
            if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
                strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
                chrcat_WS(str, SEG_OFF_CHR);
            }
            tmpDisp64 = di->disp & addrMask;
            str_code_hqw(str, (uint8_t*)&tmpDisp64);
            chrcat_WS(str, CLOSE_CHR);
            break;
        case O_SMEM:
            distorm_format_size(str, di, i);
            chrcat_WS(str, OPEN_CHR);

            /*
             * This is where we need to take special care for String instructions.
             * If we got here, it means we need to explicitly show their operands.
             * The problem with CMPS and MOVS is that they have two(!) memory operands.
             * So we have to complete it ourselves, since the structure supplies only the segment that can be overridden.
             * And make the rest of the String operations explicit.
             */
            segment = SEGMENT_GET(di->segment);
            isDefault = SEGMENT_IS_DEFAULT(di->segment);
            switch (di->opcode)
            {
            case I_MOVS:
                isDefault = FALSE;
                if (i == 0) segment = R_ES;
                break;
            case I_CMPS:
                isDefault = FALSE;
                if (i == 1) segment = R_ES;
                break;
            case I_INS:
            case I_LODS:
            case I_STOS:
            case I_SCAS:
                isDefault = FALSE;
                break;
            }
            if (!isDefault && (segment != R_NONE)) {
                strcat_WS(str, (const _WString*)&_REGISTERS[segment]);
                chrcat_WS(str, SEG_OFF_CHR);
            }

            strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);

            distorm_format_signed_disp(str, di, addrMask);
            chrcat_WS(str, CLOSE_CHR);
            break;
        case O_MEM:
            distorm_format_size(str, di, i);
            chrcat_WS(str, OPEN_CHR);
            if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
                strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
                chrcat_WS(str, SEG_OFF_CHR);
            }
            if (di->base != R_NONE) {
                strcat_WS(str, (const _WString*)&_REGISTERS[di->base]);
                chrcat_WS(str, PLUS_DISP_CHR);
            }
            strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
            if (di->scale != 0) {
                chrcat_WS(str, '*');
                if (di->scale == 2) chrcat_WS(str, '2');
                else if (di->scale == 4) chrcat_WS(str, '4');
                else /* if (di->scale == 8) */ chrcat_WS(str, '8');
            }

            distorm_format_signed_disp(str, di, addrMask);
            chrcat_WS(str, CLOSE_CHR);
            break;
        case O_PC:
#ifdef SUPPORT_64BIT_OFFSET
            str_off64(str, (di->imm.sqword + di->addr + di->size) & addrMask);
#else
            str_code_hdw(str, ((_OffsetType)di->imm.sdword + di->addr + di->size) & (uint32_t)addrMask);
#endif
            break;
        case O_PTR:
            str_code_hdw(str, di->imm.ptr.seg);
            chrcat_WS(str, SEG_OFF_CHR);
            str_code_hdw(str, di->imm.ptr.off);
            break;
        }
    }

    if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN");
    else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN");
}