Esempio n. 1
0
_DLLEXPORT_ void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result)
#endif
{
    _WString* str;
    unsigned int i, isDefault;
    int64_t tmpDisp64;
    uint64_t addrMask = (uint64_t)-1;
    uint8_t segment;
    const _WMnemonic* mnemonic;

    /* Set address mask, when default is for 64bits addresses. */
    if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff;
    else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff;

    /* Copy other fields. */
    result->size = di->size;
    result->offset = di->addr & addrMask;

    if (di->flags == FLAG_NOT_DECODABLE) {
        str = &result->mnemonic;
        strclear_WS(&result->operands);
        strcpy_WSN(str, "DB ");
        str_code_hb(str, di->imm.byte);
        strclear_WS(&result->instructionHex);
        str_hex_b(&result->instructionHex, di->imm.byte);
        return; /* Skip to next instruction. */
    }

    str = &result->instructionHex;
    strclear_WS(str);
    for (i = 0; i < di->size; i++)
        str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]);

    str = &result->mnemonic;
    switch (FLAG_GET_PREFIX(di->flags))
    {
    case FLAG_LOCK:
        strcpy_WSN(str, "LOCK ");
        break;
    case FLAG_REP:
        strcpy_WSN(str, "REP ");
        break;
    case FLAG_REPNZ:
        strcpy_WSN(str, "REPNZ ");
        break;
    default:
        /* Init mnemonic string, cause next touch is concatenation. */
        strclear_WS(str);
        break;
    }

    mnemonic = (const _WMnemonic*)&_MNEMONICS[di->opcode];
    memcpy((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1);
    str->length += mnemonic->length;

    /* Format operands: */
    str = &result->operands;
    strclear_WS(str);

    /* Special treatment for String instructions. */
    if ((META_GET_ISC(di->meta) == ISC_INTEGER) &&
            ((di->opcode == I_MOVS) ||
             (di->opcode == I_CMPS) ||
             (di->opcode == I_STOS) ||
             (di->opcode == I_LODS) ||
             (di->opcode == I_SCAS)))
    {
        /*
         * No operands are needed if the address size is the default one,
         * and no segment is overridden, so add the suffix letter,
         * to indicate size of operation and continue to next instruction.
         */
        if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment))) {
            str = &result->mnemonic;
            switch (di->ops[0].size)
            {
            case 8:
                chrcat_WS(str, 'B');
                break;
            case 16:
                chrcat_WS(str, 'W');
                break;
            case 32:
                chrcat_WS(str, 'D');
                break;
            case 64:
                chrcat_WS(str, 'Q');
                break;
            }
            return;
        }
    }

    for (i = 0; ((i < OPERANDS_NO) && (di->ops[i].type != O_NONE)); i++) {
        if (i > 0) strcat_WSN(str, ", ");
        switch (di->ops[i].type)
        {
        case O_REG:
            strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
            break;
        case O_IMM:
            /* If the instruction is 'push', show explicit size (except byte imm). */
            if (di->opcode == I_PUSH && di->ops[i].size != 8) distorm_format_size(str, di, i);
            /* Special fix for negative sign extended immediates. */
            if ((di->flags & FLAG_IMM_SIGNED) && (di->ops[i].size == 8)) {
                if (di->imm.sbyte < 0) {
                    chrcat_WS(str, MINUS_DISP_CHR);
                    str_code_hb(str, -di->imm.sbyte);
                    break;
                }
            }
            if (di->ops[i].size == 64) str_code_hqw(str, (uint8_t*)&di->imm.qword);
            else str_code_hdw(str, di->imm.dword);
            break;
        case O_IMM1:
            str_code_hdw(str, di->imm.ex.i1);
            break;
        case O_IMM2:
            str_code_hdw(str, di->imm.ex.i2);
            break;
        case O_DISP:
            distorm_format_size(str, di, i);
            chrcat_WS(str, OPEN_CHR);
            if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
                strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
                chrcat_WS(str, SEG_OFF_CHR);
            }
            tmpDisp64 = di->disp & addrMask;
            str_code_hqw(str, (uint8_t*)&tmpDisp64);
            chrcat_WS(str, CLOSE_CHR);
            break;
        case O_SMEM:
            distorm_format_size(str, di, i);
            chrcat_WS(str, OPEN_CHR);

            /*
             * This is where we need to take special care for String instructions.
             * If we got here, it means we need to explicitly show their operands.
             * The problem with CMPS and MOVS is that they have two(!) memory operands.
             * So we have to complete it ourselves, since the structure supplies only the segment that can be overridden.
             * And make the rest of the String operations explicit.
             */
            segment = SEGMENT_GET(di->segment);
            isDefault = SEGMENT_IS_DEFAULT(di->segment);
            switch (di->opcode)
            {
            case I_MOVS:
                isDefault = FALSE;
                if (i == 0) segment = R_ES;
                break;
            case I_CMPS:
                isDefault = FALSE;
                if (i == 1) segment = R_ES;
                break;
            case I_INS:
            case I_LODS:
            case I_STOS:
            case I_SCAS:
                isDefault = FALSE;
                break;
            }
            if (!isDefault && (segment != R_NONE)) {
                strcat_WS(str, (const _WString*)&_REGISTERS[segment]);
                chrcat_WS(str, SEG_OFF_CHR);
            }

            strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);

            distorm_format_signed_disp(str, di, addrMask);
            chrcat_WS(str, CLOSE_CHR);
            break;
        case O_MEM:
            distorm_format_size(str, di, i);
            chrcat_WS(str, OPEN_CHR);
            if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) {
                strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]);
                chrcat_WS(str, SEG_OFF_CHR);
            }
            if (di->base != R_NONE) {
                strcat_WS(str, (const _WString*)&_REGISTERS[di->base]);
                chrcat_WS(str, PLUS_DISP_CHR);
            }
            strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]);
            if (di->scale != 0) {
                chrcat_WS(str, '*');
                if (di->scale == 2) chrcat_WS(str, '2');
                else if (di->scale == 4) chrcat_WS(str, '4');
                else /* if (di->scale == 8) */ chrcat_WS(str, '8');
            }

            distorm_format_signed_disp(str, di, addrMask);
            chrcat_WS(str, CLOSE_CHR);
            break;
        case O_PC:
#ifdef SUPPORT_64BIT_OFFSET
            str_off64(str, (di->imm.sqword + di->addr + di->size) & addrMask);
#else
            str_code_hdw(str, ((_OffsetType)di->imm.sdword + di->addr + di->size) & (uint32_t)addrMask);
#endif
            break;
        case O_PTR:
            str_code_hdw(str, di->imm.ptr.seg);
            chrcat_WS(str, SEG_OFF_CHR);
            str_code_hdw(str, di->imm.ptr.off);
            break;
        }
    }

    if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN");
    else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN");
}
PyObject* distorm_Decompose(PyObject* pSelf, PyObject* pArgs)
{
	_CodeInfo ci;
	_DecodeResult res = DECRES_NONE;

	_DInst decodedInstructions[MAX_INSTRUCTIONS];
	unsigned int decodedInstructionsCount = 0, i = 0, j = 0, next = 0;

	PyObject *ret = NULL, *pyObj = NULL, *dtObj = NULL, *featObj = NULL, *opsObj = NULL, *o = NULL;

	pSelf = pSelf; /* UNREFERENCED_PARAMETER */

	/* Decode(int32/64 offset, string code, int type=Decode32Bits) */
	if (!PyArg_ParseTuple(pArgs, _PY_OFF_INT_SIZE_ "s#|OO", &ci.codeOffset, &ci.code, &ci.codeLen, &dtObj, &featObj)) return NULL;

	if (ci.code == NULL) {
		PyErr_SetString(PyExc_IOError, "Error while reading code buffer.");
		return NULL;
	}

	if (ci.codeLen < 0) {
		PyErr_SetString(PyExc_OverflowError, "Code length is too big.");
		return NULL;
	}

	/* Default parameter. */
	if (dtObj == NULL) ci.dt = Decode32Bits;
	else if (!PyInt_Check(dtObj)) {
		PyErr_SetString(PyExc_IndexError, "Third parameter must be either Decode16Bits, Decode32Bits or Decode64Bits (integer type).");
		return NULL;
	} else ci.dt = (_DecodeType)PyInt_AsUnsignedLongMask(dtObj);

	if ((ci.dt != Decode16Bits) && (ci.dt != Decode32Bits) && (ci.dt != Decode64Bits)) {
		PyErr_SetString(PyExc_IndexError, "Decoding-type must be either Decode16Bits, Decode32Bits or Decode64Bits.");
		return NULL;
	}

	/* Default parameter. */
	if (featObj == NULL) ci.features = 0;
	else if (!PyInt_Check(dtObj)) {
		PyErr_SetString(PyExc_IndexError, "Fourth parameter must be either features flags (integer type).");
		return NULL;
	} else ci.features = (_DecodeType)PyInt_AsUnsignedLongMask(featObj);


	/* Construct an empty list, which later will be filled with tuples of (offset, size, mnemonic, hex). */
	ret = PyList_New(0);
	if (ret == NULL) {
		PyErr_SetString(PyExc_MemoryError, "Not enough memory to initialize a list.");
		return NULL;
	}

	while (res != DECRES_SUCCESS) {
		res = decode_internal(&ci, FALSE, decodedInstructions, MAX_INSTRUCTIONS, &decodedInstructionsCount);

		if ((res == DECRES_MEMORYERR) && (decodedInstructionsCount == 0)) break;

		for (i = 0; i < decodedInstructionsCount; i++) {
			opsObj = NULL;
			for (j = 0; j < OPERANDS_NO && decodedInstructions[i].flags != FLAG_NOT_DECODABLE; j++) {
				if (decodedInstructions[i].ops[j].type != O_NONE) {
					if (opsObj == NULL) {
						opsObj = PyList_New(0);
						if (opsObj == NULL) {
							PyErr_SetString(PyExc_MemoryError, "Not enough memory to allocate operands list.");
							Py_DECREF(ret);
							return NULL;
						}
					}
					pyObj = Py_BuildValue("{s:Bs:Hs:B}",
						"type", decodedInstructions[i].ops[j].type,
						"size", decodedInstructions[i].ops[j].size,
						"index", decodedInstructions[i].ops[j].index);
					if ((pyObj == NULL) || (PyList_Append(opsObj, pyObj) == -1)) {
						PyErr_SetString(PyExc_MemoryError, "Not enough memory to append an operand into the list.");
						Py_DECREF(ret);
						Py_DECREF(opsObj);
						return NULL;
					}
					Py_DECREF(pyObj);
				} else break;
			}
			pyObj = Py_BuildValue("{s:" _PY_OFF_INT_SIZE_ "s:Bs:Hs:Bs:is:Hs:Bs:Bs:Bs:Ks:Hs:B}",
			                      "addr",
			                      decodedInstructions[i].addr,
			                      "size",
			                      decodedInstructions[i].size,
			                      "flags",
			                      decodedInstructions[i].flags,
			                      "segment",
			                      SEGMENT_GET(decodedInstructions[i].segment),
								  "isSegmentDefault",
								  SEGMENT_IS_DEFAULT(decodedInstructions[i].segment),
			                      "opcode",
			                      decodedInstructions[i].opcode,
			                      "base",
			                      decodedInstructions[i].base,
			                      "scale",
			                      decodedInstructions[i].scale,
			                      "dispSize",
			                      decodedInstructions[i].dispSize,
			                      "disp",
			                      decodedInstructions[i].disp,
								  "unusedPrefixesMask",
								  decodedInstructions[i].unusedPrefixesMask,
								  "meta",
								  decodedInstructions[i].meta);
			if (opsObj != NULL) {
				PyDict_SetItemString(pyObj, "ops", opsObj);
				Py_DECREF(opsObj);
			}
			/* Handle the special case where the instruction wasn't decoded. */
			if (decodedInstructions[i].flags == FLAG_NOT_DECODABLE) {
				if ((o = PyLong_FromUnsignedLongLong(decodedInstructions[i].imm.byte)) == NULL) raise_exc(pyObj, ret);
				if (PyDict_SetItemString(pyObj, "imm", o) == -1) raise_exc(pyObj, ret);
				Py_XDECREF(o);
			}
			for (j = 0; j < OPERANDS_NO; j++) {
				/* Put dynamic immediate type. */
				switch (decodedInstructions[i].ops[j].type)
				{
					case O_IMM:
						if ((o = PyLong_FromUnsignedLongLong(decodedInstructions[i].imm.qword)) == NULL) raise_exc(pyObj, ret);
						if (PyDict_SetItemString(pyObj, "imm", o) == -1) raise_exc(pyObj, ret);
						Py_XDECREF(o);
					break;
					case O_IMM1:
						if ((o = PyLong_FromUnsignedLong(decodedInstructions[i].imm.ex.i1)) == NULL) raise_exc(pyObj, ret);
						if (PyDict_SetItemString(pyObj, "imm1", o)  == -1) raise_exc(pyObj, ret);
						Py_XDECREF(o);
					break;
					case O_IMM2:
						if ((o = PyLong_FromUnsignedLong(decodedInstructions[i].imm.ex.i2)) == NULL) raise_exc(pyObj, ret);
						if (PyDict_SetItemString(pyObj, "imm2", o) == -1) raise_exc(pyObj, ret);
						Py_XDECREF(o);
					break;
					case O_PTR:
						if ((o = PyLong_FromUnsignedLong(decodedInstructions[i].imm.ptr.seg)) == NULL) raise_exc(pyObj, ret);
						if (PyDict_SetItemString(pyObj, "seg", o) == -1) raise_exc(pyObj, ret);
						Py_XDECREF(o);
						if ((o = PyLong_FromUnsignedLong(decodedInstructions[i].imm.ptr.off)) == NULL) raise_exc(pyObj, ret);
						if (PyDict_SetItemString(pyObj, "off", o) == -1) raise_exc(pyObj, ret);
						Py_XDECREF(o);
					break;
					case O_PC:
						if ((o = PyLong_FromUnsignedLongLong(decodedInstructions[i].imm.qword)) == NULL) raise_exc(pyObj, ret);
						if (PyDict_SetItemString(pyObj, "imm", o) == -1) raise_exc(pyObj, ret);
						Py_XDECREF(o);
					break;
				}
			}
			if (pyObj == NULL) {
				Py_DECREF(ret);
				PyErr_SetString(PyExc_MemoryError, "Not enough memory to allocate an instruction.");
				return NULL;
			}
			if (PyList_Append(ret, pyObj) == -1) {
				Py_DECREF(pyObj);
				Py_DECREF(ret);
				PyErr_SetString(PyExc_MemoryError, "Not enough memory to append an instruction into the list.");
				return NULL;
			}
			Py_DECREF(pyObj);
		}

		/* Get offset difference. */
		next = (unsigned int)(decodedInstructions[decodedInstructionsCount-1].addr - ci.codeOffset);
		next += decodedInstructions[decodedInstructionsCount-1].size;

		/* Advance ptr and recalc offset. */
		ci.code += next;
		ci.codeLen -= next;
		ci.codeOffset += next;
	}

	return ret;
}