_DLLEXPORT_ void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result) #endif { _WString* str; unsigned int i, isDefault; int64_t tmpDisp64; uint64_t addrMask = (uint64_t)-1; uint8_t segment; const _WMnemonic* mnemonic; /* Set address mask, when default is for 64bits addresses. */ if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff; else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff; /* Copy other fields. */ result->size = di->size; result->offset = di->addr & addrMask; if (di->flags == FLAG_NOT_DECODABLE) { str = &result->mnemonic; strclear_WS(&result->operands); strcpy_WSN(str, "DB "); str_code_hb(str, di->imm.byte); strclear_WS(&result->instructionHex); str_hex_b(&result->instructionHex, di->imm.byte); return; /* Skip to next instruction. */ } str = &result->instructionHex; strclear_WS(str); for (i = 0; i < di->size; i++) str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]); str = &result->mnemonic; switch (FLAG_GET_PREFIX(di->flags)) { case FLAG_LOCK: strcpy_WSN(str, "LOCK "); break; case FLAG_REP: strcpy_WSN(str, "REP "); break; case FLAG_REPNZ: strcpy_WSN(str, "REPNZ "); break; default: /* Init mnemonic string, cause next touch is concatenation. */ strclear_WS(str); break; } mnemonic = (const _WMnemonic*)&_MNEMONICS[di->opcode]; memcpy((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1); str->length += mnemonic->length; /* Format operands: */ str = &result->operands; strclear_WS(str); /* Special treatment for String instructions. */ if ((META_GET_ISC(di->meta) == ISC_INTEGER) && ((di->opcode == I_MOVS) || (di->opcode == I_CMPS) || (di->opcode == I_STOS) || (di->opcode == I_LODS) || (di->opcode == I_SCAS))) { /* * No operands are needed if the address size is the default one, * and no segment is overridden, so add the suffix letter, * to indicate size of operation and continue to next instruction. */ if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment))) { str = &result->mnemonic; switch (di->ops[0].size) { case 8: chrcat_WS(str, 'B'); break; case 16: chrcat_WS(str, 'W'); break; case 32: chrcat_WS(str, 'D'); break; case 64: chrcat_WS(str, 'Q'); break; } return; } } for (i = 0; ((i < OPERANDS_NO) && (di->ops[i].type != O_NONE)); i++) { if (i > 0) strcat_WSN(str, ", "); switch (di->ops[i].type) { case O_REG: strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]); break; case O_IMM: /* If the instruction is 'push', show explicit size (except byte imm). */ if (di->opcode == I_PUSH && di->ops[i].size != 8) distorm_format_size(str, di, i); /* Special fix for negative sign extended immediates. */ if ((di->flags & FLAG_IMM_SIGNED) && (di->ops[i].size == 8)) { if (di->imm.sbyte < 0) { chrcat_WS(str, MINUS_DISP_CHR); str_code_hb(str, -di->imm.sbyte); break; } } if (di->ops[i].size == 64) str_code_hqw(str, (uint8_t*)&di->imm.qword); else str_code_hdw(str, di->imm.dword); break; case O_IMM1: str_code_hdw(str, di->imm.ex.i1); break; case O_IMM2: str_code_hdw(str, di->imm.ex.i2); break; case O_DISP: distorm_format_size(str, di, i); chrcat_WS(str, OPEN_CHR); if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) { strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]); chrcat_WS(str, SEG_OFF_CHR); } tmpDisp64 = di->disp & addrMask; str_code_hqw(str, (uint8_t*)&tmpDisp64); chrcat_WS(str, CLOSE_CHR); break; case O_SMEM: distorm_format_size(str, di, i); chrcat_WS(str, OPEN_CHR); /* * This is where we need to take special care for String instructions. * If we got here, it means we need to explicitly show their operands. * The problem with CMPS and MOVS is that they have two(!) memory operands. * So we have to complete it ourselves, since the structure supplies only the segment that can be overridden. * And make the rest of the String operations explicit. */ segment = SEGMENT_GET(di->segment); isDefault = SEGMENT_IS_DEFAULT(di->segment); switch (di->opcode) { case I_MOVS: isDefault = FALSE; if (i == 0) segment = R_ES; break; case I_CMPS: isDefault = FALSE; if (i == 1) segment = R_ES; break; case I_INS: case I_LODS: case I_STOS: case I_SCAS: isDefault = FALSE; break; } if (!isDefault && (segment != R_NONE)) { strcat_WS(str, (const _WString*)&_REGISTERS[segment]); chrcat_WS(str, SEG_OFF_CHR); } strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]); distorm_format_signed_disp(str, di, addrMask); chrcat_WS(str, CLOSE_CHR); break; case O_MEM: distorm_format_size(str, di, i); chrcat_WS(str, OPEN_CHR); if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) { strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]); chrcat_WS(str, SEG_OFF_CHR); } if (di->base != R_NONE) { strcat_WS(str, (const _WString*)&_REGISTERS[di->base]); chrcat_WS(str, PLUS_DISP_CHR); } strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]); if (di->scale != 0) { chrcat_WS(str, '*'); if (di->scale == 2) chrcat_WS(str, '2'); else if (di->scale == 4) chrcat_WS(str, '4'); else /* if (di->scale == 8) */ chrcat_WS(str, '8'); } distorm_format_signed_disp(str, di, addrMask); chrcat_WS(str, CLOSE_CHR); break; case O_PC: #ifdef SUPPORT_64BIT_OFFSET str_off64(str, (di->imm.sqword + di->addr + di->size) & addrMask); #else str_code_hdw(str, ((_OffsetType)di->imm.sdword + di->addr + di->size) & (uint32_t)addrMask); #endif break; case O_PTR: str_code_hdw(str, di->imm.ptr.seg); chrcat_WS(str, SEG_OFF_CHR); str_code_hdw(str, di->imm.ptr.off); break; } } if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN"); else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN"); }
PyObject* distorm_Decompose(PyObject* pSelf, PyObject* pArgs) { _CodeInfo ci; _DecodeResult res = DECRES_NONE; _DInst decodedInstructions[MAX_INSTRUCTIONS]; unsigned int decodedInstructionsCount = 0, i = 0, j = 0, next = 0; PyObject *ret = NULL, *pyObj = NULL, *dtObj = NULL, *featObj = NULL, *opsObj = NULL, *o = NULL; pSelf = pSelf; /* UNREFERENCED_PARAMETER */ /* Decode(int32/64 offset, string code, int type=Decode32Bits) */ if (!PyArg_ParseTuple(pArgs, _PY_OFF_INT_SIZE_ "s#|OO", &ci.codeOffset, &ci.code, &ci.codeLen, &dtObj, &featObj)) return NULL; if (ci.code == NULL) { PyErr_SetString(PyExc_IOError, "Error while reading code buffer."); return NULL; } if (ci.codeLen < 0) { PyErr_SetString(PyExc_OverflowError, "Code length is too big."); return NULL; } /* Default parameter. */ if (dtObj == NULL) ci.dt = Decode32Bits; else if (!PyInt_Check(dtObj)) { PyErr_SetString(PyExc_IndexError, "Third parameter must be either Decode16Bits, Decode32Bits or Decode64Bits (integer type)."); return NULL; } else ci.dt = (_DecodeType)PyInt_AsUnsignedLongMask(dtObj); if ((ci.dt != Decode16Bits) && (ci.dt != Decode32Bits) && (ci.dt != Decode64Bits)) { PyErr_SetString(PyExc_IndexError, "Decoding-type must be either Decode16Bits, Decode32Bits or Decode64Bits."); return NULL; } /* Default parameter. */ if (featObj == NULL) ci.features = 0; else if (!PyInt_Check(dtObj)) { PyErr_SetString(PyExc_IndexError, "Fourth parameter must be either features flags (integer type)."); return NULL; } else ci.features = (_DecodeType)PyInt_AsUnsignedLongMask(featObj); /* Construct an empty list, which later will be filled with tuples of (offset, size, mnemonic, hex). */ ret = PyList_New(0); if (ret == NULL) { PyErr_SetString(PyExc_MemoryError, "Not enough memory to initialize a list."); return NULL; } while (res != DECRES_SUCCESS) { res = decode_internal(&ci, FALSE, decodedInstructions, MAX_INSTRUCTIONS, &decodedInstructionsCount); if ((res == DECRES_MEMORYERR) && (decodedInstructionsCount == 0)) break; for (i = 0; i < decodedInstructionsCount; i++) { opsObj = NULL; for (j = 0; j < OPERANDS_NO && decodedInstructions[i].flags != FLAG_NOT_DECODABLE; j++) { if (decodedInstructions[i].ops[j].type != O_NONE) { if (opsObj == NULL) { opsObj = PyList_New(0); if (opsObj == NULL) { PyErr_SetString(PyExc_MemoryError, "Not enough memory to allocate operands list."); Py_DECREF(ret); return NULL; } } pyObj = Py_BuildValue("{s:Bs:Hs:B}", "type", decodedInstructions[i].ops[j].type, "size", decodedInstructions[i].ops[j].size, "index", decodedInstructions[i].ops[j].index); if ((pyObj == NULL) || (PyList_Append(opsObj, pyObj) == -1)) { PyErr_SetString(PyExc_MemoryError, "Not enough memory to append an operand into the list."); Py_DECREF(ret); Py_DECREF(opsObj); return NULL; } Py_DECREF(pyObj); } else break; } pyObj = Py_BuildValue("{s:" _PY_OFF_INT_SIZE_ "s:Bs:Hs:Bs:is:Hs:Bs:Bs:Bs:Ks:Hs:B}", "addr", decodedInstructions[i].addr, "size", decodedInstructions[i].size, "flags", decodedInstructions[i].flags, "segment", SEGMENT_GET(decodedInstructions[i].segment), "isSegmentDefault", SEGMENT_IS_DEFAULT(decodedInstructions[i].segment), "opcode", decodedInstructions[i].opcode, "base", decodedInstructions[i].base, "scale", decodedInstructions[i].scale, "dispSize", decodedInstructions[i].dispSize, "disp", decodedInstructions[i].disp, "unusedPrefixesMask", decodedInstructions[i].unusedPrefixesMask, "meta", decodedInstructions[i].meta); if (opsObj != NULL) { PyDict_SetItemString(pyObj, "ops", opsObj); Py_DECREF(opsObj); } /* Handle the special case where the instruction wasn't decoded. */ if (decodedInstructions[i].flags == FLAG_NOT_DECODABLE) { if ((o = PyLong_FromUnsignedLongLong(decodedInstructions[i].imm.byte)) == NULL) raise_exc(pyObj, ret); if (PyDict_SetItemString(pyObj, "imm", o) == -1) raise_exc(pyObj, ret); Py_XDECREF(o); } for (j = 0; j < OPERANDS_NO; j++) { /* Put dynamic immediate type. */ switch (decodedInstructions[i].ops[j].type) { case O_IMM: if ((o = PyLong_FromUnsignedLongLong(decodedInstructions[i].imm.qword)) == NULL) raise_exc(pyObj, ret); if (PyDict_SetItemString(pyObj, "imm", o) == -1) raise_exc(pyObj, ret); Py_XDECREF(o); break; case O_IMM1: if ((o = PyLong_FromUnsignedLong(decodedInstructions[i].imm.ex.i1)) == NULL) raise_exc(pyObj, ret); if (PyDict_SetItemString(pyObj, "imm1", o) == -1) raise_exc(pyObj, ret); Py_XDECREF(o); break; case O_IMM2: if ((o = PyLong_FromUnsignedLong(decodedInstructions[i].imm.ex.i2)) == NULL) raise_exc(pyObj, ret); if (PyDict_SetItemString(pyObj, "imm2", o) == -1) raise_exc(pyObj, ret); Py_XDECREF(o); break; case O_PTR: if ((o = PyLong_FromUnsignedLong(decodedInstructions[i].imm.ptr.seg)) == NULL) raise_exc(pyObj, ret); if (PyDict_SetItemString(pyObj, "seg", o) == -1) raise_exc(pyObj, ret); Py_XDECREF(o); if ((o = PyLong_FromUnsignedLong(decodedInstructions[i].imm.ptr.off)) == NULL) raise_exc(pyObj, ret); if (PyDict_SetItemString(pyObj, "off", o) == -1) raise_exc(pyObj, ret); Py_XDECREF(o); break; case O_PC: if ((o = PyLong_FromUnsignedLongLong(decodedInstructions[i].imm.qword)) == NULL) raise_exc(pyObj, ret); if (PyDict_SetItemString(pyObj, "imm", o) == -1) raise_exc(pyObj, ret); Py_XDECREF(o); break; } } if (pyObj == NULL) { Py_DECREF(ret); PyErr_SetString(PyExc_MemoryError, "Not enough memory to allocate an instruction."); return NULL; } if (PyList_Append(ret, pyObj) == -1) { Py_DECREF(pyObj); Py_DECREF(ret); PyErr_SetString(PyExc_MemoryError, "Not enough memory to append an instruction into the list."); return NULL; } Py_DECREF(pyObj); } /* Get offset difference. */ next = (unsigned int)(decodedInstructions[decodedInstructionsCount-1].addr - ci.codeOffset); next += decodedInstructions[decodedInstructionsCount-1].size; /* Advance ptr and recalc offset. */ ci.code += next; ci.codeLen -= next; ci.codeOffset += next; } return ret; }