/* * This function is reponsible to return the instruction information of the first found in code. * It returns the _InstInfo of the found instruction, otherwise NULL. * code should point to the ModR/M byte upon exit (if used), or after the instruction binary code itself. * This function is NOT decoding-type dependant, it is up to the caller to see whether the instruction is valid. * Get the instruction info, using a Trie data structure. * I call it "raw", because it simply locates an instruction, it doesn't care what bytes it's using, such as prefixes. */ static _InstInfo* locate_raw_inst(const uint8_t** code0, int* codeLen0, _OffsetType* codeOffset0, _WString* instructionHex, int isREXPrefixValid, _DecodeType dt) { const uint8_t* code = *code0; int codeLen = *codeLen0; _OffsetType codeOffset = *codeOffset0; unsigned int tmpIndex0 = 0, tmpIndex1 = 0, tmpIndex2 = 0, tmpIndex3 = 0; _InstNode* in = NULL; _InstInfo* ii = NULL; /* Precaution. */ if (codeLen <= 0) return NULL; tmpIndex0 = *code; /* Check for NULL node for index 0. */ in = (_InstNode*)Instructions.list[Instructions.ids[tmpIndex0]]; if (in == NULL) return NULL; /* Single byte instruction (OCST_1BYTE). */ if (in->type == INT_INFO) { str_hex_b(instructionHex, tmpIndex0); codeLen -= 1; if (codeLen < 0) return NULL; code += 1; codeOffset += 1; *code0 = code; *codeLen0 = codeLen; *codeOffset0 = codeOffset; /* * ARPL/MOVSXD share the same instruction number, and both have different operands and mnemonics, of course. * Practically, I couldn't come up with a comfortable way to merge the operands' types of ARPL/MOVSXD. * And since the DB can't be patched dynamically, because the DB has to be multi-threaded compliant, * I have no choice but to check for ARPL/MOVSXD right here - "right about now, the funk soul brother, check it out now, the funk soul brother...", fatboy slim */ if (tmpIndex0 == INST_ARPL_INDEX) return dt == Decode64Bits ? (_InstInfo*)&II_movsxd : &II_arpl; return (_InstInfo*)in; } /* Single byte instruction + reg bits (OCST_13BYTES). */ if (in->type == INT_LIST_GROUP) { str_hex_b(instructionHex, tmpIndex0); codeLen -= 1; if (codeLen <= 0) return NULL; code += 1; codeOffset += 1; *code0 = code; *codeLen0 = codeLen; *codeOffset0 = codeOffset; return (_InstInfo*)in->list[in->ids[(*code >> 3) & 7]]; }
// This function is reponsible to return the instruction information of the first found in code. // It returns the _InstInfo of the found instruction, otherwise NULL. // code should point to the ModR/M byte upon exit (if used), or after the instruction binary code itself. // This function is NOT decoding-type dependant, it is up to the caller to see whether the instruction is valid. // Get the instruction info, using a Trie data structure. // I call it "basic", because it simply locates an instruction, it doesn't care what bytes it's using, such as prefixes. // _InstInfo* locate_basic_inst(const unsigned char** code0, long* codeLen0, _OffsetType* codeOffset0, _WString* instructionHex, int isERXPrefixValid, _DecodeType dt) { const unsigned char* code = *code0; long codeLen = *codeLen0; _OffsetType codeOffset = *codeOffset0; unsigned char tmpIndex0 = *code, tmpIndex1 = 0, tmpIndex2 = 0; _InstNode *in = NULL; _InstInfo *ii = NULL; // Single byte instruction (OCST_1BYTE). if (Instructions[tmpIndex0].type == INT_INFO) { str_hex_b(instructionHex, tmpIndex0); codeLen -= 1; if (codeLen < 0) return NULL; code += 1; codeOffset += 1; *code0 = code; *codeLen0 = codeLen; *codeOffset0 = codeOffset; // ARPL/MOVSXD share the same instruction number, and both have differenct operands and mnemonics, of course. // Practically, I couldn't come up with a comfortable way to merge the operands' types of ARPL/MOVSXD. // And since the DB can't be patched dynamically, because the DB has to be multi-threaded compliant, // I have no choice but to check for ARPL/MOVSXD right here - "right about now, the funk soul brother, check it out now, the funk soul brother...", fatboy slim if (tmpIndex0 == INST_ARPL_INDEX) return dt == Decode64Bits ? &II_movsxd : &II_arpl; return Instructions[tmpIndex0].ii; } // Single byte instruction + reg bits (OCST_13BYTES). if (Instructions[tmpIndex0].type == INT_LIST_GROUP) { str_hex_b(instructionHex, tmpIndex0); codeLen -= 1; if (codeLen <= 0) return NULL; code += 1; codeOffset += 1; *code0 = code; *codeLen0 = codeLen; *codeOffset0 = codeOffset; return Instructions[tmpIndex0].list[(*code >> 3) & 7].ii; }
_DLLEXPORT_ void distorm_format32(const _CodeInfo* ci, const _DInst* di, _DecodedInst* result) #endif { _WString* str; unsigned int i, isDefault; int64_t tmpDisp64; uint64_t addrMask = (uint64_t)-1; uint8_t segment; const _WMnemonic* mnemonic; /* Set address mask, when default is for 64bits addresses. */ if (ci->features & DF_MAXIMUM_ADDR32) addrMask = 0xffffffff; else if (ci->features & DF_MAXIMUM_ADDR16) addrMask = 0xffff; /* Copy other fields. */ result->size = di->size; result->offset = di->addr & addrMask; if (di->flags == FLAG_NOT_DECODABLE) { str = &result->mnemonic; strclear_WS(&result->operands); strcpy_WSN(str, "DB "); str_code_hb(str, di->imm.byte); strclear_WS(&result->instructionHex); str_hex_b(&result->instructionHex, di->imm.byte); return; /* Skip to next instruction. */ } str = &result->instructionHex; strclear_WS(str); for (i = 0; i < di->size; i++) str_hex_b(str, ci->code[(unsigned int)(di->addr - ci->codeOffset + i)]); str = &result->mnemonic; switch (FLAG_GET_PREFIX(di->flags)) { case FLAG_LOCK: strcpy_WSN(str, "LOCK "); break; case FLAG_REP: strcpy_WSN(str, "REP "); break; case FLAG_REPNZ: strcpy_WSN(str, "REPNZ "); break; default: /* Init mnemonic string, cause next touch is concatenation. */ strclear_WS(str); break; } mnemonic = (const _WMnemonic*)&_MNEMONICS[di->opcode]; memcpy((int8_t*)&str->p[str->length], mnemonic->p, mnemonic->length + 1); str->length += mnemonic->length; /* Format operands: */ str = &result->operands; strclear_WS(str); /* Special treatment for String instructions. */ if ((META_GET_ISC(di->meta) == ISC_INTEGER) && ((di->opcode == I_MOVS) || (di->opcode == I_CMPS) || (di->opcode == I_STOS) || (di->opcode == I_LODS) || (di->opcode == I_SCAS))) { /* * No operands are needed if the address size is the default one, * and no segment is overridden, so add the suffix letter, * to indicate size of operation and continue to next instruction. */ if ((FLAG_GET_ADDRSIZE(di->flags) == ci->dt) && (SEGMENT_IS_DEFAULT(di->segment))) { str = &result->mnemonic; switch (di->ops[0].size) { case 8: chrcat_WS(str, 'B'); break; case 16: chrcat_WS(str, 'W'); break; case 32: chrcat_WS(str, 'D'); break; case 64: chrcat_WS(str, 'Q'); break; } return; } } for (i = 0; ((i < OPERANDS_NO) && (di->ops[i].type != O_NONE)); i++) { if (i > 0) strcat_WSN(str, ", "); switch (di->ops[i].type) { case O_REG: strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]); break; case O_IMM: /* If the instruction is 'push', show explicit size (except byte imm). */ if (di->opcode == I_PUSH && di->ops[i].size != 8) distorm_format_size(str, di, i); /* Special fix for negative sign extended immediates. */ if ((di->flags & FLAG_IMM_SIGNED) && (di->ops[i].size == 8)) { if (di->imm.sbyte < 0) { chrcat_WS(str, MINUS_DISP_CHR); str_code_hb(str, -di->imm.sbyte); break; } } if (di->ops[i].size == 64) str_code_hqw(str, (uint8_t*)&di->imm.qword); else str_code_hdw(str, di->imm.dword); break; case O_IMM1: str_code_hdw(str, di->imm.ex.i1); break; case O_IMM2: str_code_hdw(str, di->imm.ex.i2); break; case O_DISP: distorm_format_size(str, di, i); chrcat_WS(str, OPEN_CHR); if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) { strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]); chrcat_WS(str, SEG_OFF_CHR); } tmpDisp64 = di->disp & addrMask; str_code_hqw(str, (uint8_t*)&tmpDisp64); chrcat_WS(str, CLOSE_CHR); break; case O_SMEM: distorm_format_size(str, di, i); chrcat_WS(str, OPEN_CHR); /* * This is where we need to take special care for String instructions. * If we got here, it means we need to explicitly show their operands. * The problem with CMPS and MOVS is that they have two(!) memory operands. * So we have to complete it ourselves, since the structure supplies only the segment that can be overridden. * And make the rest of the String operations explicit. */ segment = SEGMENT_GET(di->segment); isDefault = SEGMENT_IS_DEFAULT(di->segment); switch (di->opcode) { case I_MOVS: isDefault = FALSE; if (i == 0) segment = R_ES; break; case I_CMPS: isDefault = FALSE; if (i == 1) segment = R_ES; break; case I_INS: case I_LODS: case I_STOS: case I_SCAS: isDefault = FALSE; break; } if (!isDefault && (segment != R_NONE)) { strcat_WS(str, (const _WString*)&_REGISTERS[segment]); chrcat_WS(str, SEG_OFF_CHR); } strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]); distorm_format_signed_disp(str, di, addrMask); chrcat_WS(str, CLOSE_CHR); break; case O_MEM: distorm_format_size(str, di, i); chrcat_WS(str, OPEN_CHR); if ((SEGMENT_GET(di->segment) != R_NONE) && !SEGMENT_IS_DEFAULT(di->segment)) { strcat_WS(str, (const _WString*)&_REGISTERS[SEGMENT_GET(di->segment)]); chrcat_WS(str, SEG_OFF_CHR); } if (di->base != R_NONE) { strcat_WS(str, (const _WString*)&_REGISTERS[di->base]); chrcat_WS(str, PLUS_DISP_CHR); } strcat_WS(str, (const _WString*)&_REGISTERS[di->ops[i].index]); if (di->scale != 0) { chrcat_WS(str, '*'); if (di->scale == 2) chrcat_WS(str, '2'); else if (di->scale == 4) chrcat_WS(str, '4'); else /* if (di->scale == 8) */ chrcat_WS(str, '8'); } distorm_format_signed_disp(str, di, addrMask); chrcat_WS(str, CLOSE_CHR); break; case O_PC: #ifdef SUPPORT_64BIT_OFFSET str_off64(str, (di->imm.sqword + di->addr + di->size) & addrMask); #else str_code_hdw(str, ((_OffsetType)di->imm.sdword + di->addr + di->size) & (uint32_t)addrMask); #endif break; case O_PTR: str_code_hdw(str, di->imm.ptr.seg); chrcat_WS(str, SEG_OFF_CHR); str_code_hdw(str, di->imm.ptr.off); break; } } if (di->flags & FLAG_HINT_TAKEN) strcat_WSN(str, " ;TAKEN"); else if (di->flags & FLAG_HINT_NOT_TAKEN) strcat_WSN(str, " ;NOT TAKEN"); }