//Function parsing needs all of the instructions, and translating into C needs all of the instructions, but storing all of the instructions between those two points in time //takes up an enourmous amount of memory. So we need a seperate function from the init function to disassemble all of the instructions in a jump block a second time. void parse_instructions (jump_block* to_parse) { uint8_t* current = file_buf + addr_to_index (to_parse->start); size_t size = to_parse->end - to_parse->start; cs_disasm (handle, current, size, 0x0000, 0, &(to_parse->instructions)); }
void get_dyn_syms64 (void) { Elf64_Ehdr* header = (Elf64_Ehdr*)file_buf; Elf64_Phdr* segment_table = (Elf64_Phdr*)(file_buf + header->e_phoff); Elf64_Dyn* dynamic_table; int i = 0; int j = 0; dynamic_string_table = NULL; dynamic_symbol_table.arch2 = NULL; relocation_table.arch2 = NULL; num_dynamic_symbols = 0; for (i; i < header->e_phnum; i ++) { if (segment_table [i].p_type == PT_DYNAMIC) break; } if (i >= header->e_phnum) { printf ("Error: No dynamic linking information\n"); return; } dynamic_table = (Elf64_Dyn*)(file_buf + segment_table [i].p_offset); j = 0; while (dynamic_table [j].d_tag != DT_NULL) { if (dynamic_table [j].d_tag == DT_STRTAB) dynamic_string_table = (char*)(file_buf + addr_to_index (dynamic_table [j].d_un.d_ptr)); if (dynamic_table [j].d_tag == DT_SYMTAB) dynamic_symbol_table.arch2 = (Elf64_Sym*)(file_buf + addr_to_index (dynamic_table [j].d_un.d_ptr)); if (dynamic_table [j].d_tag == DT_RELASZ) num_dynamic_symbols += dynamic_table [j].d_un.d_ptr /sizeof (Elf64_Rela); if (dynamic_table [j].d_tag == DT_PLTRELSZ) num_dynamic_symbols = dynamic_table [j].d_un.d_ptr / sizeof (Elf64_Rela); if (dynamic_table [j].d_tag == DT_RELA) relocation_table.arch2 = (Elf64_Rela*)(file_buf + addr_to_index (dynamic_table [j].d_un.d_ptr)); j ++; } }
void get_dyn_syms64 (void) { Elf64_Ehdr* header = (Elf64_Ehdr*)file_buf; Elf64_Phdr* segment_table = (Elf64_Phdr*)(file_buf + header->e_phoff); Elf64_Dyn* dynamic_table; int i = 0; int j = 0; dynamic_string_table = NULL; dynamic_symbol_table.arch2 = NULL; relocation_table.arch2 = NULL; num_dynamic_symbols = 0; for (i; i < header->e_phnum; i ++) { if (segment_table [i].p_type == PT_DYNAMIC) break; } if (i >= header->e_phnum) { printf ("Error: No dynamic linking information\n"); return; } dynamic_table = (Elf64_Dyn*)(file_buf + segment_table [i].p_offset); j = 0; while (&(dynamic_table [j]) < (Elf64_Dyn*)(dynamic_table + segment_table[i].p_filesz)) { if (dynamic_string_table && dynamic_symbol_table.arch2 && num_dynamic_symbols && relocation_table.arch2) break; if (dynamic_table [j].d_tag == DT_STRTAB) dynamic_string_table = (char*)(file_buf + addr_to_index (dynamic_table [j].d_un.d_ptr)); if (dynamic_table [j].d_tag == DT_SYMTAB) dynamic_symbol_table.arch2 = (Elf64_Sym*)(file_buf + addr_to_index (dynamic_table [j].d_un.d_ptr)); if (dynamic_table [j].d_tag == DT_GNU_HASH) num_dynamic_symbols = *(int*)(file_buf + addr_to_index (dynamic_table [j].d_un.d_ptr) + 4); if (dynamic_table [j].d_tag == DT_JMPREL) relocation_table.arch2 = (Elf64_Rel*)(file_buf + addr_to_index (dynamic_table [j].d_un.d_ptr)); j ++; } }
void get_text64 (void) { text_addr = entry_point; text_offset = addr_to_index (text_addr); Elf64_Phdr* segment_table = (Elf64_Phdr*)(file_buf + ((Elf64_Ehdr*)file_buf)->e_phoff); int i; for (i = 0; i < ((Elf64_Ehdr*)file_buf)->e_phnum; i ++) { if (segment_table [i].p_vaddr <= text_addr && segment_table [i].p_vaddr + segment_table [i].p_memsz > text_addr) { end_of_text = segment_table [i].p_vaddr + segment_table [i].p_memsz; break; } } if (i >= ((Elf64_Ehdr*)file_buf)->e_phnum) { printf ("ERROR: entry point not in loadable segment\n"); exit (-1); } }
jump_block* init_jump_block (jump_block* to_init, unsigned int start_addr, unsigned int stop_addr) { to_init->instructions = NULL; to_init->calls = NULL; to_init->conditional_jumps = NULL; to_init->flags = next_flags; next_flags = 0; to_init->next = NULL; //Locals to cut down on dereference operators; this code was a disaster the first time around with no locals size_t size = file_size; int num_instructions = 0; int num_calls = 0; int num_conditional_jumps = 0; unsigned long long relative_address = 0; unsigned int current_addr = start_addr; uint8_t* current = file_buf + addr_to_index (current_addr); unsigned int next_addr; cs_insn* instruction = cs_malloc (handle); to_init->start = start_addr; do { num_instructions ++; //Dynamic memory allocation stuff here if (num_instructions - 1) { if (num_instructions * sizeof (cs_insn) > to_init->instructions_buf_size) { to_init->instructions_buf_size *= 2; //Just double the buffer; I'd rather allocate too much than reallocate memory every single iteration to_init->instructions = (cs_insn*)realloc (to_init->instructions, to_init->instructions_buf_size); } } else { to_init->instructions_buf_size = 256 * sizeof (cs_insn); //My memory allocator screams at me for numbers that aren't a multiple of 8 to_init->instructions = (cs_insn*)malloc (to_init->instructions_buf_size); } //Partially disassemble the instruction into machine readable format cs_disasm_iter (handle, (const uint8_t **)¤t, &file_size, (uint64_t*)&relative_address, instruction); to_init->instructions [num_instructions-1] = *instruction; to_init->instructions [num_instructions-1].detail = (cs_detail*)malloc (sizeof(cs_detail)); *(to_init->instructions [num_instructions-1].detail) = *(instruction->detail); current_addr = index_to_addr ((char*)current - file_buf); //Identify references to conditional jump blocks and function calls for later disassembly. if (instruction->detail->x86.op_count && instruction->detail->x86.operands [0].type > X86_OP_REG) //Please don't go chasing rax... { //Keep track of calls if (instruction->id == X86_INS_CALL) { num_calls ++; //More dynamic memory allocation stuff here if (num_calls - 1) { if (num_calls * sizeof (unsigned int) > to_init->calls_buf_size) { to_init->calls_buf_size *= 2; to_init->calls = realloc (to_init->calls, to_init->calls_buf_size); } } else { to_init->calls_buf_size = 8 * sizeof (unsigned int); to_init->calls = malloc (to_init->calls_buf_size); } //Add operand address to call buffer to_init->calls [num_calls-1] = relative_insn (instruction, current_addr); } } //Keep track of how many times we've seen the instruction "push %ebp". One too many and we've started on the adjacent function. if ((instruction->id >= X86_INS_PUSH && instruction->id <= X86_INS_PUSHFQ) && (instruction->detail->x86.operands [0].reg == X86_REG_EBP || instruction->detail->x86.operands [0].reg == X86_REG_RBP)) num_push_ebp ++; if (current_addr > stop_addr) //If we're outside the text section, we should be done. num_push_ebp = 2; //Stop disassembly of jump block at next unconditional jump or call } while (instruction->mnemonic [0] != 'j' && num_push_ebp != 2); //Jump block ends on jump or return //Synchronize the jump block with locals to_init->end = current_addr; to_init->num_conditional_jumps = num_conditional_jumps; to_init->num_calls = num_calls; to_init->num_instructions = num_instructions; if (instruction->id >= X86_INS_JAE && instruction->id <= X86_INS_JS && instruction->id != X86_INS_JMP) { if (relative_insn (instruction, current_addr) < current_addr - instruction->size) { to_init->flags |= IS_LOOP; next_flags |= IS_AFTER_LOOP; } } cs_free (instruction, 1); //Print jump block start address; uncomment for debugging information //printf ("%p\n", to_init->start); return to_init; //Convenient to return the to_init param so we can chain function calls like "example (init_jump_block (malloc (sizeof (jump_block)), some_addr, block))" }