void prstr(CPUState *env, uint32_t o, uint32_t va) { uint32_t ptr; panda_virtual_memory_rw(env, va+o, (uint8_t *) &ptr, 4, false); printf ("ptr=0x%x\n", ptr); uint8_t buf[16]; panda_virtual_memory_rw(env, ptr, buf, 16, false); buf[15] = 0; printf ("o=%d : [%s]\n", o, buf); }
// 179 NTSTATUS NtOpenFile (PHANDLE FileHandle, ACCESS_MASK DesiredAccess, POBJECT_ATTRIBUTES ObjectAttributes, PIO_STATUS_BLOCK IoStatusBlock, ULONG ShareAccess, ULONG OpenOptions); // typedef void (*on_NtOpenFile_enter_t)(CPUState *cpu,target_ulong pc,uint32_t FileHandle,uint32_t DesiredAccess,uint32_t ObjectAttributes,uint32_t IoStatusBlock,uint32_t ShareAccess,uint32_t OpenOptions); void windows_open_enter(CPUState *cpu, target_ulong pc, uint32_t FileHandle, uint32_t DesiredAccess, uint32_t ObjectAttributes, uint32_t IoStatusBlock, uint32_t ShareAccess, uint32_t OpenOptions) { char the_filename[MAX_FILENAME]; OBJECT_ATTRIBUTES obj_attrs; UNICODE_STRING unicode_string; panda_virtual_memory_rw(cpu, ObjectAttributes, (uint8_t *)&obj_attrs, sizeof(obj_attrs), 0); panda_virtual_memory_rw(cpu, obj_attrs.ObjectName, (uint8_t *)&unicode_string, sizeof(unicode_string), 0); guest_wstrncpy(cpu, the_filename, MAX_FILENAME, unicode_string.Buffer); char *trunc_filename = the_filename; if (strncmp("\\??\\", the_filename, 4) == 0) { trunc_filename += 4; } the_windows_filename = std::string(trunc_filename); open_enter(cpu, pc, trunc_filename, 0, DesiredAccess); }
void get_prog_point(CPUState* cpu, prog_point *p) { CPUArchState* env = (CPUArchState*)cpu->env_ptr; if (!p) return; // Get address space identifier target_ulong asid = panda_current_asid(ENV_GET_CPU(env)); // Lump all kernel-mode CR3s together if(!in_kernelspace(env)) p->cr3 = asid; // Try to get the caller int n_callers = 0; n_callers = get_callers(&p->caller, 1, cpu); if (n_callers == 0) { #ifdef TARGET_I386 // fall back to EBP on x86 int word_size = (env->hflags & HF_LMA_MASK) ? 8 : 4; panda_virtual_memory_rw(cpu, env->regs[R_EBP]+word_size, (uint8_t *)&p->caller, word_size, 0); #endif #ifdef TARGET_ARM p->caller = env->regs[14]; // LR #endif } p->pc = cpu->panda_guest_pc; }
//void pfun(VarType var_ty, const char *var_nm, LocType loc_t, target_ulong loc, void *in_args){ void pfun(void *var_ty_void, const char *var_nm, LocType loc_t, target_ulong loc, void *in_args){ //const char *var_ty = (const char *) var_ty_void; // restore args struct args *args = (struct args *) in_args; CPUState *pfun_env = args->env; //const char *src_filename = args->src_filename; //uint64_t src_linenum = args->src_linenum; target_ulong guest_dword; switch (loc_t){ case LocReg: printf("VAR REG: %s in Reg %d", var_nm, loc); printf(" => 0x%x\n", pfun_env->regs[loc]); break; case LocMem: printf("VAR MEM: %s @ 0x%x", var_nm, loc); panda_virtual_memory_rw(pfun_env, loc, (uint8_t *)&guest_dword, sizeof(guest_dword), 0); printf(" => 0x%x\n", guest_dword); break; case LocConst: //printf("VAR CONST: %s %s as 0x%x\n", var_ty, var_nm, loc); break; case LocErr: printf("VAR does not have a location we could determine. Most likely because the var is split among multiple locations\n"); break; } }
// hypercall-initiated taint query of some src-level extent void lava_taint_query (PandaHypercallStruct phs) { extern CPUState *cpu_single_env; CPUState *env = cpu_single_env; if (taintEnabled && (taint2_num_labels_applied() > 0)){ // okay, taint is on and some labels have actually been applied // is there *any* taint on this extent uint32_t num_tainted = 0; for (uint32_t offset=0; offset<phs.len; offset++) { uint32_t va = phs.buf + offset; uint32_t pa = panda_virt_to_phys(env, va); if ((int) pa != -1) { Addr a = make_maddr(pa); if (taint2_query(a)) { num_tainted ++; } } } if (num_tainted) { // ok at least one byte in the extent is tainted // 1. write the pandalog entry that tells us something was tainted on this extent Panda__TaintQueryHypercall *tqh = (Panda__TaintQueryHypercall *) malloc (sizeof (Panda__TaintQueryHypercall)); *tqh = PANDA__TAINT_QUERY_HYPERCALL__INIT; tqh->buf = phs.buf; tqh->len = phs.len; tqh->num_tainted = num_tainted; // obtain the actual data out of memory // NOTE: first 32 bytes only! uint32_t data[32]; uint32_t n = phs.len; if (32 < phs.len) n = 32; for (uint32_t i=0; i<n; i++) { data[i] = 0; uint8_t c; panda_virtual_memory_rw(env, phs.buf+i, &c, 1, false); data[i] = c; } tqh->n_data = n; tqh->data = data; Panda__LogEntry ple = PANDA__LOG_ENTRY__INIT; ple.taint_query_hypercall = tqh; pandalog_write_entry(&ple); free(tqh); // 2. write out src-level info lava_src_info_pandalog(phs); // 3. write out callstack info callstack_pandalog(); // 4. iterate over the bytes in the extent and pandalog detailed info about taint for (uint32_t offset=0; offset<phs.len; offset++) { uint32_t va = phs.buf + offset; uint32_t pa = panda_virt_to_phys(env, va); if ((int) pa != -1) { Addr a = make_maddr(pa); if (taint2_query(a)) { __taint2_query_pandalog(a); } } } } } }
// typedef void (*on_NtOpenFile_return_t)(CPUState *cpu,target_ulong pc,uint32_t FileHandle,uint32_t DesiredAccess,uint32_t ObjectAttributes,uint32_t IoStatusBlock,uint32_t ShareAccess,uint32_t OpenOptions); void windows_open_return(CPUState *cpu, target_ulong pc, uint32_t FileHandle, uint32_t DesiredAccess, uint32_t ObjectAttributes, uint32_t IoStatusBlock, uint32_t ShareAccess, uint32_t OpenOptions) { uint32_t Handle; panda_virtual_memory_rw(cpu, FileHandle, (uint8_t *)&Handle, 4, 0); if (debug) printf ("asid=0x%x filehandle=%d filename=[%s]\n", (uint)panda_current_asid(cpu), FileHandle, the_windows_filename.c_str()); windows_filenames[panda_current_asid(cpu)][FileHandle] = the_windows_filename; open_return(cpu, Handle); }
void panda_virtual_string_read(CPUState *env, target_ulong vaddr, char *str) { for (uint32_t i=0; i<PANDA_MAX_STRING_READ; i++) { uint8_t c; panda_virtual_memory_rw(env, vaddr + i, &c, 1, false); str[i] = c; if (c==0) break; } str[PANDA_MAX_STRING_READ-1] = 0; }
static uint32_t get_win_syscall_arg(CPUState* env, int nr) { #if defined(TARGET_I386) // At sysenter on Windows7, args start at EDX+8 uint32_t arg = 0; panda_virtual_memory_rw(env, env->regs[R_EDX] + 8 + (4*nr), (uint8_t *) &arg, 4, false); return arg; #endif return 0; }
// Return address calculations target_ulong calc_retaddr_windows_x86(CPUState* env, target_ulong pc) { #if defined(TARGET_I386) target_ulong retaddr = 0; panda_virtual_memory_rw(env, EDX, (uint8_t *) &retaddr, 4, false); return retaddr; #else // shouldn't happen assert (1==0); #endif }
// Check if the instruction is sysenter (0F 34) bool translate_callback(CPUState *env, target_ulong pc) { #if defined(TARGET_I386) unsigned char buf[2]; panda_virtual_memory_rw(env, pc, buf, 2, 0); // Check if the instruction is syscall (0F 05) if (buf[0]== 0x0F && buf[1] == 0x05) { return true; } // Check if the instruction is sysenter (0F 34) else if (buf[0]== 0x0F && buf[1] == 0x34) { return true; } else { return false; } #elif defined(TARGET_ARM) unsigned char buf[4]; // Check for ARM mode syscall if(env->thumb == 0) { panda_virtual_memory_rw(env, pc, buf, 4, 0); // EABI if ( ((buf[3] & 0x0F) == 0x0F) && (buf[2] == 0) && (buf[1] == 0) && (buf[0] == 0) ) { return true; } #if defined(CAPTURE_ARM_OABI) else if (((buf[3] & 0x0F) == 0x0F) && (buf[2] == 0x90)) { // old ABI return true; } #endif } else { panda_virtual_memory_rw(env, pc, buf, 2, 0); // check for Thumb mode syscall if (buf[1] == 0xDF && buf[0] == 0){ return true; } } return false; #endif }
void windows_read_return(CPUState *cpu, target_ulong pc, uint32_t FileHandle, uint32_t Event, uint32_t UserApcRoutine, uint32_t UserApcContext, uint32_t IoStatusBlock, uint32_t Buffer, uint32_t BufferLength, uint32_t ByteOffset, uint32_t Key) { CPUArchState *env = (CPUArchState*)cpu->env_ptr; if (env->regs[R_EAX] != STATUS_SUCCESS) return; IO_STATUS_BLOCK io_status_block; uint32_t actual_count = BufferLength; if (panda_virtual_memory_rw(cpu, IoStatusBlock, (uint8_t *)&io_status_block, sizeof(io_status_block), 0) != -1) { actual_count = io_status_block.Information; } else { if (debug) printf("file_taint: failed to read IoStatusBlock @ %x\n", IoStatusBlock); } read_return(cpu, pc, Buffer, actual_count); }
uint32_t guest_wstrncpy(CPUState *cpu, char *buf, size_t maxlen, target_ulong guest_addr) { buf[0] = 0; unsigned i; for (i=0; i<maxlen; i++) { panda_virtual_memory_rw(cpu, guest_addr + 2 * i, (uint8_t *)&buf[i], 1, 0); if (buf[i] == 0) { break; } } buf[maxlen-1] = 0; return i; }
void panda_virtual_string_read(CPUState *env, target_ulong vaddr, char *str) { for (uint32_t i=0; i<PANDA_MAX_STRING_READ; i++) { uint8_t c = 0; if (-1 == panda_virtual_memory_rw(env, vaddr + i, &c, 1, false)) { printf("Can't access memory at " TARGET_FMT_lx "\n", vaddr + i); str[i] = 0; break; } str[i] = c; if (c==0) break; } str[PANDA_MAX_STRING_READ-1] = 0; }
int mem_read_callback(CPUState *env, target_ulong pc, target_ulong addr, target_ulong size, void *buf) { prog_point p = {}; #ifdef TARGET_I386 panda_virtual_memory_rw(env, env->regs[R_EBP]+4, (uint8_t *)&p.caller, 4, 0); if((env->hflags & HF_CPL_MASK) != 0) // Lump all kernel-mode CR3s together p.cr3 = env->cr[3]; #endif p.pc = pc; read_tracker[p] += size; return 1; }
// i.e. return pointer to the object represented by this handle static uint32_t get_handle_table_entry(CPUState *cpu, uint32_t pHandleTable, uint32_t handle) { uint32_t tableCode, tableLevels; // get tablecode panda_virtual_memory_rw(cpu, pHandleTable, (uint8_t *)&tableCode, 4, false); // extract levels tableLevels = tableCode & LEVEL_MASK; if (tableLevels > 2) { return 0; } uint32_t pEntry=0; if (tableLevels == 0) { uint32_t index = (handle & HANDLE_MASK1) >> HANDLE_SHIFT1; pEntry = handle_table_L1_entry(cpu, pHandleTable, index); }
uint32_t guest_strncpy(CPUState *cpu, char *buf, size_t maxlen, target_ulong guest_addr) { buf[0] = 0; unsigned i; for (i=0; i<maxlen; i++) { uint8_t c; panda_virtual_memory_rw(cpu, guest_addr+i, &c, 1, 0); buf[i] = c; if (c==0) { break; } } buf[maxlen-1] = 0; return i; }
// this is called from stringsearch upon a match void tstringsearch_match(CPUState *env, target_ulong pc, target_ulong addr, uint8_t *matched_string, uint32_t matched_string_length, bool is_write) { // determine if the search string is sitting in memory, starting at addr - (strlen-1) // first, grab that string out of memory target_ulong p = addr - (matched_string_length-1); uint8_t thestring[MAX_STRLEN*2]; panda_virtual_memory_rw(env, p, thestring, matched_string_length, 0); printf ("tstringsearch: thestring = ["); for (unsigned i=0; i<matched_string_length; i++) { if (isprint(thestring[i])) { printf("%c", thestring[i]); } else { printf("."); } } printf ("]\ntstringsearch: "); for (unsigned i=0; i<matched_string_length; i++) { printf ("%02x ", thestring[i]); } printf ("\n"); // now compare it to the search string // NOTE: this is a write, so the final byte of the string hasn't yet been // written to memory since write callback is at start of fn. // thus, the matched_string_length - 1. // yes, we can get this right. but, meh. if ((memcmp((char *)thestring, (char *)matched_string, matched_string_length-1)) == 0) { printf ("tstringsearch: string in memory @ 0x%lx\n", (long unsigned int) p); // ok this is ugly. save pc, buffer addr and len the_pc = pc; the_buf = p; the_len = matched_string_length; // this should enable tstringsearch_label_on = true; if (first_time) { first_time = false; // add a callback for taint processor st panda_cb pcb; pcb.phys_mem_before_read = tstringsearch_label; panda_register_callback(plugin_self, PANDA_CB_PHYS_MEM_BEFORE_READ, pcb); pcb.phys_mem_after_write = tstringsearch_label_write; panda_register_callback(plugin_self, PANDA_CB_PHYS_MEM_AFTER_WRITE, pcb); } } }
static PTR get_file_struct_ptr(CPUState *env, PTR task_struct, int fd) { PTR files = get_files(env, task_struct); PTR fds = get_files_fds(env, files); PTR fd_file_ptr, fd_file; // fds is a flat array with struct file pointers. // Calculate the address of the nth pointer and read it. fd_file_ptr = fds + fd*sizeof(PTR); if (-1 == panda_virtual_memory_rw(env, fd_file_ptr, (uint8_t *)&fd_file, sizeof(PTR), 0)) { return (PTR)NULL; } if (fd_file == (PTR)NULL) { return (PTR)NULL; } return fd_file; }
target_ulong calc_retaddr_linux_x86(CPUState* env, target_ulong pc) { #if defined(TARGET_I386) unsigned char buf[2] = {}; panda_virtual_memory_rw(env, pc, buf, 2, 0); // Check if the instruction is syscall (0F 05) or sysenter (0F 34) if ((buf[0]== 0x0F && buf[1] == 0x05) || (buf[0]== 0x0F && buf[1] == 0x34)) { return pc+11; } // Check if the instruction is int 0x80 (CD 80) else if (buf[0]== 0xCD && buf[1] == 0x80) { return pc+2; } // shouldn't happen else { assert(1==0); } #else // shouldn't happen assert (1==0); #endif }
void windows_read_enter(CPUState *cpu, target_ulong pc, uint32_t FileHandle, uint32_t Event, uint32_t UserApcRoutine, uint32_t UserApcContext, uint32_t IoStatusBlock, uint32_t Buffer, uint32_t BufferLength, uint32_t ByteOffset, uint32_t Key) { int64_t offset = -1; if (ByteOffset != 0) { // Byte offset into file is specified (pointer to LARGE_INTEGER). Read and interpret. panda_virtual_memory_rw(cpu, ByteOffset, (uint8_t *)&offset, sizeof(offset), 0); //printf("NtReadFile: %lu[%ld]\n", (unsigned long)FileHandle, offset); } else { //printf("NtReadFile: %lu[]\n", (unsigned long)FileHandle); } char *filename = get_handle_name(cpu, get_current_proc(cpu), FileHandle); if (ByteOffset && (offset >= 0 && offset < (1L << 48))) { read_enter(cpu, pc, filename, offset, Buffer, BufferLength); } else { offset = get_file_handle_pos(cpu, get_current_proc(cpu), FileHandle); if (offset != -1) read_enter(cpu, pc, filename, offset, Buffer, BufferLength); else // last resort. just assume last_pos. read_enter(cpu, pc, filename, last_pos, Buffer, BufferLength); } }
static target_ulong calc_retaddr(CPUState* env, target_ulong pc){ #if defined(TARGET_ARM) // Normal syscalls: return addr is stored in LR // Except that we haven't run the SWI instruction yet! LR is where libc will return to! //return mask_retaddr_to_pc(env->regs[14]); // Fork, exec uint8_t offset = 0; if(env->thumb == 0){ offset = 4; } else { offset = 2; } return mask_retaddr_to_pc(pc + offset); #elif defined(TARGET_I386) // syscall and sysenter x86 instructions are both 2 bytes //return pc+2; // ABI from http://wiki.osdev.org/SYSENTER // Return address is set by user code before the syscall/sysenter instr is executed unsigned char buf[2]; panda_virtual_memory_rw(env, pc, buf, 2, 0); // Check if the instruction is syscall (0F 05) if (buf[0]== 0x0F && buf[1] == 0x05) { return ECX; } // Check if the instruction is sysenter (0F 34) else if (buf[0]== 0x0F && buf[1] == 0x34) { return EDX; } else { // Not a syscall or sysenter!? assert(0); } #else #error "return address calculation not implemented for this architecture in fdtracker" #endif }
instr_type disas_block(CPUArchState* env, target_ulong pc, int size) { unsigned char *buf = (unsigned char *) malloc(size); int err = panda_virtual_memory_rw(ENV_GET_CPU(env), pc, buf, size, 0); if (err == -1) printf("Couldn't read TB memory!\n"); instr_type res = INSTR_UNKNOWN; #if defined(TARGET_I386) csh handle = (env->hflags & HF_LMA_MASK) ? cs_handle_64 : cs_handle_32; #elif defined(TARGET_ARM) || defined(TARGET_PPC) csh handle = cs_handle_32; #endif cs_insn *insn; cs_insn *end; size_t count = cs_disasm(handle, buf, size, pc, 0, &insn); if (count <= 0) goto done2; for (end = insn + count - 1; end >= insn; end--) { if (!cs_insn_group(handle, end, CS_GRP_INVALID)) { break; } } if (end < insn) goto done; if (cs_insn_group(handle, end, CS_GRP_CALL)) { res = INSTR_CALL; } else if (cs_insn_group(handle, end, CS_GRP_RET)) { res = INSTR_RET; } else { res = INSTR_UNKNOWN; } done: cs_free(insn, count); done2: free(buf); return res; }
void open_enter(CPUState* env,target_ulong pc,target_ulong filename,int32_t flags,int32_t mode) { uint32_t i; char the_filename[MAX_FILENAME]; the_filename[0] = 0; for (i=0; i<MAX_FILENAME; i++) { uint8_t c; panda_virtual_memory_rw(env, filename+i, &c, 1, 0); the_filename[i] = c; if (c==0) { break; } } the_filename[MAX_FILENAME-1] = 0; if (i != 0 ) { printf ("saw open of [%s]\n", the_filename); } if (i == strlen(taint_filename)) { if (strncmp(the_filename, taint_filename, strlen(the_filename)) == 0) { saw_open = true; printf ("saw open of file we want to taint: [%s]\n", taint_filename); the_asid = panda_current_asid(env); } } }
// Support all features of label and query program void i386_hypercall_callback(CPUState *env){ #if 0 if (EAX == 0xabcd) { printf ("\n hypercall pc=0x%x\n", (int) panda_current_pc(env)); for (uint32_t i=0; i<8; i++) { printf ("reg[%d] = 0x%x\n", i, (int) env->regs[i]); } } #endif //printf("taint2: Hypercall! B " TARGET_FMT_lx " C " TARGET_FMT_lx " D " TARGET_FMT_lx "\n", // env->regs[R_EBX], env->regs[R_ECX], env->regs[R_EDX]); #if 0 // Label op. // EBX contains addr of that data // ECX contains size of data // EDX contains the label; ~0UL for autoenc. if ((env->regs[R_EAX] == 7 || env->regs[R_EAX] == 8)) { printf ("hypercall -- EAX=0x%x\n", EAX); target_ulong addr = panda_virt_to_phys(env, env->regs[R_EBX]); target_ulong size = env->regs[R_ECX]; target_ulong label = env->regs[R_EDX]; if (!taintEnabled){ printf("taint2: Label operation detected @ %lu\n", rr_get_guest_instr_count()); printf("taint2: Labeling " TARGET_FMT_lx " to " TARGET_FMT_lx " with label " TARGET_FMT_lx ".\n", addr, addr + size, label); __taint2_enable_taint(); } LabelSetP ls = NULL; if (label != (target_ulong)~0UL) { ls = label_set_singleton(label); } // otherwise autoinc. qemu_log_mask(CPU_LOG_TAINT_OPS, "label: %lx[%lx+%lx] <- %lx (%lx)\n", (uint64_t)shadow->ram, (uint64_t)addr, (uint64_t)size, (uint64_t)label, (uint64_t)ls); for (unsigned i = 0; i < size; i++) { //printf("label %u\n", i); shadow->ram->set(addr + i, label_set_singleton(i)); } } #endif if (pandalog && env->regs[R_EAX] == 0xabcd) { // LAVA Hypercall target_ulong addr = panda_virt_to_phys(env, ECX); if ((int)addr == -1) { printf ("panda hypercall with ptr to invalid PandaHypercallStruct: vaddr=0x%x paddr=0x%x\n", (uint32_t) ECX, (uint32_t) addr); } else { PandaHypercallStruct phs; panda_virtual_memory_rw(env, ECX, (uint8_t *) &phs, sizeof(phs), false); if (phs.action == 11) { // it's a lava query lava_taint_query(phs); } if (phs.action == 12) { // it's an attack point sighting lava_attack_point(phs); } } } }
// hypercall-initiated taint query of some src-level extent void lava_taint_query (PandaHypercallStruct phs) { extern CPUState *cpu_single_env; CPUState *env = cpu_single_env; if (pandalog && taintEnabled && (taint2_num_labels_applied() > 0)){ // okay, taint is on and some labels have actually been applied // is there *any* taint on this extent uint32_t num_tainted = 0; for (uint32_t offset=0; offset<phs.len; offset++) { uint32_t va = phs.buf + offset; uint32_t pa = panda_virt_to_phys(env, va); if ((int) pa != -1) { Addr a = make_maddr(pa); if (taint2_query(a)) { num_tainted ++; } } } if (num_tainted) { // ok at least one byte in the extent is tainted // 1. write the pandalog entry that tells us something was tainted on this extent Panda__TaintQueryHypercall *tqh = (Panda__TaintQueryHypercall *) malloc (sizeof (Panda__TaintQueryHypercall)); *tqh = PANDA__TAINT_QUERY_HYPERCALL__INIT; tqh->buf = phs.buf; tqh->len = phs.len; tqh->num_tainted = num_tainted; // obtain the actual data out of memory // NOTE: first 32 bytes only! uint32_t data[32]; uint32_t n = phs.len; // grab at most 32 bytes from memory to pandalog // this is just a snippet. we dont want to write 1M buffer if (32 < phs.len) n = 32; for (uint32_t i=0; i<n; i++) { data[i] = 0; uint8_t c; panda_virtual_memory_rw(env, phs.buf+i, &c, 1, false); data[i] = c; } tqh->n_data = n; tqh->data = data; // 2. write out src-level info Panda__SrcInfo *si = pandalog_src_info_create(phs); tqh->src_info = si; // 3. write out callstack info Panda__CallStack *cs = pandalog_callstack_create(); tqh->call_stack = cs; // 4. iterate over the bytes in the extent and pandalog detailed info about taint std::vector<Panda__TaintQuery *> tq; for (uint32_t offset=0; offset<phs.len; offset++) { uint32_t va = phs.buf + offset; uint32_t pa = panda_virt_to_phys(env, va); if ((int) pa != -1) { Addr a = make_maddr(pa); if (taint2_query(a)) { tq.push_back(__taint2_query_pandalog(a, offset)); } } } tqh->n_taint_query = tq.size(); tqh->taint_query = (Panda__TaintQuery **) malloc(sizeof(Panda__TaintQuery *) * tqh->n_taint_query); for (uint32_t i=0; i<tqh->n_taint_query; i++) { tqh->taint_query[i] = tq[i]; } Panda__LogEntry ple = PANDA__LOG_ENTRY__INIT; ple.taint_query_hypercall = tqh; pandalog_write_entry(&ple); free(tqh->src_info); pandalog_callstack_free(tqh->call_stack); for (uint32_t i=0; i<tqh->n_taint_query; i++) { __pandalog_taint_query_free(tqh->taint_query[i]); } free(tqh); } } }
// hypercall-initiated taint query of some src-level extent void lava_taint_query (Panda__SrcInfoPri *si, target_ulong buf, target_ulong buf_len) { extern CPUState *cpu_single_env; CPUState *env = cpu_single_env; //if (pandalog && taintEnabled && (taint2_num_labels_applied() > 0)){ if (pandalog && taint2_enabled() && (taint2_num_labels_applied() > 0)){ // okay, taint is on and some labels have actually been applied // is there *any* taint on this extent uint32_t num_tainted = 0; bool is_strnlen = false; //bool is_strnlen = ((int) phs.len == -1); uint32_t offset=0; while (true) { // for (uint32_t offset=0; offset<phs.len; offset++) { uint32_t va = buf + offset; //uint32_t va = phs.buf + offset; uint32_t pa = panda_virt_to_phys(env, va); if (is_strnlen) { uint8_t c; panda_virtual_memory_rw(env, pa, &c, 1, false); // null terminator if (c==0) break; } if ((int) pa != -1) { Addr a = make_maddr(pa); if (taint2_query(a)) { num_tainted ++; } } offset ++; // end of query by length or max string length if (!is_strnlen && offset == buf_len) break; //if (!is_strnlen && offset == phs.len) break; if (is_strnlen && (offset == LAVA_TAINT_QUERY_MAX_LEN)) break; } uint32_t len = offset; if (num_tainted) { printf("logging lava query\n"); // ok at least one byte in the extent is tainted // 1. write the pandalog entry that tells us something was tainted on this extent Panda__TaintQueryPri *tqh = (Panda__TaintQueryPri *) malloc (sizeof (Panda__TaintQueryPri)); *tqh = PANDA__TAINT_QUERY_PRI__INIT; tqh->buf = buf; //tqh->buf = phs.buf; tqh->len = len; tqh->num_tainted = num_tainted; // obtain the actual data out of memory // NOTE: first X bytes only! uint32_t data[LAVA_TAINT_QUERY_MAX_LEN]; uint32_t n = len; // grab at most X bytes from memory to pandalog // this is just a snippet. we dont want to write 1M buffer if (LAVA_TAINT_QUERY_MAX_LEN < len) n = LAVA_TAINT_QUERY_MAX_LEN; for (uint32_t i=0; i<n; i++) { data[i] = 0; uint8_t c; panda_virtual_memory_rw(env, buf+i, &c, 1, false); //panda_virtual_memory_rw(env, phs.buf+i, &c, 1, false); data[i] = c; } tqh->n_data = n; tqh->data = data; // 2. write out src-level info //Panda__SrcInfoPri *si = pandalog_src_info_create(phs); tqh->src_info = si; // 3. write out callstack info Panda__CallStack *cs = pandalog_callstack_create(); tqh->call_stack = cs; // 4. iterate over the bytes in the extent and pandalog detailed info about taint std::vector<Panda__TaintQuery *> tq; for (uint32_t offset=0; offset<len; offset++) { uint32_t va = buf + offset; //uint32_t va = phs.buf + offset; uint32_t pa = panda_virt_to_phys(env, va); if ((int) pa != -1) { Addr a = make_maddr(pa); if (taint2_query(a)) { tq.push_back(taint2_query_pandalog(a, offset)); } } } tqh->n_taint_query = tq.size(); tqh->taint_query = (Panda__TaintQuery **) malloc(sizeof(Panda__TaintQuery *) * tqh->n_taint_query); for (uint32_t i=0; i<tqh->n_taint_query; i++) { tqh->taint_query[i] = tq[i]; } Panda__LogEntry ple = PANDA__LOG_ENTRY__INIT; ple.taint_query_pri = tqh; printf("about to write out taint query entry\n"); pandalog_write_entry(&ple); free(tqh->src_info); pandalog_callstack_free(tqh->call_stack); for (uint32_t i=0; i<tqh->n_taint_query; i++) { pandalog_taint_query_free(tqh->taint_query[i]); } free(tqh); } } }
void pfun(void *var_ty_void, const char *var_nm, LocType loc_t, target_ulong loc, void *in_args){ //void pfun(const char *var_ty, const char *var_nm, LocType loc_t, target_ulong loc, void *in_args){ // restore args const char *var_ty = (const char *) var_ty_void; struct args *args = (struct args *) in_args; CPUState *pfun_env = args->env; const char *src_filename = args->src_filename; uint64_t src_linenum = args->src_linenum; target_ulong guest_dword; std::string ty_string = std::string(var_ty); size_t num_derefs = std::count(ty_string.begin(), ty_string.end(), '*'); size_t i; //Panda__SrcInfoPri *si = pandalog_src_info_pri_create(const char *src_filename, uint64_t src_linenum, const char *src_ast_node_name); Panda__SrcInfoPri *si = pandalog_src_info_pri_create(src_filename, src_linenum, var_nm); switch (loc_t){ case LocReg: guest_dword = pfun_env->regs[loc]; if (num_derefs > 0) { for (i = 0; i < num_derefs; i++) { int rc = panda_virtual_memory_rw(pfun_env, guest_dword, (uint8_t *)&guest_dword, sizeof(guest_dword), 0); if (0 != rc) break; } if (0 != taint2_query_ram(panda_virt_to_phys(pfun_env, guest_dword))) { printf("VAR REG: %s %s in Reg %d\n", var_ty, var_nm, loc); printf(" => 0x%x, derefs: %ld\n", guest_dword, i); printf(" ==Location is tainted!==\n"); lava_taint_query(si, guest_dword, 1); } } else { // only query reg taint if the reg number is less than the number of registers if (loc < CPU_NB_REGS) { if (0 != taint2_query_reg(loc, 0)) { printf("VAR REG: %s %s in Reg %d\n", var_ty, var_nm, loc); printf(" => 0x%x, derefs: %d\n", guest_dword, 0); printf(" ==Reg is tainted!==\n"); } } } break; case LocMem: guest_dword = loc; for (i = 0; i < num_derefs; i++) { if (0 != panda_virtual_memory_rw(pfun_env, guest_dword, (uint8_t *)&guest_dword, sizeof(guest_dword), 0)){ break; } } if (0 != taint2_query_ram(panda_virt_to_phys(pfun_env, guest_dword))) { printf("VAR MEM: %s %s @ 0x%x\n", var_ty, var_nm, loc); printf(" => 0x%x, derefs: %ld\n", guest_dword, i); printf(" ==Location is tainted!==\n"); lava_taint_query(si, guest_dword, 1); } break; case LocConst: //printf("VAR CONST: %s %s as 0x%x\n", var_ty, var_nm, loc); break; case LocErr: //printf("VAR does not have a location we could determine. Most likely because the var is split among multiple locations\n"); break; // should not get here default: assert(1==0); } }
// This will only be called for instructions where the // translate_callback returned true int exec_callback(CPUState *env, target_ulong pc) { // run any code we need to update our state for(const auto callback : preExecCallbacks){ callback(env, pc); } #if defined(TARGET_I386) // On Windows, the system call id is in EAX record_syscall = [&env, &pc](const char* callname){ syscall_fprintf(env, "CALL=%s, PC=" TARGET_FMT_lx ", SYSCALL=" TARGET_FMT_lx ", CR3=" TARGET_FMT_lx "\n", callname, pc, env->regs[R_EAX], env->cr[3]); }; #elif defined(TARGET_ARM) #if defined(CAPTURE_ARM_OABI) #if (1) if(env->thumb == 0){ //Old ABI not possible with Thumb // read 4 bytes, number may be in instruction. unsigned char buf[4]; panda_virtual_memory_rw(env, pc, buf, 4, 0); if (buf[2] == 0x90) { fprintf(plugin_log, "PC=" TARGET_FMT_lx ", SYSCALL=" TARGET_FMT_lx ", OLD ABI \n", pc, *(unsigned int*)&buf); return 0; } } #else syscall_fprintf(env, "SKIPPING OABI\n"); #endif #endif// OABI if (env->regs[7] == 0xf0){ //skip sys_futex return 0; } record_syscall = [&env, &pc](const char* callname){ syscall_fprintf(env, "CALL=%s, PC=" TARGET_FMT_lx ", SYSCALL=" TARGET_FMT_lx ", thumb=" TARGET_FMT_lx "\n", callname, pc, env->regs[7], env->thumb); }; #endif log_string = [&env, &pc](target_ulong src, const char* argname) -> syscalls::string{ syscalls::string arg(env, pc, src); syscall_fprintf(env, "STRING, NAME=%s, VALUE=%s\n", argname, arg.value().c_str()); return arg; }; log_pointer = [&env, &pc](target_ulong addr, const char* argname) -> target_ulong { syscall_fprintf(env, "PTR, NAME=%s, VALUE=" TARGET_FMT_lx"\n",argname, addr); return addr; }; log_32 = [&env,&pc](target_ulong value, const char* argname){ syscall_fprintf(env, "U32, NAME=%s, VALUE=" TARGET_FMT_lx"\n", argname, value); return value; }; log_s32 = [&env,&pc](target_long value, const char* argname){ syscall_fprintf(env, "S32, NAME=%s, VALUE=" TARGET_FMT_lx"\n", argname, value); return value; }; log_64 = [&env,&pc](target_ulong high, target_ulong low, const char* argname){ syscall_fprintf(env, "I64, NAME=%s, VALUE=%llx\n", argname, ((unsigned long long)high << 32) | low ); return ((unsigned long long)high << 32) | low; }; // syscall is in R7 //syscall_fprintf(env, "PC=" TARGET_FMT_lx ", SYSCALL=" TARGET_FMT_lx ", thumb=" TARGET_FMT_lx "\n", pc, env->regs[7], env->thumb); #include "gen_syscall_printer.cpp" return 0; }
// Support all features of label and query program void i386_hypercall_callback(CPUState *env){ if (taintEnabled && pandalog) { // LAVA Hypercall #ifdef TAINT_LEGACY_HYPERCALL target_ulong buf_start = EBX; target_ulong buf_len = ECX; long label = EDI; // call to label data // EBX contains addr of that data // ECX contains size of data // EDI is the label integer // EDX = starting offset (for positional labels only) // -mostly not used, this is managed in pirate_utils if (EAX == 7 || EAX == 8){ if (!taintEnabled){ printf("Taint plugin: Label operation detected\n"); printf("Enabling taint processing\n"); __taint2_enable_taint(); } if (EAX == 7){ // Standard buffer label printf("taint2: single taint label\n"); taint2_add_taint_ram_single_label(env, (uint64_t)buf_start, (int)buf_len, label); } else if (EAX == 8){ // Positional buffer label printf("taint2: positional taint label\n"); taint2_add_taint_ram_pos(env, (uint64_t)buf_start, (int)buf_len); } } /* //mz Query taint on this buffer //mz EBX = start of buffer (VA) //mz ECX = size of buffer (bytes) // EDX = starting offset - for file queries // -mostly not used, this is managed in pirate_utils else if (env->regs[R_EAX] == 9){ //Query taint on label if (taintEnabled){ printf("Taint plugin: Query operation detected\n"); Addr a = make_maddr(buf_start); bufplot(env, shadow, &a, (int)buf_len); } //printf("Disabling taint processing\n"); //taintEnabled = false; //taintJustDisabled = true; //printf("Label occurrences on HD: %d\n", shad_dir_occ_64(shadow->hd)); } else if (env->regs[R_EAX] == 10){ // Guest util done - reset positional label counter taint_pos_count = 0; } */ #else target_ulong addr = panda_virt_to_phys(env, EAX); if ((int)addr == -1) { printf ("panda hypercall with ptr to invalid PandaHypercallStruct: vaddr=0x%x paddr=0x%x\n", (uint32_t) EAX, (uint32_t) addr); } else { PandaHypercallStruct phs; panda_virtual_memory_rw(env, EAX, (uint8_t *) &phs, sizeof(phs), false); if (phs.magic == 0xabcd) { if (phs.action == 11) { // it's a lava query lava_taint_query(phs); } else if (phs.action == 12) { // it's an attack point sighting lava_attack_point(phs); } else { printf("Unknown hypercall action %d\n", phs.action); } } else { printf ("Invalid magic value in PHS struct: %x != 0xabcd.\n", phs.magic); } } #endif // TAINT_LEGACY_HYPERCALL } }
/** * @brief Hypercall-initiated taint query of some src-level extent. */ void taint_query_hypercall(PandaHypercallStruct phs) { CPUState *cpu = first_cpu; if (pandalog && taintEnabled && (taint2_num_labels_applied() > 0)) { // okay, taint is on and some labels have actually been applied // is there *any* taint on this extent uint32_t num_tainted = 0; bool is_strnlen = ((int) phs.len == -1); uint32_t offset=0; while (true) { uint32_t va = phs.buf + offset; uint32_t pa = panda_virt_to_phys(cpu, va); if (is_strnlen) { uint8_t c; panda_virtual_memory_rw(cpu, pa, &c, 1, false); // null terminator if (c==0) break; } if ((int) pa != -1) { Addr a = make_maddr(pa); if (taint2_query(a)) { num_tainted ++; } } offset ++; // end of query by length or max string length if (!is_strnlen && offset == phs.len) break; if (is_strnlen && (offset == QUERY_HYPERCALL_MAX_LEN)) break; } uint32_t len = offset; if (num_tainted) { // ok at least one byte in the extent is tainted // 1. write the pandalog entry that tells us something was tainted on this extent Panda__TaintQueryHypercall *tqh = (Panda__TaintQueryHypercall *) malloc (sizeof (Panda__TaintQueryHypercall)); *tqh = PANDA__TAINT_QUERY_HYPERCALL__INIT; tqh->buf = phs.buf; tqh->len = len; tqh->num_tainted = num_tainted; // obtain the actual data out of memory // NOTE: first X bytes only! uint32_t data[QUERY_HYPERCALL_MAX_LEN]; uint32_t n = len; // grab at most X bytes from memory to pandalog // this is just a snippet. we dont want to write 1M buffer if (QUERY_HYPERCALL_MAX_LEN < len) n = QUERY_HYPERCALL_MAX_LEN; for (uint32_t i=0; i<n; i++) { data[i] = 0; uint8_t c; panda_virtual_memory_rw(cpu, phs.buf+i, &c, 1, false); data[i] = c; } tqh->n_data = n; tqh->data = data; // 2. write out src-level info Panda__SrcInfo *si = pandalog_src_info_create(phs); tqh->src_info = si; // 3. write out callstack info Panda__CallStack *cs = pandalog_callstack_create(); tqh->call_stack = cs; std::vector<Panda__TaintQuery *> tq; for (uint32_t offset=0; offset<len; offset++) { uint32_t va = phs.buf + offset; uint32_t pa = panda_virt_to_phys(cpu, va); if ((int) pa != -1) { Addr a = make_maddr(pa); if (taint2_query(a)) { tq.push_back(taint2_query_pandalog(a, offset)); } } } tqh->n_taint_query = tq.size(); tqh->taint_query = (Panda__TaintQuery **) malloc(sizeof(Panda__TaintQuery *) * tqh->n_taint_query); for (uint32_t i=0; i<tqh->n_taint_query; i++) { tqh->taint_query[i] = tq[i]; } Panda__LogEntry ple = PANDA__LOG_ENTRY__INIT; ple.taint_query_hypercall = tqh; pandalog_write_entry(&ple); free(tqh->src_info); pandalog_callstack_free(tqh->call_stack); for (uint32_t i=0; i<tqh->n_taint_query; i++) { pandalog_taint_query_free(tqh->taint_query[i]); } free(tqh); } } }