void uninit_plugin(void *self) { // Save any that we haven't flushed yet for (auto &kvp : read_text_tracker) { if (kvp.second.nch > min_strlen) { gzprintf(mem_report, "%llu:%.*s\n", rr_get_guest_instr_count(), kvp.second.nch, kvp.second.ch); } } for (auto &kvp : write_text_tracker) { if (kvp.second.nch > min_strlen) { gzprintf(mem_report, "%llu:%.*s\n", rr_get_guest_instr_count(), kvp.second.nch, kvp.second.ch); } } for (auto &kvp : read_utext_tracker) { if (kvp.second.nch > min_strlen) { gsize bytes_written = 0; gchar *out_str = g_convert((gchar *)kvp.second.ch, kvp.second.nch*2, "UTF-8", "UTF-16LE", NULL, &bytes_written, NULL); gzprintf(mem_report, "%llu:%s\n", rr_get_guest_instr_count(), out_str); g_free(out_str); } } for (auto &kvp : write_utext_tracker) { if (kvp.second.nch > min_strlen) { gsize bytes_written = 0; gchar *out_str = g_convert((gchar *)kvp.second.ch, kvp.second.nch*2, "UTF-8", "UTF-16LE", NULL, &bytes_written, NULL); gzprintf(mem_report, "%llu:%s\n", rr_get_guest_instr_count(), out_str); g_free(out_str); } } gzclose(mem_report); }
// R0 is command (label or query) // R1 is buf_start // R2 is length // R3 is offset (not currently implemented) void arm_hypercall_callback(CPUState *env){ //target_ulong buf_start = env->regs[1]; //target_ulong buf_len = env->regs[2]; if (env->regs[0] == 7 || env->regs[0] == 8){ //Taint label if (!taintEnabled){ printf("Taint plugin: Label operation detected @ %lu\n", rr_get_guest_instr_count()); printf("Enabling taint processing\n"); __taint2_enable_taint(); } // FIXME: do labeling here. } else if (env->regs[0] == 9){ //Query taint on label if (taintEnabled){ printf("Taint plugin: Query operation detected @ %lu\n", rr_get_guest_instr_count()); //Addr a = make_maddr(buf_start); //bufplot(env, shadow, &a, (int)buf_len); } //printf("Disabling taint processing\n"); //taintEnabled = false; //taintJustDisabled = true; //printf("Label occurrences on HD: %d\n", shad_dir_occ_64(shadow->hd)); } }
// compress current chunk and write it to file, // also update directory map void write_current_chunk(void) { // uncompressed chunk size unsigned long cs = thePandalog->chunk.buf_p - thePandalog->chunk.buf; unsigned long ccs = thePandalog->chunk.zsize; int ret; // loop allows compress2 to fail and resize output buffer as needed // not sure why compress2 needs output buf to be bigger than input // even though ultimately it is smaller. scratch space? // 10 is just a random guess. shouldn't need more than 1 re-try uint32_t i; for (i=0; i<10; i++) { ret = compress2(thePandalog->chunk.zbuf, &ccs, thePandalog->chunk.buf, cs, Z_BEST_COMPRESSION); if (ret == Z_OK) break; // bigger output buffer needed to perform compression? thePandalog->chunk.zsize *= 2; thePandalog->chunk.zbuf = (unsigned char *) realloc(thePandalog->chunk.zbuf, thePandalog->chunk.zsize); assert (thePandalog->chunk.zbuf != NULL); } // ccs is final compressed chunk size assert (ret == Z_OK); assert(ccs > 0); assert(cs >= ccs); printf ("writing chunk %d of pandalog %d / %d = %.2f compression\n", (int) thePandalog->chunk_num, (int) cs, (int) ccs, ((float) cs) / ((float) ccs)); fwrite(thePandalog->chunk.zbuf, 1, ccs, thePandalog->file); add_dir_entry(thePandalog->chunk_num); // reset start instr / pos thePandalog->chunk.start_instr = rr_get_guest_instr_count(); thePandalog->chunk.start_pos = ftell(thePandalog->file); // rewind chunk buf and inc chunk # thePandalog->chunk.buf_p = thePandalog->chunk.buf; thePandalog->chunk_num ++; thePandalog->chunk.ind_entry = 0; }
void pandalog_write_entry(Panda__LogEntry *entry) { // fill in required fields. // NOTE: any other fields will already have been filled in // by the plugin that made this call. if (panda_in_main_loop) { entry->pc = panda_current_pc(cpu_single_env); entry->instr = rr_get_guest_instr_count (); } else { entry->pc = -1; entry->instr = -1; } size_t n = panda__log_entry__get_packed_size(entry); resize_pandalog(n); panda__log_entry__pack(entry, pandalog_buf); // write size of log entry int x = gzwrite(pandalog_file, (void *) &n, sizeof(n)); //printf ("x=%d\n", x); if (x == 0) { printf("gzwrite for pandalog failed\n"); } // and then the entry itself x=gzwrite(pandalog_file, pandalog_buf, n); //printf ("x=%d\n", x); if (x == 0) { printf("gzwrite for pandalog failed\n"); } }
// 3 long sys_read(unsigned int fd, char __user *buf, size_t count); // typedef void (*on_sys_read_return_t)(CPUState *cpu,target_ulong pc,uint32_t fd,target_ulong buf,uint32_t count); void read_return(CPUState *cpu, target_ulong pc, uint32_t buf, uint32_t actual_count) { if (saw_read && panda_current_asid(cpu) == the_asid) { // These are the start and end of the current range of labels. uint32_t read_start = last_pos; uint32_t read_end = last_pos + actual_count; if (debug) printf ("returning from read of [%s] count=%u\n", taint_filename, actual_count); // check if we overlap the range we want to label. if (read_start < end_label && read_end > start_label) { uint32_t range_start = std::max(read_start, start_label); uint32_t range_end = std::min(read_end, end_label); printf("*** applying %s taint labels %u..%u to buffer @ %lu\n", positional_labels ? "positional" : "uniform", range_start, range_end - 1, rr_get_guest_instr_count()); uint32_t num_labeled = 0; uint32_t i = 0; for (uint32_t l = range_start; l < range_end; l++) { if (label_byte(cpu, last_read_buf + i, positional_labels ? l : 1)) num_labeled ++; i ++; } printf("%u bytes labeled for this read\n", range_end - range_start); } last_pos += actual_count; // printf (" ... done applying labels\n"); saw_read = false; } }
int taint_compute_numbers_before_block_exec(CPUState *env, TranslationBlock *tb) { if (taint_enabled()) { // just store pc the_pc = tb->pc; the_instr_count = rr_get_guest_instr_count(); } return 0; }
int before_block_exec(CPUState *env, TranslationBlock *tb) { uint64_t count = rr_get_guest_instr_count(); if (!snipping && count+tb->icount > start_count) { sassert((oldlog = fopen(rr_nondet_log->name, "r")), 8); sassert(fread(&orig_last_prog_point, sizeof(RR_prog_point), 1, oldlog) == 1, 9); printf("Original ending prog point: "); rr_spit_prog_point(orig_last_prog_point); actual_start_count = count; printf("Saving snapshot at instr count %lu...\n", count); // Force running state global_state_store_running(); printf("writing snapshot:\t%s\n", snp_name); QIOChannelFile* ioc = qio_channel_file_new_path(snp_name, O_WRONLY | O_CREAT, 0660, NULL); QEMUFile* snp = qemu_fopen_channel_output(QIO_CHANNEL(ioc)); qemu_savevm_state(snp, NULL); qemu_fclose(snp); printf("Beginning cut-and-paste process at prog point:\n"); rr_spit_prog_point(rr_prog_point()); printf("Writing entries to %s...\n", nondet_name); newlog = fopen(nondet_name, "w"); sassert(newlog, 10); // We'll fix this up later. RR_prog_point prog_point = {0}; fwrite(&prog_point.guest_instr_count, sizeof(prog_point.guest_instr_count), 1, newlog); fseek(oldlog, ftell(rr_nondet_log->fp), SEEK_SET); // If there are items in the queue, then start copying the log // from there RR_log_entry *item = rr_get_queue_head(); if (item != NULL) fseek(oldlog, item->header.file_pos, SEEK_SET); while (prog_point.guest_instr_count < end_count && !feof(oldlog)) { prog_point = copy_entry(); } if (!feof(oldlog)) { // prog_point is the first one AFTER what we want printf("Reached end of old nondet log.\n"); } else { printf("Past desired ending point for log.\n"); } snipping = true; printf("Continuing with replay.\n"); } if (snipping && !done && count > end_count) { end_snip(); rr_end_replay_requested = 1; } return 0; }
int file_taint_enable(CPUState *cpu, target_ulong pc) { if (!no_taint && !taint2_enabled()) { uint64_t ins = rr_get_guest_instr_count(); if (ins > first_instr) { taint2_enable_taint(); if (debug) printf (" enabled taint2 @ ins %" PRId64 "\n", ins); } } return 0; }
// turn on taint at right instr count int tstringsearch_enable_taint(CPUState *env, target_ulong pc) { // enable taint if close to instruction count uint64_t ic = rr_get_guest_instr_count(); if (!taint2_enabled()) { if (ic + 100 > enable_taint_instr_count) { printf ("enabling taint at instr count %" PRIu64 "\n", ic); taint2_enable_taint(); } } return 0; }
void dead_data_on_branch(Addr a, uint64_t size) { assert (a.typ == LADDR); LAddr reg_num = a.val.la; current_instr = rr_get_guest_instr_count(); for (uint32_t offset=0; offset<size; offset++) { if (taint2_query_llvm(reg_num, offset)) { // this offset of reg is tainted. // iterate over labels in set & update dead data taint2_labelset_llvm_iter(reg_num, offset, dd_each_label, NULL); } } }
int before_block_exec(CPUState *env, TranslationBlock *tb) { uint64_t count = rr_get_guest_instr_count(); if (!snipping && count+tb->num_guest_insns > start_count) { sassert((oldlog = fopen(rr_nondet_log->name, "r"))); sassert(fread(&orig_last_prog_point, sizeof(RR_prog_point), 1, oldlog) == 1); printf("Original ending prog point: "); rr_spit_prog_point(orig_last_prog_point); actual_start_count = count; printf("Saving snapshot at instr count %lu...\n", count); do_savevm_rr(get_monitor(), snp_name); printf("Beginning cut-and-paste process at prog point:\n"); rr_spit_prog_point(rr_prog_point()); printf("Writing entries to %s...\n", nondet_name); newlog = fopen(nondet_name, "w"); sassert(newlog); // We'll fix this up later. RR_prog_point prog_point = {0, 0, 0}; fwrite(&prog_point, sizeof(RR_prog_point), 1, newlog); fseek(oldlog, ftell(rr_nondet_log->fp), SEEK_SET); RR_log_entry *item = rr_get_queue_head(); while (item != NULL && item->header.prog_point.guest_instr_count < end_count) { write_entry(item); item = item->next; } while (prog_point.guest_instr_count < end_count && !feof(oldlog)) { prog_point = copy_entry(); } if (!feof(oldlog)) { // prog_point is the first one AFTER what we want printf("Reached end of old nondet log.\n"); } else { printf("Past desired ending point for log.\n"); } snipping = true; printf("Continuing with replay.\n"); } if (snipping && !done && count > end_count) { end_snip(); rr_end_replay_requested = 1; } return 0; }
void open_enter(CPUState *cpu, target_ulong pc, std::string filename, int32_t flags, int32_t mode) { if (!filename.empty()) { if (debug) printf ("open_enter: saw open of [%s]\n", filename.c_str()); } if (filename.find(taint_filename) != std::string::npos) { saw_open = true; printf ("saw open of file we want to taint: [%s] insn %" PRId64 "\n", taint_filename, rr_get_guest_instr_count()); the_asid = panda_current_asid(cpu); if (enable_taint_on_open && !no_taint && !taint2_enabled()) { uint64_t ins = rr_get_guest_instr_count(); taint2_enable_taint(); if (debug) printf ("file_taint: enabled taint2 @ ins %" PRId64 "\n", ins); } } }
bool before_block_exec(CPUState *env, TranslationBlock *tb) { static int progress = 0; static void *saved = NULL; static void *last = NULL; if (rr_get_guest_instr_count() / 50000 > progress) { progress++; printf("Taking panda checkpoint %u...\n", progress); last = panda_checkpoint(); printf("Done.\n"); } if (!saved && rr_get_guest_instr_count() > 100000) { printf("\n\nSaving checkpoint for restore!!\n\n"); saved = last; } static int restart_count = 0; if (rr_get_guest_instr_count() > 200000 && restart_count < 3) { restart_count++; printf("Restarting...\n"); panda_restart(saved); printf("Done.\n"); return true; } return false; }
void pandalog_write_entry(Panda__LogEntry *entry) { // fill in required fields. if (panda_in_main_loop) { entry->pc = panda_current_pc(first_cpu); entry->instr = rr_get_guest_instr_count (); } else { entry->pc = -1; entry->instr = -1; } size_t n = panda__log_entry__get_packed_size(entry); // possibly compress and write current chunk and move on to next chunk // but dont do so if it would spread log entries for same instruction between chunks // invariant: all log entries for an instruction belong in a single chunk if ((instr_last_entry != -1) // first entry written && (instr_last_entry != entry->instr) && (thePandalog->chunk.buf_p + n >= thePandalog->chunk.buf + thePandalog->chunk.size)) { // entry won't fit in current chunk // and new entry is a different instr from last entry written write_current_chunk(); } // sanity check. If this fails, that means a large number of pandalog entries // for same instr went off the end of a chunk, which was already allocated bigger than needed. // possible. but I'd rather assert its not and understand why before adding auto realloc here. // TRL 2016-05-10: Ok here's a time when this legit happens. When you pandalog in uninit_plugin // this can be a lot of entries for the same instr (the very last one in the trace). // So no more assert. if (thePandalog->chunk.buf_p + sizeof(uint32_t) + n >= thePandalog->chunk.buf + ((int)(floor(thePandalog->chunk.size)))) { uint32_t offset = thePandalog->chunk.buf_p - thePandalog->chunk.buf; uint32_t new_size = offset * 2; printf ("reallocing chunk.buf to %d bytes\n", new_size); thePandalog->chunk.buf = (unsigned char *) realloc(thePandalog->chunk.buf, new_size); thePandalog->chunk.buf_p = thePandalog->chunk.buf + offset; assert (thePandalog->chunk.buf != NULL); } // now write the entry itself to the buffer. size then entry itself *((uint32_t *) thePandalog->chunk.buf_p) = n; thePandalog->chunk.buf_p += sizeof(uint32_t); // and then the entry itself (packed) panda__log_entry__pack(entry, thePandalog->chunk.buf_p); thePandalog->chunk.buf_p += n; // remember instr for last entry instr_last_entry = entry->instr; thePandalog->chunk.ind_entry ++; }
int mem_callback(CPUState *env, target_ulong pc, target_ulong addr, target_ulong size, void *buf, bool is_write, std::map<prog_point,string_pos> &text_tracker) { prog_point p = {}; get_prog_point(env, &p); string_pos &sp = text_tracker[p]; for (unsigned int i = 0; i < size; i++) { uint8_t val = ((uint8_t *)buf)[i]; for(int str_idx = 0; str_idx < num_strings; str_idx++) { if (tofind[str_idx][sp.val[str_idx]] == val) sp.val[str_idx]++; else sp.val[str_idx] = 0; if (sp.val[str_idx] == strlens[str_idx]) { // Victory! printf("%s Match of str %d at: instr_count=%lu : " TARGET_FMT_lx " " TARGET_FMT_lx " " TARGET_FMT_lx "\n", (is_write ? "WRITE" : "READ"), str_idx, rr_get_guest_instr_count(), p.caller, p.pc, p.cr3); matches[p].val[str_idx]++; sp.val[str_idx] = 0; // Also get the full stack here fullstack f = {0}; f.n = get_callers(f.callers, 16, env); f.pc = p.pc; f.asid = p.cr3; matchstacks[p] = f; // call the i-found-a-match registered callbacks here PPP_RUN_CB(on_ssm, env, pc, addr, tofind[str_idx], strlens[str_idx], is_write) } } }
void pandalog_write_entry(Panda__LogEntry *entry) { // fill in required fields. if (panda_in_main_loop) { entry->pc = panda_current_pc(cpu_single_env); entry->instr = rr_get_guest_instr_count (); } else { entry->pc = -1; entry->instr = -1; } size_t n = panda__log_entry__get_packed_size(entry); // possibly compress and write current chunk and move on to next chunk // but dont do so if it would spread log entries for same instruction between chunks // invariant: all log entries for an instruction belong in a single chunk if ((instr_last_entry != -1) // first entry written && (instr_last_entry != entry->instr) && (thePandalog->chunk.buf_p + n >= thePandalog->chunk.buf + thePandalog->chunk.size)) { // entry won't fit in current chunk // and new entry is a different instr from last entry written write_current_chunk(); } // sanity check. If this fails, that means a large number of pandalog entries // for same instr went off the end of a chunk, which was already allocated bigger than needed. // possible. but I'd rather assert its not and understand why before adding auto realloc here. assert (thePandalog->chunk.buf_p + sizeof(uint32_t) + n < thePandalog->chunk.buf + ((int)(floor(thePandalog->chunk.size * SLACK_MULT)))); // now write the entry itself to the buffer. size then entry itself *((uint32_t *) thePandalog->chunk.buf_p) = n; thePandalog->chunk.buf_p += sizeof(uint32_t); // and then the entry itself (packed) panda__log_entry__pack(entry, thePandalog->chunk.buf_p); thePandalog->chunk.buf_p += n; // remember instr for last entry instr_last_entry = entry->instr; thePandalog->chunk.ind_entry ++; }
int guest_hypercall_callback(CPUState *cpu) { #if defined(TARGET_I386) CPUArchState *env = (CPUArchState*)cpu->env_ptr; if (taintEnabled) { if (EAX == 7 || EAX == 8) { target_ulong buf_start = EBX; target_ulong buf_len = ECX; long label = EDI; if (R_EAX == 7) { // Standard buffer label printf("taint2: single taint label\n"); taint2_add_taint_ram_single_label(cpu, (uint64_t)buf_start, (int)buf_len, label); } else if (R_EAX == 8) { // Positional buffer label printf("taint2: positional taint label\n"); taint2_add_taint_ram_pos(cpu, (uint64_t)buf_start, (int)buf_len, label); } } else { // LAVA Hypercall target_ulong addr = panda_virt_to_phys(cpu, env->regs[R_EAX]); if ((int)addr == -1) { // if EAX is not a valid ptr, then it is unlikely that this is a // PandaHypercall which requires EAX to point to a block of memory // defined by PandaHypercallStruct printf ("cpuid with invalid ptr in EAX: vaddr=0x%x paddr=0x%x. Probably not a Panda Hypercall\n", (uint32_t) env->regs[R_EAX], (uint32_t) addr); } else if (pandalog) { PandaHypercallStruct phs; panda_virtual_memory_rw(cpu, env->regs[R_EAX], (uint8_t *) &phs, sizeof(phs), false); if (phs.magic == 0xabcd) { if (phs.action == 11) { // it's a lava query taint_query_hypercall(phs); } else if (phs.action == 12) { // it's an attack point sighting lava_attack_point(phs); } else if (phs.action == 13) { // it's a pri taint query point // do nothing and let pri_taint with hypercall // option handle it } else if (phs.action == 14) { // reserved for taint-exploitability } else { printf("Unknown hypercall action %d\n", phs.action); } } else { printf ("Invalid magic value in PHS struct: %x != 0xabcd.\n", phs.magic); } } } } return 1; #elif defined(TARGET_ARM) // R0 is command (label or query) // R1 is buf_start // R2 is length // R3 is offset (not currently implemented) CPUArchState *env = (CPUArchState*)cpu->env_ptr; if (env->regs[0] == 7 || env->regs[0] == 8) { //Taint label if (!taintEnabled) { printf("Taint plugin: Label operation detected @ %lu\n", rr_get_guest_instr_count()); printf("Enabling taint processing\n"); taint2_enable_taint(); } // FIXME: do labeling here. } else if (env->regs[0] == 9) { //Query taint on label if (taintEnabled) { printf("Taint plugin: Query operation detected @ %lu\n", rr_get_guest_instr_count()); } } return 1; #else // other architectures return 0; #endif }
int mem_callback(CPUState *env, target_ulong pc, target_ulong addr, target_ulong size, void *buf, bool is_write) { string_pos &sp = is_write ? write_text_tracker[pc] : read_text_tracker[pc]; ustring_pos &usp = is_write ? write_utext_tracker[pc] : read_utext_tracker[pc]; // ASCII for (unsigned int i = 0; i < size; i++) { uint8_t val = ((uint8_t *)buf)[i]; if (isprint(val)) { sp.ch[sp.nch++] = val; // If we max out the string, chop it if (sp.nch == MAX_STRLEN - 1) { gzprintf(mem_report, "%llu:%.*s\n", rr_get_guest_instr_count(), sp.nch, sp.ch); sp.nch = 0; } } else { // Don't bother with strings shorter than min if (sp.nch >= min_strlen) { gzprintf(mem_report, "%llu:%.*s\n", rr_get_guest_instr_count(), sp.nch, sp.ch); } sp.nch = 0; } } // Don't consider one-byte reads/writes for UTF-16 if (size < 2) { return 1; } // UTF-16-LE for (unsigned int i = 0; i < size; i+=2) { uint8_t vall = ((uint8_t *)buf)[i]; uint8_t valh = ((uint8_t *)buf)[i+1]; uint16_t val = (valh << 8) | vall; if (iswprint(val)) { usp.ch[usp.nch++] = val; // If we max out the string, chop it if (usp.nch == MAX_STRLEN - 1) { gsize bytes_written = 0; gchar *out_str = g_convert((gchar *)usp.ch, usp.nch*2, "UTF-8", "UTF-16LE", NULL, &bytes_written, NULL); gzprintf(mem_report, "%llu:%s\n", rr_get_guest_instr_count(), out_str); g_free(out_str); usp.nch = 0; } } else { // Don't bother with strings shorter than min if (usp.nch >= min_strlen) { gsize bytes_written = 0; gchar *out_str = g_convert((gchar *)usp.ch, usp.nch*2, "UTF-8", "UTF-16LE", NULL, &bytes_written, NULL); gzprintf(mem_report, "%llu:%s\n", rr_get_guest_instr_count(), out_str); g_free(out_str); } usp.nch = 0; } } return 1; }
// Support all features of label and query program void i386_hypercall_callback(CPUState *env){ #if 0 if (EAX == 0xabcd) { printf ("\n hypercall pc=0x%x\n", (int) panda_current_pc(env)); for (uint32_t i=0; i<8; i++) { printf ("reg[%d] = 0x%x\n", i, (int) env->regs[i]); } } #endif //printf("taint2: Hypercall! B " TARGET_FMT_lx " C " TARGET_FMT_lx " D " TARGET_FMT_lx "\n", // env->regs[R_EBX], env->regs[R_ECX], env->regs[R_EDX]); #if 0 // Label op. // EBX contains addr of that data // ECX contains size of data // EDX contains the label; ~0UL for autoenc. if ((env->regs[R_EAX] == 7 || env->regs[R_EAX] == 8)) { printf ("hypercall -- EAX=0x%x\n", EAX); target_ulong addr = panda_virt_to_phys(env, env->regs[R_EBX]); target_ulong size = env->regs[R_ECX]; target_ulong label = env->regs[R_EDX]; if (!taintEnabled){ printf("taint2: Label operation detected @ %lu\n", rr_get_guest_instr_count()); printf("taint2: Labeling " TARGET_FMT_lx " to " TARGET_FMT_lx " with label " TARGET_FMT_lx ".\n", addr, addr + size, label); __taint2_enable_taint(); } LabelSetP ls = NULL; if (label != (target_ulong)~0UL) { ls = label_set_singleton(label); } // otherwise autoinc. qemu_log_mask(CPU_LOG_TAINT_OPS, "label: %lx[%lx+%lx] <- %lx (%lx)\n", (uint64_t)shadow->ram, (uint64_t)addr, (uint64_t)size, (uint64_t)label, (uint64_t)ls); for (unsigned i = 0; i < size; i++) { //printf("label %u\n", i); shadow->ram->set(addr + i, label_set_singleton(i)); } } #endif if (pandalog && env->regs[R_EAX] == 0xabcd) { // LAVA Hypercall target_ulong addr = panda_virt_to_phys(env, ECX); if ((int)addr == -1) { printf ("panda hypercall with ptr to invalid PandaHypercallStruct: vaddr=0x%x paddr=0x%x\n", (uint32_t) ECX, (uint32_t) addr); } else { PandaHypercallStruct phs; panda_virtual_memory_rw(env, ECX, (uint8_t *) &phs, sizeof(phs), false); if (phs.action == 11) { // it's a lava query lava_taint_query(phs); } if (phs.action == 12) { // it's an attack point sighting lava_attack_point(phs); } } } }