void RSIM_Simulator::terminate_self() { RSIM_Process *process = get_process(); if (!process->has_terminated()) return; SAWYER_THREAD_TRAITS::RecursiveLockGuard lock(class_rwlock); int status = process->get_termination_status(); if (WIFEXITED(status)) { exit(WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { struct sigaction sa, old; memset(&sa, 0, sizeof sa); sa.sa_handler = SIG_DFL; sigaction(WTERMSIG(status), &sa, &old); raise(WTERMSIG(status)); sigaction(WTERMSIG(status), &old, NULL); } else if (WIFSTOPPED(status)) { struct sigaction sa, old; memset(&sa, 0, sizeof sa); sa.sa_handler = SIG_DFL; sigaction(WTERMSIG(status), &sa, &old); raise(WTERMSIG(status)); sigaction(WTERMSIG(status), &old, NULL); } }
virtual bool operator()(bool enabled, const Args &args) { if (enabled && !triggered && args.insn->get_address()==when) { triggered = true; RTS_Message *m = args.thread->tracing(TRACE_MISC); m->mesg("MemoryTransactionTester: triggered\n"); RSIM_Process *proc = args.thread->get_process(); proc->mem_showmap(m, "before starting transaction:\n"); proc->mem_transaction_start("MemoryTransactionTester"); proc->mem_showmap(m, "after starting transaction:\n"); } return enabled; }
static void syscall_RSIM_transaction(RSIM_Thread *t, int callno) { int cmd = t->syscall_arg(0); uint32_t regs_va = t->syscall_arg(1); int result = -ENOSYS; RSIM_Process *proc = t->get_process(); std::string transaction_name = "specimen-initiated transaction " + StringUtility::addrToString(regs_va); switch (cmd) { case 0: { // start if (32 == t->get_process()->wordSize()) { pt_regs_32 regs_guest = t->get_regs().get_pt_regs_32(); if (sizeof(regs_guest)!=proc->mem_write(®s_guest, regs_va, sizeof regs_guest)) { result = -EFAULT; } else { result = proc->mem_transaction_start(transaction_name); // total number of transactions assert(-1==result || result>0); } } else { FIXME("[Robb P. Matzke 2015-05-28]: 64-bit not supported yet"); } break; } case 1: { // rollback if (32 == t->get_process()->wordSize()) { pt_regs_32 regs_guest; if (sizeof(regs_guest)!=proc->mem_read(®s_guest, regs_va, sizeof regs_guest)) { result = -EFAULT; } else if (0>=(result = t->get_process()->mem_transaction_rollback(transaction_name))) { // error; don't initialize registers; this syscall will return without doing anything if (0==result) result = -EINVAL; // no such transaction } else { // success. The syscall will return as if we had called transaction start, but with zero to distinguish it // from transaction start. result = 0; t->init_regs(PtRegs(regs_guest)); } } else { FIXME("[Robb P. Matzke 2015-05-28]: 64-bit not supported yet"); } break; } case 2: // not implemented yet break; } t->syscall_return(result); }
/* Class method. This is a signal handler -- do not use thread synchronization or functions that are not async signal safe. */ void RSIM_Simulator::signal_receiver(int signo, siginfo_t *info, void*) { /* In order for this signal handler to be installed, there must be an active simulator. This is because the activate() * method installs the signal handler and the deactivate() removes it. The active_sim is set before the signal handler is * installed and reset after it is removed. */ RSIM_Simulator *simulator = active_sim; assert(simulator!=NULL); RSIM_Process *process = simulator->get_process(); assert(process!=NULL); #if 1 /* WARNING: this is not async signal safe, but useful for debugging */ char buf[1024]; sprintf(buf, "PID %d received signal %d with info=%p\n", getpid(), signo, info); write(2, buf, strlen(buf)); sprintf(buf, " info.si_signo = %d\n", info->si_signo); write(2, buf, strlen(buf)); sprintf(buf, " info.si_errno = %d\n", info->si_errno); write(2, buf, strlen(buf)); sprintf(buf, " info.si_code = %d\n", info->si_code); write(2, buf, strlen(buf)); sprintf(buf, " info.si_pid = %d\n", info->si_pid); write(2, buf, strlen(buf)); sprintf(buf, " info.si_uid = %u\n", info->si_uid); write(2, buf, strlen(buf)); sprintf(buf, " info.si_int = %u\n", info->si_int); write(2, buf, strlen(buf)); sprintf(buf, " info.si_ptr = %p\n", info->si_ptr); write(2, buf, strlen(buf)); sprintf(buf, " info.si_status = %u\n", info->si_status); write(2, buf, strlen(buf)); sprintf(buf, " info.si_utime = %ld\n", info->si_utime); write(2, buf, strlen(buf)); sprintf(buf, " info.si_stime = %ld\n", info->si_stime); write(2, buf, strlen(buf)); sprintf(buf, " info.si_addr = %p\n", info->si_addr); write(2, buf, strlen(buf)); sprintf(buf, " info.si_band = %ld\n", info->si_band); write(2, buf, strlen(buf)); sprintf(buf, " info.si_fd = %d\n", info->si_fd); write(2, buf, strlen(buf)); #endif process->signal_enqueue(RSIM_SignalHandling::mk(info)); }
/** Main driving function for clone detection. This is the class that chooses inputs, runs each function, and looks at the * outputs to decide how to partition the functions. It does this repeatedly in order to build a PartitionForest. The * analyze() method is the main entry point. */ class CloneDetector { protected: static const char *name; /**< For --debug output. */ RSIM_Thread *thread; /**< Thread where analysis is running. */ PartitionForest partition; /**< Partitioning of functions into similarity sets. */ enum { MAX_ITERATIONS = 10 }; /**< Maximum number of times we run the functions; max number of input sets. */ enum { MAX_SIMSET_SIZE = 3 }; /**< Any similarity set containing more than this many functions will be partitioned. */ public: CloneDetector(RSIM_Thread *thread): thread(thread) {} // Allocate a page of memory in the process address space. rose_addr_t allocate_page(rose_addr_t hint=0) { RSIM_Process *proc = thread->get_process(); rose_addr_t addr = proc->mem_map(hint, 4096, MemoryMap::MM_PROT_RW, MAP_ANONYMOUS, 0, -1); assert((int64_t)addr>=0 || (int64_t)addr<-256); // disallow error numbers return addr; } // Obtain a memory map for disassembly MemoryMap *disassembly_map(RSIM_Process *proc) { MemoryMap *map = new MemoryMap(proc->get_memory(), MemoryMap::COPY_SHALLOW); map->prune(MemoryMap::MM_PROT_READ); // don't let the disassembler read unreadable memory, else it will segfault // Removes execute permission for any segment whose debug name does not contain the name of the executable. When // comparing two different executables for clones, we probably don't need to compare code that came from dynamically // linked libraries since they will be identical in both executables. struct Pruner: MemoryMap::Visitor { std::string exename; Pruner(const std::string &exename): exename(exename) {} virtual bool operator()(const MemoryMap*, const Extent&, const MemoryMap::Segment &segment_) { MemoryMap::Segment *segment = const_cast<MemoryMap::Segment*>(&segment_); if (segment->get_name().find(exename)==std::string::npos) { unsigned p = segment->get_mapperms(); p &= ~MemoryMap::MM_PROT_EXEC; segment->set_mapperms(p); } return true; } } pruner(proc->get_exename()); map->traverse(pruner); return map; } // Get all the functions defined for this process image. We do this by disassembling the entire process executable memory // and using CFG analysis to figure out where the functions are located. Functions find_functions(RTS_Message *m, RSIM_Process *proc) { m->mesg("%s triggered; disassembling entire specimen image...\n", name); MemoryMap *map = disassembly_map(proc); std::ostringstream ss; map->dump(ss, " "); m->mesg("%s: using this memory map for disassembly:\n%s", name, ss.str().c_str()); SgAsmBlock *gblk = proc->disassemble(false/*take no shortcuts*/, map); delete map; map=NULL; std::vector<SgAsmFunction*> functions = SageInterface::querySubTree<SgAsmFunction>(gblk); #if 0 /*DEBUGGING [Robb P. Matzke 2013-02-12]*/ // Prune the function list to contain only what we want. for (std::vector<SgAsmFunction*>::iterator fi=functions.begin(); fi!=functions.end(); ++fi) { if ((*fi)->get_name().compare("_Z1fRi")!=0) *fi = NULL; } functions.erase(std::remove(functions.begin(), functions.end(), (SgAsmFunction*)NULL), functions.end()); #endif return Functions(functions.begin(), functions.end()); } // Perform a pointer-detection analysis on each function. We'll need the results in order to determine whether a function // input should consume a pointer or a non-pointer from the input value set. typedef std::map<SgAsmFunction*, CloneDetection::PointerDetector> PointerDetectors; PointerDetectors detect_pointers(RTS_Message *m, RSIM_Thread *thread, const Functions &functions) { // Choose an SMT solver. This is completely optional. Pointer detection still seems to work fairly well (and much, // much faster) without an SMT solver. SMTSolver *solver = NULL; #if 0 // optional code if (YicesSolver::available_linkage()) solver = new YicesSolver; #endif PointerDetectors retval; CloneDetection::InstructionProvidor *insn_providor = new CloneDetection::InstructionProvidor(thread->get_process()); for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) { m->mesg("%s: performing pointer detection analysis for \"%s\" at 0x%08"PRIx64, name, (*fi)->get_name().c_str(), (*fi)->get_entry_va()); CloneDetection::PointerDetector pd(insn_providor, solver); pd.initial_state().registers.gpr[x86_gpr_sp] = SYMBOLIC_VALUE<32>(thread->policy.INITIAL_STACK); pd.initial_state().registers.gpr[x86_gpr_bp] = SYMBOLIC_VALUE<32>(thread->policy.INITIAL_STACK); //pd.set_debug(stderr); pd.analyze(*fi); retval.insert(std::make_pair(*fi, pd)); #if 1 /*DEBUGGING [Robb P. Matzke 2013-01-24]*/ if (m->get_file()) { const CloneDetection::PointerDetector::Pointers plist = pd.get_pointers(); for (CloneDetection::PointerDetector::Pointers::const_iterator pi=plist.begin(); pi!=plist.end(); ++pi) { std::ostringstream ss; if (pi->type & BinaryAnalysis::PointerAnalysis::DATA_PTR) ss <<"data "; if (pi->type & BinaryAnalysis::PointerAnalysis::CODE_PTR) ss <<"code "; ss <<"pointer at " <<pi->address; m->mesg(" %s", ss.str().c_str()); } } #endif } return retval; } // Randomly choose a set of input values. The set will consist of the specified number of non-pointers and pointers. The // non-pointer values are chosen randomly, but limited to a certain range. The pointers are chosen randomly to be null or // non-null and the non-null values each have one page allocated via simulated mmap() (i.e., the non-null values themselves // are not actually random). InputValues choose_inputs(size_t nintegers, size_t npointers) { static unsigned integer_modulus = 256; // arbitrary; static unsigned nonnull_denom = 3; // probability of a non-null pointer is 1/N CloneDetection::InputValues inputs; for (size_t i=0; i<nintegers; ++i) inputs.add_integer(rand() % integer_modulus); for (size_t i=0; i<npointers; ++i) inputs.add_pointer(rand()%nonnull_denom ? 0 : allocate_page()); return inputs; } // Run a single function, look at its outputs, and insert it into the correct place in the PartitionForest void insert_function(SgAsmFunction *func, InputValues &inputs, CloneDetection::PointerDetector &pointers, PartitionForest &partition, PartitionForest::Vertex *parent) { CloneDetection::Outputs<RSIM_SEMANTICS_VTYPE> *outputs = fuzz_test(func, inputs, pointers); OutputValues concrete_outputs = outputs->get_values(); partition.insert(func, concrete_outputs, parent); } // Analyze a single function by running it with the specified inputs and collecting its outputs. */ CloneDetection::Outputs<RSIM_SEMANTICS_VTYPE> *fuzz_test(SgAsmFunction *function, CloneDetection::InputValues &inputs, const CloneDetection::PointerDetector &pointers) { RSIM_Process *proc = thread->get_process(); RTS_Message *m = thread->tracing(TRACE_MISC); m->mesg("=========================================================================================="); m->mesg("%s: fuzz testing function \"%s\" at 0x%08"PRIx64, name, function->get_name().c_str(), function->get_entry_va()); // Not sure if saving/restoring memory state is necessary. I don't thing machine memory is adjusted by the semantic // policy's writeMemory() or readMemory() operations after the policy is triggered to enable our analysis. But it // shouldn't hurt to save/restore anyway, and it's fast. [Robb Matzke 2013-01-14] proc->mem_transaction_start(name); pt_regs_32 saved_regs = thread->get_regs(); // Trigger the analysis, resetting it to start executing the specified function using the input values and pointer // variable addresses we selected previously. thread->policy.trigger(function->get_entry_va(), &inputs, &pointers); // "Run" the function using our semantic policy. The function will not "run" in the normal sense since: since our // policy has been triggered, memory access, function calls, system calls, etc. will all operate differently. See // CloneDetectionSemantics.h and CloneDetectionTpl.h for details. try { thread->main(); } catch (const Disassembler::Exception &e) { // Probably due to the analyzed function's RET instruction, but could be from other things as well. In any case, we // stop analyzing the function when this happens. m->mesg("%s: function disassembly failed at 0x%08"PRIx64": %s", name, e.ip, e.mesg.c_str()); } catch (const CloneDetection::InsnLimitException &e) { // The analysis might be in an infinite loop, such as when analyzing "void f() { while(1); }" m->mesg("%s: %s", name, e.mesg.c_str()); } catch (const RSIM_Semantics::InnerPolicy<>::Halt &e) { // The x86 HLT instruction appears in some functions (like _start) as a failsafe to terminate a process. We need // to intercept it and terminate only the function analysis. m->mesg("%s: function executed HLT instruction at 0x%08"PRIx64, name, e.ip); } // Gather the function's outputs before restoring machine state. bool verbose = true; CloneDetection::Outputs<RSIM_SEMANTICS_VTYPE> *outputs = thread->policy.get_outputs(verbose); thread->init_regs(saved_regs); proc->mem_transaction_rollback(name); return outputs; }
std::string RSIM_Simulator::describe_termination() { std::ostringstream m; RSIM_Process *process = get_process(); if (process->has_terminated()) { int status = process->get_termination_status(); if (WIFEXITED(status)) { mfprintf(m)("specimen %d exited with status %d", getpid(), WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { mfprintf(m)("specimen %d exited due to signal %s %s", getpid(), strsignal(WTERMSIG(status)), WCOREDUMP(status)?" core dumped":""); } else if (WIFSTOPPED(status)) { mfprintf(m)("specimen %d is stopped due to signal %s", getpid(), strsignal(WSTOPSIG(status))); } else { mfprintf(m)("specimen %d has unknown termination status: 0x%08x", getpid(), status); } } else { mfprintf(m)("specimen %d has not exited yet", getpid()); } return m.str(); }
int RSIM_Simulator::main_loop() { RSIM_Process *process = get_process(); RSIM_Thread *thread = process->get_main_thread(); /* The simulator's main thread is executed by the calling thread because the simulator's main thread must be a thread group * leader. */ bool cb_process_status = process->get_callbacks().call_process_callbacks(RSIM_Callbacks::BEFORE, process, RSIM_Callbacks::ProcessCallback::START, true); // The process' main thread has already been created and initialized but has not started running yet. thread->start(); thread->waitForState(RSIM_Thread::TERMINATED); process->get_callbacks().call_process_callbacks(RSIM_Callbacks::AFTER, process, RSIM_Callbacks::ProcessCallback::FINISH, cb_process_status); return process->get_termination_status(); }
int main(int argc, char *argv[], char *envp[]) { rose_addr_t trigger_va = 0; // address at which disassembly is triggered std::string trigger_func = "oep"; // name of function at which disassembly is triggered (oep=original entry point) AsmUnparser::Organization org = AsmUnparser::ORGANIZED_BY_AST; // Parse arguments that we need for ourself. for (int i=1; i<argc; i++) { if (!strncmp(argv[i], "--trigger=", 10)) { // Address (or name of function) that will trigger the disassembly. When the EIP register contains this value then // disassembly will run and the specimen will be terminated. char *rest; trigger_func = ""; trigger_va = strtoull(argv[i]+10, &rest, 0); if (*rest) { trigger_va = 0; trigger_func = argv[i]+10; } memmove(argv+i, argv+i+1, (argc-- - i)*sizeof(*argv)); --i; } else if (!strcmp(argv[i], "--linear")) { org = AsmUnparser::ORGANIZED_BY_ADDRESS; memmove(argv+i, argv+i+1, (argc-- - i)*sizeof(*argv)); --i; } } // Initialize the simulator RSIM_Linux32 sim; int n = sim.configure(argc, argv, envp); sim.install_callback(new RSIM_Tools::UnhandledInstruction); sim.exec(argc-n, argv+n); RSIM_Process *process = sim.get_process(); RSIM_Thread *main_thread = process->get_main_thread(); // Find the trigger address if (0==trigger_va) { if (trigger_func.empty() || !trigger_func.compare("oep")) { trigger_va = process->get_ep_orig_va(); } else if (0==(trigger_va = RSIM_Tools::FunctionFinder().address(process->headers(), trigger_func))) { std::cerr <<argv[0] <<": unable to locate address of function: " <<trigger_func <<"\n"; exit(1); } } // Install our disassembler callback to the main thread. We don't use RSIM_Tools::MemoryDisassembler because we want to // cancel the specimen once we disassemble. main_thread->install_callback(new MyDisassembler(trigger_va, org)); // Allow the specimen to run until the disassembly is triggered bool disassembled = false; sim.activate(); try { sim.main_loop(); } catch (MyDisassembler*) { disassembled = true; } if (!disassembled) { std::cerr <<argv[0] <<": specimen ran to completion without triggering a disassembly.\n"; exit(1); } return 0; }
// Allocate a page of memory in the process address space. rose_addr_t allocate_page(rose_addr_t hint=0) { RSIM_Process *proc = thread->get_process(); rose_addr_t addr = proc->mem_map(hint, 4096, MemoryMap::MM_PROT_RW, MAP_ANONYMOUS, 0, -1); assert((int64_t)addr>=0 || (int64_t)addr<-256); // disallow error numbers return addr; }