void show_results() { trace->mesg("CrcTable results: the following table entries were read by the specimen:"); for (size_t i=0; i<seen.size(); i++) { if (seen[i]>0) trace->mesg("CrcTable results: entry #%zu read %zu time%s", i, seen[i], 1==seen[i]?"":"s"); } }
virtual bool operator()(bool enabled, const Args &args) /*overrides*/ { if (enabled) { if (!triggered && args.insn->get_address()==when) { triggered = true; initialize_state(args.thread); } SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.insn); if (triggered && insn) { RTS_Message *m = args.thread->tracing(TRACE_MISC); m->mesg("%s: %s", name, unparseInstructionWithAddress(insn).c_str()); policy.get_state().registers.ip = SymbolicSemantics::ValueType<32>(insn->get_address()); semantics.processInstruction(insn); rose::BinaryAnalysis::SMTSolver::Stats smt_stats = yices.get_stats(); m->mesg("%s: mem-cell list size: %zu elements\n", name, policy.get_state().memory.cell_list.size()); m->mesg("%s: SMT stats: ncalls=%zu, input=%zu bytes, output=%zu bytes\n", name, smt_stats.ncalls, smt_stats.input_size, smt_stats.output_size); yices.reset_stats(); #if 0 std::ostringstream ss; ss <<policy; m->mesg("%s", ss.str().c_str()); #endif } } return enabled; }
virtual bool operator()(bool enabled, const Args &args) /*overrides*/ { if (enabled) { if (!triggered && args.insn->get_address()==when) { triggered = true; initialize_register_intervals(args.thread); } SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.insn); if (triggered && insn) { RTS_Message *m = args.thread->tracing(TRACE_MISC); m->mesg("%s: %s", name, unparseInstructionWithAddress(insn).c_str()); semantics.processInstruction(insn); std::ostringstream ss; ss <<policy; m->mesg("%s", ss.str().c_str()); } } return enabled; }
/** Main driving function for clone detection. This is the class that chooses inputs, runs each function, and looks at the * outputs to decide how to partition the functions. It does this repeatedly in order to build a PartitionForest. The * analyze() method is the main entry point. */ class CloneDetector { protected: static const char *name; /**< For --debug output. */ RSIM_Thread *thread; /**< Thread where analysis is running. */ PartitionForest partition; /**< Partitioning of functions into similarity sets. */ enum { MAX_ITERATIONS = 10 }; /**< Maximum number of times we run the functions; max number of input sets. */ enum { MAX_SIMSET_SIZE = 3 }; /**< Any similarity set containing more than this many functions will be partitioned. */ public: CloneDetector(RSIM_Thread *thread): thread(thread) {} // Allocate a page of memory in the process address space. rose_addr_t allocate_page(rose_addr_t hint=0) { RSIM_Process *proc = thread->get_process(); rose_addr_t addr = proc->mem_map(hint, 4096, MemoryMap::MM_PROT_RW, MAP_ANONYMOUS, 0, -1); assert((int64_t)addr>=0 || (int64_t)addr<-256); // disallow error numbers return addr; } // Obtain a memory map for disassembly MemoryMap *disassembly_map(RSIM_Process *proc) { MemoryMap *map = new MemoryMap(proc->get_memory(), MemoryMap::COPY_SHALLOW); map->prune(MemoryMap::MM_PROT_READ); // don't let the disassembler read unreadable memory, else it will segfault // Removes execute permission for any segment whose debug name does not contain the name of the executable. When // comparing two different executables for clones, we probably don't need to compare code that came from dynamically // linked libraries since they will be identical in both executables. struct Pruner: MemoryMap::Visitor { std::string exename; Pruner(const std::string &exename): exename(exename) {} virtual bool operator()(const MemoryMap*, const Extent&, const MemoryMap::Segment &segment_) { MemoryMap::Segment *segment = const_cast<MemoryMap::Segment*>(&segment_); if (segment->get_name().find(exename)==std::string::npos) { unsigned p = segment->get_mapperms(); p &= ~MemoryMap::MM_PROT_EXEC; segment->set_mapperms(p); } return true; } } pruner(proc->get_exename()); map->traverse(pruner); return map; } // Get all the functions defined for this process image. We do this by disassembling the entire process executable memory // and using CFG analysis to figure out where the functions are located. Functions find_functions(RTS_Message *m, RSIM_Process *proc) { m->mesg("%s triggered; disassembling entire specimen image...\n", name); MemoryMap *map = disassembly_map(proc); std::ostringstream ss; map->dump(ss, " "); m->mesg("%s: using this memory map for disassembly:\n%s", name, ss.str().c_str()); SgAsmBlock *gblk = proc->disassemble(false/*take no shortcuts*/, map); delete map; map=NULL; std::vector<SgAsmFunction*> functions = SageInterface::querySubTree<SgAsmFunction>(gblk); #if 0 /*DEBUGGING [Robb P. Matzke 2013-02-12]*/ // Prune the function list to contain only what we want. for (std::vector<SgAsmFunction*>::iterator fi=functions.begin(); fi!=functions.end(); ++fi) { if ((*fi)->get_name().compare("_Z1fRi")!=0) *fi = NULL; } functions.erase(std::remove(functions.begin(), functions.end(), (SgAsmFunction*)NULL), functions.end()); #endif return Functions(functions.begin(), functions.end()); } // Perform a pointer-detection analysis on each function. We'll need the results in order to determine whether a function // input should consume a pointer or a non-pointer from the input value set. typedef std::map<SgAsmFunction*, CloneDetection::PointerDetector> PointerDetectors; PointerDetectors detect_pointers(RTS_Message *m, RSIM_Thread *thread, const Functions &functions) { // Choose an SMT solver. This is completely optional. Pointer detection still seems to work fairly well (and much, // much faster) without an SMT solver. SMTSolver *solver = NULL; #if 0 // optional code if (YicesSolver::available_linkage()) solver = new YicesSolver; #endif PointerDetectors retval; CloneDetection::InstructionProvidor *insn_providor = new CloneDetection::InstructionProvidor(thread->get_process()); for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) { m->mesg("%s: performing pointer detection analysis for \"%s\" at 0x%08"PRIx64, name, (*fi)->get_name().c_str(), (*fi)->get_entry_va()); CloneDetection::PointerDetector pd(insn_providor, solver); pd.initial_state().registers.gpr[x86_gpr_sp] = SYMBOLIC_VALUE<32>(thread->policy.INITIAL_STACK); pd.initial_state().registers.gpr[x86_gpr_bp] = SYMBOLIC_VALUE<32>(thread->policy.INITIAL_STACK); //pd.set_debug(stderr); pd.analyze(*fi); retval.insert(std::make_pair(*fi, pd)); #if 1 /*DEBUGGING [Robb P. Matzke 2013-01-24]*/ if (m->get_file()) { const CloneDetection::PointerDetector::Pointers plist = pd.get_pointers(); for (CloneDetection::PointerDetector::Pointers::const_iterator pi=plist.begin(); pi!=plist.end(); ++pi) { std::ostringstream ss; if (pi->type & BinaryAnalysis::PointerAnalysis::DATA_PTR) ss <<"data "; if (pi->type & BinaryAnalysis::PointerAnalysis::CODE_PTR) ss <<"code "; ss <<"pointer at " <<pi->address; m->mesg(" %s", ss.str().c_str()); } } #endif } return retval; } // Randomly choose a set of input values. The set will consist of the specified number of non-pointers and pointers. The // non-pointer values are chosen randomly, but limited to a certain range. The pointers are chosen randomly to be null or // non-null and the non-null values each have one page allocated via simulated mmap() (i.e., the non-null values themselves // are not actually random). InputValues choose_inputs(size_t nintegers, size_t npointers) { static unsigned integer_modulus = 256; // arbitrary; static unsigned nonnull_denom = 3; // probability of a non-null pointer is 1/N CloneDetection::InputValues inputs; for (size_t i=0; i<nintegers; ++i) inputs.add_integer(rand() % integer_modulus); for (size_t i=0; i<npointers; ++i) inputs.add_pointer(rand()%nonnull_denom ? 0 : allocate_page()); return inputs; } // Run a single function, look at its outputs, and insert it into the correct place in the PartitionForest void insert_function(SgAsmFunction *func, InputValues &inputs, CloneDetection::PointerDetector &pointers, PartitionForest &partition, PartitionForest::Vertex *parent) { CloneDetection::Outputs<RSIM_SEMANTICS_VTYPE> *outputs = fuzz_test(func, inputs, pointers); OutputValues concrete_outputs = outputs->get_values(); partition.insert(func, concrete_outputs, parent); } // Analyze a single function by running it with the specified inputs and collecting its outputs. */ CloneDetection::Outputs<RSIM_SEMANTICS_VTYPE> *fuzz_test(SgAsmFunction *function, CloneDetection::InputValues &inputs, const CloneDetection::PointerDetector &pointers) { RSIM_Process *proc = thread->get_process(); RTS_Message *m = thread->tracing(TRACE_MISC); m->mesg("=========================================================================================="); m->mesg("%s: fuzz testing function \"%s\" at 0x%08"PRIx64, name, function->get_name().c_str(), function->get_entry_va()); // Not sure if saving/restoring memory state is necessary. I don't thing machine memory is adjusted by the semantic // policy's writeMemory() or readMemory() operations after the policy is triggered to enable our analysis. But it // shouldn't hurt to save/restore anyway, and it's fast. [Robb Matzke 2013-01-14] proc->mem_transaction_start(name); pt_regs_32 saved_regs = thread->get_regs(); // Trigger the analysis, resetting it to start executing the specified function using the input values and pointer // variable addresses we selected previously. thread->policy.trigger(function->get_entry_va(), &inputs, &pointers); // "Run" the function using our semantic policy. The function will not "run" in the normal sense since: since our // policy has been triggered, memory access, function calls, system calls, etc. will all operate differently. See // CloneDetectionSemantics.h and CloneDetectionTpl.h for details. try { thread->main(); } catch (const Disassembler::Exception &e) { // Probably due to the analyzed function's RET instruction, but could be from other things as well. In any case, we // stop analyzing the function when this happens. m->mesg("%s: function disassembly failed at 0x%08"PRIx64": %s", name, e.ip, e.mesg.c_str()); } catch (const CloneDetection::InsnLimitException &e) { // The analysis might be in an infinite loop, such as when analyzing "void f() { while(1); }" m->mesg("%s: %s", name, e.mesg.c_str()); } catch (const RSIM_Semantics::InnerPolicy<>::Halt &e) { // The x86 HLT instruction appears in some functions (like _start) as a failsafe to terminate a process. We need // to intercept it and terminate only the function analysis. m->mesg("%s: function executed HLT instruction at 0x%08"PRIx64, name, e.ip); } // Gather the function's outputs before restoring machine state. bool verbose = true; CloneDetection::Outputs<RSIM_SEMANTICS_VTYPE> *outputs = thread->policy.get_outputs(verbose); thread->init_regs(saved_regs); proc->mem_transaction_rollback(name); return outputs; }
virtual bool operator()(bool enabled, const Args &args) { if (enabled && !triggered && args.insn->get_address()==when) { triggered = true; RTS_Message *m = args.thread->tracing(TRACE_MISC); m->mesg("MemoryTransactionTester: triggered\n"); RSIM_Process *proc = args.thread->get_process(); proc->mem_showmap(m, "before starting transaction:\n"); proc->mem_transaction_start("MemoryTransactionTester"); proc->mem_showmap(m, "after starting transaction:\n"); } return enabled; }
virtual bool operator()(bool enabled, const Args &args) { // Trigger only if we're reading a table entry if (enabled && args.how==MemoryMap::READABLE && 4==args.nbytes && args.va>=table_va && args.va<table_va+sizeof(table) && 0==(args.va-table_va)%4) { size_t idx = (args.va-table_va)/4; trace->mesg("CrcTable: read entry %zu = 0x%08"PRIx32, idx, table[idx]); seen[idx] += 1; memcpy(args.buffer, table+idx, 4); args.nbytes_xfer = 4; enabled = false; } return enabled; }
virtual bool operator()(bool enabled, const Args &args) { if (enabled && args.insn->get_address()==trigger_va) { args.thread->get_process()->get_simulator()->deactivate(); RTS_Message *m = args.thread->tracing(TRACE_MISC); m->mesg("disassembly triggered; disassembling now...\n"); SgAsmBlock *gblk = args.thread->get_process()->disassemble(false); // full disassembly with partitioning AsmUnparser unparser; unparser.set_organization(org); unparser.unparse(std::cout, gblk); throw this; // to terminate specimen } return enabled; }
// The actual analysis, triggered when we reach the specified execution address... virtual bool operator()(bool enabled, const Args &args) { using namespace rose::BinaryAnalysis::InstructionSemantics; if (enabled && args.insn->get_address()==trigger_addr) { RTS_Message *trace = args.thread->tracing(TRACE_MISC); trace->mesg("Analysis triggered: analyzing function at 0x%08"PRIx64, analysis_addr); // An SMT solver is necessary for this example to work correctly. ROSE uses the SMT solver to try to figure out // when memory address expressions might be aliases. Since we're initializing some memory (the function argument) // using an address expression that we build here, ROSE needs to be able to figure out when the program also tries // to access the same memory but using an address expression that is generated by the analysis itself. ROSE should // have been configured with "--with-yices=/full/path/to/yices/installation". If not, you'll get a failed // assertion when ROSE tries to use the solver. rose::BinaryAnalysis::YicesSolver smt_solver; smt_solver.set_linkage(rose::BinaryAnalysis::YicesSolver::LM_EXECUTABLE); //smt_solver.set_debug(stdout); // We deactive the simulator while we're doing this analysis. If the simulator remains activated, then the SIGCHLD // that are generated from running the Yices executable will be sent to the specimen. That probably wouldn't cause // problems for the specimen, but the messages are anoying. args.thread->get_process()->get_simulator()->deactivate(); // Create the policy that holds the analysis state which is modified by each instruction. Then plug the policy // into the X86InstructionSemantics to which we'll feed each instruction. SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver); X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>, SymbolicSemantics::ValueType> semantics(policy); // The top of the stack contains the (unknown) return address. The value above that (in memory) is the address of // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a // concrete value). The contents of the buffer are unknown. Process memory is maintained by the policy we created // above, so none of these memory writes are actually affecting the specimen's state in the simulator. policy.writeRegister("esp", policy.number<32>(4000)); SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4)); SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4)); policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_()); // ptr to buffer policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_()); // bytes in buffer // Run the analysis until we can't figure out what instruction is next. If we set things up correctly, the // simulation will stop when we hit the RET instruction to return from this function. policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr)); while (policy.readRegister<32>("eip").is_known()) { uint64_t va = policy.readRegister<32>("eip").known_value(); SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(va)); assert(insn!=NULL); //std::cout <<policy <<unparseInstructionWithAddress(insn) <<"\n"; semantics.processInstruction(insn); } // Show the value of the EAX register since this is where GCC puts the function's return value. If we did things // right, the return value should depend only on the unknown bytes from the beginning of the buffer. SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax"); std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables(); { std::ostringstream s; s <<"Analysis: symbolic return value is " <<result <<"\n" <<"Analysis: return value has " <<vars.size() <<" variables:"; for (std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) s <<" " <<*vi; trace->mesg("%s", s.str().c_str()); } // Now give values to those two bytes and solve the equation for the result using an SMT solver. if (!result.is_known()) { trace->mesg("Analysis: setting variables (buffer bytes) to 'x' and evaluating the function symbolically..."); using namespace rose::BinaryAnalysis::InsnSemanticsExpr; std::vector<TreeNodePtr> exprs; LeafNodePtr result_var = LeafNode::create_variable(32); TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var); exprs.push_back(expr); for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x')); exprs.push_back(expr); } if (rose::BinaryAnalysis::SMTSolver::SAT_YES==smt_solver.satisfiable(exprs)) { LeafNodePtr result_value = smt_solver.evidence_for_variable(result_var)->isLeafNode(); if (!result_value) { trace->mesg("Analysis: evaluation result could not be determined. ERROR!"); } else if (!result_value->is_known()) { trace->mesg("Analysis: evaluation result is not constant. ERROR!"); } else { trace->mesg("Analysis: evaluation result is 0x%08"PRIx64, result_value->get_value()); } } else { trace->mesg("Analysis: expression is not satisfiable (or unknown). ERROR!"); } } // Now try going the other direction. Set the return expression to a value and try to discover what two bytes // would satisfy the equation. if (!result.is_known()) { trace->mesg("Analysis: setting result equal to 0xff015e7c and trying to find inputs..."); using namespace rose::BinaryAnalysis::InsnSemanticsExpr; TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), LeafNode::create_integer(32, 0xff015e7c)); if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(expr)) { for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { LeafNodePtr var_val = smt_solver.evidence_for_variable(*vi)->isLeafNode(); if (var_val && var_val->is_known()) trace->mesg("Analysis: v%"PRIu64" = %"PRIu64" %c", (*vi)->get_name(), var_val->get_value(), isprint(var_val->get_value())?(char)var_val->get_value():' '); } } else { trace->mesg("Analysis: expression is not satisfiable (or unknown). No solutions."); } } // Reactivate the simulator in case we want to continue simulating. args.thread->get_process()->get_simulator()->activate(); throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator } return enabled; }
// Detect functions that are semantically similar by running multiple iterations of partition_functions(). void analyze() { RTS_Message *m = thread->tracing(TRACE_MISC); Functions functions = find_functions(m, thread->get_process()); PointerDetectors pointers = detect_pointers(m, thread, functions); PartitionForest partition; while (partition.nlevels()<MAX_ITERATIONS) { InputValues inputs = choose_inputs(3, 3); size_t level = partition.new_level(inputs); m->mesg("####################################################################################################"); m->mesg("%s: fuzz testing %zu function%s at level %zu", name, functions.size(), 1==functions.size()?"":"s", level); m->mesg("%s: using these input values:\n%s", name, inputs.toString().c_str()); if (0==level) { partition_functions(m, partition, functions, pointers, inputs, NULL); } else { const PartitionForest::Vertices &parent_vertices = partition.vertices_at_level(level-1); for (PartitionForest::Vertices::const_iterator pvi=parent_vertices.begin(); pvi!=parent_vertices.end(); ++pvi) { PartitionForest::Vertex *parent_vertex = *pvi; if (parent_vertex->functions.size()>MAX_SIMSET_SIZE) partition_functions(m, partition, parent_vertex->functions, pointers, inputs, parent_vertex); } } // If the new level doesn't contain any vertices then we must not have needed to repartition anything and we're all // done. if (partition.vertices_at_level(level).empty()) break; } m->mesg("=========================================================================================="); m->mesg("%s: The entire partition forest follows...", name); m->mesg("%s", StringUtility::prefixLines(partition.toString(), std::string(name)+": ").c_str()); m->mesg("=========================================================================================="); m->mesg("%s: Final function similarity sets are:", name); PartitionForest::Vertices leaves = partition.get_leaves(); size_t setno=0; for (PartitionForest::Vertices::iterator vi=leaves.begin(); vi!=leaves.end(); ++vi, ++setno) { PartitionForest::Vertex *leaf = *vi; const Functions &functions = leaf->get_functions(); m->mesg("%s: set #%zu at level %zu has %zu function%s:", name, setno, leaf->get_level(), functions.size(), 1==functions.size()?"":"s"); for (Functions::const_iterator fi=functions.begin(); fi!=functions.end(); ++fi) m->mesg("%s: 0x%08"PRIx64" <%s>", name, (*fi)->get_entry_va(), (*fi)->get_name().c_str()); } m->mesg("%s: dumping final similarity sets to clones.sql", name); partition.dump("clones.sql", "NO_USER", "NO_PASSWD"); }
// The actual analysis, triggered when we reach the specified execution address... virtual bool operator()(bool enabled, const Args &args) try { using namespace rose::BinaryAnalysis::InstructionSemantics; static const char *name = "Analysis"; using namespace rose::BinaryAnalysis::InsnSemanticsExpr; if (enabled && args.insn->get_address()==trigger_addr) { RTS_Message *trace = args.thread->tracing(TRACE_MISC); trace->mesg("%s triggered: analyzing function at 0x%08"PRIx64, name, analysis_addr); // An SMT solver is necessary for this example to work correctly. ROSE should have been configured with // "--with-yices=/full/path/to/yices/installation". If not, you'll get a failed assertion when ROSE tries to use // the solver. rose::BinaryAnalysis::YicesSolver smt_solver; smt_solver.set_linkage(rose::BinaryAnalysis::YicesSolver::LM_EXECUTABLE); //smt_solver.set_debug(stdout); // We deactive the simulator while we're doing this analysis. If the simulator remains activated, then the SIGCHLD // that are generated from running the Yices executable will be sent to the specimen. That probably wouldn't cause // problems for the specimen, but the messages are annoying. args.thread->get_process()->get_simulator()->deactivate(); // Create the policy that holds the analysis state which is modified by each instruction. Then plug the policy // into the X86InstructionSemantics to which we'll feed each instruction. SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver); X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>, SymbolicSemantics::ValueType> semantics(policy); // The top of the stack contains the (unknown) return address. The value above that (in memory) is the address of // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a // concrete value). The contents of the buffer are unknown. Process memory is maintained by the policy we created // above, so none of these memory writes are actually affecting the specimen's state in the simulator. policy.writeRegister("esp", policy.number<32>(4000)); SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4)); SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4)); policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_()); // ptr to buffer policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_()); // bytes in buffer policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr)); // branch to analysis address #if 1 { // This is a kludge. If the first instruction is an indirect JMP then assume we're executing through a dynamic // linker thunk and execute the instruction concretely to advance the instruction pointer. SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(analysis_addr)); if (x86_jmp==insn->get_kind()) { PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType> p; X86InstructionSemantics<PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType>, PartialSymbolicSemantics::ValueType> sem(p); MemoryMap p_map = args.thread->get_process()->get_memory(); BOOST_FOREACH (MemoryMap::Segment &segment, p_map.segments()) segment.buffer()->copyOnWrite(true); p.set_map(&p_map); // won't be thread safe sem.processInstruction(insn); policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(p.readRegister<32>("eip").known_value())); trace->mesg("%s: dynamic linker thunk kludge triggered: changed eip from 0x%08"PRIx64" to 0x%08"PRIx64, name, analysis_addr, p.readRegister<32>("eip").known_value()); } } #endif // Run the analysis until we can't figure out what instruction is next. If we set things up correctly, the // simulation will stop when we hit the RET instruction to return from this function. size_t nbranches = 0; std::vector<TreeNodePtr> constraints; // path constraints for the SMT solver while (policy.readRegister<32>("eip").is_known()) { uint64_t va = policy.readRegister<32>("eip").known_value(); SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(va)); assert(insn!=NULL); trace->mesg("%s: analysing instruction %s", name, unparseInstructionWithAddress(insn).c_str()); semantics.processInstruction(insn); if (policy.readRegister<32>("eip").is_known()) continue; bool complete; std::set<rose_addr_t> succs = insn->getSuccessors(&complete); if (complete && 2==succs.size()) { if (nbranches>=take_branch.size()) { std::ostringstream s; s<<policy.readRegister<32>("eip"); trace->mesg("%s: EIP = %s", name, s.str().c_str()); trace->mesg("%s: analysis cannot continue; out of \"take_branch\" values", name); throw this; } // Decide whether we should take the branch or not. bool take = take_branch[nbranches++]; rose_addr_t target = 0; for (std::set<rose_addr_t>::iterator si=succs.begin(); si!=succs.end(); ++si) { if ((take && *si!=insn->get_address()+insn->get_size()) || (!take && *si==insn->get_address()+insn->get_size())) target = *si; } assert(target!=0); trace->mesg("%s: branch %staken; target=0x%08"PRIx64, name, take?"":"not ", target); // Is this path feasible? We don't really need to check it now; we could wait until the end. TreeNodePtr c = InternalNode::create(32, OP_EQ, policy.readRegister<32>("eip").get_expression(), LeafNode::create_integer(32, target)); constraints.push_back(c); // shouldn't really have to do this again if we could save some state if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(constraints)) { policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(target)); } else { trace->mesg("%s: chosen control flow path is not feasible (or unknown).", name); break; } } } // Show the value of the EAX register since this is where GCC puts the function's return value. If we did things // right, the return value should depend only on the unknown bytes from the beginning of the buffer. SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax"); std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables(); { std::ostringstream s; s <<name <<": symbolic return value is " <<result <<"\n" <<name <<": return value has " <<vars.size() <<" variables:"; for (std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) s <<" " <<*vi; s <<"\n"; if (!constraints.empty()) { s <<name <<": path constraints:\n"; for (std::vector<TreeNodePtr>::iterator ci=constraints.begin(); ci!=constraints.end(); ++ci) s <<name <<": " <<*ci <<"\n"; } trace->mesg("%s", s.str().c_str()); } // Now give values to those bytes and solve the equation for the result using an SMT solver. if (!result.is_known()) { trace->mesg("%s: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...", name); std::vector<TreeNodePtr> exprs = constraints; LeafNodePtr result_var = LeafNode::create_variable(32); TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var); exprs.push_back(expr); for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x')); exprs.push_back(expr); } if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) { LeafNodePtr result_value = smt_solver.evidence_for_variable(result_var)->isLeafNode(); if (!result_value) { trace->mesg("%s: evaluation result could not be determined. ERROR!", name); } else if (!result_value->is_known()) { trace->mesg("%s: evaluation result is not constant. ERROR!", name); } else { trace->mesg("%s: evaluation result is 0x%08"PRIx64, name, result_value->get_value()); } } else { trace->mesg("%s: expression is not satisfiable. (or unknown)", name); } } // Now try going the other direction. Set the return expression to a value and try to discover what two bytes // would satisfy the equation. if (!result.is_known()) { trace->mesg("%s: setting result equal to 0xff015e7c and trying to find inputs...", name); std::vector<TreeNodePtr> exprs = constraints; TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), LeafNode::create_integer(32, 0xff015e7c)); exprs.push_back(expr); if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) { for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { LeafNodePtr var_val = smt_solver.evidence_for_variable(*vi)->isLeafNode(); if (var_val && var_val->is_known()) trace->mesg("%s: v%"PRIu64" = %"PRIu64" %c", name, (*vi)->get_name(), var_val->get_value(), isprint(var_val->get_value())?(char)var_val->get_value():' '); } } else { trace->mesg("%s: expression is not satisfiable (or unknown). No solutions.", name); } } // Reactivate the simulator in case we want to continue simulating. args.thread->get_process()->get_simulator()->activate(); throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator } return enabled; } catch (const Analysis*) { args.thread->get_process()->get_simulator()->activate(); throw; }