// see base class bool SgAsmX86Instruction::isFunctionCallFast(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target, rose_addr_t *return_va) { if (insns.empty()) return false; SgAsmX86Instruction *last = isSgAsmX86Instruction(insns.back()); if (!last) return false; // Quick method based only on the kind of instruction if (x86_call==last->get_kind() || x86_farcall==last->get_kind()) { last->getBranchTarget(target); if (return_va) *return_va = last->get_address() + last->get_size(); return true; } return false; }
// see base class bool SgAsmX86Instruction::isFunctionCallSlow(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target, rose_addr_t *return_va) { if (isFunctionCallFast(insns, target, return_va)) return true; // The following stuff works only if we have a relatively complete AST. static const size_t EXECUTION_LIMIT = 10; // max size of basic blocks for expensive analyses if (insns.empty()) return false; SgAsmX86Instruction *last = isSgAsmX86Instruction(insns.back()); if (!last) return false; SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(last); SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(func); // Slow method: Emulate the instructions and then look at the EIP and stack. If the EIP points outside the current // function and the top of the stack holds an address of an instruction within the current function, then this must be a // function call. if (interp && insns.size()<=EXECUTION_LIMIT) { using namespace Rose::BinaryAnalysis; using namespace Rose::BinaryAnalysis::InstructionSemantics2; using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics; const InstructionMap &imap = interp->get_instruction_map(); const RegisterDictionary *regdict = RegisterDictionary::dictionary_for_isa(interp); SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver); BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver); ASSERT_not_null(ops); const RegisterDescriptor SP = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp); DispatcherX86Ptr dispatcher = DispatcherX86::instance(ops, SP.get_nbits()); SValuePtr orig_esp = SValue::promote(ops->readRegister(dispatcher->REG_anySP)); try { for (size_t i=0; i<insns.size(); ++i) dispatcher->processInstruction(insns[i]); } catch (const BaseSemantics::Exception &e) { return false; } // If the next instruction address is concrete but does not point to a function entry point, then this is not a call. SValuePtr eip = SValue::promote(ops->readRegister(dispatcher->REG_anyIP)); if (eip->is_number()) { rose_addr_t target_va = eip->get_number(); SgAsmFunction *target_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(target_va, NULL)); if (!target_func || target_va!=target_func->get_entry_va()) return false; } // If nothing was pushed onto the stack, then this isn't a function call. const size_t spWidth = dispatcher->REG_anySP.get_nbits(); SValuePtr esp = SValue::promote(ops->readRegister(dispatcher->REG_anySP)); SValuePtr stack_delta = SValue::promote(ops->add(esp, ops->negate(orig_esp))); SValuePtr stack_delta_sign = SValue::promote(ops->extract(stack_delta, spWidth-1, spWidth)); if (stack_delta_sign->is_number() && 0==stack_delta_sign->get_number()) return false; // If the top of the stack does not contain a concrete value or the top of the stack does not point to an instruction // in this basic block's function, then this is not a function call. const size_t ipWidth = dispatcher->REG_anyIP.get_nbits(); SValuePtr top = SValue::promote(ops->readMemory(dispatcher->REG_SS, esp, esp->undefined_(ipWidth), esp->boolean_(true))); if (top->is_number()) { rose_addr_t va = top->get_number(); SgAsmFunction *return_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(va, NULL)); if (!return_func || return_func!=func) { return false; } } else { return false; } // Since EIP might point to a function entry address and since the top of the stack contains a pointer to an // instruction in this function, we assume that this is a function call. if (target && eip->is_number()) *target = eip->get_number(); if (return_va && top->is_number()) *return_va = top->get_number(); return true; } // Similar to the above method, but works when all we have is the basic block (e.g., this case gets hit quite a bit from // the Partitioner). Returns true if, after executing the basic block, the top of the stack contains the fall-through // address of the basic block. We depend on our caller to figure out if EIP is reasonably a function entry address. if (!interp && insns.size()<=EXECUTION_LIMIT) { using namespace Rose::BinaryAnalysis; using namespace Rose::BinaryAnalysis::InstructionSemantics2; using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics; SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver); SgAsmX86Instruction *x86insn = isSgAsmX86Instruction(insns.front()); ASSERT_not_null(x86insn); #if 1 // [Robb P. Matzke 2015-03-03]: FIXME[Robb P. Matzke 2015-03-03]: not ready yet; x86-64 semantics still under construction if (x86insn->get_addressSize() != x86_insnsize_32) return false; #endif const RegisterDictionary *regdict = registersForInstructionSize(x86insn->get_addressSize()); const RegisterDescriptor SP = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp); BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver); DispatcherX86Ptr dispatcher = DispatcherX86::instance(ops, SP.get_nbits()); try { for (size_t i=0; i<insns.size(); ++i) dispatcher->processInstruction(insns[i]); } catch (const BaseSemantics::Exception &e) { return false; } // Look at the top of the stack const size_t ipWidth = dispatcher->REG_anyIP.get_nbits(); SValuePtr top = SValue::promote(ops->readMemory(dispatcher->REG_SS, ops->readRegister(SP), ops->protoval()->undefined_(ipWidth), ops->protoval()->boolean_(true))); if (top->is_number() && top->get_number() == last->get_address()+last->get_size()) { if (target) { SValuePtr eip = SValue::promote(ops->readRegister(dispatcher->REG_anyIP)); if (eip->is_number()) *target = eip->get_number(); } if (return_va) *return_va = top->get_number(); return true; } } return false; }
// The actual analysis, triggered when we reach the specified execution address... virtual bool operator()(bool enabled, const Args &args) try { using namespace rose::BinaryAnalysis::InstructionSemantics; static const char *name = "Analysis"; using namespace rose::BinaryAnalysis::InsnSemanticsExpr; if (enabled && args.insn->get_address()==trigger_addr) { RTS_Message *trace = args.thread->tracing(TRACE_MISC); trace->mesg("%s triggered: analyzing function at 0x%08"PRIx64, name, analysis_addr); // An SMT solver is necessary for this example to work correctly. ROSE should have been configured with // "--with-yices=/full/path/to/yices/installation". If not, you'll get a failed assertion when ROSE tries to use // the solver. rose::BinaryAnalysis::YicesSolver smt_solver; smt_solver.set_linkage(rose::BinaryAnalysis::YicesSolver::LM_EXECUTABLE); //smt_solver.set_debug(stdout); // We deactive the simulator while we're doing this analysis. If the simulator remains activated, then the SIGCHLD // that are generated from running the Yices executable will be sent to the specimen. That probably wouldn't cause // problems for the specimen, but the messages are annoying. args.thread->get_process()->get_simulator()->deactivate(); // Create the policy that holds the analysis state which is modified by each instruction. Then plug the policy // into the X86InstructionSemantics to which we'll feed each instruction. SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver); X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>, SymbolicSemantics::ValueType> semantics(policy); // The top of the stack contains the (unknown) return address. The value above that (in memory) is the address of // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a // concrete value). The contents of the buffer are unknown. Process memory is maintained by the policy we created // above, so none of these memory writes are actually affecting the specimen's state in the simulator. policy.writeRegister("esp", policy.number<32>(4000)); SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4)); SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4)); policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_()); // ptr to buffer policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_()); // bytes in buffer policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr)); // branch to analysis address #if 1 { // This is a kludge. If the first instruction is an indirect JMP then assume we're executing through a dynamic // linker thunk and execute the instruction concretely to advance the instruction pointer. SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(analysis_addr)); if (x86_jmp==insn->get_kind()) { PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType> p; X86InstructionSemantics<PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType>, PartialSymbolicSemantics::ValueType> sem(p); MemoryMap p_map = args.thread->get_process()->get_memory(); BOOST_FOREACH (MemoryMap::Segment &segment, p_map.segments()) segment.buffer()->copyOnWrite(true); p.set_map(&p_map); // won't be thread safe sem.processInstruction(insn); policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(p.readRegister<32>("eip").known_value())); trace->mesg("%s: dynamic linker thunk kludge triggered: changed eip from 0x%08"PRIx64" to 0x%08"PRIx64, name, analysis_addr, p.readRegister<32>("eip").known_value()); } } #endif // Run the analysis until we can't figure out what instruction is next. If we set things up correctly, the // simulation will stop when we hit the RET instruction to return from this function. size_t nbranches = 0; std::vector<TreeNodePtr> constraints; // path constraints for the SMT solver while (policy.readRegister<32>("eip").is_known()) { uint64_t va = policy.readRegister<32>("eip").known_value(); SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(va)); assert(insn!=NULL); trace->mesg("%s: analysing instruction %s", name, unparseInstructionWithAddress(insn).c_str()); semantics.processInstruction(insn); if (policy.readRegister<32>("eip").is_known()) continue; bool complete; std::set<rose_addr_t> succs = insn->getSuccessors(&complete); if (complete && 2==succs.size()) { if (nbranches>=take_branch.size()) { std::ostringstream s; s<<policy.readRegister<32>("eip"); trace->mesg("%s: EIP = %s", name, s.str().c_str()); trace->mesg("%s: analysis cannot continue; out of \"take_branch\" values", name); throw this; } // Decide whether we should take the branch or not. bool take = take_branch[nbranches++]; rose_addr_t target = 0; for (std::set<rose_addr_t>::iterator si=succs.begin(); si!=succs.end(); ++si) { if ((take && *si!=insn->get_address()+insn->get_size()) || (!take && *si==insn->get_address()+insn->get_size())) target = *si; } assert(target!=0); trace->mesg("%s: branch %staken; target=0x%08"PRIx64, name, take?"":"not ", target); // Is this path feasible? We don't really need to check it now; we could wait until the end. TreeNodePtr c = InternalNode::create(32, OP_EQ, policy.readRegister<32>("eip").get_expression(), LeafNode::create_integer(32, target)); constraints.push_back(c); // shouldn't really have to do this again if we could save some state if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(constraints)) { policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(target)); } else { trace->mesg("%s: chosen control flow path is not feasible (or unknown).", name); break; } } } // Show the value of the EAX register since this is where GCC puts the function's return value. If we did things // right, the return value should depend only on the unknown bytes from the beginning of the buffer. SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax"); std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables(); { std::ostringstream s; s <<name <<": symbolic return value is " <<result <<"\n" <<name <<": return value has " <<vars.size() <<" variables:"; for (std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) s <<" " <<*vi; s <<"\n"; if (!constraints.empty()) { s <<name <<": path constraints:\n"; for (std::vector<TreeNodePtr>::iterator ci=constraints.begin(); ci!=constraints.end(); ++ci) s <<name <<": " <<*ci <<"\n"; } trace->mesg("%s", s.str().c_str()); } // Now give values to those bytes and solve the equation for the result using an SMT solver. if (!result.is_known()) { trace->mesg("%s: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...", name); std::vector<TreeNodePtr> exprs = constraints; LeafNodePtr result_var = LeafNode::create_variable(32); TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var); exprs.push_back(expr); for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x')); exprs.push_back(expr); } if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) { LeafNodePtr result_value = smt_solver.evidence_for_variable(result_var)->isLeafNode(); if (!result_value) { trace->mesg("%s: evaluation result could not be determined. ERROR!", name); } else if (!result_value->is_known()) { trace->mesg("%s: evaluation result is not constant. ERROR!", name); } else { trace->mesg("%s: evaluation result is 0x%08"PRIx64, name, result_value->get_value()); } } else { trace->mesg("%s: expression is not satisfiable. (or unknown)", name); } } // Now try going the other direction. Set the return expression to a value and try to discover what two bytes // would satisfy the equation. if (!result.is_known()) { trace->mesg("%s: setting result equal to 0xff015e7c and trying to find inputs...", name); std::vector<TreeNodePtr> exprs = constraints; TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), LeafNode::create_integer(32, 0xff015e7c)); exprs.push_back(expr); if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) { for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { LeafNodePtr var_val = smt_solver.evidence_for_variable(*vi)->isLeafNode(); if (var_val && var_val->is_known()) trace->mesg("%s: v%"PRIu64" = %"PRIu64" %c", name, (*vi)->get_name(), var_val->get_value(), isprint(var_val->get_value())?(char)var_val->get_value():' '); } } else { trace->mesg("%s: expression is not satisfiable (or unknown). No solutions.", name); } } // Reactivate the simulator in case we want to continue simulating. args.thread->get_process()->get_simulator()->activate(); throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator } return enabled; } catch (const Analysis*) { args.thread->get_process()->get_simulator()->activate(); throw; }