// see base class
bool
SgAsmX86Instruction::isFunctionCallFast(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target, rose_addr_t *return_va)
{
    if (insns.empty())
        return false;
    SgAsmX86Instruction *last = isSgAsmX86Instruction(insns.back());
    if (!last)
        return false;

    // Quick method based only on the kind of instruction
    if (x86_call==last->get_kind() || x86_farcall==last->get_kind()) {
        last->getBranchTarget(target);
        if (return_va)
            *return_va = last->get_address() + last->get_size();
        return true;
    }

    return false;
}
// see base class
bool
SgAsmX86Instruction::isFunctionCallSlow(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target, rose_addr_t *return_va)
{
    if (isFunctionCallFast(insns, target, return_va))
        return true;

    // The following stuff works only if we have a relatively complete AST.
    static const size_t EXECUTION_LIMIT = 10; // max size of basic blocks for expensive analyses
    if (insns.empty())
        return false;
    SgAsmX86Instruction *last = isSgAsmX86Instruction(insns.back());
    if (!last)
        return false;
    SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(last);
    SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(func);

    // Slow method: Emulate the instructions and then look at the EIP and stack.  If the EIP points outside the current
    // function and the top of the stack holds an address of an instruction within the current function, then this must be a
    // function call.
    if (interp && insns.size()<=EXECUTION_LIMIT) {
        using namespace Rose::BinaryAnalysis;
        using namespace Rose::BinaryAnalysis::InstructionSemantics2;
        using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics;
        const InstructionMap &imap = interp->get_instruction_map();
        const RegisterDictionary *regdict = RegisterDictionary::dictionary_for_isa(interp);
        SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver);
        BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver);
        ASSERT_not_null(ops);
        const RegisterDescriptor SP = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp);
        DispatcherX86Ptr dispatcher = DispatcherX86::instance(ops, SP.get_nbits());
        SValuePtr orig_esp = SValue::promote(ops->readRegister(dispatcher->REG_anySP));
        try {
            for (size_t i=0; i<insns.size(); ++i)
                dispatcher->processInstruction(insns[i]);
        } catch (const BaseSemantics::Exception &e) {
            return false;
        }

        // If the next instruction address is concrete but does not point to a function entry point, then this is not a call.
        SValuePtr eip = SValue::promote(ops->readRegister(dispatcher->REG_anyIP));
        if (eip->is_number()) {
            rose_addr_t target_va = eip->get_number();
            SgAsmFunction *target_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(target_va, NULL));
            if (!target_func || target_va!=target_func->get_entry_va())
                return false;
        }

        // If nothing was pushed onto the stack, then this isn't a function call.
        const size_t spWidth = dispatcher->REG_anySP.get_nbits();
        SValuePtr esp = SValue::promote(ops->readRegister(dispatcher->REG_anySP));
        SValuePtr stack_delta = SValue::promote(ops->add(esp, ops->negate(orig_esp)));
        SValuePtr stack_delta_sign = SValue::promote(ops->extract(stack_delta, spWidth-1, spWidth));
        if (stack_delta_sign->is_number() && 0==stack_delta_sign->get_number())
            return false;

        // If the top of the stack does not contain a concrete value or the top of the stack does not point to an instruction
        // in this basic block's function, then this is not a function call.
        const size_t ipWidth = dispatcher->REG_anyIP.get_nbits();
        SValuePtr top = SValue::promote(ops->readMemory(dispatcher->REG_SS, esp, esp->undefined_(ipWidth), esp->boolean_(true)));
        if (top->is_number()) {
            rose_addr_t va = top->get_number();
            SgAsmFunction *return_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(va, NULL));
            if (!return_func || return_func!=func) {
                return false;
            }
        } else {
            return false;
        }

        // Since EIP might point to a function entry address and since the top of the stack contains a pointer to an
        // instruction in this function, we assume that this is a function call.
        if (target && eip->is_number())
            *target = eip->get_number();
        if (return_va && top->is_number())
            *return_va = top->get_number();
        return true;
    }

    // Similar to the above method, but works when all we have is the basic block (e.g., this case gets hit quite a bit from
    // the Partitioner).  Returns true if, after executing the basic block, the top of the stack contains the fall-through
    // address of the basic block. We depend on our caller to figure out if EIP is reasonably a function entry address.
    if (!interp && insns.size()<=EXECUTION_LIMIT) {
        using namespace Rose::BinaryAnalysis;
        using namespace Rose::BinaryAnalysis::InstructionSemantics2;
        using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics;
        SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver);
        SgAsmX86Instruction *x86insn = isSgAsmX86Instruction(insns.front());
        ASSERT_not_null(x86insn);
#if 1 // [Robb P. Matzke 2015-03-03]: FIXME[Robb P. Matzke 2015-03-03]: not ready yet; x86-64 semantics still under construction
        if (x86insn->get_addressSize() != x86_insnsize_32)
            return false;
#endif
        const RegisterDictionary *regdict = registersForInstructionSize(x86insn->get_addressSize());
        const RegisterDescriptor SP = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp);
        BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver);
        DispatcherX86Ptr dispatcher = DispatcherX86::instance(ops, SP.get_nbits());
        try {
            for (size_t i=0; i<insns.size(); ++i)
                dispatcher->processInstruction(insns[i]);
        } catch (const BaseSemantics::Exception &e) {
            return false;
        }

        // Look at the top of the stack
        const size_t ipWidth = dispatcher->REG_anyIP.get_nbits();
        SValuePtr top = SValue::promote(ops->readMemory(dispatcher->REG_SS, ops->readRegister(SP),
                                                        ops->protoval()->undefined_(ipWidth),
                                                        ops->protoval()->boolean_(true)));
        if (top->is_number() && top->get_number() == last->get_address()+last->get_size()) {
            if (target) {
                SValuePtr eip = SValue::promote(ops->readRegister(dispatcher->REG_anyIP));
                if (eip->is_number())
                    *target = eip->get_number();
            }
            if (return_va)
                *return_va = top->get_number();
            return true;
        }
    }

    return false;
}
Beispiel #3
0
    // The actual analysis, triggered when we reach the specified execution address...
    virtual bool operator()(bool enabled, const Args &args) try {
        using namespace rose::BinaryAnalysis::InstructionSemantics;

        static const char *name = "Analysis";
        using namespace rose::BinaryAnalysis::InsnSemanticsExpr;
        if (enabled && args.insn->get_address()==trigger_addr) {
            RTS_Message *trace = args.thread->tracing(TRACE_MISC);
            trace->mesg("%s triggered: analyzing function at 0x%08"PRIx64, name, analysis_addr);

            // An SMT solver is necessary for this example to work correctly. ROSE should have been configured with
            // "--with-yices=/full/path/to/yices/installation".  If not, you'll get a failed assertion when ROSE tries to use
            // the solver.
            rose::BinaryAnalysis::YicesSolver smt_solver;
            smt_solver.set_linkage(rose::BinaryAnalysis::YicesSolver::LM_EXECUTABLE);
            //smt_solver.set_debug(stdout);

            // We deactive the simulator while we're doing this analysis.  If the simulator remains activated, then the SIGCHLD
            // that are generated from running the Yices executable will be sent to the specimen.  That probably wouldn't cause
            // problems for the specimen, but the messages are annoying.
            args.thread->get_process()->get_simulator()->deactivate();

            // Create the policy that holds the analysis state which is modified by each instruction.  Then plug the policy
            // into the X86InstructionSemantics to which we'll feed each instruction.
            SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver);
            X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>,
                                    SymbolicSemantics::ValueType> semantics(policy);

            // The top of the stack contains the (unknown) return address.  The value above that (in memory) is the address of
            // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a
            // concrete value).  The contents of the buffer are unknown.  Process memory is maintained by the policy we created
            // above, so none of these memory writes are actually affecting the specimen's state in the simulator.
            policy.writeRegister("esp", policy.number<32>(4000));
            SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4));
            SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4));
            policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_());   // ptr to buffer
            policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_());       // bytes in buffer
            policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr));            // branch to analysis address

#if 1
            {
                // This is a kludge.  If the first instruction is an indirect JMP then assume we're executing through a dynamic
                // linker thunk and execute the instruction concretely to advance the instruction pointer.
                SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(analysis_addr));
                if (x86_jmp==insn->get_kind()) {
                    PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType> p;
                    X86InstructionSemantics<PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State,
                                                                             PartialSymbolicSemantics::ValueType>,
                                            PartialSymbolicSemantics::ValueType> sem(p);
                    MemoryMap p_map = args.thread->get_process()->get_memory();
                    BOOST_FOREACH (MemoryMap::Segment &segment, p_map.segments())
                        segment.buffer()->copyOnWrite(true);
                    p.set_map(&p_map); // won't be thread safe
                    sem.processInstruction(insn);
                    policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(p.readRegister<32>("eip").known_value()));
                    trace->mesg("%s: dynamic linker thunk kludge triggered: changed eip from 0x%08"PRIx64" to 0x%08"PRIx64,
                                name, analysis_addr, p.readRegister<32>("eip").known_value());
                }
            }
#endif

            // Run the analysis until we can't figure out what instruction is next.  If we set things up correctly, the
            // simulation will stop when we hit the RET instruction to return from this function.
            size_t nbranches = 0;
            std::vector<TreeNodePtr> constraints; // path constraints for the SMT solver
            while (policy.readRegister<32>("eip").is_known()) {
                uint64_t va = policy.readRegister<32>("eip").known_value();
                SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(va));
                assert(insn!=NULL);
                trace->mesg("%s: analysing instruction %s", name, unparseInstructionWithAddress(insn).c_str());
                semantics.processInstruction(insn);
                if (policy.readRegister<32>("eip").is_known())
                    continue;
                
                bool complete;
                std::set<rose_addr_t> succs = insn->getSuccessors(&complete);
                if (complete && 2==succs.size()) {
                    if (nbranches>=take_branch.size()) {
                        std::ostringstream s; s<<policy.readRegister<32>("eip");
                        trace->mesg("%s: EIP = %s", name, s.str().c_str());
                        trace->mesg("%s: analysis cannot continue; out of \"take_branch\" values", name);
                        throw this;
                    }

                    // Decide whether we should take the branch or not.
                    bool take = take_branch[nbranches++];
                    rose_addr_t target = 0;
                    for (std::set<rose_addr_t>::iterator si=succs.begin(); si!=succs.end(); ++si) {
                        if ((take && *si!=insn->get_address()+insn->get_size()) ||
                            (!take && *si==insn->get_address()+insn->get_size()))
                            target = *si;
                    }
                    assert(target!=0);
                    trace->mesg("%s: branch %staken; target=0x%08"PRIx64, name, take?"":"not ", target);

                    // Is this path feasible?  We don't really need to check it now; we could wait until the end.
                    TreeNodePtr c = InternalNode::create(32, OP_EQ, policy.readRegister<32>("eip").get_expression(),
                                                         LeafNode::create_integer(32, target));
                    constraints.push_back(c); // shouldn't really have to do this again if we could save some state
                    if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(constraints)) {
                        policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(target));
                    } else {
                        trace->mesg("%s: chosen control flow path is not feasible (or unknown).", name);
                        break;
                    }
                }
            }

            // Show the value of the EAX register since this is where GCC puts the function's return value.  If we did things
            // right, the return value should depend only on the unknown bytes from the beginning of the buffer.
            SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax");
            std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables();
            {
                std::ostringstream s;
                s <<name <<": symbolic return value is " <<result <<"\n"
                  <<name <<": return value has " <<vars.size() <<" variables:";
                for (std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin();
                     vi!=vars.end(); ++vi)
                    s <<" " <<*vi;
                s <<"\n";
                if (!constraints.empty()) {
                    s <<name <<": path constraints:\n";
                    for (std::vector<TreeNodePtr>::iterator ci=constraints.begin(); ci!=constraints.end(); ++ci)
                        s <<name <<":   " <<*ci <<"\n";
                }
                trace->mesg("%s", s.str().c_str());
            }

            // Now give values to those bytes and solve the equation for the result using an SMT solver.
            if (!result.is_known()) {
                trace->mesg("%s: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...", name);
                std::vector<TreeNodePtr> exprs = constraints;
                LeafNodePtr result_var = LeafNode::create_variable(32);
                TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var);
                exprs.push_back(expr);
                for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                    expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x'));
                    exprs.push_back(expr);
                }
                if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) {
                    LeafNodePtr result_value = smt_solver.evidence_for_variable(result_var)->isLeafNode();
                    if (!result_value) {
                        trace->mesg("%s: evaluation result could not be determined. ERROR!", name);
                    } else if (!result_value->is_known()) {
                        trace->mesg("%s: evaluation result is not constant. ERROR!", name);
                    } else {
                        trace->mesg("%s: evaluation result is 0x%08"PRIx64, name, result_value->get_value());
                    }
                } else {
                    trace->mesg("%s: expression is not satisfiable. (or unknown)", name);
                }
            }

            // Now try going the other direction.  Set the return expression to a value and try to discover what two bytes
            // would satisfy the equation.
            if (!result.is_known()) {
                trace->mesg("%s: setting result equal to 0xff015e7c and trying to find inputs...", name);
                std::vector<TreeNodePtr> exprs = constraints;
                TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(),
                                                        LeafNode::create_integer(32, 0xff015e7c));
                exprs.push_back(expr);
                if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) {
                    for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                        LeafNodePtr var_val = smt_solver.evidence_for_variable(*vi)->isLeafNode();
                        if (var_val && var_val->is_known())
                            trace->mesg("%s:   v%"PRIu64" = %"PRIu64" %c",
                                        name, (*vi)->get_name(), var_val->get_value(),
                                        isprint(var_val->get_value())?(char)var_val->get_value():' ');
                    }
                } else {
                    trace->mesg("%s:   expression is not satisfiable (or unknown).  No solutions.", name);
                }
            }

            // Reactivate the simulator in case we want to continue simulating.
            args.thread->get_process()->get_simulator()->activate();
            throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator
        }
        return enabled;
    } catch (const Analysis*) {
        args.thread->get_process()->get_simulator()->activate();
        throw;
    }