Ejemplo n.º 1
0
/* Assembles all instructions of an interpretation.  ROSE allows blocks to be non-contiguous (i.e., an unconditiona jump can
 * appear in the middle of a basic block).  However, we need to disassemble contiguous instructions. */
static void
assemble_all(SgAsmInterpretation *interp)
{
    size_t nassembled = 0;
    Assembler *assembler = Assembler::create(interp);
    ROSE_ASSERT(assembler!=NULL);
    InstructionCollector collector(interp);
    for (Disassembler::InstructionMap::iterator ii=collector.insns.begin(); ii!=collector.insns.end(); ++ii) {
        rose_addr_t original_va = ii->first;

        /* The new_va is the virtual address of the instruction now that we may have moved it to a new location in memory.
         * We're leaving this implementation for later. For now, just assume that instructions don't move in memory. */
        rose_addr_t new_va = original_va;

        SgAsmx86Instruction *insn = isSgAsmx86Instruction(ii->second);
        ROSE_ASSERT(insn!=NULL);
        SgUnsignedCharList machine_code;
        try {
            insn->set_address(new_va);
            machine_code = assembler->assembleOne(insn);
            ROSE_ASSERT(!machine_code.empty());
            ++nassembled;
        } catch (const Assembler::Exception &e) {
            std::cerr <<"assembly failed at " <<StringUtility::addrToString(e.insn->get_address())
                      <<": " <<e.what() <<std::endl;
            if (!assembler->get_debug()) {
                assembler->set_debug(stderr);
                try {
                    assembler->assembleOne(insn);
                } catch (...) {
                    /*void*/
                }
                assembler->set_debug(false);
            }
            //return;
        }

#if 0   /* Don't worry about writing the instruction back out to the section. [RPM 2011-08-23] */
        /* We don't handle the case where an instruction grows because that could cause us to require that the section
         * containing the instruction grows, which opens a whole can of worms. */
        ROSE_ASSERT(machine_code.size() <= insn->get_size());
        
        /* We're using the same memory map as what was used when we loaded the binary and disassembled it. Therefore, the
         * machine code that we're writing back needs to fall within those same areas of the virtual address space: we cannot
         * write past the end of mapped memory, nor can we write to the space (if any) between mapped memory chunks. */
        size_t nwritten = interp->get_map()->write(&(machine_code[0]), new_va, machine_code.size(), MemoryMap::MM_PROT_NONE);
        ROSE_ASSERT(nwritten==machine_code.size());
#endif
    }

    std::cout <<"Assembled " <<nassembled <<" instruction" <<(1==nassembled?"":"s") <<"\n";
    delete assembler;
}
Ejemplo n.º 2
0
    // The actual analysis, triggered when we reach the specified execution address...
    virtual bool operator()(bool enabled, const Args &args) try {
        using namespace BinaryAnalysis::InstructionSemantics;

        static const char *name = "Analysis";
        using namespace InsnSemanticsExpr;
        if (enabled && args.insn->get_address()==trigger_addr) {
            RTS_Message *trace = args.thread->tracing(TRACE_MISC);
            trace->mesg("%s triggered: analyzing function at 0x%08"PRIx64, name, analysis_addr);

            // An SMT solver is necessary for this example to work correctly. ROSE should have been configured with
            // "--with-yices=/full/path/to/yices/installation".  If not, you'll get a failed assertion when ROSE tries to use
            // the solver.
            YicesSolver smt_solver;
            smt_solver.set_linkage(YicesSolver::LM_EXECUTABLE);
            //smt_solver.set_debug(stdout);

            // We deactive the simulator while we're doing this analysis.  If the simulator remains activated, then the SIGCHLD
            // that are generated from running the Yices executable will be sent to the specimen.  That probably wouldn't cause
            // problems for the specimen, but the messages are annoying.
            args.thread->get_process()->get_simulator()->deactivate();

            // Create the policy that holds the analysis state which is modified by each instruction.  Then plug the policy
            // into the X86InstructionSemantics to which we'll feed each instruction.
            SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver);
            X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>,
                                    SymbolicSemantics::ValueType> semantics(policy);

            // The top of the stack contains the (unknown) return address.  The value above that (in memory) is the address of
            // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a
            // concrete value).  The contents of the buffer are unknown.  Process memory is maintained by the policy we created
            // above, so none of these memory writes are actually affecting the specimen's state in the simulator.
            policy.writeRegister("esp", policy.number<32>(4000));
            SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4));
            SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4));
            policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_());   // ptr to buffer
            policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_());       // bytes in buffer
            policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr));            // branch to analysis address

#if 1
            {
                // This is a kludge.  If the first instruction is an indirect JMP then assume we're executing through a dynamic
                // linker thunk and execute the instruction concretely to advance the instruction pointer.
                SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.thread->get_process()->get_instruction(analysis_addr));
                if (x86_jmp==insn->get_kind()) {
                    VirtualMachineSemantics::Policy<VirtualMachineSemantics::State, VirtualMachineSemantics::ValueType> p;
                    X86InstructionSemantics<VirtualMachineSemantics::Policy<VirtualMachineSemantics::State,
                                                                            VirtualMachineSemantics::ValueType>,
                                            VirtualMachineSemantics::ValueType> sem(p);
                    p.set_map(args.thread->get_process()->get_memory()); // won't be thread safe
                    sem.processInstruction(insn);
                    policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(p.readRegister<32>("eip").known_value()));
                    trace->mesg("%s: dynamic linker thunk kludge triggered: changed eip from 0x%08"PRIx64" to 0x%08"PRIx64,
                                name, analysis_addr, p.readRegister<32>("eip").known_value());
                }
            }
#endif

            // Run the analysis until we can't figure out what instruction is next.  If we set things up correctly, the
            // simulation will stop when we hit the RET instruction to return from this function.
            size_t nbranches = 0;
            std::vector<TreeNodePtr> constraints; // path constraints for the SMT solver
            while (policy.readRegister<32>("eip").is_known()) {
                uint64_t va = policy.readRegister<32>("eip").known_value();
                SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.thread->get_process()->get_instruction(va));
                assert(insn!=NULL);
                trace->mesg("%s: analysing instruction %s", name, unparseInstructionWithAddress(insn).c_str());
                semantics.processInstruction(insn);
                if (policy.readRegister<32>("eip").is_known())
                    continue;
                
                bool complete;
                std::set<rose_addr_t> succs = insn->get_successors(&complete);
                if (complete && 2==succs.size()) {
                    if (nbranches>=take_branch.size()) {
                        std::ostringstream s; s<<policy.readRegister<32>("eip");
                        trace->mesg("%s: EIP = %s", name, s.str().c_str());
                        trace->mesg("%s: analysis cannot continue; out of \"take_branch\" values", name);
                        throw this;
                    }

                    // Decide whether we should take the branch or not.
                    bool take = take_branch[nbranches++];
                    rose_addr_t target = 0;
                    for (std::set<rose_addr_t>::iterator si=succs.begin(); si!=succs.end(); ++si) {
                        if ((take && *si!=insn->get_address()+insn->get_size()) ||
                            (!take && *si==insn->get_address()+insn->get_size()))
                            target = *si;
                    }
                    assert(target!=0);
                    trace->mesg("%s: branch %staken; target=0x%08"PRIx64, name, take?"":"not ", target);

                    // Is this path feasible?  We don't really need to check it now; we could wait until the end.
                    InternalNodePtr c = InternalNode::create(32, OP_EQ, policy.readRegister<32>("eip").get_expression(),
                                                             LeafNode::create_integer(32, target));
                    constraints.push_back(c); // shouldn't really have to do this again if we could save some state
                    if (smt_solver.satisfiable(constraints)) {
                        policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(target));
                    } else {
                        trace->mesg("%s: chosen control flow path is not feasible.", name);
                        break;
                    }
                }
            }

            // Show the value of the EAX register since this is where GCC puts the function's return value.  If we did things
            // right, the return value should depend only on the unknown bytes from the beginning of the buffer.
            SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax");
            std::set<InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables();
            {
                std::ostringstream s;
                s <<name <<": symbolic return value is " <<result <<"\n"
                  <<name <<": return value has " <<vars.size() <<" variables:";
                for (std::set<InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi)
                    s <<" " <<*vi;
                s <<"\n";
                if (!constraints.empty()) {
                    s <<name <<": path constraints:\n";
                    for (std::vector<TreeNodePtr>::iterator ci=constraints.begin(); ci!=constraints.end(); ++ci)
                        s <<name <<":   " <<*ci <<"\n";
                }
                trace->mesg("%s", s.str().c_str());
            }

            // Now give values to those bytes and solve the equation for the result using an SMT solver.
            if (!result.is_known()) {
                trace->mesg("%s: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...", name);
                std::vector<TreeNodePtr> exprs = constraints;
                LeafNodePtr result_var = LeafNode::create_variable(32);
                InternalNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var);
                exprs.push_back(expr);
                for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                    expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x'));
                    exprs.push_back(expr);
                }
                if (smt_solver.satisfiable(exprs)) {
                    LeafNodePtr result_value = smt_solver.get_definition(result_var)->isLeafNode();
                    if (!result_value) {
                        trace->mesg("%s: evaluation result could not be determined. ERROR!", name);
                    } else if (!result_value->is_known()) {
                        trace->mesg("%s: evaluation result is not constant. ERROR!", name);
                    } else {
                        trace->mesg("%s: evaluation result is 0x%08"PRIx64, name, result_value->get_value());
                    }
                } else {
                    trace->mesg("%s: expression is not satisfiable.", name);
                }
            }

            // Now try going the other direction.  Set the return expression to a value and try to discover what two bytes
            // would satisfy the equation.
            if (!result.is_known()) {
                trace->mesg("%s: setting result equal to 0xff015e7c and trying to find inputs...", name);
                std::vector<TreeNodePtr> exprs = constraints;
                InternalNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(),
                                                            LeafNode::create_integer(32, 0xff015e7c));
                exprs.push_back(expr);
                if (smt_solver.satisfiable(exprs)) {
                    for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) {
                        LeafNodePtr var_val = smt_solver.get_definition(*vi)->isLeafNode();
                        if (var_val && var_val->is_known())
                            trace->mesg("%s:   v%"PRIu64" = %"PRIu64" %c",
                                        name, (*vi)->get_name(), var_val->get_value(),
                                        isprint(var_val->get_value())?(char)var_val->get_value():' ');
                    }
                } else {
                    trace->mesg("%s:   expression is not satisfiable.  No solutions.", name);
                }
            }

            // Reactivate the simulator in case we want to continue simulating.
            args.thread->get_process()->get_simulator()->activate();
            throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator
        }
        return enabled;
    } catch (const Analysis*) {
        args.thread->get_process()->get_simulator()->activate();
        throw;
    }