Example #1
Printer::eret() {
    RegisterDescriptor reg = thread_->get_process()->get_simulator()->syscallReturnRegister();
    uint64_t unsignedRetval = thread_->operators()->readRegister(reg)->get_number();
    int64_t signedRetval = IntegerOps::signExtend2(unsignedRetval, reg.get_nbits(), 64);
    return eret(signedRetval);
Example #2
//! [basicReadTest]
static void
basicReadTest(const P2::Partitioner &partitioner) {
    std::cout <<"\n" <<std::string(40, '=') <<"\nbasicReadTest\n" <<std::string(40, '=') <<"\n";
    SymbolicSemantics::Formatter fmt;
    fmt.set_line_prefix("  ");

    // Create the RiscOperators and the initial state.
    const RegisterDictionary *regdict = partitioner.instructionProvider().registerDictionary();
    const RegisterDescriptor REG = partitioner.instructionProvider().stackPointerRegister();
    const std::string REG_NAME = RegisterNames(regdict)(REG);
    BaseSemantics::RiscOperatorsPtr ops = SymbolicSemantics::RiscOperators::instance(regdict);
    BaseSemantics::StatePtr initialState = ops->currentState()->clone();
    ops->initialState(initialState);                    // lazily evaluated initial state
    std::cout <<"Initial state before reading:\n" <<(*initialState+fmt);

    // Read some memory and a register, which should cause them to spring into existence in both the current state and the
    // initial state.
    BaseSemantics::SValuePtr addr1 = ops->number_(32, 0);
    BaseSemantics::SValuePtr dflt1m = ops->number_(32, 0x11223344);
    BaseSemantics::SValuePtr read1m = ops->readMemory(RegisterDescriptor(), addr1, dflt1m, ops->boolean_(true));
    BaseSemantics::SValuePtr dflt1r = ops->undefined_(REG.get_nbits());
    BaseSemantics::SValuePtr read1r = ops->readRegister(REG, dflt1r);

    std::cout <<"Initial state after reading " <<*read1m <<" from address " <<*addr1 <<"\n"
              <<"and " <<*read1r <<" from " <<REG_NAME <<"\n"

    // Create a new current state and read again. We should get the same value even though the current state is empty.
    BaseSemantics::StatePtr curState = ops->currentState()->clone();
    BaseSemantics::SValuePtr dflt2m = ops->number_(32, 0x55667788);
    BaseSemantics::SValuePtr read2m = ops->readMemory(RegisterDescriptor(), addr1, dflt2m, ops->boolean_(true));
    BaseSemantics::SValuePtr dflt2r = ops->undefined_(REG.get_nbits());
    BaseSemantics::SValuePtr read2r = ops->readRegister(REG, dflt2r);

    std::cout <<"Initial state after reading " <<*read2m <<" from address " <<*addr1 <<"\n"
              <<"and " <<*read2r <<" from " <<REG_NAME <<"\n"

    // Disable the initial state. If we re-read the same address we'll still get the same result because it's now present in
    // the current state also.
    BaseSemantics::SValuePtr dflt3m = ops->number_(32, 0x99aabbcc);
    BaseSemantics::SValuePtr read3m = ops->readMemory(RegisterDescriptor(), addr1, dflt3m, ops->boolean_(true));
    BaseSemantics::SValuePtr dflt3r = ops->undefined_(REG.get_nbits());
    BaseSemantics::SValuePtr read3r = ops->readRegister(REG, dflt3r);
SgAsmX86Instruction::getSuccessors(const std::vector<SgAsmInstruction*>& insns, bool *complete,
                                   const MemoryMap::Ptr &initial_memory)
    Stream debug(mlog[DEBUG]);
    using namespace Rose::BinaryAnalysis::InstructionSemantics2;

    if (debug) {
        debug <<"SgAsmX86Instruction::getSuccessors(" <<StringUtility::addrToString(insns.front()->get_address())
              <<" for " <<insns.size() <<" instruction" <<(1==insns.size()?"":"s") <<"):" <<"\n";

    BinaryAnalysis::Disassembler::AddressSet successors = SgAsmInstruction::getSuccessors(insns, complete);

    /* If we couldn't determine all the successors, or a cursory analysis couldn't narrow it down to a single successor then
     * we'll do a more thorough analysis now. In the case where the cursory analysis returned a complete set containing two
     * successors, a thorough analysis might be able to narrow it down to a single successor. We should not make special
     * assumptions about CALL and FARCALL instructions -- their only successor is the specified address operand. */
    if (!*complete || successors.size()>1) {
        const RegisterDictionary *regdict;
        if (SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(this)) {
            regdict = RegisterDictionary::dictionary_for_isa(interp);
        } else {
            switch (get_baseSize()) {
                case x86_insnsize_16:
                    regdict = RegisterDictionary::dictionary_i286();
                case x86_insnsize_32:
                    regdict = RegisterDictionary::dictionary_pentium4();
                case x86_insnsize_64:
                    regdict = RegisterDictionary::dictionary_amd64();
                    ASSERT_not_reachable("invalid x86 instruction size");
        const RegisterDescriptor IP = regdict->findLargestRegister(x86_regclass_ip, 0);
        PartialSymbolicSemantics::RiscOperatorsPtr ops = PartialSymbolicSemantics::RiscOperators::instance(regdict);
        BaseSemantics::DispatcherPtr cpu = DispatcherX86::instance(ops, IP.get_nbits(), regdict);

        try {
            BOOST_FOREACH (SgAsmInstruction *insn, insns) {
                SAWYER_MESG(debug) <<"  state after " <<insn->toString() <<"\n" <<*ops;
            BaseSemantics::SValuePtr ip = ops->readRegister(IP);
            if (ip->is_number()) {
                *complete = true;
        } catch(const BaseSemantics::Exception &e) {
Example #4
get_position_in_register(const RegisterDescriptor &rdesc) {
    if (0==rdesc.get_offset()) {
        switch (rdesc.get_nbits()) {
            case 8: return RoseBin_support::x86_regpos_low_byte;
            case 16: return RoseBin_support::x86_regpos_word;
            case 32: return RoseBin_support::x86_regpos_dword;
            case 64: return RoseBin_support::x86_regpos_qword;
            default: return RoseBin_support::x86_regpos_all;
    } else if (8==rdesc.get_offset() && 8==rdesc.get_nbits()) {
        return RoseBin_support::x86_regpos_high_byte;
    } else {
        return RoseBin_support::x86_regpos_unknown;
main(int argc, char *argv[]) {
    Diagnostics::initAndRegister(&::mlog, "tool");

    // Parse command-line
    P2::Engine engine;
    Settings settings;
    std::vector<std::string> specimen = parseCommandLine(argc, argv, engine, settings);
    if (specimen.empty()) {
        ::mlog[FATAL] <<"no specimen supplied on command-line; see --help\n";

    // Load specimen into ROSE's simulated memory
    if (!engine.parseContainers(specimen.front())) {
        ::mlog[FATAL] <<"cannot parse specimen binary container\n";
    Disassembler *disassembler = engine.obtainDisassembler();
    if (!disassembler) {
        ::mlog[FATAL] <<"no disassembler for this architecture\n";
    const RegisterDescriptor REG_IP = disassembler->instructionPointerRegister();
    ASSERT_require2(REG_IP.is_valid(), "simulation must know what register serves as the instruction pointer");

    // Single-step the specimen natively in a debugger and show each instruction.
    BinaryDebugger debugger(specimen);
    while (!debugger.isTerminated()) {
        uint64_t ip = debugger.readRegister(REG_IP).toInteger();
        uint8_t buf[16];                                // 16 should be large enough for any instruction
        size_t nBytes = debugger.readMemory(ip, sizeof buf, buf);
        if (0 == nBytes) {
            ::mlog[ERROR] <<"cannot read memory at " <<StringUtility::addrToString(ip) <<"\n";
        } else if (SgAsmInstruction *insn = disassembler->disassembleOne(buf, ip, nBytes, ip)) {
            std::cout <<unparseInstructionWithAddress(insn) <<"\n";
        } else {
            ::mlog[ERROR] <<"cannot disassemble instruction at " <<StringUtility::addrToString(ip) <<"\n";
    std::cout <<debugger.howTerminated();
NoOperation::findNoopSubsequences(const std::vector<SgAsmInstruction*> &insns) const {
    IndexIntervals retval;
    Sawyer::Message::Stream debug(mlog[DEBUG]);

    if (debug) {
        debug <<"findNoopSubsequences(\n";
        BOOST_FOREACH (SgAsmInstruction *insn, insns)
            debug <<"  " <<unparseInstructionWithAddress(insn) <<"\n";
        debug <<")\n";

    // If we have no instruction semantics then assume that all instructions have an effect.
    if (!cpu_ || insns.empty())
        return retval;

    // Process each instruction as if insns were a basic block. Store insns[i]'s initial state in states[i] and its final state
    // in states[i+1].  States don't generally have a way to compare them for equality, so use a simple string-based comparison
    // for now. FIXME[Robb P. Matzke 2015-05-11]
    std::vector<std::string> states;
    bool hadError = false;
    const RegisterDescriptor regIP = cpu_->instructionPointerRegister();
    try {
        BOOST_FOREACH (SgAsmInstruction *insn, insns) {
            cpu_->get_operators()->writeRegister(regIP, cpu_->get_operators()->number_(regIP.get_nbits(), insn->get_address()));
            if (debug) {
                debug <<"  normalized state #" <<states.size()-1 <<":\n" <<StringUtility::prefixLines(states.back(), "    ");
                debug <<"  instruction: " <<unparseInstructionWithAddress(insn) <<"\n";
    } catch (const BaseSemantics::Exception &e) {
        hadError = true;
        SAWYER_MESG(debug) <<"  semantic exception: " <<e <<"\n";
    if (!hadError) {
        if (debug)
            debug <<"  normalized state #" <<states.size()-1 <<":\n" <<StringUtility::prefixLines(states.back(), "    ");

    // Look for pairs of states that are the same, and call that sequence of instructions a no-op
    for (size_t i=0; i+1<states.size(); ++i) {
        for (size_t j=i+1; j<states.size(); ++j) {
            if (states[i]==states[j]) {
                retval.push_back(IndexInterval::hull(i, j-1));
                SAWYER_MESG(debug) <<"  no-op: " <<i <<".." <<(j-1) <<"\n";
    return retval;
Example #7
/** Returns the name of an X86 register.
 *  We use the amd64 architecture because, since it's backward compatible with the 8086, it contains definitions for all the
 *  registers from older architectures. */
std::string unparseX86Register(const RegisterDescriptor &reg) {
    using namespace StringUtility;
    const RegisterDictionary *dict = RegisterDictionary::dictionary_amd64();
    std::string name = dict->lookup(reg);
    if (name.empty()) {
        static bool dumped_dict = false;
        std::cerr <<"unparseX86Register(" <<reg <<"): register descriptor not found in dictionary.\n";
        if (!dumped_dict) {
            std::cerr <<"  FIXME: we might be using the amd64 register dictionary. [RPM 2011-03-02]\n";
            //std::cerr <<*dict;
            dumped_dict = true;
        return (std::string("BAD_REGISTER(") +
                numberToString(reg.get_major()) + "." +
                numberToString(reg.get_minor()) + "." +
                numberToString(reg.get_offset()) + "." +
                numberToString(reg.get_nbits()) + ")");
    return name;
NoOperation::initialState(SgAsmInstruction *insn) const {
    BaseSemantics::StatePtr state;
    if (normalizer_) {
        state = normalizer_->initialState(cpu_, insn);
    } else {
        state = cpu_->currentState()->clone();
        RegisterDescriptor IP = cpu_->instructionPointerRegister();
        state->writeRegister(IP, cpu_->number_(IP.get_nbits(), insn->get_address()), cpu_->get_operators().get());

    // Set the stack pointer to a concrete value
    if (initialSp_) {
        const RegisterDescriptor regSp = cpu_->stackPointerRegister();
        BaseSemantics::RiscOperatorsPtr ops = cpu_->get_operators();
        state->writeRegister(regSp, ops->number_(regSp.get_nbits(), *initialSp_), ops.get());

    return state;
 VirtualMachine(const P2::Partitioner &partitioner, const Settings &settings)
     : wordSize_(0), stackVa_(settings.stackVa), returnMarker_(0xbeef0967) {
     const RegisterDictionary *regs = partitioner.instructionProvider().registerDictionary();
     ops_ = ConcreteSemantics::RiscOperators::instance(regs);
     if (settings.traceSemantics) {
         BaseSemantics::RiscOperatorsPtr traceOps = TraceSemantics::RiscOperators::instance(ops_);
         cpu_ = partitioner.newDispatcher(traceOps);
     } else {
         cpu_ = partitioner.newDispatcher(ops_);
     if (cpu_==NULL)
         throw std::runtime_error("no semantics for architecture");
     regIp_ = partitioner.instructionProvider().instructionPointerRegister();
     regSp_ = partitioner.instructionProvider().stackPointerRegister();
     regSs_ = partitioner.instructionProvider().stackSegmentRegister();
     wordSize_ = regIp_.get_nbits();
// see base class
SgAsmX86Instruction::isFunctionCallSlow(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target, rose_addr_t *return_va)
    if (isFunctionCallFast(insns, target, return_va))
        return true;

    // The following stuff works only if we have a relatively complete AST.
    static const size_t EXECUTION_LIMIT = 10; // max size of basic blocks for expensive analyses
    if (insns.empty())
        return false;
    SgAsmX86Instruction *last = isSgAsmX86Instruction(insns.back());
    if (!last)
        return false;
    SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(last);
    SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(func);

    // Slow method: Emulate the instructions and then look at the EIP and stack.  If the EIP points outside the current
    // function and the top of the stack holds an address of an instruction within the current function, then this must be a
    // function call.
    if (interp && insns.size()<=EXECUTION_LIMIT) {
        using namespace Rose::BinaryAnalysis;
        using namespace Rose::BinaryAnalysis::InstructionSemantics2;
        using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics;
        const InstructionMap &imap = interp->get_instruction_map();
        const RegisterDictionary *regdict = RegisterDictionary::dictionary_for_isa(interp);
        SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver);
        BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver);
        const RegisterDescriptor SP = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp);
        DispatcherX86Ptr dispatcher = DispatcherX86::instance(ops, SP.get_nbits());
        SValuePtr orig_esp = SValue::promote(ops->readRegister(dispatcher->REG_anySP));
        try {
            for (size_t i=0; i<insns.size(); ++i)
        } catch (const BaseSemantics::Exception &e) {
            return false;

        // If the next instruction address is concrete but does not point to a function entry point, then this is not a call.
        SValuePtr eip = SValue::promote(ops->readRegister(dispatcher->REG_anyIP));
        if (eip->is_number()) {
            rose_addr_t target_va = eip->get_number();
            SgAsmFunction *target_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(target_va, NULL));
            if (!target_func || target_va!=target_func->get_entry_va())
                return false;

        // If nothing was pushed onto the stack, then this isn't a function call.
        const size_t spWidth = dispatcher->REG_anySP.get_nbits();
        SValuePtr esp = SValue::promote(ops->readRegister(dispatcher->REG_anySP));
        SValuePtr stack_delta = SValue::promote(ops->add(esp, ops->negate(orig_esp)));
        SValuePtr stack_delta_sign = SValue::promote(ops->extract(stack_delta, spWidth-1, spWidth));
        if (stack_delta_sign->is_number() && 0==stack_delta_sign->get_number())
            return false;

        // If the top of the stack does not contain a concrete value or the top of the stack does not point to an instruction
        // in this basic block's function, then this is not a function call.
        const size_t ipWidth = dispatcher->REG_anyIP.get_nbits();
        SValuePtr top = SValue::promote(ops->readMemory(dispatcher->REG_SS, esp, esp->undefined_(ipWidth), esp->boolean_(true)));
        if (top->is_number()) {
            rose_addr_t va = top->get_number();
            SgAsmFunction *return_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(va, NULL));
            if (!return_func || return_func!=func) {
                return false;
        } else {
            return false;

        // Since EIP might point to a function entry address and since the top of the stack contains a pointer to an
        // instruction in this function, we assume that this is a function call.
        if (target && eip->is_number())
            *target = eip->get_number();
        if (return_va && top->is_number())
            *return_va = top->get_number();
        return true;

    // Similar to the above method, but works when all we have is the basic block (e.g., this case gets hit quite a bit from
    // the Partitioner).  Returns true if, after executing the basic block, the top of the stack contains the fall-through
    // address of the basic block. We depend on our caller to figure out if EIP is reasonably a function entry address.
    if (!interp && insns.size()<=EXECUTION_LIMIT) {
        using namespace Rose::BinaryAnalysis;
        using namespace Rose::BinaryAnalysis::InstructionSemantics2;
        using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics;
        SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver);
        SgAsmX86Instruction *x86insn = isSgAsmX86Instruction(insns.front());
#if 1 // [Robb P. Matzke 2015-03-03]: FIXME[Robb P. Matzke 2015-03-03]: not ready yet; x86-64 semantics still under construction
        if (x86insn->get_addressSize() != x86_insnsize_32)
            return false;
        const RegisterDictionary *regdict = registersForInstructionSize(x86insn->get_addressSize());
        const RegisterDescriptor SP = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp);
        BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver);
        DispatcherX86Ptr dispatcher = DispatcherX86::instance(ops, SP.get_nbits());
        try {
            for (size_t i=0; i<insns.size(); ++i)
        } catch (const BaseSemantics::Exception &e) {
            return false;

        // Look at the top of the stack
        const size_t ipWidth = dispatcher->REG_anyIP.get_nbits();
        SValuePtr top = SValue::promote(ops->readMemory(dispatcher->REG_SS, ops->readRegister(SP),
        if (top->is_number() && top->get_number() == last->get_address()+last->get_size()) {
            if (target) {
                SValuePtr eip = SValue::promote(ops->readRegister(dispatcher->REG_anyIP));
                if (eip->is_number())
                    *target = eip->get_number();
            if (return_va)
                *return_va = top->get_number();
            return true;

    return false;
Example #11
 // Custom version of the readRegister API that does not require a RiscOperators pointer.  It
 // simply uses a global variable to fill in the missing parameter.  This must be a global
 // variable because storing the smart pointer in the register state causes pointer reference
 // cycles that confuses the reference counter of the smart pointer and causes memory leaks.
 // A better solution should probably be identified.  This method does NOT alter the "create
 // on access" behaviors of the the standard readRegister() method.
 SymbolicValuePtr read_register(RegisterDescriptor rd) {
   BaseRiscOperators* ops = (BaseRiscOperators*)global_rops.get();
   return SymbolicValue::promote(RegisterStateGeneric::readRegister(
                                   rd, ops->undefined_(rd.get_nbits()), ops));