Esempio n. 1
0
//! [basicReadTest]
static void
basicReadTest(const P2::Partitioner &partitioner) {
    std::cout <<"\n" <<std::string(40, '=') <<"\nbasicReadTest\n" <<std::string(40, '=') <<"\n";
    SymbolicSemantics::Formatter fmt;
    fmt.set_line_prefix("  ");

    // Create the RiscOperators and the initial state.
    const RegisterDictionary *regdict = partitioner.instructionProvider().registerDictionary();
    const RegisterDescriptor REG = partitioner.instructionProvider().stackPointerRegister();
    const std::string REG_NAME = RegisterNames(regdict)(REG);
    BaseSemantics::RiscOperatorsPtr ops = SymbolicSemantics::RiscOperators::instance(regdict);
    ops->currentState()->memoryState()->set_byteOrder(partitioner.instructionProvider().defaultByteOrder());
    BaseSemantics::StatePtr initialState = ops->currentState()->clone();
    ops->initialState(initialState);                    // lazily evaluated initial state
    std::cout <<"Initial state before reading:\n" <<(*initialState+fmt);

    // Read some memory and a register, which should cause them to spring into existence in both the current state and the
    // initial state.
    BaseSemantics::SValuePtr addr1 = ops->number_(32, 0);
    BaseSemantics::SValuePtr dflt1m = ops->number_(32, 0x11223344);
    BaseSemantics::SValuePtr read1m = ops->readMemory(RegisterDescriptor(), addr1, dflt1m, ops->boolean_(true));
    BaseSemantics::SValuePtr dflt1r = ops->undefined_(REG.get_nbits());
    BaseSemantics::SValuePtr read1r = ops->readRegister(REG, dflt1r);

    std::cout <<"Initial state after reading " <<*read1m <<" from address " <<*addr1 <<"\n"
              <<"and " <<*read1r <<" from " <<REG_NAME <<"\n"
              <<(*initialState+fmt);
    ASSERT_always_require(read1m->must_equal(dflt1m));
    ASSERT_always_require(read1r->must_equal(dflt1r));

    // Create a new current state and read again. We should get the same value even though the current state is empty.
    BaseSemantics::StatePtr curState = ops->currentState()->clone();
    curState->clear();
    ops->currentState(curState);
    BaseSemantics::SValuePtr dflt2m = ops->number_(32, 0x55667788);
    BaseSemantics::SValuePtr read2m = ops->readMemory(RegisterDescriptor(), addr1, dflt2m, ops->boolean_(true));
    BaseSemantics::SValuePtr dflt2r = ops->undefined_(REG.get_nbits());
    BaseSemantics::SValuePtr read2r = ops->readRegister(REG, dflt2r);

    std::cout <<"Initial state after reading " <<*read2m <<" from address " <<*addr1 <<"\n"
              <<"and " <<*read2r <<" from " <<REG_NAME <<"\n"
              <<(*initialState+fmt);
    ASSERT_always_require(read1m->must_equal(read2m));
    ASSERT_always_require(read1r->must_equal(read2r));

    // Disable the initial state. If we re-read the same address we'll still get the same result because it's now present in
    // the current state also.
    ops->initialState(BaseSemantics::StatePtr());
    BaseSemantics::SValuePtr dflt3m = ops->number_(32, 0x99aabbcc);
    BaseSemantics::SValuePtr read3m = ops->readMemory(RegisterDescriptor(), addr1, dflt3m, ops->boolean_(true));
    BaseSemantics::SValuePtr dflt3r = ops->undefined_(REG.get_nbits());
    BaseSemantics::SValuePtr read3r = ops->readRegister(REG, dflt3r);
    ASSERT_always_require(read1m->must_equal(read3m));
    ASSERT_always_require(read1r->must_equal(read3r));
}
 VirtualMachine(const P2::Partitioner &partitioner, const Settings &settings)
     : wordSize_(0), stackVa_(settings.stackVa), returnMarker_(0xbeef0967) {
     const RegisterDictionary *regs = partitioner.instructionProvider().registerDictionary();
     ops_ = ConcreteSemantics::RiscOperators::instance(regs);
     if (settings.traceSemantics) {
         BaseSemantics::RiscOperatorsPtr traceOps = TraceSemantics::RiscOperators::instance(ops_);
         cpu_ = partitioner.newDispatcher(traceOps);
     } else {
         cpu_ = partitioner.newDispatcher(ops_);
     }
     if (cpu_==NULL)
         throw std::runtime_error("no semantics for architecture");
 
     regIp_ = partitioner.instructionProvider().instructionPointerRegister();
     regSp_ = partitioner.instructionProvider().stackPointerRegister();
     regSs_ = partitioner.instructionProvider().stackSegmentRegister();
     wordSize_ = regIp_.get_nbits();
 }
 rose_addr_t run(const P2::Partitioner &partitioner, const Settings &settings,
                 const std::set<rose_addr_t> &breakpoints = std::set<rose_addr_t>()) {
     for (size_t nInsns=0; nInsns<settings.insnLimit; ++nInsns) {
         rose_addr_t ip = ops_->readRegister(regIp_)->get_number();
         if (ip == returnMarker_ || (nInsns>0 && breakpoints.find(ip)!=breakpoints.end()))
             return ip;
         SgAsmInstruction *insn = partitioner.instructionProvider()[ip];
         if (!insn)
             throw std::runtime_error("no instruction at " + StringUtility::addrToString(ip));
         if (settings.traceInsns && ::mlog[TRACE])
             ::mlog[TRACE] <<unparseInstructionWithAddress(insn) <<"\n";
         cpu_->processInstruction(insn);
     }
     throw std::runtime_error("execution limit exceeded ("+StringUtility::plural(settings.insnLimit, "instructions")+")");
 }
Esempio n. 4
0
// Run natively and return number of instructions executed and reason for termination.
static std::pair<size_t, std::string>
runNatively(const Settings &settings, const std::string &specimenName, Sawyer::Optional<rose_addr_t> initVa,
            const P2::Partitioner &partitioner, rose_addr_t randomAddress) {
    Stream debug(mlog[DEBUG]);

    BinaryDebugger debugger(specimenName);
    if (debugger.isTerminated()) {
        mlog[FATAL] <<"child " <<debugger.isAttached() <<" " <<debugger.howTerminated() <<" before we could gain control\n";
        exit(1);
    }

    // Allow child to run until we hit the desired address.
    if (initVa) {
        debugger.setBreakpoint(*initVa);
        debugger.runToBreakpoint();
        debugger.clearBreakpoint(*initVa);
        if (debugger.isTerminated()) {
            mlog[FATAL] <<"child " <<debugger.isAttached() <<" " <<debugger.howTerminated()
                        <<" without reaching " <<addrToString(*initVa) <<"\n";
            exit(1);
        }
    }
    
    // Show specimen address map so we can verify that the Linux loader used the same addresses we used.
    // We could have shown it earlier, but then we wouldn't have seen the results of dynamic linking.
    if (settings.showMaps) {
        std::cout <<"Linux loader specimen memory map:\n";
        system(("cat /proc/" + numberToString(debugger.isAttached()) + "/maps").c_str());
    }

    // Branch to the starting address
    debug <<"branching to " <<addrToString(randomAddress) <<"\n";
    debugger.executionAddress(randomAddress);

    std::string terminationReason;
    size_t nExecuted = 0;                               // number of instructions executed
    while (1) {
        // Check for and avoid system calls if necessary
        if (!settings.allowSyscalls) {
            rose_addr_t eip = debugger.executionAddress();
            SgAsmX86Instruction *insn = isSgAsmX86Instruction(partitioner.instructionProvider()[eip]);
            if (!insn || insn->isUnknown()) {
                if (settings.showInsnTrace)
                    std::cout <<"at " <<addrToString(eip) <<": " <<(insn?"no":"unknown") <<" instruction\n";
                terminationReason = "executed at " + addrToString(eip) +" which we don't know about";
                break;
            }
            if (settings.showInsnTrace)
                std::cout <<"at " <<unparseInstructionWithAddress(insn) <<"\n";
            if (insn->get_kind() == x86_int || insn->get_kind() == x86_sysenter) {
                terminationReason = "tried to execute a system call";
                break;
            }
        }

        // Single-step
        if (debug)
            debug <<"single stepping at " <<addrToString(debugger.executionAddress()) <<"\n";
        debugger.singleStep();
        if (debugger.isTerminated()) {
            terminationReason = debugger.howTerminated();
            break;
        }
        ++nExecuted;
        if (settings.maxInsns!=0 && nExecuted>=settings.maxInsns) {
            terminationReason = "reached instruction limit";
            break;
        }
    }
    debugger.terminate();
    return std::make_pair(nExecuted, terminationReason);
}
Esempio n. 5
0
int
main(int argc, char *argv[]) {

    // This paragraph initializes the ROSE library, generates the man page for this tool, does command-line parsing for quite a
    // few switches including "--help", loads various specimen resources (ELF/PE, running process, raw memory dumps, etc),
    // disassembles, and partitions.  We could have called Engine::frontend() and done it all in one function call, but then we
    // wouldn't have a Partitioner2::Partitioner object that we need below.
    std::string purpose = "demonstrate inter-function disassembly";
    std::string description =
        "Disassembles and partitions the specimen(s), then tries to disassemble things between the functions.";
    P2::Engine engine;
    std::vector<std::string> specimens = engine.parseCommandLine(argc, argv, purpose, description).unreachedArgs();
    P2::Partitioner partitioner = engine.partition(specimens);

    // The partitioner's address usage map (AUM) describes what part of memory has been disassembled as instructions or
    // data. We're interested in the unused parts between the lowest and highest disassembled addresses, so we loop over those
    // parts.  The hull() is the entire used interval -- lowest to highest addresses used regardless of the unused areas in the
    // middle.  An AddressInterval evaluated in boolean context returns false if it's empty.
    rose_addr_t va = partitioner.aum().hull().least();
    while (AddressInterval unused = partitioner.aum().nextUnused(va)) {

        // Is the unused area beyond the last thing compiled?  We're only interested in the stuff between functions.  This
        // check also means that unused.greatest()+1 will not overflow, which simplifies later code. Overflows are easy to
        // trigger when the specimen's word size is the same as ROSE's word size.
        if (unused.least() > partitioner.aum().hull().greatest())
            break;

        // The unused address might be in the middle of some very large unmapped area of memory, or perhaps in an area that
        // doesn't have execute permission (the partitioner will only disassemble at addresses that we've marked as
        // executable). A naive implementation would just increment to the next address and try again, but that could take a
        // very long time.  This "if" statement will give us the next executable address that falls within the unused interval
        // if possible. The address is assigned to "va" if possible.
        if (!engine.memoryMap().within(unused).require(MemoryMap::EXECUTABLE).next().assignTo(va)) {
            va = unused.greatest() + 1;                 // won't overflow because of check above
            continue;
        }

        // "va" now points to an executable address that the partitioner doesn't know about yet.
        ASSERT_require(engine.memoryMap().at(va).require(MemoryMap::EXECUTABLE).exists());
        ASSERT_forbid(partitioner.aum().instructionExists(va));
        std::cout <<"unused address " <<StringUtility::addrToString(va) <<"\n";

        // Cause the partitioner to discover (disassemble) one basic block. This doesn't add the basic block to the
        // partitioner or change the partitioner in any way.  If the BB isn't something we want to keep then just forget about
        // it and garbage collection will reclaim the memory.
        P2::BasicBlock::Ptr bb = partitioner.discoverBasicBlock(va);
        if (!isGoodBasicBlock(bb)) {
            ++va;
            continue;
        }
        std::cout <<"  disassembled " <<bb->printableName() <<"\n";

        // Inform the partitioner that we wish to keep this BB.
        partitioner.attachBasicBlock(bb);

        // This BB was not reachable by any previous CFG edge, therefore it doesn't belong to any function. In order for it to
        // show up in the eventual AST we need to add it to some function (the ROSE AST has a requirement that every basic
        // block belongs to a function, although the partitioner can easily cope with the other case). The easiest way in this
        // situation is to just create a new function whose entry block is this BB.  Creating a function doesn't modify the
        // partitioner in any way, so we need to also attach the function to the partitioner.
        P2::Function::Ptr function = P2::Function::instance(va, SgAsmFunction::FUNC_USERDEF);
        function->insertBasicBlock(va);                 // allowed only before attaching function to partitioner
        partitioner.attachOrMergeFunction(function);

        // This basic block might be the first block of a whole bunch that are connected by as yet undiscovered CFG edges. We
        // can recursively discover and attach all those blocks with one Engine method.  There are also Partitioner methods to
        // do similar things, but they're lower level.
        engine.runPartitionerRecursive(partitioner);
    }

    // We've probably added a bunch more functions and basic blocks to the partitioner, but we haven't yet assigned the basic
    // blocks discovered by Engine::runPartitionerRecursive to any functions.  We might also need to assign function labels
    // from ELF/PE information, re-run some analysis, etc., so do that now.
    engine.runPartitionerFinal(partitioner);

    // Most ROSE analysis is performed on an abstract syntax tree, so generate one.  If the specime is an ELF or PE container
    // then the returned global block will also be attached somewhere below a SgProject node, otherwise the returned global
    // block is the root of the AST and there is no project (e.g., like when the specimen is a raw memory dump).
    SgAsmBlock *gblock = P2::Modules::buildAst(partitioner, engine.interpretation());

    // Generate an assembly listing. These unparser properties are all optional, but they result in more informative assembly
    // listings.
    AsmUnparser unparser;
    unparser.set_registers(partitioner.instructionProvider().registerDictionary());
    unparser.add_control_flow_graph(ControlFlow().build_block_cfg_from_ast<ControlFlow::BlockGraph>(gblock));
    unparser.staticDataDisassembler.init(engine.disassembler());
    unparser.unparse(std::cout, gblock);
}