static void
processSynthesizedCalls(const P2::Partitioner &partitioner, const Settings &settings) {
    const rose_addr_t lengthVa = settings.stackVa + 0x1000; // arbitrary address (at least 4 bytes of space)
    const rose_addr_t resultVa = settings.stackVa + 0x1010; // arbitrary address (at least 104 bytes of space)
    VirtualMachine vm(partitioner, settings);           // virtual machine using concrete semantics
    for (size_t strId=1; strId<=0x7a; ++strId) {
        vm.reset(partitioner.memoryMap());              // reset virtual machine to initial conditions
        vm.writeMemory(lengthVa, 0x68);                 // limited length
        vm.push(resultVa);                              // arg #3 is the address of the decoded string's buffer
        vm.push(lengthVa);                              // arg #2 is the returned length of the decoded string
        vm.push(strId);                                 // arg #1 is the string id number in [1 .. 0x7a]
        vm.push(vm.returnMarker());                     // to know when to stop
        vm.setIp(settings.decoderVa);                   // starting postion
        if (settings.showCall)
            std::cout <<"(*" <<StringUtility::addrToString(settings.decoderVa) <<")" <<arguments(vm, settings.showCall) <<"\n";
        try {
            vm.run(partitioner, settings);              // run until returnMarker is executed
        } catch (const std::runtime_error &e) {
            ::mlog[WARN] <<e.what() <<"\n";
            continue;
        }
        std::string str = vm.readString(resultVa);      // read the NUL-terminated string
        std::cout <<"string-" <<std::hex <<strId <<std::dec <<"\t\"" <<StringUtility::cEscape(str) <<"\"\n";
    }
}
static void
processExistingCalls(const P2::Partitioner &partitioner, const Settings &settings) {
    P2::ControlFlowGraph::ConstVertexIterator decoderVertex = partitioner.findPlaceholder(settings.decoderVa);
    ASSERT_require(partitioner.cfg().isValidVertex(decoderVertex));

    VirtualMachine vm(partitioner, settings);

    // Find all calls to the decoder function
    BOOST_FOREACH (const P2::ControlFlowGraph::Edge &edge, decoderVertex->inEdges()) {
        if (edge.value().type() != P2::E_FUNCTION_CALL)
            continue;
        const P2::ControlFlowGraph::ConstVertexIterator caller = edge.source();
        if (caller->value().type() != P2::V_BASIC_BLOCK || caller->value().bblock()==NULL)
            continue;
        ::mlog[TRACE] <<"decoder called at " <<partitioner.edgeName(edge) <<"\n";

        // Reset the virtual machine
        vm.reset(partitioner.memoryMap());
        vm.setIp(caller->value().address());

        // Decoder return addresses
        std::set<rose_addr_t> breakpoints;
        BOOST_FOREACH (const P2::ControlFlowGraph::ConstEdgeIterator &callret, P2::findCallReturnEdges(caller)) {
            const P2::ControlFlowGraph::ConstVertexIterator returnVertex = callret->target();
            if (returnVertex->value().type() == P2::V_BASIC_BLOCK)
                breakpoints.insert(returnVertex->value().address());
        }

        // Execute until the return address
        rose_addr_t resultVa = 0;
        std::string stringId;
        breakpoints.insert(settings.decoderVa);
        while (1) {
            rose_addr_t ip = 0;
            try {
                ip = vm.run(partitioner, settings, breakpoints);
            } catch (const std::runtime_error &e) {
                ::mlog[WARN] <<StringUtility::addrToString(ip) <<": " <<e.what() <<"\n";
                break;
            }

            if (ip == settings.decoderVa) {
                // When entering the decoder, save [esp+0xc] since this is the address of the decoded string.
                breakpoints.erase(settings.decoderVa);
                if (settings.showCall)
                    std::cout <<"(" <<partitioner.edgeName(edge) <<")" <<arguments(vm, settings.showCall) <<"\n";
                stringId = "string-" + StringUtility::numberToString(vm.argument(0)->get_number());
                resultVa = vm.argument(2)->get_number();
            } else {
                // When leaving the decoder, print the decoded string
                std::string str = vm.readString(resultVa);
                std::cout <<stringId <<"\t\"" <<StringUtility::cEscape(str) <<"\"\n";
                break;
            }
        }
    }
}
Esempio n. 3
0
int
main(int argc, char *argv[]) {
    ROSE_INITIALIZE;
    Diagnostics::initAndRegister(&mlog, "tool");

    // Parse the command-line to configure the partitioner engine, obtain the executable and its arguments, and generate a man
    // page, adjust global settings, etc. This demo tool has no switches of its own, which makes this even easier. For a
    // production tool, it's probably better to obtain the parser and register only those switches we need (e.g., no need for
    // AST generation switches since we skip that step), to set it up to use our own diagnostic stream instead of exceptions,
    // and to adjust this tool's synopsis in the documentation.  Examples of all of these can be found in other demos.
    P2::Engine engine;
    engine.doingPostAnalysis(false);                    // no need for any post-analysis phases (user can override on cmdline)
    std::vector<std::string> command;
    try {
        command = engine.parseCommandLine(argc, argv, purpose, description).unreachedArgs();
    } catch (const std::runtime_error &e) {
        mlog[FATAL] <<"invalid command-line: " <<e.what() <<"\n";
        exit(1);
    }
    if (command.empty()) {
        mlog[FATAL] <<"no executable specified\n";
        exit(1);
    }

    // Since we'll be tracing this program's execution, we might as well disassemble the process's memory directly. That way we
    // don't have to worry about ROSE mapping the specimen to the same virtual address as the kernel (which might be using
    // address randomization). We can stop short of generating the AST because we won't need it.
    BinaryAnalysis::BinaryDebugger debugger(command);
    std::string specimenResourceName = "proc:noattach:" + StringUtility::numberToString(debugger.isAttached());
    P2::Partitioner partitioner = engine.partition(specimenResourceName);
    partitioner.memoryMap()->dump(std::cerr);           // show the memory map as a debugging aid

    // Create a global control flow graph whose vertices are instructions from a global CFG whose verts are mostly basic
    // blocks.
    InsnCfg insnCfg;
    const P2::ControlFlowGraph &bbCfg = partitioner.cfg();
    BOOST_FOREACH (const P2::ControlFlowGraph::Vertex &bbVert, bbCfg.vertices()) {
        if (P2::BasicBlock::Ptr bb = isBasicBlock(bbVert)) {
            const std::vector<SgAsmInstruction*> &insns = bb->instructions();

            // Each basic block has one or more instructions that need to be inserted into our instruction control flow graph
            // with edges from each instruction to the next.  The insertEdgeWithVertices automatically inserts missing
            // vertices, and doesn't insert vertices that already exist, making it convenient for this type of construction.
            for (size_t i=1; i<insns.size(); ++i)
                insnCfg.insertEdgeWithVertices(insns[i-1], insns[i]);

            // The final instruction of this block needs to flow into each of the initial instructions of the successor basic
            // blocks. Be careful that the successors are actually existing basic blocks.  Note that in ROSE's global CFG, a
            // function call has at least two successors: the function being called (normal edges), and the address to which
            // the function returns ("callret" edges). There are other types of edges too, but we want only the normal edges.
            BOOST_FOREACH (const P2::ControlFlowGraph::Edge &bbEdge, bbVert.outEdges()) {
                if (bbEdge.value().type() == P2::E_NORMAL) {
                    if (P2::BasicBlock::Ptr target = isBasicBlock(*bbEdge.target()))
                        insnCfg.insertEdgeWithVertices(insns.back(), target->instructions()[0]);
                }
            }
        }
    }
    mlog[INFO] <<"block CFG: "
               <<StringUtility::plural(bbCfg.nVertices(), "vertices", "vertex") <<", "
               <<StringUtility::plural(bbCfg.nEdges(), "edges") <<"\n";
    mlog[INFO] <<"insn CFG:  "
               <<StringUtility::plural(insnCfg.nVertices(), "vertices", "vertex") <<", "
               <<StringUtility::plural(insnCfg.nEdges(), "edges") <<"\n";
    
    // Run the executable to obtain a trace.  We use the instruction pointer to look up a SgAsmInstruction in the insnCfg and
    // thus map the trace onto the instruction CFG.
    mlog[INFO] <<"running subordinate to obtain trace: " <<boost::join(command, " ") <<"\n";
    std::set<rose_addr_t> missingAddresses;
    Trace trace;
    while (!debugger.isTerminated()) {
        // Find the instruction CFG vertex corresponding to the current execution address. It could be that the execution
        // address doesn't exist in the CFG, and this can be caused by a number of things including failure of ROSE to
        // statically find the address, dynamic libraries that weren't loaded statically, etc.
        rose_addr_t va = debugger.executionAddress();
        InsnCfg::ConstVertexIterator vertex = insnCfg.findVertexKey(va);
        if (!insnCfg.isValidVertex(vertex)) {
            missingAddresses.insert(va);
        } else {
            trace.append(vertex->id());
        }
        debugger.singleStep();
    }
    mlog[INFO] <<"subordinate " <<debugger.howTerminated() <<"\n";
    mlog[INFO] <<"trace length: " <<StringUtility::plural(trace.size(), "instructions") <<"\n";
    Diagnostics::mfprintf(mlog[INFO])("overall burstiness: %6.2f%%\n", 100.0 * trace.burstiness());
    mlog[INFO] <<"distinct executed addresses missing from CFG: " <<missingAddresses.size() <<"\n";

    // Print a list of CFG vertices that were never reached.  We use std::cout rather than diagnostics because this is one of
    // the main outputs of this demo. The "if" condition is constant time.
    BOOST_FOREACH (const InsnCfg::Vertex &vertex, insnCfg.vertices()) {
        if (!trace.exists(vertex.id()))
            std::cout <<"not executed: " <<unparseInstructionWithAddress(vertex.value()) <<"\n";
    }

    // Print list of addresses that were executed but did not appear in the CFG
    BOOST_FOREACH (rose_addr_t va, missingAddresses)
        std::cout <<"missing address: " <<StringUtility::addrToString(va) <<"\n";

    // Print those branch instructions that were executed by the trace but always took the same branch.  Just to mix things up,
    // I'll iterate over the trace labels this time instead of the CFG vertices.  Remember, the labels are the integer IDs of
    // the CFG vertices. The "if" condition executes in constant time, as does the next line.
    for (size_t i = 0; i < trace.nLabels(); ++i) {
        if (insnCfg.findVertex(i)->nOutEdges() > 1 && trace.successors(i).size() == 1) {
            SgAsmInstruction *successor = insnCfg.findVertex(*trace.successorSet(i).begin())->value();
            std::cout <<"single flow: " <<unparseInstructionWithAddress(insnCfg.findVertex(i)->value())
                      <<" --> " <<unparseInstructionWithAddress(successor) <<"\n";
        }
    }

    // Get a list of executed instructions that are branch points and sort them by their burstiness.  The "if" condition is
    // constant time.
    std::vector<InsnTraceInfo> info;
    BOOST_FOREACH (const InsnCfg::Vertex &vertex, insnCfg.vertices()) {
        if (vertex.nOutEdges() > 1 && trace.exists(vertex.id()))
            info.push_back(InsnTraceInfo(vertex.value(), trace.burstiness(vertex.id()), trace.size(vertex.id())));
    }
    std::sort(info.begin(), info.end());
    std::reverse(info.begin(), info.end());
    BOOST_FOREACH (const InsnTraceInfo &record, info) {
        Diagnostics::mfprintf(std::cout)("burstiness %6.2f%% %5zu hits at %s\n",
                                         100.0*record.burstiness, record.nHits,
                                         unparseInstructionWithAddress(record.insn).c_str());
    }