int main(int argc, char *argv[]) { ROSE_INITIALIZE; Diagnostics::initAndRegister(&::mlog, "tool"); Settings settings; P2::Engine engine; engine.doingPostAnalysis(false); // not needed by this tool std::vector<std::string> specimens = parseCommandLine(argc, argv, engine, settings); P2::Partitioner partitioner = engine.partition(specimens); if (settings.traceInsns || settings.traceSemantics) ::mlog[TRACE].enable(); // Find the string decoder. if (!partitioner.functionExists(settings.decoderVa)) { ::mlog[FATAL] <<"cannot find decoder function at " <<StringUtility::addrToString(settings.decoderVa) <<"\n"; exit(1); } if (settings.synthesized) { processSynthesizedCalls(partitioner, settings); } else { processExistingCalls(partitioner, settings); } }
static std::vector<SgAsmFunction*> loadFunctions(const std::vector<std::string> &specimen, P2::Engine &engine) { engine.reset(); // clear all but config properties engine.doingPostAnalysis(false); // not needed for this tool SgAsmBlock *gblock = engine.buildAst(specimen); // parse, load, link, disassemble, partition, build AST return SageInterface::querySubTree<SgAsmFunction>(gblock); // return just the functions }
int main(int argc, char *argv[]) { ROSE_INITIALIZE; Diagnostics::initAndRegister(&mlog, "tool"); // Parse the command-line to configure the partitioner engine, obtain the executable and its arguments, and generate a man // page, adjust global settings, etc. This demo tool has no switches of its own, which makes this even easier. For a // production tool, it's probably better to obtain the parser and register only those switches we need (e.g., no need for // AST generation switches since we skip that step), to set it up to use our own diagnostic stream instead of exceptions, // and to adjust this tool's synopsis in the documentation. Examples of all of these can be found in other demos. P2::Engine engine; engine.doingPostAnalysis(false); // no need for any post-analysis phases (user can override on cmdline) std::vector<std::string> command; try { command = engine.parseCommandLine(argc, argv, purpose, description).unreachedArgs(); } catch (const std::runtime_error &e) { mlog[FATAL] <<"invalid command-line: " <<e.what() <<"\n"; exit(1); } if (command.empty()) { mlog[FATAL] <<"no executable specified\n"; exit(1); } // Since we'll be tracing this program's execution, we might as well disassemble the process's memory directly. That way we // don't have to worry about ROSE mapping the specimen to the same virtual address as the kernel (which might be using // address randomization). We can stop short of generating the AST because we won't need it. BinaryAnalysis::BinaryDebugger debugger(command); std::string specimenResourceName = "proc:noattach:" + StringUtility::numberToString(debugger.isAttached()); P2::Partitioner partitioner = engine.partition(specimenResourceName); partitioner.memoryMap()->dump(std::cerr); // show the memory map as a debugging aid // Create a global control flow graph whose vertices are instructions from a global CFG whose verts are mostly basic // blocks. InsnCfg insnCfg; const P2::ControlFlowGraph &bbCfg = partitioner.cfg(); BOOST_FOREACH (const P2::ControlFlowGraph::Vertex &bbVert, bbCfg.vertices()) { if (P2::BasicBlock::Ptr bb = isBasicBlock(bbVert)) { const std::vector<SgAsmInstruction*> &insns = bb->instructions(); // Each basic block has one or more instructions that need to be inserted into our instruction control flow graph // with edges from each instruction to the next. The insertEdgeWithVertices automatically inserts missing // vertices, and doesn't insert vertices that already exist, making it convenient for this type of construction. for (size_t i=1; i<insns.size(); ++i) insnCfg.insertEdgeWithVertices(insns[i-1], insns[i]); // The final instruction of this block needs to flow into each of the initial instructions of the successor basic // blocks. Be careful that the successors are actually existing basic blocks. Note that in ROSE's global CFG, a // function call has at least two successors: the function being called (normal edges), and the address to which // the function returns ("callret" edges). There are other types of edges too, but we want only the normal edges. BOOST_FOREACH (const P2::ControlFlowGraph::Edge &bbEdge, bbVert.outEdges()) { if (bbEdge.value().type() == P2::E_NORMAL) { if (P2::BasicBlock::Ptr target = isBasicBlock(*bbEdge.target())) insnCfg.insertEdgeWithVertices(insns.back(), target->instructions()[0]); } } } } mlog[INFO] <<"block CFG: " <<StringUtility::plural(bbCfg.nVertices(), "vertices", "vertex") <<", " <<StringUtility::plural(bbCfg.nEdges(), "edges") <<"\n"; mlog[INFO] <<"insn CFG: " <<StringUtility::plural(insnCfg.nVertices(), "vertices", "vertex") <<", " <<StringUtility::plural(insnCfg.nEdges(), "edges") <<"\n"; // Run the executable to obtain a trace. We use the instruction pointer to look up a SgAsmInstruction in the insnCfg and // thus map the trace onto the instruction CFG. mlog[INFO] <<"running subordinate to obtain trace: " <<boost::join(command, " ") <<"\n"; std::set<rose_addr_t> missingAddresses; Trace trace; while (!debugger.isTerminated()) { // Find the instruction CFG vertex corresponding to the current execution address. It could be that the execution // address doesn't exist in the CFG, and this can be caused by a number of things including failure of ROSE to // statically find the address, dynamic libraries that weren't loaded statically, etc. rose_addr_t va = debugger.executionAddress(); InsnCfg::ConstVertexIterator vertex = insnCfg.findVertexKey(va); if (!insnCfg.isValidVertex(vertex)) { missingAddresses.insert(va); } else { trace.append(vertex->id()); } debugger.singleStep(); } mlog[INFO] <<"subordinate " <<debugger.howTerminated() <<"\n"; mlog[INFO] <<"trace length: " <<StringUtility::plural(trace.size(), "instructions") <<"\n"; Diagnostics::mfprintf(mlog[INFO])("overall burstiness: %6.2f%%\n", 100.0 * trace.burstiness()); mlog[INFO] <<"distinct executed addresses missing from CFG: " <<missingAddresses.size() <<"\n"; // Print a list of CFG vertices that were never reached. We use std::cout rather than diagnostics because this is one of // the main outputs of this demo. The "if" condition is constant time. BOOST_FOREACH (const InsnCfg::Vertex &vertex, insnCfg.vertices()) { if (!trace.exists(vertex.id())) std::cout <<"not executed: " <<unparseInstructionWithAddress(vertex.value()) <<"\n"; } // Print list of addresses that were executed but did not appear in the CFG BOOST_FOREACH (rose_addr_t va, missingAddresses) std::cout <<"missing address: " <<StringUtility::addrToString(va) <<"\n"; // Print those branch instructions that were executed by the trace but always took the same branch. Just to mix things up, // I'll iterate over the trace labels this time instead of the CFG vertices. Remember, the labels are the integer IDs of // the CFG vertices. The "if" condition executes in constant time, as does the next line. for (size_t i = 0; i < trace.nLabels(); ++i) { if (insnCfg.findVertex(i)->nOutEdges() > 1 && trace.successors(i).size() == 1) { SgAsmInstruction *successor = insnCfg.findVertex(*trace.successorSet(i).begin())->value(); std::cout <<"single flow: " <<unparseInstructionWithAddress(insnCfg.findVertex(i)->value()) <<" --> " <<unparseInstructionWithAddress(successor) <<"\n"; } } // Get a list of executed instructions that are branch points and sort them by their burstiness. The "if" condition is // constant time. std::vector<InsnTraceInfo> info; BOOST_FOREACH (const InsnCfg::Vertex &vertex, insnCfg.vertices()) { if (vertex.nOutEdges() > 1 && trace.exists(vertex.id())) info.push_back(InsnTraceInfo(vertex.value(), trace.burstiness(vertex.id()), trace.size(vertex.id()))); } std::sort(info.begin(), info.end()); std::reverse(info.begin(), info.end()); BOOST_FOREACH (const InsnTraceInfo &record, info) { Diagnostics::mfprintf(std::cout)("burstiness %6.2f%% %5zu hits at %s\n", 100.0*record.burstiness, record.nHits, unparseInstructionWithAddress(record.insn).c_str()); }