/************************************************************************** * Main function. This function is run on each node that is being traversed * in the graph. For each node, we determine the successors and check * if those have been previously seen. If yes, a cycle may exist. **************************************************************************/ bool CompassAnalyses::CycleDetection::Traversal::run(string& name, SgGraphNode* node, SgGraphNode* previous){ // check known function calls and resolve variables ROSE_ASSERT(node); //cerr << " cycledetection->run " << node->get_name() << endl; SgAsmFunction* func = isSgAsmFunction(node->get_SgNode()); if (func) { // if the node is a function, we clear the visited nodes // this should speed up our search visited.clear(); return false; } successors.clear(); ROSE_ASSERT(vizzGraph); vizzGraph->getSuccessors(node, successors); vector<SgGraphNode*>::iterator succ = successors.begin(); for (;succ!=successors.end();++succ) { // for each successor do... SgGraphNode* next = *succ; // if the node is an instruction, we check if it was visited // if not, we add it to the visited set, otherwise a cycle is present std::set<SgGraphNode*>::iterator it =visited.find(next); if (it!=visited.end()) { // found this node in visited list SgAsmX86Instruction* nodeSg = isSgAsmX86Instruction(node->get_SgNode()); SgAsmX86Instruction* nextSg = isSgAsmX86Instruction(next->get_SgNode()); if (debug) { std::string outputText = "Found possible cycle between "; outputText+=stringifyX86InstructionKind(nodeSg->get_kind()) + " ("; outputText+=RoseBin_support::HexToString(nodeSg->get_address()) + ") and "; outputText+=stringifyX86InstructionKind(nextSg->get_kind()) + " ("; outputText+=RoseBin_support::HexToString(nextSg->get_address()) + ")"; std::cerr << outputText << std::endl; output->addOutput(new CheckerOutput(nodeSg, outputText)); } bool validCycle = checkIfValidCycle(node,next); if (validCycle) { std::string outputText = "Found cycle between "; outputText+=stringifyX86InstructionKind(nodeSg->get_kind()) + " ("; outputText+=RoseBin_support::HexToString(nodeSg->get_address()) + ") and "; outputText+=stringifyX86InstructionKind(nextSg->get_kind()) + " ("; outputText+=RoseBin_support::HexToString(nextSg->get_address()) + ")"; std::cerr << outputText << std::endl; output->addOutput(new CheckerOutput(nodeSg, outputText)); cycleFound[node]=next; } else { if (debug) std::cerr << "This is not a cyclic node " << std::endl; } } } visited.insert(node); return false; }
int main(int argc, char *argv[]) { // Parse command-line int argno=1; for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) { if (!strcmp(argv[argno], "--")) { ++argno; break; } else { std::cerr <<argv[0] <<": unrecognized switch: " <<argv[argno] <<"\n"; exit(1); } } if (argno+1!=argc) { std::cerr <<"usage: " <<argv[0] <<" [SWITCHES] [--] SPECIMEN\n"; exit(1); } std::string specimen_name = argv[argno++]; // Open the file rose_addr_t start_va = 0; MemoryMap map; size_t file_size = map.insertFile(specimen_name, start_va); map.at(start_va).limit(file_size).changeAccess(MemoryMap::EXECUTABLE, 0); // Try to disassemble every byte, and print the CALL/FARCALL targets size_t ninsns=0, nerrors=0; Disassembler *disassembler = new DisassemblerX86(4); for (rose_addr_t offset=0; offset<file_size; ++offset) { try { rose_addr_t insn_va = start_va + offset; SgAsmX86Instruction *insn = isSgAsmX86Instruction(disassembler->disassembleOne(&map, insn_va)); if (insn && (x86_call==insn->get_kind() || x86_farcall==insn->get_kind())) { ++ninsns; rose_addr_t target_va; if (insn->getBranchTarget(&target_va)) std::cout <<StringUtility::addrToString(insn_va) <<": " <<StringUtility::addrToString(target_va) <<"\n"; } } catch (const Disassembler::Exception &e) { ++nerrors; } } std::cerr <<specimen_name <<": " <<ninsns <<" instructions; " <<nerrors <<" errors\n"; return 0; }
/** Returns a string containing the specified operand. */ std::string unparseX86Expression(SgAsmExpression *expr, const AsmUnparser::LabelMap *labels, const RegisterDictionary *registers) { /* Find the instruction with which this expression is associated. */ SgAsmX86Instruction *insn = NULL; for (SgNode *node=expr; !insn && node; node=node->get_parent()) { insn = isSgAsmX86Instruction(node); } ASSERT_not_null(insn); return unparseX86Expression(expr, labels, registers, insn->get_kind()==x86_lea); }
// see base class bool SgAsmX86Instruction::isFunctionCallFast(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target, rose_addr_t *return_va) { if (insns.empty()) return false; SgAsmX86Instruction *last = isSgAsmX86Instruction(insns.back()); if (!last) return false; // Quick method based only on the kind of instruction if (x86_call==last->get_kind() || x86_farcall==last->get_kind()) { last->getBranchTarget(target); if (return_va) *return_va = last->get_address() + last->get_size(); return true; } return false; }
bool RoseBin_DataFlowAnalysis::existsPath(SgGraphNode* start, SgGraphNode* end) { // make sure its not a SgAsmCall and the next node is a DirectedControlFlowEdge ROSE_ASSERT(g_algo->info); bool exists = false; ROSE_ASSERT(start); ROSE_ASSERT(end); SgAsmX86Instruction* next = isSgAsmX86Instruction(start); SgAsmX86Instruction* endAsm = isSgAsmX86Instruction(end); if (next && endAsm) { while (next!=endAsm) { next = isSgAsmX86Instruction(next->cfgBinFlowOutEdge(g_algo->info)); if (next==NULL) break; if ((next->get_kind() == x86_call || next->get_kind() == x86_ret) && next!=endAsm) break; } exists = true; } return exists; }
/** Add edges to graph from functions that call system calls to system calls. * * The first 1000 vertexes (0 to 999) in the graph is reserved for system calls, which is many more than the actual system * calls in linux. */ void add_syscall_edges(DirectedGraph* G, std::vector<SgAsmFunction*>& all_functions) { // Detect all system calls and add an edge from the function to the function to the system call for (unsigned int caller_id = 0; caller_id < all_functions.size(); ++caller_id) { SgAsmFunction *func = all_functions[caller_id]; std::vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(func); for (std::vector<SgAsmInstruction*>::iterator inst_it = insns.begin(); inst_it != insns.end(); ++inst_it) { SgAsmX86Instruction *insn = isSgAsmX86Instruction(*inst_it); if (insn == NULL) continue; SgAsmBlock *block = SageInterface::getEnclosingNode<SgAsmBlock>(insn); // On linux system calls are always interrups and all interrupts are system calls if (insn && block && insn->get_kind()==x86_int) { const SgAsmExpressionPtrList &opand_list = insn->get_operandList()->get_operands(); SgAsmExpression *expr = opand_list.size()==1 ? opand_list[0] : NULL; //semantically execute the basic block to find out which sytem call was called if (expr && expr->variantT()==V_SgAsmIntegerValueExpression && 0x80==isSgAsmIntegerValueExpression(expr)->get_value()) { const SgAsmStatementPtrList &stmts = block->get_statementList(); size_t int_n; for (int_n=0; int_n<stmts.size(); int_n++) { if (isSgAsmInstruction(stmts[int_n])==insn) break; } typedef PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType> Policy; typedef X86InstructionSemantics<Policy, PartialSymbolicSemantics::ValueType> Semantics; Policy policy; Semantics semantics(policy); try { semantics.processBlock(stmts, 0, int_n); if (policy.readRegister<32>("eax").is_known()) { int nr = policy.readRegister<32>("eax").known_value(); boost::add_edge(caller_id, nr, *G); } } catch (const Semantics::Exception&) { } catch (const Policy::Exception&) { } } } } } }
BtorTranslationPolicy::BtorTranslationPolicy(BtorTranslationHooks* hooks, uint32_t minNumStepsToFindError, uint32_t maxNumStepsToFindError, SgProject* proj): problem(), hooks(hooks), regdict(NULL) { assert (minNumStepsToFindError >= 1); // Can't find an error on the first step assert (maxNumStepsToFindError < 0xFFFFFFFFU); // Prevent overflows assert (minNumStepsToFindError <= maxNumStepsToFindError || maxNumStepsToFindError == 0); makeRegMap(origRegisterMap, ""); makeRegMapZero(newRegisterMap); isValidIp = false_(); validIPs.clear(); Comp stepCount = problem.build_var(32, "stepCount_saturating_at_" + boost::lexical_cast<std::string>(maxNumStepsToFindError + 1)); addNext(stepCount, ite(problem.build_op_eq(stepCount, number<32>(maxNumStepsToFindError + 1)), number<32>(maxNumStepsToFindError + 1), problem.build_op_inc(stepCount))); resetState = problem.build_op_eq(stepCount, zero(32)); errorsEnabled = problem.build_op_and( problem.build_op_ugte(stepCount, number<32>(minNumStepsToFindError)), (maxNumStepsToFindError == 0 ? true_() : problem.build_op_ulte(stepCount, number<32>(maxNumStepsToFindError)))); { vector<SgNode*> functions = NodeQuery::querySubTree(proj, V_SgAsmFunction); for (size_t i = 0; i < functions.size(); ++i) { functionStarts.push_back(isSgAsmFunction(functions[i])->get_address()); // fprintf(stderr, "functionStarts 0x%"PRIx64"\n", isSgAsmFunction(functions[i])->get_address()); } } { vector<SgNode*> blocks = NodeQuery::querySubTree(proj, V_SgAsmBlock); for (size_t i = 0; i < blocks.size(); ++i) { SgAsmBlock* b = isSgAsmBlock(blocks[i]); if (!b->get_statementList().empty() && isSgAsmX86Instruction(b->get_statementList().front())) { blockStarts.push_back(b->get_address()); // fprintf(stderr, "blockStarts 0x%"PRIx64"\n", b->get_address()); } } } { vector<SgNode*> calls = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction); for (size_t i = 0; i < calls.size(); ++i) { SgAsmX86Instruction* b = isSgAsmX86Instruction(calls[i]); if (b->get_kind() != x86_call) continue; returnPoints.push_back(b->get_address() + b->get_raw_bytes().size()); // fprintf(stderr, "returnPoints 0x%"PRIx64"\n", b->get_address() + b->get_raw_bytes().size()); } } { vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction); for (size_t i = 0; i < instructions.size(); ++i) { SgAsmX86Instruction* b = isSgAsmX86Instruction(instructions[i]); validIPs.push_back(b->get_address()); } } }
virtual void visit(SgNode* n) { SgAsmX86Instruction* insn = isSgAsmX86Instruction(n); if (!insn) return; if (insn->get_kind() != x86_call) return; //cerr << "Found call xxx at " << hex << insn->get_address() << endl; uint64_t tgtAddr; if (!insn->getBranchTarget(&tgtAddr)) return; //cerr << "Found call at " << hex << insn->get_address() << " with known target " << hex << tgtAddr << endl; SgAsmInstruction* tgt = info->getInstructionAtAddress(tgtAddr); if (!tgt) return; //cerr << "Found target insn" << endl; SgNode* f = tgt; while (f && !isSgAsmBlock(f) && !isSgAsmFunction(f)) f = f->get_parent(); if (!f) return; //cerr << "Found function of target" << endl; uint64_t next = insn->get_address() + insn->get_raw_bytes().size(); info->returnTargets[isSgAsmStatement(f)].insert(next); }
// Run natively and return number of instructions executed and reason for termination. static std::pair<size_t, std::string> runNatively(const Settings &settings, const std::string &specimenName, Sawyer::Optional<rose_addr_t> initVa, const P2::Partitioner &partitioner, rose_addr_t randomAddress) { Stream debug(mlog[DEBUG]); BinaryDebugger debugger(specimenName); if (debugger.isTerminated()) { mlog[FATAL] <<"child " <<debugger.isAttached() <<" " <<debugger.howTerminated() <<" before we could gain control\n"; exit(1); } // Allow child to run until we hit the desired address. if (initVa) { debugger.setBreakpoint(*initVa); debugger.runToBreakpoint(); debugger.clearBreakpoint(*initVa); if (debugger.isTerminated()) { mlog[FATAL] <<"child " <<debugger.isAttached() <<" " <<debugger.howTerminated() <<" without reaching " <<addrToString(*initVa) <<"\n"; exit(1); } } // Show specimen address map so we can verify that the Linux loader used the same addresses we used. // We could have shown it earlier, but then we wouldn't have seen the results of dynamic linking. if (settings.showMaps) { std::cout <<"Linux loader specimen memory map:\n"; system(("cat /proc/" + numberToString(debugger.isAttached()) + "/maps").c_str()); } // Branch to the starting address debug <<"branching to " <<addrToString(randomAddress) <<"\n"; debugger.executionAddress(randomAddress); std::string terminationReason; size_t nExecuted = 0; // number of instructions executed while (1) { // Check for and avoid system calls if necessary if (!settings.allowSyscalls) { rose_addr_t eip = debugger.executionAddress(); SgAsmX86Instruction *insn = isSgAsmX86Instruction(partitioner.instructionProvider()[eip]); if (!insn || insn->isUnknown()) { if (settings.showInsnTrace) std::cout <<"at " <<addrToString(eip) <<": " <<(insn?"no":"unknown") <<" instruction\n"; terminationReason = "executed at " + addrToString(eip) +" which we don't know about"; break; } if (settings.showInsnTrace) std::cout <<"at " <<unparseInstructionWithAddress(insn) <<"\n"; if (insn->get_kind() == x86_int || insn->get_kind() == x86_sysenter) { terminationReason = "tried to execute a system call"; break; } } // Single-step if (debug) debug <<"single stepping at " <<addrToString(debugger.executionAddress()) <<"\n"; debugger.singleStep(); if (debugger.isTerminated()) { terminationReason = debugger.howTerminated(); break; } ++nExecuted; if (settings.maxInsns!=0 && nExecuted>=settings.maxInsns) { terminationReason = "reached instruction limit"; break; } } debugger.terminate(); return std::make_pair(nExecuted, terminationReason); }
/*********************************************************************** * (10/31/07) tps: Traverses the graph for each node in rootNodes * and applies to each node the evaluate function * which can be either def_use, variable detection or emulation * Each node in the controlflow of rootNode is traversed (forward) * and only if the hasChanged function returns false, the algorithm * comes to a fixpoint ***********************************************************************/ void RoseBin_DataFlowAnalysis::traverseGraph(vector <SgGraphNode*>& rootNodes, RoseBin_DataFlowAbstract* analysis, bool interprocedural){ if (RoseBin_support::DEBUG_MODE_MIN()) cerr << " traverseGraph : debug: " << RoseBin_support::resBool(RoseBin_support::DEBUG_MODE()) << " debug_min : " << RoseBin_support::resBool(RoseBin_support::DEBUG_MODE_MIN()) << endl; // Number of functions traversed int funcNr =0; // --------------------------------------------------------------------- // stores the nodes that still needs to be visited // vector<SgGraphNode*> worklist; deque<SgGraphNode*> worklist; nodeHashSetType worklist_hash; // a vector of successors of the current node vector<SgGraphNode*> successors; // --------------------------------------------------------------------- // iterate through all functions vector<SgGraphNode*>::iterator it = rootNodes.begin(); for (; it!=rootNodes.end();++it) { // current node SgGraphNode* node = *it; string func_name = vizzGraph->getProperty(SgGraph::name, node); RoseBin_support::checkText(func_name); funcNr++; if (RoseBin_support::DEBUG_MODE()) { cout << "\n\n ----------- dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+ RoseBin_support::ToString(rootNodes.size())+") : " << func_name << " visited size : " << visited.size() << " total visited nodes : " << nrOfNodesVisited << endl; // debug } if (RoseBin_support::DEBUG_MODE_MIN()) { cerr << " ----------- dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+ RoseBin_support::ToString(rootNodes.size())+") : " << func_name << " visited size : " << visited.size() << " total visited nodes : " << nrOfNodesVisited << " def size : " << analysis->getDefinitionSize() << endl; } // indicates whether the current value for this node has changed bool hasChanged=false; // pushback into worklist and visited list worklist.push_back(node); worklist_hash.insert(node); visited.insert(node); visitedCounter[node] = 1; vector <SgGraphNode*> pre; // while there are still graph nodes in the worklist do while (worklist.size()>0) { nrOfNodesVisited++; // the new node is taken from the back of the worklist //node = worklist.back(); //worklist.pop_back(); node = worklist.front(); worklist.pop_front(); worklist_hash.erase(node); // get the successors of the current node and store in successors vector string name = vizzGraph->getProperty(SgGraph::name, node); //if (RoseBin_support::DEBUG_MODE_MIN() && node) // if (node->get_SgNode()) // cerr << node->get_SgNode()->class_name() << " " << node << " " << node->get_name() << endl; if (RoseBin_support::DEBUG_MODE_MIN() && node) { SgAsmInstruction* instr = isSgAsmInstruction(node->get_SgNode()); if (instr) { SgAsmFunction* funcParent = isSgAsmFunction(instr->get_parent()); if (funcParent) { string parent = funcParent->get_name(); cout << " ---- analysis of node in function : " << parent << " defs " << analysis->getDefinitionSize() << " visited : " << RoseBin_support::ToString(visitedCounter[node]) << endl; } } } if (RoseBin_support::DEBUG_MODE()) cout << "\n evaluating: " << name << endl; // do something with the current node // e.g. checkVariables(name, node); SgGraphNode* nodeBefore= NULL; BeforeMapType::const_iterator it = nodeBeforeMap.find(node); if (it!=nodeBeforeMap.end()) nodeBefore = it->second; // successor vector is empty on each new node successors.clear(); ROSE_ASSERT(isSgIncidenceDirectedGraph(vizzGraph)); isSgIncidenceDirectedGraph(vizzGraph)->getSuccessors(node, successors); hasChanged = analysis->run(name, node, nodeBefore); // append the successors to the worklist if (RoseBin_support::DEBUG_MODE()) cout << ">> getting successors (" << successors.size() << ") for : " << name << endl; // if (successors.size()==0) // cout << "PROBLEM ..................................................... : " << endl; vector<SgGraphNode*>::iterator succ = successors.begin(); for (;succ!=successors.end();++succ) { // for each successor do... SgGraphNode* next = *succ; SgAsmX86Instruction* nodeN = isSgAsmX86Instruction(node->get_SgNode()); //if (!nodeN) continue; SgAsmX86Instruction* nextN = isSgAsmX86Instruction(next->get_SgNode()); //if (!nextN) continue; string name_n = vizzGraph->getProperty(SgGraph::name, next); bool call = false; bool exceptionCallNext = false; if (nextN) exceptionCallNext = exceptionCall(nextN->get_kind() == x86_call ? nextN : 0); bool exceptionCallNode = false; if (nodeN) exceptionCallNode = exceptionCall(nodeN->get_kind() == x86_call ? nodeN : 0); if (RoseBin_support::DEBUG_MODE()) std::cout << " exceptionCallNode : " << exceptionCallNode << " exceptionCallNext : " << exceptionCallNext << endl; // if function call is call to malloc we have an exception and follow the call path if ((exceptionCallNode && !exceptionCallNext)) { } else if ( //if ( (nodeN && nodeN->get_kind() == x86_call) || (nextN && nextN->get_kind() == x86_ret) ) call = true; //bool sameParent = analysis->sameParents(node, next); bool validNode=false; if (g_algo->isValidCFGEdge(next, node) || exceptionCallNode) validNode = true; // debug ------------------------ if (RoseBin_support::DEBUG_MODE()) { string nodeBeforeStr=""; if (nodeBefore) nodeBeforeStr= nodeBefore->get_name(); cout << " DEBUG : >>>>>>>> previous node " << nodeBeforeStr << " This node : " << name << " next node : " << name_n << " ** validNode : " << RoseBin_support::resBool(validNode) << endl; } // ---------------------------------- if (( interprocedural==false && !call) // || (interprocedural==true && validNode)) { if (visited.find(next)==visited.end()) { // if the successor is not yet visited // mark as visited and put into worklist if (RoseBin_support::DEBUG_MODE()) cout << " never visited next node before... " << name_n << " interprocedural : " << interprocedural << " call : " << call << endl; if (RoseBin_support::DEBUG_MODE()) cout << "adding to visited : " << name_n << endl; visited.insert(next); nodeBeforeMap[next]=node; visitedCounter[next]=1; vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(1)); if (!containsHash(worklist_hash,next)) { // add next node only if the next node if (RoseBin_support::DEBUG_MODE()) cout << "adding to worklist: " << name_n << endl; worklist.push_back(next); worklist_hash.insert(next); } } else { // if the successor has been visited, we need to check if it has changed // if it has not, we continue, else we need to push it back to the worklist int nr = visitedCounter[next]; if (RoseBin_support::DEBUG_MODE()) cout << " visited next node before... " << RoseBin_support::ToString(nr) << " Changed == " << RoseBin_support::resBool(hasChanged) << endl; if (hasChanged) { visitedCounter[next]=++nr; vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(nr)); if (RoseBin_support::DEBUG_MODE()) cout << " has changed : " << RoseBin_support::resBool(hasChanged) << " -- interprocedural : " << RoseBin_support::resBool(interprocedural) << " -- Call : " << RoseBin_support::resBool(call) << " ------> new number: " << RoseBin_support::ToString(nr) << " -- contained in hash? : " << RoseBin_support::resBool(containsHash(worklist_hash,next)) << " ---- nr of Defs: " << RoseBin_support::ToString(analysis->getDefinitionSize()) << " ---- nr of Use: " << RoseBin_support::ToString(analysis->getUsageSize()) << endl; if (interprocedural || (!interprocedural && !call)){ //sameParent)) { //!call && ) { if (!containsHash(worklist_hash,next)) { worklist_hash.insert(next); worklist.push_back(next); if (RoseBin_support::DEBUG_MODE()) cout << " adding to worklist: " << name_n << endl; } } } else if (RoseBin_support::DEBUG_MODE()) cout << " has NOT changed. " << endl; //else we continue with the next node } } } // for } // while worklist.size()>0 } // for rootNodes }
/* Analyze a single interpretation a block at a time */ static void analyze_interp(SgAsmInterpretation *interp) { /* Get the set of all instructions except instructions that are part of left-over blocks. */ struct AllInstructions: public SgSimpleProcessing, public std::map<rose_addr_t, SgAsmX86Instruction*> { void visit(SgNode *node) { SgAsmX86Instruction *insn = isSgAsmX86Instruction(node); SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn); if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS)) insert(std::make_pair(insn->get_address(), insn)); } } insns; insns.traverse(interp, postorder); while (!insns.empty()) { std::cout <<"=====================================================================================\n" <<"=== Starting a new basic block ===\n" <<"=====================================================================================\n"; AllInstructions::iterator si = insns.begin(); SgAsmX86Instruction *insn = si->second; insns.erase(si); BaseSemantics::RiscOperatorsPtr operators = make_ops(); BaseSemantics::Formatter formatter; formatter.set_suppress_initial_values(); formatter.set_show_latest_writers(do_usedef); BaseSemantics::DispatcherPtr dispatcher; if (do_trace) { // Enable RiscOperators tracing, but turn off a bunch of info that makes comparisons with a known good answer // difficult. Sawyer::Message::PrefixPtr prefix = Sawyer::Message::Prefix::instance(); prefix->showProgramName(false); prefix->showThreadId(false); prefix->showElapsedTime(false); prefix->showFacilityName(Sawyer::Message::Prefix::NEVER); prefix->showImportance(false); Sawyer::Message::UnformattedSinkPtr sink = Sawyer::Message::StreamSink::instance(std::cout); sink->prefix(prefix); sink->defaultPropertiesNS().useColor = false; TraceSemantics::RiscOperatorsPtr trace = TraceSemantics::RiscOperators::instance(operators); trace->stream().destination(sink); trace->stream().enable(); dispatcher = DispatcherX86::instance(trace, 32); } else { dispatcher = DispatcherX86::instance(operators, 32); } operators->set_solver(make_solver()); // The fpstatus_top register must have a concrete value if we'll use the x86 floating-point stack (e.g., st(0)) if (const RegisterDescriptor *REG_FPSTATUS_TOP = regdict->lookup("fpstatus_top")) { BaseSemantics::SValuePtr st_top = operators->number_(REG_FPSTATUS_TOP->get_nbits(), 0); operators->writeRegister(*REG_FPSTATUS_TOP, st_top); } #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN BaseSemantics::SValuePtr orig_esp; if (do_test_subst) { // Only request the orig_esp if we're going to use it later because it causes an esp value to be instantiated // in the state, which is printed in the output, and thus changes the answer. BaseSemantics::RegisterStateGeneric::promote(operators->get_state()->get_register_state())->initialize_large(); orig_esp = operators->readRegister(*regdict->lookup("esp")); std::cout <<"Original state:\n" <<*operators; } #endif /* Perform semantic analysis for each instruction in this block. The block ends when we no longer know the value of * the instruction pointer or the instruction pointer refers to an instruction that doesn't exist or which has already * been processed. */ while (1) { /* Analyze current instruction */ std::cout <<"\n" <<unparseInstructionWithAddress(insn) <<"\n"; try { dispatcher->processInstruction(insn); # if 0 /*DEBUGGING [Robb P. Matzke 2013-05-01]*/ show_state(operators); // for comparing RegisterStateGeneric with the old RegisterStateX86 output # else std::cout <<(*operators + formatter); # endif } catch (const BaseSemantics::Exception &e) { std::cout <<e <<"\n"; } /* Never follow CALL instructions */ if (insn->get_kind()==x86_call || insn->get_kind()==x86_farcall) break; /* Get next instruction of this block */ BaseSemantics::SValuePtr ip = operators->readRegister(dispatcher->findRegister("eip")); if (!ip->is_number()) break; rose_addr_t next_addr = ip->get_number(); si = insns.find(next_addr); if (si==insns.end()) break; insn = si->second; insns.erase(si); } // Test substitution on the symbolic state. #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN if (do_test_subst) { SymbolicSemantics::SValuePtr from = SymbolicSemantics::SValue::promote(orig_esp); BaseSemantics::SValuePtr newvar = operators->undefined_(32); newvar->set_comment("frame_pointer"); SymbolicSemantics::SValuePtr to = SymbolicSemantics::SValue::promote(operators->add(newvar, operators->number_(32, 4))); std::cout <<"Substituting from " <<*from <<" to " <<*to <<"\n"; SymbolicSemantics::RiscOperators::promote(operators)->substitute(from, to); std::cout <<"Substituted state:\n" <<(*operators+formatter); } #endif } }
int main(int argc, char** argv) { std::string binaryFilename = (argc >= 1 ? argv[argc-1] : "" ); std::vector<std::string> newArgv(argv,argv+argc); newArgv.push_back("-rose:output"); newArgv.push_back(binaryFilename+"-binarySemantics.C"); SgProject* proj = frontend(newArgv); ROSE_ASSERT (proj); SgSourceFile* newFile = isSgSourceFile(proj->get_fileList().front()); ROSE_ASSERT(newFile != NULL); SgGlobal* g = newFile->get_globalScope(); ROSE_ASSERT (g); //I am doing some experimental work to enable functions in the C representation //Set this flag to true in order to enable that work bool enable_functions = true; //Jeremiah did some work to enable a simplification and normalization of the //C representation. Enable this work by setting this flag to true. bool enable_normalizations = false; vector<SgNode*> asmFiles = NodeQuery::querySubTree(proj, V_SgAsmGenericFile); ROSE_ASSERT (asmFiles.size() == 1); if( enable_functions == false) { //Representation of C normalizations withotu functions SgFunctionDeclaration* decl = buildDefiningFunctionDeclaration("run", SgTypeVoid::createType(), buildFunctionParameterList(), g); appendStatement(decl, g); SgBasicBlock* body = decl->get_definition()->get_body(); // ROSE_ASSERT(isSgAsmFile(asmFiles[0])); // X86CTranslationPolicy policy(newFile, isSgAsmFile(asmFiles[0])); X86CTranslationPolicy policy(newFile, isSgAsmGenericFile(asmFiles[0])); ROSE_ASSERT( isSgAsmGenericFile(asmFiles[0]) != NULL); policy.switchBody = buildBasicBlock(); removeDeadStores(policy.switchBody,policy); SgSwitchStatement* sw = buildSwitchStatement(buildVarRefExp(policy.ipSym), policy.switchBody); ROSE_ASSERT(isSgBasicBlock(sw->get_body())); SgWhileStmt* whileStmt = buildWhileStmt(buildBoolValExp(true), sw); appendStatement(whileStmt, body); policy.whileBody = sw; X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression> t(policy); //AS FIXME: This query gets noting in the form in the repository. Doing this hack since we only //have one binary file anyways. //vector<SgNode*> instructions = NodeQuery::querySubTree(asmFiles[0], V_SgAsmX86Instruction); vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction); std::cout << "Instruction\n"; for (size_t i = 0; i < instructions.size(); ++i) { SgAsmX86Instruction* insn = isSgAsmX86Instruction(instructions[i]); ROSE_ASSERT (insn); try { t.processInstruction(insn); } catch (const X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression>::Exception &e) { std::cout <<e.mesg <<": " <<unparseInstructionWithAddress(e.insn) <<"\n"; } } if ( enable_normalizations == true ) { //Enable normalizations of C representation //This is done heuristically where some steps //are repeated. It is not clear which order is //the best { plugInAllConstVarDefs(policy.switchBody,policy) ; simplifyAllExpressions(policy.switchBody); removeIfConstants(policy.switchBody); removeDeadStores(policy.switchBody,policy); removeUnusedVariables(policy.switchBody); } { plugInAllConstVarDefs(policy.switchBody,policy) ; simplifyAllExpressions(policy.switchBody); removeIfConstants(policy.switchBody); removeDeadStores(policy.switchBody,policy); } removeUnusedVariables(policy.switchBody); } }else{ //Experimental changes to introduce functions into the C representation //When trying to add function I get that symbols are not defined //Iterate over the functions separately vector<SgNode*> asmFunctions = NodeQuery::querySubTree(proj, V_SgAsmFunction); for(size_t j = 0; j < asmFunctions.size(); j++ ) { SgAsmFunction* binFunc = isSgAsmFunction( asmFunctions[j] ); // Some functions (probably just one) are generated to hold basic blocks that could not // be assigned to a particular function. This happens when the Disassembler is overzealous // and the Partitioner cannot statically determine where the block belongs. The name of // one such function is "***uncategorized blocks***". [matzke 2010-06-29] if ((binFunc->get_reason() & SgAsmFunction::FUNC_LEFTOVERS)) continue; //Some functions may be unnamed so we need to generate a name for those std::string funcName; if (binFunc->get_name().size()==0) { char addr_str[64]; sprintf(addr_str, "0x%"PRIx64, binFunc->get_statementList()[0]->get_address()); funcName = std::string("my_") + addr_str;; } else { funcName = "my" + binFunc->get_name(); } //Functions can have illegal characters in their name. Need to replace those characters for ( int i = 0 ; i < funcName.size(); i++ ) { char& currentCharacter = funcName.at(i); if ( currentCharacter == '.' ) currentCharacter = '_'; } SgFunctionDeclaration* decl = buildDefiningFunctionDeclaration(funcName, SgTypeVoid::createType(), buildFunctionParameterList(), g); appendStatement(decl, g); SgBasicBlock* body = decl->get_definition()->get_body(); X86CTranslationPolicy policy(newFile, isSgAsmGenericFile(asmFiles[0])); ROSE_ASSERT( isSgAsmGenericFile(asmFiles[0]) != NULL); policy.switchBody = buildBasicBlock(); SgSwitchStatement* sw = buildSwitchStatement(buildVarRefExp(policy.ipSym), policy.switchBody); SgWhileStmt* whileStmt = buildWhileStmt(buildBoolValExp(true), sw); appendStatement(whileStmt, body); policy.whileBody = sw; X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression> t(policy); vector<SgNode*> instructions = NodeQuery::querySubTree(binFunc, V_SgAsmX86Instruction); for (size_t i = 0; i < instructions.size(); ++i) { SgAsmX86Instruction* insn = isSgAsmX86Instruction(instructions[i]); if( insn->get_kind() == x86_nop ) continue; ROSE_ASSERT (insn); try { t.processInstruction(insn); } catch (const X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression>::Exception &e) { std::cout <<e.mesg <<": " <<unparseInstructionWithAddress(e.insn) <<"\n"; } } } //addDirectJumpsToSwitchCases(policy); } proj->get_fileList().erase(proj->get_fileList().end() - 1); // Remove binary file before calling backend // AstTests::runAllTests(proj); //Compile the resulting project return backend(proj); }
std::string RoseBin_GMLGraph::getInternalNodes( SgGraphNode* node, bool forward_analysis, SgAsmNode* internal) { SgAsmInstruction* bin_inst = isSgAsmInstruction(internal); SgAsmX86Instruction* control = isSgAsmX86Instruction(internal); // get the unparser string! string eval = ""; string name="noname"; string regs = ""; // specifies that this node has no destination address nodest_jmp = false; // specifies that there is a node that has a call error (calling itself) error =false; // specifies a call to a unknown location nodest_call = false; // specifies where its an int instruction interrupt = false; // specifies whether a node has been visited (dfa) checked = false; dfa_standard = false; dfa_resolved_func =false; dfa_unresolved_func=false; string dfa_info=""; string dfa_variable=""; string visitedCounter=""; map < int , string> node_p = node->get_properties(); map < int , string>::iterator prop = node_p.begin(); string type = "removed";//node->get_type(); for (; prop!=node_p.end(); ++prop) { int addr = prop->first; // cerr << " dot : property for addr : " << addr << " and node " << hex_address << endl; if (addr==SgGraph::name) name = prop->second; else if (addr==SgGraph::eval) eval = prop->second; else if (addr==SgGraph::regs) regs = prop->second; else if (addr==SgGraph::nodest_jmp) nodest_jmp = true; else if (addr==SgGraph::itself_call) error = true; else if (addr==SgGraph::nodest_call) nodest_call = true; else if (addr==SgGraph::interrupt) interrupt = true; else if (addr==SgGraph::done) checked = true; else if (addr==SgGraph::dfa_standard) dfa_standard = true; else if (addr==SgGraph::dfa_resolved_func) { dfa_resolved_func = true; dfa_info = prop->second; } else if (addr==SgGraph::dfa_unresolved_func) { dfa_unresolved_func = true; dfa_info = prop->second; } else if (addr==SgGraph::dfa_variable) { dfa_variable = prop->second; } else if (addr==SgGraph::visitedCounter) { visitedCounter = prop->second; } else { cerr << " *************** dotgraph: unknown property found :: " << addr << endl; } } if (bin_inst) { type += " " + bin_inst->class_name(); } string add = ""; string typeNode = ""; if (control->get_kind() == x86_call || control->get_kind() == x86_ret) { typeNode += " Type_ \"[ 67108864 FUNCTION_NODE ]\" \n"; if (nodest_call) add = " FF9900 "; else if (error) add = " 3399FF "; else add = " FFCCFF "; } else if (control->get_kind() == x86_jmp) { typeNode += " Type_ \"[ 67108864 FILE_NODE ]\" \n"; if (nodest_jmp) add = " FF0000 "; else add = " 00FF00 "; } else if (x86InstructionIsControlTransfer(control)) { typeNode += " Type_ \"[ 67108864 CLASS_NODE ]\" \n"; if (control->get_kind() == x86_int) add = " 0000FF "; else add = " 008800 "; } else { add = " FFFF66 "; } if (checked) add = " 777777 "; if (dfa_standard) add = " FFFF00 "; if (dfa_resolved_func) add = " 00FF00 "; if (dfa_unresolved_func) add = " FF0000 "; string nodeStr = ""; regs+=eval; // cant get the extra register info printed in gml format // because multiline is not supported? (tps 10/18/07) name = name/*+" " +regs + " " +dfa_variable+" "+"vis:"+visitedCounter */; nodeStr= " label \"" + name+"\"\n "+typeNode; int length = name.length(); SgAsmX86Instruction* pre = NULL; // isSgAsmX86Instruction(bin_inst->cfgBinFlowInEdge()); if (pre==NULL) { // first node nodeStr +=" first_ 1 \n"; } else { if (pre->get_kind() == x86_ret || pre->get_kind() == x86_hlt) { // this instruction must be suspicious add =" 0000FF "; } } nodeStr += " Node_Color_ " + add + " \n"; nodeStr += " graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"rectangle\" fill \"#" + add + "\" ]\n"; return nodeStr; }
bool CompassAnalyses::BinaryInterruptAnalysis::Traversal::run(string& name, SgGraphNode* node, SgGraphNode* previous){ // check known function calls and resolve variables ROSE_ASSERT(node); vector<uint64_t> val_rax, val_rbx, val_rcx, val_rdx ; std::vector<uint64_t> pos_rax, pos_rbx, pos_rcx, pos_rdx; uint64_t fpos_rax, fpos_rbx, fpos_rcx, fpos_rdx=0xffffffff; SgAsmX86Instruction* asmNode = isSgAsmX86Instruction(node->get_SgNode()); if (asmNode) { // cerr << " Interrupt Analysis :: checking node " << RoseBin_support::HexToString(asmNode->get_address()) // << " - " << toString(asmNode->get_kind()) << endl; // ANALYSIS 1 : INTERRUPT DETECTION ------------------------------------------- // verify all interrupts and make sure they do what one expects them to do. if (asmNode->get_kind() == x86_int) { if (RoseBin_support::DEBUG_MODE()) cout << " " << name << " : found int call " << endl; // need to resolve rax, rbx, rcx, rdx // therefore get the definition for each getValueForDefinition(val_rax, pos_rax, fpos_rax, node, std::make_pair(x86_regclass_gpr, x86_gpr_ax)); getValueForDefinition(val_rbx, pos_rbx, fpos_rbx, node, std::make_pair(x86_regclass_gpr, x86_gpr_bx)); getValueForDefinition(val_rcx, pos_rcx, fpos_rcx, node, std::make_pair(x86_regclass_gpr, x86_gpr_cx)); getValueForDefinition(val_rdx, pos_rdx, fpos_rdx, node, std::make_pair(x86_regclass_gpr, x86_gpr_dx)); string int_name = "unknown "; DataTypes data_ebx = unknown; DataTypes data_ecx = unknown; DataTypes data_edx = unknown; bool ambigious_inst=false; if (val_rax.size()>1) ambigious_inst = true; else if (val_rax.size()==1) { uint64_t rax = *(val_rax.begin()); int_name = getIntCallName(rax, data_ebx, data_ecx, data_edx, val_rbx, val_rcx, val_rdx, pos_rbx, pos_rcx, pos_rdx, fpos_rbx, fpos_rcx, fpos_rdx); ambigious_inst = false; } if (ambigious_inst) { string value = ""; vector<uint64_t>::iterator it = val_rax.begin(); for (;it!=val_rax.end();++it) { string i_name = getIntCallName(*it, data_ebx, data_ecx, data_edx, val_rbx, val_rcx, val_rdx, pos_rbx, pos_rcx, pos_rdx, fpos_rbx, fpos_rcx, fpos_rdx); value +="rAX:"+RoseBin_support::HexToString(*it)+" "+i_name+" "; // createVariable(fpos_rax, pos_rax, "rax", data_ebx, "rax", 0, val_rax,false); } //cerr << " DataFlow::VariableAnalysis . Ambigious INT call: " << // vizzGraph->getProperty(SgGraph::name, node) << " - " << value << endl; value = "PROBLEM: " + value; node->append_properties(SgGraph::dfa_unresolved_func,value); } else { // we know what INT instruction it is string t_ebx = RoseBin_support::getTypeName(data_ebx); string t_ecx = RoseBin_support::getTypeName(data_ecx); string t_edx = RoseBin_support::getTypeName(data_edx); int_name += " ("+t_ebx+","+t_ecx+","+t_edx+")"; //if (RoseBin_support::DEBUG_MODE()) // cout << " found INT call : " << value << " .. " << int_name << endl; node->append_properties(SgGraph::dfa_variable,int_name); } } } return false; }
// Analyze the allocation type and location of this-pointers. void ThisPtrUsage::analyze_alloc() { // Set the allocation type to unknown by default. alloc_type = AllocUnknown; // Use the type returned from get_memory_type() to determine our allocation type. Perhaps in // the future thse can be fully combined, but this was what was required to support non-zero // ESP initialization. MemoryType type = this_ptr->get_memory_type(); if (type == StackMemLocalVariable) { alloc_type = AllocLocalStack; } else if (type == StackMemParameter) { alloc_type = AllocParameter; } else if (type == UnknownMem) { // This code is really a function of get_memory_type() still being broken. // If we're not a constant address (global), skip this function. if (!this_ptr->is_number() || this_ptr->get_width() > 64) return; // This is a bit hackish, but also reject obviously invalid constants. size_t num = this_ptr->get_number(); // Here's a place where we're having the age old debate about how to tell what is an // address with absolutely no context. Cory still likes consistency. Others have // suggested that we should be using the memory map despite all of it's flaws... // if (!global_descriptor_set->memory_in_image(num)) return; if (num < 0x10000 || num > 0x7FFFFFFF) return; // Otherwise, we look like a legit global address? alloc_type = AllocGlobal; } else { return; } // It's not actually clear why we're looking for the allocation instruction. Perhaps we // should quit doing this and just use the tests above. At least for now though, looking for // the common pattern allows us to detect some unusual situations. Wes' previous logic also // filtered by requiring LEA instructions, so we're preserving that limit. // This code previously relied on the first-creator-of-read feature of modifiers, which we're // retiring. Even though it's not clear that this code is required, I've updated it to use // latest definers in place of modifiers, only we haven't switch to just using the _latest_ // definers yet so it needed some additional filtering to determine which definer to use. PDG* pdg = fd->get_pdg(); // Shouldn't happen. if (!pdg) return; const DUAnalysis& du = pdg->get_usedef(); // This is hackish too. We need it for debugging so that we have some way of reporting which // instructions are involved in the confusion. SgAsmInstruction* first_insn = NULL; for (SgAsmInstruction *ginsn : this_ptr->get_defining_instructions()) { // For debugging so that we have an address. if (first_insn == NULL) first_insn = ginsn; SgAsmX86Instruction *insn = isSgAsmX86Instruction(ginsn); if (insn == NULL) continue; // Since definers isn't the "latest" definer just yet, filter our subsequent analysis to // the one that wrote the this-ptr value. This is hackish and wrong because we shouldn't // have to filter. But maybe once we can upgrade , this code can go away... auto writes = du.get_writes(insn); bool found_write = false; for (const AbstractAccess& aa : writes) { if (aa.value->get_expression()->isEquivalentTo(this_ptr->get_expression())) { found_write = true; break; } } // If this instruction didn't write the this-ptr, it's not the one that we're looking for. if (!found_write) continue; // If we're here, this should be the instruction that defined the this-pointer. // If we're a local variable and we've found an LEA instruction, that's probably the one // we're looking for. if (alloc_type == AllocLocalStack && insn->get_kind() == x86_lea) { alloc_insn = ginsn; GDEBUG << "Stack allocated object: " << *(this_ptr->get_expression()) << " at " << debug_instruction(alloc_insn) << LEND; return; } // For global variables, the typical cases are move or push instructions. else if (alloc_type == AllocGlobal && (insn->get_kind() == x86_mov || insn->get_kind() == x86_push)) { alloc_insn = ginsn; GDEBUG << "Global static object: " << *(this_ptr->get_expression()) << " at " << debug_instruction(alloc_insn) << LEND; return; } // For passed parameters, we should probably be looking for the instruction that reads the // undefined this-pointer value. This code was sufficient at the time... else if (alloc_type == AllocParameter) { GDEBUG << "Passed object: " << *(this_ptr->get_expression()) << LEND; return; } } if (first_insn == NULL) { GDEBUG << "No allocation instruction found for " << *(this_ptr->get_expression()) << " alloc_type=" << Enum2Str(alloc_type) << LEND; } else { GDEBUG << "No allocation instruction found for " << *(this_ptr->get_expression()) << " alloc_type=" << Enum2Str(alloc_type) << " at " << debug_instruction(first_insn) << LEND; } // Based on evaluation of the test suite, if we've reached this point, something's gone // wrong, and it's very unclear if we're really the allocation type we thought we were. // Perhaps it's better to be cautious and retract our allocation type claims. We could also // choose to return the best guess of our type here, by removing this line. //alloc_type = AllocUnknown; return; }
void RoseBin_GMLGraph::printEdges( VirtualBinCFG::AuxiliaryInformation* info, bool forward_analysis, std::ofstream& myfile, SgDirectedGraphEdge* edge) { // traverse edges and visualize results of graph SgGraphNode* source = isSgGraphNode(edge->get_from()); SgGraphNode* target = isSgGraphNode(edge->get_to()); ROSE_ASSERT(source); ROSE_ASSERT(target); string edgeLabel=""; map < int , string> edge_p = edge->get_properties(); map < int , string>::iterator prop = edge_p.begin(); //string type = node->get_type(); for (; prop!=edge_p.end(); ++prop) { int addr = prop->first; // cerr << " dot : property for addr : " << addr << " and node " << hex_address << endl; if (addr==SgGraph::edgeLabel) edgeLabel = prop->second; if (edgeLabel.length()>1) if (edgeLabel[0]!='U') edgeLabel=""; } SgAsmStatement* binStat_s = isSgAsmStatement(source->get_SgNode()); SgAsmStatement* binStat_t = isSgAsmStatement(target->get_SgNode()); if (binStat_s==NULL || binStat_t==NULL) { //cerr << "binStat_s==NULL || binStat_t==NULL" << endl; } else { map <SgAsmStatement*, int>::iterator it_s = nodesMap.find(binStat_s); map <SgAsmStatement*, int>::iterator it_t = nodesMap.find(binStat_t); int pos_s=0; int pos_t=0; if (it_s!=nodesMap.end()) pos_s = it_s->second; if (it_t!=nodesMap.end()) pos_t = it_t->second; if (pos_s==0 || pos_t==0) { //cerr << " GMLGraph edge, node == 0 " << endl; } string output = "edge [\n label \""+edgeLabel+"\"\n source " + RoseBin_support::ToString(pos_s) + "\n target " + RoseBin_support::ToString(pos_t) + "\n"; // ------------------ SgAsmX86Instruction* contrl = isSgAsmX86Instruction(source->get_SgNode()); string add = ""; if (contrl && x86InstructionIsControlTransfer(contrl)) { // the source is a control transfer function // we use either dest or dest_list // dest is used for single destinations during cfg run // dest_list is used for a static cfg image vector<VirtualBinCFG::CFGEdge> outEdges = contrl->cfgBinOutEdges(info); SgAsmX86Instruction* dest = isSgAsmX86Instruction(outEdges.empty() ? NULL : outEdges.back().target().getNode()); bool dest_list_empty = true; if (contrl->get_kind() == x86_ret) dest_list_empty = outEdges.empty(); SgAsmInstruction* nextNode = isSgAsmInstruction(target->get_SgNode()); ROSE_ASSERT(nextNode); if (dest) { //string type = "jmp_if"; if (dest==nextNode) { if (contrl->get_kind() == x86_call || contrl->get_kind() == x86_ret) { add += " graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#FF0000\" ] ]\n"; } else if (contrl->get_kind() == x86_jmp) { add += " graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#FF0000\" ] ]\n"; } else add += " graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#00FF00\" ] ]\n"; } else if (forward_analysis && (contrl->get_kind() == x86_call || contrl->get_kind() == x86_jmp)) { add += " graphics [ type \"line\" arrow \"last\" fill \"#FFFF00\" ] ]\n"; } } else if (contrl->get_kind() == x86_ret ) { //&& dest_list_empty) { // in case of a multiple return add += " graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#3399FF\" ] ]\n"; } } string type_n = getProperty(SgGraph::type, edge); if (type_n==RoseBin_support::ToString(SgGraph::usage)) { add = " graphics [ type \"line\" style \"dashed\" arrow \"last\" fill \"#000000\" ] ]\n"; } // skip the function declaration edges for now // bool blankOutput=false; //if (skipFunctions) //if (isSgAsmFunction(binStat_s)) // blankOutput=true; if (skipInternalEdges) { SgAsmX86Instruction* contrl = isSgAsmX86Instruction(source->get_SgNode()); if (contrl && x86InstructionIsControlTransfer(contrl) && contrl->get_kind() != x86_ret) { if (contrl->get_kind() == x86_call) output += " Edge_Color_ FF0000 \n Type_ \"[ 33554432 CALL_EDGE ]\" \n"; else if (contrl->get_kind() == x86_jmp) output += " Edge_Color_ 00FF00 \n Type_ \"[ 33554432 FILECALL_EDGE ]\" \n"; else output += " Edge_Color_ 0000FF \n "; } //else // blankOutput=true; } if (add=="") output += " graphics [ type \"line\" arrow \"last\" fill \"#000000\" ] ]\n"; else output +=add; myfile << output; } // } // ---------- // nodesMap.clear(); }
bool GraphAlgorithms::isValidCFGEdge(SgGraphNode* sgNode, SgGraphNode* sgNodeBefore) { if (!sgNode || !sgNodeBefore) return false; // bool isAUnconditionalControlTransfer = false; bool valid = true; bool isDirectedControlFlowEdge = false; SgAsmX86Instruction* inst = isSgAsmX86Instruction(sgNodeBefore->get_SgNode()); SgAsmInstruction* instSgNode = isSgAsmInstruction(sgNode->get_SgNode()); SgAsmInstruction* instSgNodeBefore = isSgAsmInstruction(sgNodeBefore->get_SgNode()); if (instSgNode && instSgNodeBefore) { if (RoseBin_support::DEBUG_MODE()) cout << " *** instSgNode && instSgNodeBefore " << endl; SgAsmFunction* f1 = isSgAsmFunction(instSgNode->get_parent()); SgAsmFunction* f2 = isSgAsmFunction(instSgNodeBefore->get_parent()); if (f1==NULL) f1 = isSgAsmFunction(instSgNode->get_parent()->get_parent()); if (f2==NULL) f2 = isSgAsmFunction(instSgNodeBefore->get_parent()->get_parent()); if (f1 && f2) { // (tps - 05/23/08) : the semantics of the previous implementation is: // check the node before in the instruction set and check if it is the same as the previous node // todo: the following line must be changed... the size of the current node does not give you the last node! if (RoseBin_support::DEBUG_MODE()) cout << " *** f1 && f2 " << endl; SgAsmInstruction* nodeBeforeInSet = NULL; int byte = 1; ROSE_ASSERT(info); while (nodeBeforeInSet==NULL && byte<8) { nodeBeforeInSet = info->getInstructionAtAddress(instSgNode->get_address() - byte); byte++; } if (RoseBin_support::DEBUG_MODE()) cout << " *** nodeBeforeInSet = " << nodeBeforeInSet << " instSgNodeBefore : " << instSgNodeBefore << " byte : " << byte << endl; if (nodeBeforeInSet == instSgNodeBefore) { //if (!isAsmUnconditionalBranch(nodeBeforeInSet)) if (RoseBin_support::DEBUG_MODE()) cout << " isDirectedControlFlowEdge = true -- isAsmUnconditionalBranch(nodeBeforeInSet) : " << isAsmUnconditionalBranch(nodeBeforeInSet) << endl; isDirectedControlFlowEdge = true; } if (RoseBin_support::DEBUG_MODE()) { cout << " *** f1 && f2 -- isDirectionalControlFlowEdge: " << isDirectedControlFlowEdge << endl; cout << " inst->get_kind() == x86_call : " << (inst->get_kind() == x86_call) << " inst->get_kind() == x86_ret : " << (inst->get_kind() == x86_ret) << endl; } if ((inst->get_kind() == x86_call || inst->get_kind() == x86_ret) && isDirectedControlFlowEdge) valid=false; } } /* if (RoseBin_support::DEBUG_MODE()) { cout << " ValidCFGEdge::: sgNode " << sgNode->get_name() << " sgNodeBefore " << sgNodeBefore->get_name() << " instSgNode << " << instSgNode << " instSgNodeBefore << " << instSgNodeBefore << " is Valid node ? " << RoseBin_support::resBool(valid) << " isControlFlowEdge " << RoseBin_support::resBool(isDirectedControlFlowEdge) << endl; } */ return valid; }
// The actual analysis, triggered when we reach the specified execution address... virtual bool operator()(bool enabled, const Args &args) try { using namespace rose::BinaryAnalysis::InstructionSemantics; static const char *name = "Analysis"; using namespace rose::BinaryAnalysis::InsnSemanticsExpr; if (enabled && args.insn->get_address()==trigger_addr) { RTS_Message *trace = args.thread->tracing(TRACE_MISC); trace->mesg("%s triggered: analyzing function at 0x%08"PRIx64, name, analysis_addr); // An SMT solver is necessary for this example to work correctly. ROSE should have been configured with // "--with-yices=/full/path/to/yices/installation". If not, you'll get a failed assertion when ROSE tries to use // the solver. rose::BinaryAnalysis::YicesSolver smt_solver; smt_solver.set_linkage(rose::BinaryAnalysis::YicesSolver::LM_EXECUTABLE); //smt_solver.set_debug(stdout); // We deactive the simulator while we're doing this analysis. If the simulator remains activated, then the SIGCHLD // that are generated from running the Yices executable will be sent to the specimen. That probably wouldn't cause // problems for the specimen, but the messages are annoying. args.thread->get_process()->get_simulator()->deactivate(); // Create the policy that holds the analysis state which is modified by each instruction. Then plug the policy // into the X86InstructionSemantics to which we'll feed each instruction. SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> policy(&smt_solver); X86InstructionSemantics<SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType>, SymbolicSemantics::ValueType> semantics(policy); // The top of the stack contains the (unknown) return address. The value above that (in memory) is the address of // the buffer, to which we give a concrete value, and above that is the size of the buffer, which we also give a // concrete value). The contents of the buffer are unknown. Process memory is maintained by the policy we created // above, so none of these memory writes are actually affecting the specimen's state in the simulator. policy.writeRegister("esp", policy.number<32>(4000)); SymbolicSemantics::ValueType<32> arg1_va = policy.add(policy.readRegister<32>("esp"), policy.number<32>(4)); SymbolicSemantics::ValueType<32> arg2_va = policy.add(arg1_va, policy.number<32>(4)); policy.writeMemory<32>(x86_segreg_ss, arg1_va, policy.number<32>(12345), policy.true_()); // ptr to buffer policy.writeMemory<32>(x86_segreg_ss, arg2_va, policy.number<32>(2), policy.true_()); // bytes in buffer policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(analysis_addr)); // branch to analysis address #if 1 { // This is a kludge. If the first instruction is an indirect JMP then assume we're executing through a dynamic // linker thunk and execute the instruction concretely to advance the instruction pointer. SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(analysis_addr)); if (x86_jmp==insn->get_kind()) { PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType> p; X86InstructionSemantics<PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType>, PartialSymbolicSemantics::ValueType> sem(p); MemoryMap p_map = args.thread->get_process()->get_memory(); BOOST_FOREACH (MemoryMap::Segment &segment, p_map.segments()) segment.buffer()->copyOnWrite(true); p.set_map(&p_map); // won't be thread safe sem.processInstruction(insn); policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(p.readRegister<32>("eip").known_value())); trace->mesg("%s: dynamic linker thunk kludge triggered: changed eip from 0x%08"PRIx64" to 0x%08"PRIx64, name, analysis_addr, p.readRegister<32>("eip").known_value()); } } #endif // Run the analysis until we can't figure out what instruction is next. If we set things up correctly, the // simulation will stop when we hit the RET instruction to return from this function. size_t nbranches = 0; std::vector<TreeNodePtr> constraints; // path constraints for the SMT solver while (policy.readRegister<32>("eip").is_known()) { uint64_t va = policy.readRegister<32>("eip").known_value(); SgAsmX86Instruction *insn = isSgAsmX86Instruction(args.thread->get_process()->get_instruction(va)); assert(insn!=NULL); trace->mesg("%s: analysing instruction %s", name, unparseInstructionWithAddress(insn).c_str()); semantics.processInstruction(insn); if (policy.readRegister<32>("eip").is_known()) continue; bool complete; std::set<rose_addr_t> succs = insn->getSuccessors(&complete); if (complete && 2==succs.size()) { if (nbranches>=take_branch.size()) { std::ostringstream s; s<<policy.readRegister<32>("eip"); trace->mesg("%s: EIP = %s", name, s.str().c_str()); trace->mesg("%s: analysis cannot continue; out of \"take_branch\" values", name); throw this; } // Decide whether we should take the branch or not. bool take = take_branch[nbranches++]; rose_addr_t target = 0; for (std::set<rose_addr_t>::iterator si=succs.begin(); si!=succs.end(); ++si) { if ((take && *si!=insn->get_address()+insn->get_size()) || (!take && *si==insn->get_address()+insn->get_size())) target = *si; } assert(target!=0); trace->mesg("%s: branch %staken; target=0x%08"PRIx64, name, take?"":"not ", target); // Is this path feasible? We don't really need to check it now; we could wait until the end. TreeNodePtr c = InternalNode::create(32, OP_EQ, policy.readRegister<32>("eip").get_expression(), LeafNode::create_integer(32, target)); constraints.push_back(c); // shouldn't really have to do this again if we could save some state if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(constraints)) { policy.writeRegister("eip", SymbolicSemantics::ValueType<32>(target)); } else { trace->mesg("%s: chosen control flow path is not feasible (or unknown).", name); break; } } } // Show the value of the EAX register since this is where GCC puts the function's return value. If we did things // right, the return value should depend only on the unknown bytes from the beginning of the buffer. SymbolicSemantics::ValueType<32> result = policy.readRegister<32>("eax"); std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr> vars = result.get_expression()->get_variables(); { std::ostringstream s; s <<name <<": symbolic return value is " <<result <<"\n" <<name <<": return value has " <<vars.size() <<" variables:"; for (std::set<rose::BinaryAnalysis::InsnSemanticsExpr::LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) s <<" " <<*vi; s <<"\n"; if (!constraints.empty()) { s <<name <<": path constraints:\n"; for (std::vector<TreeNodePtr>::iterator ci=constraints.begin(); ci!=constraints.end(); ++ci) s <<name <<": " <<*ci <<"\n"; } trace->mesg("%s", s.str().c_str()); } // Now give values to those bytes and solve the equation for the result using an SMT solver. if (!result.is_known()) { trace->mesg("%s: setting variables (buffer bytes) to 'x' and evaluating the function symbolically...", name); std::vector<TreeNodePtr> exprs = constraints; LeafNodePtr result_var = LeafNode::create_variable(32); TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), result_var); exprs.push_back(expr); for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { expr = InternalNode::create(32, OP_EQ, *vi, LeafNode::create_integer(32, (int)'x')); exprs.push_back(expr); } if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) { LeafNodePtr result_value = smt_solver.evidence_for_variable(result_var)->isLeafNode(); if (!result_value) { trace->mesg("%s: evaluation result could not be determined. ERROR!", name); } else if (!result_value->is_known()) { trace->mesg("%s: evaluation result is not constant. ERROR!", name); } else { trace->mesg("%s: evaluation result is 0x%08"PRIx64, name, result_value->get_value()); } } else { trace->mesg("%s: expression is not satisfiable. (or unknown)", name); } } // Now try going the other direction. Set the return expression to a value and try to discover what two bytes // would satisfy the equation. if (!result.is_known()) { trace->mesg("%s: setting result equal to 0xff015e7c and trying to find inputs...", name); std::vector<TreeNodePtr> exprs = constraints; TreeNodePtr expr = InternalNode::create(32, OP_EQ, result.get_expression(), LeafNode::create_integer(32, 0xff015e7c)); exprs.push_back(expr); if (rose::BinaryAnalysis::SMTSolver::SAT_YES == smt_solver.satisfiable(exprs)) { for (std::set<LeafNodePtr>::iterator vi=vars.begin(); vi!=vars.end(); ++vi) { LeafNodePtr var_val = smt_solver.evidence_for_variable(*vi)->isLeafNode(); if (var_val && var_val->is_known()) trace->mesg("%s: v%"PRIu64" = %"PRIu64" %c", name, (*vi)->get_name(), var_val->get_value(), isprint(var_val->get_value())?(char)var_val->get_value():' '); } } else { trace->mesg("%s: expression is not satisfiable (or unknown). No solutions.", name); } } // Reactivate the simulator in case we want to continue simulating. args.thread->get_process()->get_simulator()->activate(); throw this; // Optional: will exit simulator, caught in main(), which then deactivates the simulator } return enabled; } catch (const Analysis*) { args.thread->get_process()->get_simulator()->activate(); throw; }