void visit(SgNode *node) { SgAsmFunction* f = isSgAsmFunction(node); if (f == NULL) return; std :: cout << f->get_name() << std::endl; std :: cout << "====================================" << std :: endl; // create dataflow engine SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(node); const RegisterDictionary *regdict = interp->get_registers(); BaseSemantics ::RiscOperatorsPtr symbolicOps = SymbolicSemantics::RiscOperators::instance(regdict); DispatcherX86Ptr cpu = DispatcherX86::instance(symbolicOps); BaseSemantics :: SValuePtr esp_0 = symbolicOps->readRegister(cpu->REG_ESP); TaintedFlow taintAnalysis(cpu); TaintedFlow::Approximation approximation = TaintedFlow :: UNDER_APPROXIMATE; taintAnalysis.approximation(approximation); size_t cfgStartVertex = 0; // build control flow graph CFG cfg; ControlFlow().build_block_cfg_from_ast(node, cfg); /////////////////////// // Taint Analysis // /////////////////////// taintAnalysis.computeFlowGraphs(cfg, cfgStartVertex); TaintedFlow::StatePtr initialState = taintAnalysis.stateInstance(TaintedFlow::BOTTOM); // at this point, we would need to taint different variables initialState->setIfExists(DataFlow::Variable(), TaintedFlow::NOT_TAINTED); // actually run the taint analysis taintAnalysis.runToFixedPoint(cfg, cfgStartVertex, initialState); BOOST_FOREACH(const typename CFG::VertexNode &vertex, cfg.vertices()){ // rose_addr_t lastInsnAddr = // SageInterface::querySubTree<SgAsmInstruction>(vertex.value()).back()->get_address(); TaintedFlow::StatePtr state = taintAnalysis.getFinalState(vertex.id()); std :: cout << *state << std :: endl; } // print 'variables' of function // BOOST_FOREACH(const DataFlow::Variable &variable, taintAnalysis.variables() ) // std :: cout << variable << std :: endl; }
void CountTraversal::visit ( SgNode* n ) { SgAsmInstruction* asmInstruction = isSgAsmInstruction(n); if (asmInstruction != NULL) { // Use the new interface support for this (this detects all multi-byte nop instructions). if (SageInterface::isNOP(asmInstruction) == true) { if (previousInstructionWasNop == true) { // Increment the length of the identified NOP sequence count++; } else { count = 1; // Record the starting address of the NOP sequence nopSequenceStart = asmInstruction; } previousInstructionWasNop = true; } else { if (count > 0) { // Report the sequence when we have detected the end of the sequence. SgAsmFunction* functionDeclaration = getAsmFunction(asmInstruction); printf ("Reporting NOP sequence of length %3d at address %zu in function %s (reason for this being a function = %u = %s) \n", count,nopSequenceStart->get_address(),functionDeclaration->get_name().c_str(), functionDeclaration->get_reason(), stringifySgAsmFunctionFunctionReason(functionDeclaration->get_reason()).c_str()); nopSequences.push_back(pair<SgAsmInstruction*,int>(nopSequenceStart,count)); SgAsmBlock* block = isSgAsmBlock(nopSequenceStart->get_parent()); ROSE_ASSERT(block != NULL); SgAsmStatementPtrList & l = block->get_statementList(); // Now iterate over the nop instructions in the sequence and report the lenght of each (can be multi-byte nop instructions). SgAsmStatementPtrList::iterator i = find(l.begin(),l.end(),nopSequenceStart); ROSE_ASSERT(i != l.end()); int counter = 0; while ( (*i != asmInstruction) && (i != l.end()) ) { printf ("--- NOP #%2d is length = %2d \n",counter++,(int)isSgAsmInstruction(*i)->get_raw_bytes().size()); i++; } } count = 0; previousInstructionWasNop = false; } } }
void visit(SgNode *node) { SgAsmFunction *func = isSgAsmFunction(node); if (func && 0==func->get_name().compare("simple06")) { ++nvisits; CFG cfg = rose::BinaryAnalysis::ControlFlow().build_block_cfg_from_ast<CFG>(func); CFG_Vertex start = 0; assert(get(boost::vertex_name, cfg, start)==func->get_entry_block()); DG_RelMap dgmap1 = rose::BinaryAnalysis::Dominance().build_postdom_relation_from_cfg(cfg, start); DG_RelMap dgmap2 = MyDominance().build_postdom_relation_from_cfg(cfg, start); } }
static bool isEqual(SgNode* A, SgNode* B) { if(A==NULL || B == NULL) return false; SgAsmInstruction* iA = isSgAsmX86Instruction(A); SgAsmInstruction* iB = isSgAsmX86Instruction(B); SgAsmFunction* fA = isSgAsmFunction(A); SgAsmFunction* fB = isSgAsmFunction(B); bool isTheSame = false; if(iA != NULL && iB != NULL) isTheSame = unparseInstrFast(iA) == unparseInstrFast(iB) ? true : false; if(fA != NULL && fB != NULL) isTheSame = fA->get_name() == fB->get_name() ? true : false; return isTheSame; }
/**************************************************** * process all instructions in the DB * add the instructions to the blocks ****************************************************/ void RoseBin_DB_IDAPRO::process_instruction_query(MYSQL* conn, MYSQL_RES* res_set) { rememberInstructions.clear(); // get the functions // char* q = (char*)"SELECT * FROM instructions_1"; char *q = (char*)"select *, (select parent_function from basic_blocks_1 where id = i.basic_block_id and (i.address - parent_function) >= 0 and (i.address - parent_function) = (select min(i.address - parent_function) from basic_blocks_1 where id = i.basic_block_id and (i.address - parent_function) >= 0) ) as i_f from instructions_1 i order by i.address"; if (RoseBin_support::DEBUG_MODE()) cout << "\n>> QUERY:: " << q << "\n" << endl; res_set = process_query(conn,q); if (res_set == NULL) { print_problemWithResults(conn); } else { MYSQL_ROW row; string mnemonic=(char*)""; uint64_t address=0; int basic_block=-1; int sequence =-1; string data=(char*)""; int i_func; while ((row = mysql_fetch_row(res_set))!=NULL) { for (unsigned int i=0; i<mysql_num_fields(res_set);i++) { char* ret=(char*)""; if (row[i] ==NULL) { ret = (char*)"<NULL>"; if (i==0) address = -1; if (i==1) basic_block = -1; if (i==2) mnemonic = ret; if (i==3) sequence = -1; if (i==4) data=ret; if (i==5) i_func= -1; } else { ret= row[i]; if (i==0) address = atoi(ret); if (i==1) basic_block = atoi(ret); if (i==2) mnemonic = ret; if (i==3) sequence = atoi(ret); if (i==4) data=ret; if (i==5) i_func = atoi(ret); } } // patched to adjust to objdump , Apr 26 2007 if (mnemonic ==(char*)"retn") mnemonic = (char*)"ret"; if (RoseBin_support::DEBUG_MODE()) { ostringstream addrhex; addrhex << hex << setw(8) << address ; cout << ">> creating instruction : " << addrhex.str() << " " << address << " - " << basic_block << " - " << mnemonic << " - " << sequence << endl; } // check if it is an instruction or if it appears in the callgraph, // if it is in the callgraph, one wants to create a BinaryCall instead // append the instruction to its function rose_hash::unordered_map <int, SgAsmFunction* >::iterator func_it = rememberFunctions.find(i_func); SgAsmFunction* func = NULL; // for (func_it; func_it!=rememberFunctions.end(); ++func_it) { if (func_it != rememberFunctions.end()) { func = func_it->second; } else { if (i_func!=-1) cerr << " ERROR : cant find the function i_func : " << i_func << " in rememberFunctions for instruction : " << mnemonic << endl; } SgAsmInstruction* instruction = NULL; instruction = createInstruction(address, func, mnemonic); // instruction = new SgAsmInstruction(address,bb,mnemonic,""); // Sep 29, tps : commented the following line out, since the function was removed. //instruction->set_raw_bytes(data); ROSE_ASSERT(instruction); SgAsmOperandList* operandList = new SgAsmOperandList(); instruction->set_operandList(operandList); operandList->set_parent(instruction); ostringstream hexaddr; hexaddr << hex << setw(8) << address ; if (RoseBin_support::DEBUG_MODE()) cout << " .rememberInstruction " << instruction->class_name() << " at : " << address << " hex: " << hexaddr.str() << endl; rememberInstructions[address]= instruction ; if (func) { // get the block in the func and append to it to conform to jeremiah func->append_statement(instruction); instruction->set_parent(func); //vector <SgNode*> blockVec =func->get_traversalSuccessorContainer(); //SgAsmBlock* block = isSgAsmBlock(blockVec[0]); //ROSE_ASSERT(block); //block->append_statement(instruction); //instruction->set_parent(block); ROSE_ASSERT(instruction->get_parent()); //SgAsmNode* nInst = (SgAsmNode*) instruction; //nInst->set_parent(func); ostringstream addrhex; addrhex << hex << setw(8) << i_func ; if (RoseBin_support::DEBUG_MODE()) cout << ">> appended instruction to function: " << func->get_name() << " addr " << addrhex.str() << " " << address << endl; } else { if (i_func!=-1) { cerr << " ERROR :: could not append instruction to function : " << endl; //exit(0); } } } // while } // if (res_set==NULL) checkError(conn,res_set); }
int main(int argc, char** argv) { if (!containsArgument(argc, argv, "-checkAST") && !containsArgument(argc, argv, "-checkGraph") && !containsArgument(argc, argv, "-printTree") && !containsArgument(argc, argv, "-callgraph") && !containsArgument(argc, argv, "-cfa") && !containsArgument(argc, argv, "-dfa") ) {argc = 1;} if (argc < 2) { fprintf(stderr, "Usage: %s executableName [OPTIONS]\n", argv[0]); cout << "\nOPTIONS: " <<endl; cout << "-checkAST - run all checkers on binary AST. " << endl; cout << "-checkGraph - run all checkers on dataflow graph. " << endl; cout << "-printTree - create dot file of AST. " << endl; cout << "-callgraph - perform callgraph analysis and print callgraph.dot file. " << endl; cout << "-cfa - perform control flow analysis and print cfg.dot file. " << endl; cout << "-dfa - perform dataflow flow analysis and print dfg.dot file. " << endl; cout << "-inter - perform dataflow analysis interprocedurally (default intraprocedural). " << endl; cout << "-backward - perform backward analysis (default forward). " << endl; cout << "-gml - all graphs (except AST) are saved as gml files (default dot). " << endl; cout << "-mergeedges - aggregate edges between same nodes. " << endl; cout << "-noedges - do not print edges into dot or gml file (only nodes). " << endl; return 1; } string execName = argv[1]; lt_dlinit(); // this is our test case input, we will assert on the data from this file test = false; if (execName=="buffer2.bin") { //cerr << "running test case on buffer2.bin !! " << endl << endl; test = true; } // create out folder string filenameDir="out"; mode_t mode = S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH; mkdir(filenameDir.c_str(), mode); std::ofstream myfile; bool interprocedural = false; if (containsArgument(argc, argv, "-inter")) { interprocedural = true; } bool forward = true; if (containsArgument(argc, argv, "-backward")) { forward = false; } bool dot = true; if (containsArgument(argc, argv, "-gml")) { dot = false; } bool mergedEdges = false; if (containsArgument(argc, argv, "-mergeedges")) { mergedEdges = true; } bool edges = true; if (containsArgument(argc, argv, "-noedges")) { edges = false; } RoseBin_Def::RoseAssemblyLanguage = RoseBin_Def::x86; //fprintf(stderr, "Starting binCompass frontend...\n"); SgProject* project = frontend(argc,argv); ROSE_ASSERT (project != NULL); SgBinaryComposite* binary = isSgBinaryComposite(project->get_fileList()[0]); SgAsmGenericFile* file = binary != NULL ? binary->get_binaryFile() : NULL; // const SgAsmInterpretationPtrList& interps = file->get_interpretations(); //ROSE_ASSERT (interps.size() == 1); //SgAsmInterpretation* interp = interps[0]; SgAsmInterpretation* interp = SageInterface::getMainInterpretation(file); if (containsArgument(argc, argv, "-printTree")) { //fprintf(stderr, "Printing AST... _binary_tree.dot\n"); string filename="_binary_tree.dot"; AST_BIN_Traversal* trav = new AST_BIN_Traversal(); trav->run(interp->get_global_block(), filename); if (test) { int instrnr = trav->getNrOfInstructions(); //cerr << " Instructions written to file: " << instrnr << endl; ROSE_ASSERT(instrnr==861); } } RoseBin_Graph* graph; VirtualBinCFG::AuxiliaryInformation* info = new VirtualBinCFG::AuxiliaryInformation(file); std::map<int,std::set<SgAsmFunction*> > components; GraphAlgorithms* algo = new GraphAlgorithms(info); // call graph analysis ******************************************************* if (containsArgument(argc, argv, "-callgraph")) { //cerr << " creating call graph ... " << endl; graph= new RoseBin_DotGraph(); string callFileName = "callgraph.dot"; if (dot==false) { callFileName = "callgraph.gml"; graph= new RoseBin_GMLGraph(); } RoseBin_CallGraphAnalysis* callanalysis = new RoseBin_CallGraphAnalysis(interp->get_global_block(), new RoseObj(), algo); callanalysis->run(graph, callFileName, !mergedEdges); callanalysis->getConnectedComponents(components); if (test) { //cerr << " nr of nodes visited in callanalysis : " << callanalysis->nodesVisited() << endl; ROSE_ASSERT(callanalysis->nodesVisited()==10); //cerr << " nr of edges visited in callanalysis : " << callanalysis->edgesVisited() << endl; ROSE_ASSERT(callanalysis->edgesVisited()==9); } } if (containsArgument(argc, argv, "-printTree")) { //fprintf(stderr, "Printing AST... _binary_tree2.dot\n"); string filename="_binary_tree2.dot"; AST_BIN_Traversal* trav = new AST_BIN_Traversal(); trav->run(interp->get_global_block(), filename); if (test) { int instrnr = trav->getNrOfInstructions(); //cerr << " Instructions written to file: " << instrnr << endl; ROSE_ASSERT(instrnr==861); } } // control flow analysis ******************************************************* if (containsArgument(argc, argv, "-cfa")) { string cfgFileName = "cfg.dot"; graph= new RoseBin_DotGraph(); if (dot==false) { cfgFileName = "cfg.gml"; graph= new RoseBin_GMLGraph(); } RoseBin_ControlFlowAnalysis* cfganalysis = new RoseBin_ControlFlowAnalysis(interp->get_global_block(), forward, new RoseObj(), edges, algo); cfganalysis->run(graph, cfgFileName, mergedEdges); #if 1 std::map<int,std::set<SgAsmFunction*> >::const_iterator comps = components.begin(); //set<std::string> partialCFG; for (;comps!=components.end();++comps) { set<std::string> partialCFG; int nr = comps->first; //cerr << " found the following component " << nr << endl; std::set<SgAsmFunction*> funcs = comps->second; std::set<SgAsmFunction*>::const_iterator it = funcs.begin(); for (;it!=funcs.end();++it) { SgAsmFunction* function = *it; string name = function->get_name(); name.append("_f"); //cerr << " binCompass CALLGRAPH ANALYSIS : found function : " << name << endl; partialCFG.insert(name); } string filename = "thomas"; filename.append(RoseBin_support::ToString(nr)); filename.append(".dot"); //cerr << " binCompass writing to file " << filename << endl; cfganalysis->printGraph(filename,partialCFG); } //cfganalysis->printGraph(filename,partialCFG); #endif #if 0 set<std::string> partialCFG; partialCFG.insert(" 80483c0_f"); partialCFG.insert(" 8048491_f"); partialCFG.insert(" 8048363_f"); partialCFG.insert(" 804828f_f"); cfganalysis->printGraph("thomas.dot",partialCFG); #endif if (test) { //cout << " cfa -- Number of nodes == " << cfganalysis->nodesVisited() << endl; //cout << " cfa -- Number of edges == " << cfganalysis->edgesVisited() << endl; //ROSE_ASSERT(cfganalysis->nodesVisited()==210); //ROSE_ASSERT(cfganalysis->edgesVisited()==234); ROSE_ASSERT(cfganalysis->nodesVisited()==237); ROSE_ASSERT(cfganalysis->edgesVisited()==261); } } if (containsArgument(argc, argv, "-printTree")) { //fprintf(stderr, "Printing AST... _binary_tree3.dot\n"); string filename="_binary_tree3.dot"; AST_BIN_Traversal* trav = new AST_BIN_Traversal(); trav->run(interp->get_global_block(), filename); if (test) { int instrnr = trav->getNrOfInstructions(); //cerr << " Instructions written to file: " << instrnr << endl; ROSE_ASSERT(instrnr==861); } } if (containsArgument(argc, argv, "-dfa")) { //cerr << " creating dataflow graph ... " << endl; string dfgFileName = "dfg.dot"; graph= new RoseBin_DotGraph(); if (dot==false) { dfgFileName = "dfg.gml"; graph= new RoseBin_GMLGraph(); } RoseBin_DataFlowAnalysis* dfanalysis = new RoseBin_DataFlowAnalysis(interp->get_global_block(), forward, new RoseObj(), algo); dfanalysis->init(interprocedural, edges); dfanalysis->run(graph, dfgFileName, mergedEdges); if (test) { #if 0 cout << " dfa -- Number of nodes == " << dfanalysis->nodesVisited() << endl; cout << " dfa -- Number of edges == " << dfanalysis->edgesVisited() << endl; cout << " dfa -- Number of memWrites == " << dfanalysis->nrOfMemoryWrites() << endl; cout << " dfa -- Number of regWrites == " << dfanalysis->nrOfRegisterWrites() << endl; cout << " dfa -- Number of definitions == " << dfanalysis->nrOfDefinitions() << endl; cout << " dfa -- Number of uses == " << dfanalysis->nrOfUses() << endl; #endif if (interprocedural) { ROSE_ASSERT(dfanalysis->nodesVisited()==237); ROSE_ASSERT(dfanalysis->edgesVisited()==284); ROSE_ASSERT(dfanalysis->nrOfMemoryWrites()==12); ROSE_ASSERT(dfanalysis->nrOfRegisterWrites()==36); ROSE_ASSERT(dfanalysis->nrOfDefinitions()==183); ROSE_ASSERT(dfanalysis->nrOfUses()==25); } else { ROSE_ASSERT(dfanalysis->nodesVisited()==237); ROSE_ASSERT(dfanalysis->edgesVisited()==287); ROSE_ASSERT(dfanalysis->nrOfMemoryWrites()==18); ROSE_ASSERT(dfanalysis->nrOfRegisterWrites()==77); ROSE_ASSERT(dfanalysis->nrOfDefinitions()==216); ROSE_ASSERT(dfanalysis->nrOfUses()==31); } } } if (containsArgument(argc, argv, "-checkAST") || containsArgument(argc, argv, "-checkGraph")) { // get a list of all checkers and traverse vector <BC_AnalysisInterface*> checkers; vector <BC_GraphAnalysisInterface*> graph_checkers; loadAnalysisFiles(checkers); vector <BC_AnalysisInterface*>::const_iterator it = checkers.begin(); for (;it!=checkers.end();it++) { BC_AnalysisInterface* asmf = *it; //cout << "\nRunning Binary Checker --- " << asmf->get_name() << endl; string filename = execName+"."+asmf->get_name(); unsigned int pos = filename.find_last_of("/"); if (filename.find_last_of("/")!=string::npos && (pos+1)<filename.length()) filename = filename.substr(pos+1, filename.length()); filename = "out/"+filename+".out"; //cerr << "Writing file : " << filename << endl; myfile.open(filename.c_str()); asmf->init(interp->get_global_block()); asmf->traverse(interp->get_global_block(), preorder); asmf->finish(interp->get_global_block()); string output = asmf->get_output(); myfile << output << " \n"; myfile.close(); } if (containsArgument(argc, argv, "-checkGraph")) { loadGraphAnalysisFiles(graph_checkers); //cerr << "\n ---------------- preparing to run DataFlowAnalysis (-checkGraph)" << endl; string dfgFileName = "dfg.dot"; graph= new RoseBin_DotGraph(); if (dot==false) { dfgFileName = "dfg.gml"; graph= new RoseBin_GMLGraph(); } RoseBin_ControlFlowAnalysis* cfganalysis = new RoseBin_ControlFlowAnalysis(interp->get_global_block(), forward, new RoseObj(), edges, algo); cfganalysis->run(graph, dfgFileName, mergedEdges); if (test) { //cerr << " cfa -- Number of nodes == " << cfganalysis->nodesVisited() << endl; //cerr << " cfa -- Number of edges == " << cfganalysis->edgesVisited() << endl; ROSE_ASSERT(cfganalysis->nodesVisited()==237); ROSE_ASSERT(cfganalysis->edgesVisited()==261); } rose_graph_integer_node_hash_map nodes = graph->get_node_index_to_node_map(); //cerr << "CFG (-checkGraph) finished ----- Graph nr of nodes : " << nodes.size() << endl; ROSE_ASSERT(nodes.size()>0); RoseBin_DataFlowAnalysis* dfanalysis = new RoseBin_DataFlowAnalysis(interp->get_global_block(), forward, new RoseObj(), algo); //dfanalysis->init(interprocedural, edges,graph); dfanalysis->init(interprocedural, edges); dfanalysis->run(graph, dfgFileName, mergedEdges); //cerr << "DFG (-checkGraph) finished ----- Graph nr of nodes : " << nodes.size() << endl; vector<SgGraphNode*> rootNodes; dfanalysis->getRootNodes(rootNodes); //SgGraphNode* root1 = rootNodes[0]; //rootNodes.clear(); //rootNodes.push_back(root1); vector <BC_GraphAnalysisInterface*>::const_iterator it2 = graph_checkers.begin(); #if 0 cerr << "\n ---------------- running graph checkers : " << graph_checkers.size() << " rootNodes size : " << rootNodes.size() << " interprocedural : " << RoseBin_support::resBool(interprocedural) << endl; cout << "\n ---------------- running graph checkers : " << graph_checkers.size() << " rootNodes size : " << rootNodes.size() << " interprocedural : " << RoseBin_support::resBool(interprocedural) << endl; cerr << "Graph : " << nodes.size() << endl; #endif for (;it2!=graph_checkers.end();it2++) { BC_GraphAnalysisInterface* asmf = *it2; ROSE_ASSERT(asmf); //cerr << "\nRunning Binary Graph Checker --- " << asmf->get_name() << " " << " roots : " << // rootNodes.size() << endl; // tps 04/23/08 -- fixme: this code was broken when I added the testcase -- needs to be fixed dfanalysis->init(); asmf->init(graph); dfanalysis->traverseGraph(rootNodes, asmf, interprocedural); } } } unparseAsmStatementToFile("unparsed.s", interp->get_global_block()); lt_dlexit(); return 0; }
/*********************************************************************** * (10/31/07) tps: Traverses the graph for each node in rootNodes * and applies to each node the evaluate function * which can be either def_use, variable detection or emulation * Each node in the controlflow of rootNode is traversed (forward) * and only if the hasChanged function returns false, the algorithm * comes to a fixpoint ***********************************************************************/ void RoseBin_DataFlowAnalysis::traverseGraph(vector <SgGraphNode*>& rootNodes, RoseBin_DataFlowAbstract* analysis, bool interprocedural){ if (RoseBin_support::DEBUG_MODE_MIN()) cerr << " traverseGraph : debug: " << RoseBin_support::resBool(RoseBin_support::DEBUG_MODE()) << " debug_min : " << RoseBin_support::resBool(RoseBin_support::DEBUG_MODE_MIN()) << endl; // Number of functions traversed int funcNr =0; // --------------------------------------------------------------------- // stores the nodes that still needs to be visited // vector<SgGraphNode*> worklist; deque<SgGraphNode*> worklist; nodeHashSetType worklist_hash; // a vector of successors of the current node vector<SgGraphNode*> successors; // --------------------------------------------------------------------- // iterate through all functions vector<SgGraphNode*>::iterator it = rootNodes.begin(); for (; it!=rootNodes.end();++it) { // current node SgGraphNode* node = *it; string func_name = vizzGraph->getProperty(SgGraph::name, node); RoseBin_support::checkText(func_name); funcNr++; if (RoseBin_support::DEBUG_MODE()) { cout << "\n\n ----------- dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+ RoseBin_support::ToString(rootNodes.size())+") : " << func_name << " visited size : " << visited.size() << " total visited nodes : " << nrOfNodesVisited << endl; // debug } if (RoseBin_support::DEBUG_MODE_MIN()) { cerr << " ----------- dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+ RoseBin_support::ToString(rootNodes.size())+") : " << func_name << " visited size : " << visited.size() << " total visited nodes : " << nrOfNodesVisited << " def size : " << analysis->getDefinitionSize() << endl; } // indicates whether the current value for this node has changed bool hasChanged=false; // pushback into worklist and visited list worklist.push_back(node); worklist_hash.insert(node); visited.insert(node); visitedCounter[node] = 1; vector <SgGraphNode*> pre; // while there are still graph nodes in the worklist do while (worklist.size()>0) { nrOfNodesVisited++; // the new node is taken from the back of the worklist //node = worklist.back(); //worklist.pop_back(); node = worklist.front(); worklist.pop_front(); worklist_hash.erase(node); // get the successors of the current node and store in successors vector string name = vizzGraph->getProperty(SgGraph::name, node); //if (RoseBin_support::DEBUG_MODE_MIN() && node) // if (node->get_SgNode()) // cerr << node->get_SgNode()->class_name() << " " << node << " " << node->get_name() << endl; if (RoseBin_support::DEBUG_MODE_MIN() && node) { SgAsmInstruction* instr = isSgAsmInstruction(node->get_SgNode()); if (instr) { SgAsmFunction* funcParent = isSgAsmFunction(instr->get_parent()); if (funcParent) { string parent = funcParent->get_name(); cout << " ---- analysis of node in function : " << parent << " defs " << analysis->getDefinitionSize() << " visited : " << RoseBin_support::ToString(visitedCounter[node]) << endl; } } } if (RoseBin_support::DEBUG_MODE()) cout << "\n evaluating: " << name << endl; // do something with the current node // e.g. checkVariables(name, node); SgGraphNode* nodeBefore= NULL; BeforeMapType::const_iterator it = nodeBeforeMap.find(node); if (it!=nodeBeforeMap.end()) nodeBefore = it->second; // successor vector is empty on each new node successors.clear(); ROSE_ASSERT(isSgIncidenceDirectedGraph(vizzGraph)); isSgIncidenceDirectedGraph(vizzGraph)->getSuccessors(node, successors); hasChanged = analysis->run(name, node, nodeBefore); // append the successors to the worklist if (RoseBin_support::DEBUG_MODE()) cout << ">> getting successors (" << successors.size() << ") for : " << name << endl; // if (successors.size()==0) // cout << "PROBLEM ..................................................... : " << endl; vector<SgGraphNode*>::iterator succ = successors.begin(); for (;succ!=successors.end();++succ) { // for each successor do... SgGraphNode* next = *succ; SgAsmX86Instruction* nodeN = isSgAsmX86Instruction(node->get_SgNode()); //if (!nodeN) continue; SgAsmX86Instruction* nextN = isSgAsmX86Instruction(next->get_SgNode()); //if (!nextN) continue; string name_n = vizzGraph->getProperty(SgGraph::name, next); bool call = false; bool exceptionCallNext = false; if (nextN) exceptionCallNext = exceptionCall(nextN->get_kind() == x86_call ? nextN : 0); bool exceptionCallNode = false; if (nodeN) exceptionCallNode = exceptionCall(nodeN->get_kind() == x86_call ? nodeN : 0); if (RoseBin_support::DEBUG_MODE()) std::cout << " exceptionCallNode : " << exceptionCallNode << " exceptionCallNext : " << exceptionCallNext << endl; // if function call is call to malloc we have an exception and follow the call path if ((exceptionCallNode && !exceptionCallNext)) { } else if ( //if ( (nodeN && nodeN->get_kind() == x86_call) || (nextN && nextN->get_kind() == x86_ret) ) call = true; //bool sameParent = analysis->sameParents(node, next); bool validNode=false; if (g_algo->isValidCFGEdge(next, node) || exceptionCallNode) validNode = true; // debug ------------------------ if (RoseBin_support::DEBUG_MODE()) { string nodeBeforeStr=""; if (nodeBefore) nodeBeforeStr= nodeBefore->get_name(); cout << " DEBUG : >>>>>>>> previous node " << nodeBeforeStr << " This node : " << name << " next node : " << name_n << " ** validNode : " << RoseBin_support::resBool(validNode) << endl; } // ---------------------------------- if (( interprocedural==false && !call) // || (interprocedural==true && validNode)) { if (visited.find(next)==visited.end()) { // if the successor is not yet visited // mark as visited and put into worklist if (RoseBin_support::DEBUG_MODE()) cout << " never visited next node before... " << name_n << " interprocedural : " << interprocedural << " call : " << call << endl; if (RoseBin_support::DEBUG_MODE()) cout << "adding to visited : " << name_n << endl; visited.insert(next); nodeBeforeMap[next]=node; visitedCounter[next]=1; vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(1)); if (!containsHash(worklist_hash,next)) { // add next node only if the next node if (RoseBin_support::DEBUG_MODE()) cout << "adding to worklist: " << name_n << endl; worklist.push_back(next); worklist_hash.insert(next); } } else { // if the successor has been visited, we need to check if it has changed // if it has not, we continue, else we need to push it back to the worklist int nr = visitedCounter[next]; if (RoseBin_support::DEBUG_MODE()) cout << " visited next node before... " << RoseBin_support::ToString(nr) << " Changed == " << RoseBin_support::resBool(hasChanged) << endl; if (hasChanged) { visitedCounter[next]=++nr; vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(nr)); if (RoseBin_support::DEBUG_MODE()) cout << " has changed : " << RoseBin_support::resBool(hasChanged) << " -- interprocedural : " << RoseBin_support::resBool(interprocedural) << " -- Call : " << RoseBin_support::resBool(call) << " ------> new number: " << RoseBin_support::ToString(nr) << " -- contained in hash? : " << RoseBin_support::resBool(containsHash(worklist_hash,next)) << " ---- nr of Defs: " << RoseBin_support::ToString(analysis->getDefinitionSize()) << " ---- nr of Use: " << RoseBin_support::ToString(analysis->getUsageSize()) << endl; if (interprocedural || (!interprocedural && !call)){ //sameParent)) { //!call && ) { if (!containsHash(worklist_hash,next)) { worklist_hash.insert(next); worklist.push_back(next); if (RoseBin_support::DEBUG_MODE()) cout << " adding to worklist: " << name_n << endl; } } } else if (RoseBin_support::DEBUG_MODE()) cout << " has NOT changed. " << endl; //else we continue with the next node } } } // for } // while worklist.size()>0 } // for rootNodes }
void InitPointerToNull::visit(SgNode* node) { if (isSgAsmFunction(node)) { memoryWrites.clear(); memoryRead.clear(); } else if (isSgAsmx86Instruction(node) && isSgAsmx86Instruction(node)->get_kind() == x86_mov) { // this is the address of the mov instruction prior to the call //rose_addr_t resolveAddr=0; SgAsmx86Instruction* inst = isSgAsmx86Instruction(node); SgNode* instBlock = NULL; if (project) instBlock= isSgAsmBlock(inst->get_parent()); else //we run IDA, this is different instBlock=inst; if (instBlock==NULL) return; SgAsmFunction* instFunc = isSgAsmFunction(instBlock->get_parent()); if (instFunc==NULL) return; // we have found a mov instruction // we need to check if it is a mov mem, (value or reg) // assignment of variable // forgot mov mem, mem // or we find a mov reg, mem // usage of variable // make sure a variable is assigned before used SgAsmOperandList * ops = inst->get_operandList(); SgAsmExpressionPtrList& opsList = ops->get_operands(); SgAsmExpressionPtrList::iterator itOP = opsList.begin(); SgAsmMemoryReferenceExpression* memL=NULL; SgAsmMemoryReferenceExpression* memR=NULL; SgAsmRegisterReferenceExpression* regL=NULL; SgAsmRegisterReferenceExpression* regR=NULL; SgAsmValueExpression* Val = NULL; int iteration=0; for (;itOP!=opsList.end();++itOP) { SgAsmExpression* exp = *itOP; ROSE_ASSERT(exp); if (iteration==1) { // right hand side memR = isSgAsmMemoryReferenceExpression(exp); regR = isSgAsmRegisterReferenceExpression(exp); Val = isSgAsmValueExpression(exp); } if (iteration==0) { // left hand side memL = isSgAsmMemoryReferenceExpression(exp); regL = isSgAsmRegisterReferenceExpression(exp); iteration++; } } //for if ((memL && regR) || (memL && Val) || (memL && memR)) { // could be assignment to address rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memL); // apparently the reference to memory does not always have to be BP but // can also be IP if it is a static variable. How will we handle global variables? //bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memL); //if (containsBP) { // this is memory write with offset to BP // remember this memory location as a write if (debug) cerr << "found a memory write (REG) : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl; memoryWrites.insert(addr); //} } else if (regL && memR) { // could be usage of address rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memR); bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memR); if (containsBP) { // this is memory read with offset to BP // did we see a write for this? If not, it is not initialized! std::set<rose_addr_t>::const_iterator it = memoryWrites.find(addr); if (it!=memoryWrites.end()) { // found write, everything is good if (debug) cerr << "found a read with matching write : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl; } else { std::set<rose_addr_t>::const_iterator it2 = memoryRead.find(addr); if (it2!=memoryRead.end()) { // found this case before } else { if (debug) cerr << " This variable might not be initialized : " << RoseBin_support::HexToString(inst->get_address())<<" "<< unparseInstruction(inst) << endl; string res = "Possibly uninitialized variable: "; string funcname=""; SgAsmBlock* b = isSgAsmBlock(inst->get_parent()); SgAsmFunction* func = NULL; if (b) func=isSgAsmFunction(b->get_parent()); if (func) funcname = func->get_name(); res+=" ("+RoseBin_support::HexToString(inst->get_address())+") : "+unparseInstruction(inst)+ " <"+inst->get_comment()+"> in function: "+funcname; result[inst]= res; memoryRead.insert(addr); } } } } } }
void RoseBin_GMLGraph::printNodes( bool dfg, RoseBin_FlowAnalysis* flow,bool forward_analysis, std::ofstream& myfile, string& recursiveFunctionName) { //bool firstFunc = true; // traverse nodes and visualize results of graph funcMap.clear(); nodesMap.clear(); //cerr << " Preparing graph - Nr of Nodes : " << nodes.size() << " edges : " << edges.size() << endl; //SgGraphNodeList* gnodes = get_nodes(); // rose_graph_hash_multimap& nodes = get_nodes()->get_nodes(); rose_graph_integer_node_hash_map nodes = get_node_index_to_node_map(); int counter=nodes.size(); int count=0; rose_graph_integer_node_hash_map::iterator itn2 = nodes.begin(); for (; itn2!=nodes.end();++itn2) { counter++; count++; pair<int, SgGraphNode*> nt = *itn2; // string hex_address = itn2->first; SgGraphNode* node = isSgGraphNode(itn2->second); string hex_address =node->get_name(); SgNode* internal = node->get_SgNode(); SgAsmFunction* func = isSgAsmFunction(internal); if (func) { vector<SgNode*> list; FindInstructionsVisitorx86 vis; #ifdef _MSC_VER //#pragma message ("WARNING: Removed reference to AstQueryNamespace::querySubTree()") // ROSE_ASSERT(false); // CH (4/7/2010): Workaround for MSVC vector<SgAsmX86Instruction*> temp_list; AstQueryNamespace::querySubTree(func, std::bind2nd( vis, &temp_list )); list.resize(temp_list.size()); std::copy(temp_list.begin(), temp_list.end(), list.begin()); #else #if defined(__APPLE__) && defined(__MACH__) //Pei-Hung (7/28/2016): OSX El Capitan has issue with bind2nd. vector<SgAsmX86Instruction*> temp_list; AstQueryNamespace::querySubTree(func, std::bind2nd( vis, &temp_list )); list.resize(temp_list.size()); std::copy(temp_list.begin(), temp_list.end(), list.begin()); #else AstQueryNamespace::querySubTree(func, std::bind2nd( vis, &list )); #endif #endif int validInstructions = func->nrOfValidInstructions(list); funcMap[func]=counter; nodesMap[func]=count; string name = func->get_name(); string text = "node [\n id " + RoseBin_support::ToString(counter) + "\n id_ " + RoseBin_support::ToString(counter) + "\n label \"" + name + "\"\n "; text +=" nrinstr_ "+RoseBin_support::ToString(validInstructions)+" \n"; text+= " isGroup 1\n isGroup_ 1\n ]\n"; if (name=="frame_dummy") { //cerr << text << endl; vector<SgNode*> succs = func->get_traversalSuccessorContainer(); vector<SgNode*>::iterator j = succs.begin(); //cerr << " ------------- free_dummy"<<endl; int ii=0; for (;j!=succs.end();j++) { //SgNode* n = *j; //cerr << " Node contained at pos:"<<ii<<" - " << n->class_name() << endl; ii++; } //cerr << " number of validInstructions: " << validInstructions << endl; } if (grouping) myfile << text; } SgAsmInstruction* bin_inst = isSgAsmInstruction(internal); if (bin_inst) nodesMap[bin_inst]=count; } //cerr << " Writing graph to GML - Nr of Nodes : " << nodes.size() << endl; int pos=0; rose_graph_integer_node_hash_map::iterator itn = nodes.begin(); for (; itn!=nodes.end();++itn) { pos++; // string hex_address = itn->first; SgGraphNode* node = isSgGraphNode(itn->second); string hex_address = node->get_name(); SgNode* internal = node->get_SgNode(); SgAsmFunction* func = isSgAsmFunction(internal); string text=""; // specifies that this node has no destination address nodest_jmp = false; // specifies that there is a node that has a call error (calling itself) error =false; // specifies a call to a unknown location nodest_call = false; // specifies where its an int instruction interrupt = false; if (func) { string name = func->get_name(); //cerr << " if part name : " << name << endl; ROSE_ASSERT(node); if (grouping==false) { map < int , string> node_p = node->get_properties(); map < int , string>::iterator prop = node_p.begin(); string name = "noname"; string type = "removed";//node->get_type(); for (; prop!=node_p.end(); ++prop) { int addr = prop->first; //cerr << " gml : property for addr : " << addr << endl; if (addr==SgGraph::nodest_jmp) nodest_jmp = true; else if (addr==SgGraph::itself_call) error = true; else if (addr==SgGraph::nodest_call) nodest_call = true; else if (addr==SgGraph::interrupt) interrupt = true; // else // name = prop->second; } } int parent = funcMap[func]; RoseBin_support::checkText(name); int length = name.length(); text = "node [\n id " + RoseBin_support::ToString(pos) + "\n label \"" + name + "\"\n"; if (nodest_jmp) { text += " graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"circle\" fill \"#FF0000\" ]\n"; text +=" Node_Color_ \"FF0000\" \n"; } else if (nodest_call) { text += " graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"circle\" fill \"#FF9900\" ]\n"; text +=" Node_Color_ \"FF9900\" \n"; } else if (interrupt) { text += " graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"circle\" fill \"#0000FF\" ]\n"; text +=" Node_Color_ \"0000FF\" \n"; } else if (error) { text += " graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"circle\" fill \"#66FFFF\" ]\n"; text +=" Node_Color_ \"66FFFF\" \n"; }else { text += " graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"circle\" fill \"#9933FF\" ]\n"; text +=" Node_Color_ \"9933FF\" \n"; } text +=" gid "+RoseBin_support::ToString(parent)+" \n"; text +=" skip_ 1 \n"; text +=" gid_ "+RoseBin_support::ToString(parent)+" ]\n"; // skip functions for now // if (skipFunctions) // text =""; } /*not a func*/ else { SgAsmX86Instruction* bin_inst = isSgAsmX86Instruction(internal); //cerr << " else part " << endl; SgAsmFunction* funcDecl_parent = NULL; if (bin_inst) { funcDecl_parent = isSgAsmFunction(bin_inst->get_parent()); if (funcDecl_parent==NULL) funcDecl_parent = isSgAsmFunction(bin_inst->get_parent()->get_parent()); } if (funcDecl_parent==NULL) { cerr << " ERROR : printNodes preparation . No parent found for node : " << bin_inst->class_name() << " " << hex_address << endl; continue; } if ((pos % 10000)==0) cout << " GMLGraph:: printing GML Nodes : " << pos << endl; string name = getInternalNodes(node, forward_analysis,bin_inst); int parent=0; map <SgAsmFunction*, int>::iterator its = funcMap.find(funcDecl_parent); if (its!=funcMap.end()) parent = funcMap[funcDecl_parent]; if (parent==0) cerr << " GMLGraph parent == 0 " << endl; if (onlyControlStructure && x86InstructionIsControlTransfer(bin_inst)) { text = "node [\n id " + RoseBin_support::ToString(pos) + "\n" + name ; int instrnr = funcDecl_parent->get_childIndex(bin_inst); text +=" instrnr_ "+RoseBin_support::ToString(instrnr)+" \n"; text +=" gid_ "+RoseBin_support::ToString(parent)+" \n"; text +=" gid "+RoseBin_support::ToString(parent)+" ]\n"; } else { text = "node [\n id " + RoseBin_support::ToString(pos) + "\n" + name ; int instrnr = funcDecl_parent->get_childIndex(bin_inst); text +=" instrnr_ "+RoseBin_support::ToString(instrnr)+" \n"; text +=" gid_ "+RoseBin_support::ToString(parent)+" \n"; text +=" gid "+RoseBin_support::ToString(parent)+" ]\n"; } } myfile << text; // cerr << " this node : " << text << endl; } funcMap.clear(); }
SgAsmInstruction* RoseBin_FlowAnalysis::process_jumps_get_target(SgAsmx86Instruction* inst) { if (inst && x86InstructionIsControlTransfer(inst)) { //cerr << " ..................... processing jmp " << endl; ostringstream addrhex3; int addrsource = inst->get_address(); addrhex3 << hex << setw(8) << addrsource ; string funcName =""; // get the operand and the destination address SgAsmOperandList* opList = inst->get_operandList(); ROSE_ASSERT(opList); SgAsmExpressionPtrList ptrList = opList->get_operands(); std::vector<SgAsmExpression*>::iterator itList= ptrList.begin(); for (;itList!=ptrList.end();++itList) { SgAsmExpression* exp = *itList; ROSE_ASSERT(exp); SgAsmRegisterReferenceExpression* regRef = isSgAsmRegisterReferenceExpression(exp); //if (RoseBin_support::DEBUG_MODE()) // cout << " inst (jmp):: " << inst->get_mnemonic() << " addr : " << addrhex3.str() << endl; SgAsmValueExpression* valExpr = isSgAsmValueExpression(exp); SgAsmMemoryReferenceExpression* memExpr = isSgAsmMemoryReferenceExpression(exp); string valStr = ""; if (valExpr) { uint8_t byte_val=0xF; uint16_t word_val=0xFF; uint32_t double_word_val=0xFFFF; uint64_t quad_word_val=0xFFFFFFFFU; valStr = RoseBin_support::resolveValue(valExpr, true, byte_val, word_val, double_word_val, quad_word_val); //if (RoseBin_support::DEBUG_MODE()) //cout << " found value ....... :: " << valStr << endl; funcName = valExpr->get_replacement(); //if (funcName=="") // funcName="noName"; } if (memExpr) { continue; // this is a jump to data ... do not handle right now!! } // convert val string to long uint64_t val=0; if(from_string<uint64_t>(val, valStr, std::hex)) { ostringstream addrhex2; addrhex2 << hex << setw(8) << val ; //if (RoseBin_support::DEBUG_MODE()) //cerr << " looking for value ("<<valStr << " ) in InstrSet: " // << val << " " << addrhex2.str() << endl; rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::const_iterator itc = rememberInstructions.find(val); if (itc!=rememberInstructions.end()) { SgAsmInstruction* target = itc->second; // we set the target (jump to for each control instruction) ROSE_ASSERT(target); //if (RoseBin_support::DEBUG_MODE()) //cout << " >>> target found! " << target << " funcName " << funcName << endl; if (funcName!="") { SgAsmNode* block = target; if (!db) block = isSgAsmNode(target->get_parent()); ROSE_ASSERT(block); SgAsmFunction* func = isSgAsmFunction(block->get_parent()); if (func) { string fname = func->get_name(); uint64_t val_f=0; if(from_string<uint64_t>(val_f, fname, std::hex)) { // func name is a hex number func->set_name(funcName); // inst->set_comment(funcName); } else { // its a name } } } return target; } else { //if (RoseBin_support::DEBUG_MODE()) // cerr << " >>>>>>>>>>>>>>> !!! OPS :: Target not found ... \n" << endl; } } else{ // std::cerr << "FlowAnalysis :: from_string failed .. " << std::endl; if (valStr!="") if (RoseBin_support::DEBUG_MODE()) cerr << " WARNING: Cant convert string to long - in process_jump :: " << regRef->class_name() << " inst :: " << inst->get_mnemonic() << " addr : " << addrhex3.str() << " target : " << valStr << endl; } } } return NULL; }
int main(int argc, char** argv) { std::string binaryFilename = (argc >= 1 ? argv[argc-1] : "" ); std::vector<std::string> newArgv(argv,argv+argc); newArgv.push_back("-rose:output"); newArgv.push_back(binaryFilename+"-binarySemantics.C"); SgProject* proj = frontend(newArgv); ROSE_ASSERT (proj); SgSourceFile* newFile = isSgSourceFile(proj->get_fileList().front()); ROSE_ASSERT(newFile != NULL); SgGlobal* g = newFile->get_globalScope(); ROSE_ASSERT (g); //I am doing some experimental work to enable functions in the C representation //Set this flag to true in order to enable that work bool enable_functions = true; //Jeremiah did some work to enable a simplification and normalization of the //C representation. Enable this work by setting this flag to true. bool enable_normalizations = false; vector<SgNode*> asmFiles = NodeQuery::querySubTree(proj, V_SgAsmGenericFile); ROSE_ASSERT (asmFiles.size() == 1); if( enable_functions == false) { //Representation of C normalizations withotu functions SgFunctionDeclaration* decl = buildDefiningFunctionDeclaration("run", SgTypeVoid::createType(), buildFunctionParameterList(), g); appendStatement(decl, g); SgBasicBlock* body = decl->get_definition()->get_body(); // ROSE_ASSERT(isSgAsmFile(asmFiles[0])); // X86CTranslationPolicy policy(newFile, isSgAsmFile(asmFiles[0])); X86CTranslationPolicy policy(newFile, isSgAsmGenericFile(asmFiles[0])); ROSE_ASSERT( isSgAsmGenericFile(asmFiles[0]) != NULL); policy.switchBody = buildBasicBlock(); removeDeadStores(policy.switchBody,policy); SgSwitchStatement* sw = buildSwitchStatement(buildVarRefExp(policy.ipSym), policy.switchBody); ROSE_ASSERT(isSgBasicBlock(sw->get_body())); SgWhileStmt* whileStmt = buildWhileStmt(buildBoolValExp(true), sw); appendStatement(whileStmt, body); policy.whileBody = sw; X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression> t(policy); //AS FIXME: This query gets noting in the form in the repository. Doing this hack since we only //have one binary file anyways. //vector<SgNode*> instructions = NodeQuery::querySubTree(asmFiles[0], V_SgAsmX86Instruction); vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction); std::cout << "Instruction\n"; for (size_t i = 0; i < instructions.size(); ++i) { SgAsmX86Instruction* insn = isSgAsmX86Instruction(instructions[i]); ROSE_ASSERT (insn); try { t.processInstruction(insn); } catch (const X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression>::Exception &e) { std::cout <<e.mesg <<": " <<unparseInstructionWithAddress(e.insn) <<"\n"; } } if ( enable_normalizations == true ) { //Enable normalizations of C representation //This is done heuristically where some steps //are repeated. It is not clear which order is //the best { plugInAllConstVarDefs(policy.switchBody,policy) ; simplifyAllExpressions(policy.switchBody); removeIfConstants(policy.switchBody); removeDeadStores(policy.switchBody,policy); removeUnusedVariables(policy.switchBody); } { plugInAllConstVarDefs(policy.switchBody,policy) ; simplifyAllExpressions(policy.switchBody); removeIfConstants(policy.switchBody); removeDeadStores(policy.switchBody,policy); } removeUnusedVariables(policy.switchBody); } }else{ //Experimental changes to introduce functions into the C representation //When trying to add function I get that symbols are not defined //Iterate over the functions separately vector<SgNode*> asmFunctions = NodeQuery::querySubTree(proj, V_SgAsmFunction); for(size_t j = 0; j < asmFunctions.size(); j++ ) { SgAsmFunction* binFunc = isSgAsmFunction( asmFunctions[j] ); // Some functions (probably just one) are generated to hold basic blocks that could not // be assigned to a particular function. This happens when the Disassembler is overzealous // and the Partitioner cannot statically determine where the block belongs. The name of // one such function is "***uncategorized blocks***". [matzke 2010-06-29] if ((binFunc->get_reason() & SgAsmFunction::FUNC_LEFTOVERS)) continue; //Some functions may be unnamed so we need to generate a name for those std::string funcName; if (binFunc->get_name().size()==0) { char addr_str[64]; sprintf(addr_str, "0x%"PRIx64, binFunc->get_statementList()[0]->get_address()); funcName = std::string("my_") + addr_str;; } else { funcName = "my" + binFunc->get_name(); } //Functions can have illegal characters in their name. Need to replace those characters for ( int i = 0 ; i < funcName.size(); i++ ) { char& currentCharacter = funcName.at(i); if ( currentCharacter == '.' ) currentCharacter = '_'; } SgFunctionDeclaration* decl = buildDefiningFunctionDeclaration(funcName, SgTypeVoid::createType(), buildFunctionParameterList(), g); appendStatement(decl, g); SgBasicBlock* body = decl->get_definition()->get_body(); X86CTranslationPolicy policy(newFile, isSgAsmGenericFile(asmFiles[0])); ROSE_ASSERT( isSgAsmGenericFile(asmFiles[0]) != NULL); policy.switchBody = buildBasicBlock(); SgSwitchStatement* sw = buildSwitchStatement(buildVarRefExp(policy.ipSym), policy.switchBody); SgWhileStmt* whileStmt = buildWhileStmt(buildBoolValExp(true), sw); appendStatement(whileStmt, body); policy.whileBody = sw; X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression> t(policy); vector<SgNode*> instructions = NodeQuery::querySubTree(binFunc, V_SgAsmX86Instruction); for (size_t i = 0; i < instructions.size(); ++i) { SgAsmX86Instruction* insn = isSgAsmX86Instruction(instructions[i]); if( insn->get_kind() == x86_nop ) continue; ROSE_ASSERT (insn); try { t.processInstruction(insn); } catch (const X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression>::Exception &e) { std::cout <<e.mesg <<": " <<unparseInstructionWithAddress(e.insn) <<"\n"; } } } //addDirectJumpsToSwitchCases(policy); } proj->get_fileList().erase(proj->get_fileList().end() - 1); // Remove binary file before calling backend // AstTests::runAllTests(proj); //Compile the resulting project return backend(proj); }
/** Print the entire forest for debugging output. */ void print(std::ostream &o) const { for (size_t i=0; i<levels.size(); ++i) { if (levels[i].vertices.empty()) { o <<"partition forest level " <<i <<" is empty.\n"; } else { size_t nsets = levels[i].vertices.size(); size_t nfuncs = 0; for (Vertices::const_iterator vi=levels[i].vertices.begin(); vi!=levels[i].vertices.end(); ++vi) nfuncs += (*vi)->functions.size(); o <<"partition forest level " <<i <<" contains " <<nfuncs <<" function" <<(1==nfuncs?"":"s") <<" in " <<nsets <<" set" <<(1==nsets?"":"s") <<"\n"; o <<" the following input was used to generate " <<(1==nsets?"this set":"these sets") <<":\n"; o <<StringUtility::prefixLines(levels[i].inputs.toString(), " "); int setno = 1; for (Vertices::const_iterator vi=levels[i].vertices.begin(); vi!=levels[i].vertices.end(); ++vi, ++setno) { Vertex *vertex = *vi; const Functions &functions = vertex->functions; o <<" set #" <<setno <<" contains " <<vertex->functions.size() <<" function" <<(1==vertex->functions.size()?"":"s") <<":\n"; for (Functions::const_iterator fi=functions.begin(); fi!=functions.end(); ++fi) { SgAsmFunction *func = *fi; o <<" " <<StringUtility::addrToString(func->get_entry_va()) <<" <" <<func->get_name() <<">\n"; } o <<" whose output was: {"; for (OutputValues::const_iterator oi=vertex->outputs.begin(); oi!=vertex->outputs.end(); ++oi) o <<" " <<*oi; o <<" }\n"; } } } }
void RoseBin_FlowAnalysis::checkControlFlow( SgAsmInstruction* binInst, int functionSize, int countDown, string& currentFunctionName, int func_nr) { //cerr << "check control flow" << endl; while (!worklist_forthisfunction.empty()) { SgAsmInstruction* binInst = worklist_forthisfunction.top(); worklist_forthisfunction.pop(); ROSE_ASSERT(binInst); countDown--; int address = binInst->get_address(); ostringstream addrhex; addrhex << hex << setw(8) << address ; ROSE_ASSERT(g_algo->info); vector <VirtualBinCFG::CFGEdge> vec; if (forward_analysis) { vec = binInst->cfgBinOutEdges(g_algo->info); if (isSgAsmx86Instruction(binInst) && isSgAsmx86Instruction(binInst)->get_kind() == x86_call) { // vec.push_back(VirtualBinCFG::CFGEdge(VirtualBinCFG::CFGNode(binInst), VirtualBinCFG::CFGNode(g_algo->info->getInstructionAtAddress(binInst->get_address() + binInst->get_raw_bytes().size())), g_algo->info)); } } else vec = binInst->cfgBinInEdges(g_algo->info); string name = binInst->get_mnemonic(); // if (RoseBin_support::DEBUG_MODE()) // cout << " " << addrhex.str() << " " << func_nr << " :: " << functionSize << // "/" << countDown << " ---------- next CFG instruction : " << name << " vecSize : " << vec.size() << endl; for (int i=0; i < (int)vec.size(); i++) { VirtualBinCFG::CFGEdge edge = vec[i]; VirtualBinCFG::CFGNode cfg_target = edge.target(); VirtualBinCFG::CFGNode cfg_source = edge.source(); if (!forward_analysis) { cfg_target = edge.source(); cfg_source = edge.target(); } SgAsmInstruction* bin_target = isSgAsmInstruction(cfg_target.getNode()); SgAsmInstruction* thisbin = isSgAsmInstruction(cfg_source.getNode()); ROSE_ASSERT(thisbin); SgAsmx86Instruction* thisbinX86 = isSgAsmx86Instruction(thisbin); ROSE_ASSERT (thisbinX86); string src_mnemonic = thisbin->get_mnemonic(); int src_address = thisbin->get_address(); if (analysisName=="callgraph") src_address = funcDecl->get_address(); ostringstream addrhex_s; addrhex_s << hex << setw(8) << src_address ; SgGraphNode* src =NULL; string hexStr = addrhex_s.str(); if (analysisName!="callgraph") { vector<SgGraphNode*> sources; vizzGraph->checkIfGraphNodeExists(hexStr, sources); vector<SgGraphNode*>::const_iterator src_it = sources.begin(); for (;src_it!=sources.end();++src_it) { // should only be one node! adapted to new interface src = *src_it; } if (src==NULL) { // src= vizzGraph->createNode (src_mnemonic, typeNode, src_address, vizzGraph->graph->get_graph_id(), false, thisbin); src= addCFNode (src_mnemonic, typeNode, src_address, false, thisbin); string unp_name = unparseInstructionWithAddress(thisbin); src->append_properties(SgGraph::name,unp_name); if (analysisName=="dfa") src->append_properties(SgGraph::dfa_standard,unp_name); } ROSE_ASSERT(src); if (thisbinX86->get_kind() == x86_call) { uint64_t returnAddr = thisbinX86->get_address() + thisbinX86->get_raw_bytes().size(); ROSE_ASSERT(g_algo->info); SgAsmInstruction* retInsn = g_algo->info->getInstructionAtAddress(returnAddr); if (retInsn) { //worklist_forthisfunction.push(retInsn); //ostringstream tgthex_s; //tgthex_s << hex << setw(8) << returnAddr ; //string tgtStr = tgthex_s.str(); //SgGraphNode* tgt = vizzGraph->checkIfGraphNodeExists(tgtStr); // tps (25 Aug 2008) : this line seems broken! //string mne = retInsn->get_mnemonic(); //if (!tgt) {tgt = vizzGraph->createNode(mne, typeNode, returnAddr, vizzGraph->graph->get_graph_id(), false, retInsn);} // cerr << " ------> Creating return edge : " << thisbinX86->get_address() << " " << returnAddr << endl; // vizzGraph->createEdge( typeEdge, vizzGraph->graph->get_graph_id(), src, thisbinX86->get_address(), tgt, returnAddr); } } } else if (analysisName=="callgraph") { // These are special cases that annotate the call graph (nodes) // so that the visualization can pick up the properties and color correctly ROSE_ASSERT(g_algo->info); if (thisbinX86->get_kind() == x86_jmp) { if (thisbinX86->cfgBinOutEdges(g_algo->info).empty()) { funcDeclNode->append_properties(SgGraph::nodest_jmp,RoseBin_support::ToString("nodest_jmp")); } } else if (thisbinX86->get_kind() == x86_call) { //cerr << "CallGRAPH: Found call : " << // RoseBin_support::HexToString(VirtualBinCFG::CFGNode(thisbinX86).getNode()->get_address()) << " to " << // RoseBin_support::HexToString(VirtualBinCFG::CFGNode(g_algo->info->getInstructionAtAddress(thisbinX86->get_address() + thisbinX86->get_raw_bytes().size())).getNode()->get_address()) << endl; vector<VirtualBinCFG::CFGEdge> dests = thisbinX86->cfgBinOutEdges(g_algo->info); dests.push_back(VirtualBinCFG::CFGEdge(VirtualBinCFG::CFGNode(thisbinX86), VirtualBinCFG::CFGNode(g_algo->info->getInstructionAtAddress(thisbinX86->get_address() + thisbinX86->get_raw_bytes().size())), g_algo->info)); if (!dests.empty()) { SgAsmNode* parent = isSgAsmNode(dests[0].target().getNode()->get_parent()); if (!db) parent = isSgAsmNode(parent->get_parent()); if (parent) { SgAsmFunction* funcdestparent = isSgAsmFunction(parent); string trg_func_name = funcdestparent->get_name(); if (trg_func_name==currentFunctionName) { funcDeclNode->append_properties(SgGraph::itself_call,RoseBin_support::ToString("itself_call")); } } } else { funcDeclNode->append_properties(SgGraph::nodest_call,RoseBin_support::ToString("nodest_call")); //cerr << " no destination found for call " << addrhex.str() << endl; } } else if (thisbinX86->get_kind() == x86_int) { funcDeclNode->append_properties(SgGraph::interrupt,RoseBin_support::ToString("interrupt")); } } if (bin_target!=NULL) { string trg_func_name = ""; int trg_func_address =1; string hexStrf = ""; SgAsmFunction* funcDeclparent=NULL; if (analysisName=="callgraph") { SgAsmNode* parent = dynamic_cast<SgAsmNode*>(bin_target->get_parent()); if (parent==NULL) continue; if (!db) parent = isSgAsmNode(parent->get_parent()); ROSE_ASSERT(parent); funcDeclparent = isSgAsmFunction(parent); ROSE_ASSERT(funcDeclparent); trg_func_name = funcDeclparent->get_name(); trg_func_address = funcDeclparent->get_address(); ostringstream addrhex_tf; addrhex_tf << hex << setw(8) << trg_func_address ; hexStrf = addrhex_tf.str(); //cerr << " CALLGRAPH TARGET PARENT : " << hexStrf << endl; } string trg_mnemonic = bin_target->get_mnemonic(); int trg_address = bin_target->get_address(); ostringstream addrhex_t; addrhex_t << hex << setw(8) << trg_address ; if (RoseBin_support::DEBUG_MODE()) cout << " OUTEDGES TO: vec[" << i << "/" << vec.size() << "] :" << addrhex_t.str() << " " << trg_mnemonic << endl; string hexStr = addrhex_t.str(); SgGraphNode* trg=NULL; vector<SgGraphNode*> targets; if (analysisName=="callgraph") vizzGraph->checkIfGraphNodeExists(hexStrf, targets); else vizzGraph->checkIfGraphNodeExists(hexStr, targets); vector<SgGraphNode*>::const_iterator src_it = targets.begin(); for (;src_it!=targets.end();++src_it) { // should only be one node! adapted to new interface trg = *src_it; } //ROSE_ASSERT(trg); bool target_visited = false; // DQ (4/23/2009): We want the type defined in the base class. // rose_hash::unordered_map <string, SgAsmInstruction*>::iterator vis = local_visited.find(hexStr); // CH (4/9/2010): Use boost::unordered instead //#ifndef _MSCx_VER #if 1 rose_hash::unordered_map <string, SgAsmInstruction*>::iterator vis = local_visited.find(hexStr); #else rose_hash::unordered_map <string, SgAsmInstruction*,rose_hash::hash_string>::iterator vis = local_visited.find(hexStr); #endif if (vis!=local_visited.end()) target_visited=true; if (trg==NULL) { if (analysisName=="callgraph") { // cerr << " >>> TARGET FUNC NAME " << trg_func_name << endl; //trg = vizzGraph->createNode (trg_func_name, typeNode, trg_func_address, vizzGraph->graph->get_graph_id(),false, funcDeclparent); trg = addCFNode (trg_func_name, typeNode, trg_func_address,false, funcDeclparent); } else { //trg = vizzGraph->createNode (trg_mnemonic, typeNode, trg_address, vizzGraph->graph->get_graph_id(),false, bin_target); trg = addCFNode (trg_mnemonic, typeNode, trg_address, false, bin_target); } string unp_name = unparseInstructionWithAddress(bin_target); //cout << " (target==NULL) unparse name : " << unp_name << endl; trg->append_properties(SgGraph::name,unp_name); if (analysisName=="dfa") trg->append_properties(SgGraph::dfa_standard,unp_name); } else { string unp_name = unparseInstructionWithAddress(bin_target); //cout << " unparse name : " << unp_name << endl; trg->append_properties(SgGraph::name,unp_name); if (analysisName=="dfa") trg->append_properties(SgGraph::dfa_standard,unp_name); } ROSE_ASSERT(trg); local_visited[hexStr] = bin_target; string name=""; if (analysisName=="callgraph") name = RoseBin_support::ToString(src_address)+RoseBin_support::ToString(trg_func_address); else name = RoseBin_support::ToString(src_address)+RoseBin_support::ToString(trg_address); bool exists = vizzGraph->checkIfDirectedGraphEdgeExists(src, trg); if (!exists) { if (analysisName=="callgraph") { if (currentFunctionName!=trg_func_name && thisbinX86->get_kind() != x86_ret) { // SgDirectedGraphEdge* edge = vizzGraph->createEdge( typeEdge, vizzGraph->graph->get_graph_id(), funcDeclNode, src_address, trg, trg_func_address); SgDirectedGraphEdge* edge = vizzGraph->addDirectedEdge( funcDeclNode, trg, typeEdge); //cerr << "CallGraph : create edge : " << RoseBin_support::HexToString(src_address) << " to func : " << RoseBin_support::HexToString(trg_func_address) << endl; vizzGraph->setProperty(SgGraph::type, edge, RoseBin_support::ToString(SgGraph::cfg)); } } else { //string addr = RoseBin_support::HexToString(binInst->get_address()); //if (addr==" 8048392" || addr==" 80482fd") // cerr << " >>>>>>>>>> found " << addr << " -- target_address : " << RoseBin_support::HexToString(trg_address) << endl; // SgDirectedGraphEdge* edge =vizzGraph->createEdge( typeEdge, vizzGraph->graph->get_graph_id(), src, src_address, trg, trg_address); SgDirectedGraphEdge* edge = vizzGraph->addDirectedEdge( src, trg, typeEdge); vizzGraph->setProperty(SgGraph::type, edge, RoseBin_support::ToString(SgGraph::cfg)); } } if (analysisName!="callgraph") { // handle return edges SgAsmStatementPtrList sources = thisbin->get_sources(); SgAsmStatementPtrList::iterator it = sources.begin(); for (;it!=sources.end();++it) { SgAsmInstruction* instT = isSgAsmInstruction(*it); //cerr << " This node is called from : " << instT->get_address() << endl; ostringstream addr_t; addr_t << hex << setw(8) << instT->get_address() ; SgGraphNode* trg =NULL; string hexStr = addr_t.str(); vector<SgGraphNode*> targets; vizzGraph->checkIfGraphNodeExists(hexStr, targets); vector<SgGraphNode*>::const_iterator src_it = targets.begin(); for (;src_it!=targets.end();++src_it) { // should only be one node! adapted to new interface trg = *src_it; } //trg= vizzGraph->checkIfGraphNodeExists(hexStr); if (trg==NULL) { string hexa = RoseBin_support::HexToString(instT->get_address()); hexa = hexa.substr(1,hexa.size()); string name = "0x"+hexa+":"+instT->get_mnemonic(); //trg= vizzGraph->createNode (name, typeNode, instT->get_address(), vizzGraph->graph->get_graph_id(), false, instT); trg= addCFNode(name, typeNode, instT->get_address(), false, instT); } bool exists = vizzGraph->checkIfDirectedGraphEdgeExists( trg,src); if (!exists) { bool same = sameParents(trg,src); if (!same) { SgDirectedGraphEdge* edge =vizzGraph->addDirectedEdge( trg, src, typeEdge); //SgDirectedGraphEdge* edge =vizzGraph->createEdge( typeEdge, vizzGraph->graph->get_graph_id(), trg, instT->get_address(), src, src_address); vizzGraph->setProperty(SgGraph::type, edge, RoseBin_support::ToString(SgGraph::cfg)); } } } } if (!target_visited) { // check if target is in the same function!!! SgAsmNode* block = bin_target; if (!db) block = isSgAsmNode(bin_target->get_parent()); ROSE_ASSERT(block); SgAsmFunction* funcPar = isSgAsmFunction(block->get_parent()); if (funcPar) { string nameFunc = funcPar->get_name(); if (nameFunc==currentFunctionName) { //checkControlFlow(bin_target, functionSize, countDown, currentFunctionName); worklist_forthisfunction.push(bin_target); } } else { if (RoseBin_support::DEBUG_MODE()) cerr << " ERROR:: Target Instruction has no parent! " << bin_target->class_name() << endl; } } // if visited } else { nr_target_missed++; if (binInst) if (RoseBin_support::DEBUG_MODE()) cerr << " WARNING:: no target found for " << RoseBin_support::HexToString(binInst->get_address()) << " " << binInst->class_name() << endl; } } } // if (RoseBin_support::DEBUG_MODE()) // cout << " ------------------------ done with instr: " << name << " " << addrhex.str() << endl; }
/**************************************************** * traverse the binary AST ****************************************************/ void RoseBin_FlowAnalysis::visit(SgNode* node) { // cerr << " traversing node " << node->class_name() << endl; if (isSgAsmFunction(node) ) { SgAsmFunction* binDecl = isSgAsmFunction(node); string name = binDecl->get_name(); ostringstream addrhex; addrhex << hex << setw(8) << binDecl->get_address() ; if (name=="") { name=addrhex.str(); binDecl->set_name(name); } SgAsmStatement* stat = NULL; // SgAsmStatementPtrList& list = binDecl->get_statementList(); vector<SgAsmInstruction*> list; FindInstructionsVisitor vis; AstQueryNamespace::querySubTree(binDecl, std::bind2nd( vis, &list )); int sizeList = list.size(); if (sizeList==0) { //cerr << " this function is empty!! " << endl; return; } //if ((func_nr % 1)==0) // if (RoseBin_support::DEBUG_MODE()) // cout << analysisName << " Func Nr: " << (++func_nr) << " blocks:" << // sizeList << " ***************** checking function : " << name << endl; if (forward_analysis) { stat = list.front(); } else { // get the last instruction in a function (backward control flow) stat = list.back(); } ROSE_ASSERT(stat); // if (RoseBin_support::DEBUG_MODE()) //cout << ">>>>>>>>>>>>>. checking statement in function : " << name << " .. " << stat->class_name() << endl; if (isSgAsmInstruction(stat)) { SgAsmInstruction* inst = isSgAsmInstruction(stat); ROSE_ASSERT(inst); // check the control flow of the first instruction in a function string typeFunction ="function"; SgGraphNode* src=NULL; if (analysisName=="callgraph") { // src = vizzGraph->createNode (name, typeFunction, binDecl->get_address(), vizzGraph->graph->get_graph_id(), false, binDecl); src = addCFNode (name, typeFunction, binDecl->get_address(), false, binDecl); } else { //src = vizzGraph->createNode (name, typeFunction, binDecl->get_address(), vizzGraph->graph->get_graph_id(), true, binDecl); //cerr << ">> adding node (f) src: " << RoseBin_support::HexToString(binDecl->get_address()) << endl; src = addCFNode (name, typeFunction, binDecl->get_address(), true, binDecl); string mnemonic=inst->get_mnemonic(); //SgGraphNode* trg = vizzGraph->createNode (mnemonic, typeNode, inst->get_address(), vizzGraph->graph->get_graph_id(),false, inst); //cerr << ">> adding node (first) trg: " << RoseBin_support::HexToString(inst->get_address()) << endl; SgGraphNode* trg = addCFNode (mnemonic, typeNode, inst->get_address(), false, inst); string unp_name = unparseInstructionWithAddress(inst); trg->append_properties(SgGraph::name,unp_name); if (analysisName=="dfa") trg->append_properties(SgGraph::dfa_standard,unp_name); //cerr << "Create edge " << endl; // SgDirectedGraphEdge* edge = vizzGraph->createEdge ( typeFunction, vizzGraph->graph->get_graph_id(), src, binDecl->get_address(), trg, inst->get_address()); SgDirectedGraphEdge* edge = vizzGraph->addDirectedEdge ( src, trg, typeFunction); vizzGraph->setProperty(SgGraph::type, edge, RoseBin_support::ToString(SgGraph::cfg)); } local_visited.clear(); worklist_forthisfunction.push(inst); funcDecl = binDecl; funcDeclNode = src; checkControlFlow(inst, sizeList, sizeList, name, func_nr); } else { if (RoseBin_support::DEBUG_MODE()) cerr << "This is not an Instruction " << endl; } } }
int main(int argc, char *argv[]) { std::ios::sync_with_stdio(); argv0 = argv[0]; { size_t slash = argv0.rfind('/'); argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1); if (0==argv0.substr(0, 3).compare("lt-")) argv0 = argv0.substr(3); } int argno = 1; for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) { std::cout << argv[argno] << std::endl; if (!strcmp(argv[argno], "--")) { ++argno; break; } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) { ::usage(0); } else { std::cerr <<argv0 <<": unrecognized switch: " <<argv[argno] <<"\n" <<"see \"" <<argv0 <<" --help\" for usage info.\n"; exit(1); } } if (argno+1!=argc) ::usage(1); std::string specimen_name = StringUtility::getAbsolutePathFromRelativePath(argv[argno++], true); std::string specimen_path = StringUtility::getPathFromFileName(specimen_name); std::cout << "Specimen name is: " << specimen_name << std::endl; SgAsmInterpretation *interp = CloneDetection::open_specimen(specimen_name, argv0, false); SgBinaryComposite *binfile = SageInterface::getEnclosingNode<SgBinaryComposite>(interp); assert(interp!=NULL && binfile!=NULL); // Figure out what functions we need to generate files from. std::vector<SgAsmFunction*> all_functions = SageInterface::querySubTree<SgAsmFunction>(interp); std::cerr <<argv0 <<": " <<all_functions.size() <<" function" <<(1==all_functions.size()?"":"s") <<" found\n"; for (std::vector<SgAsmFunction*>::iterator fi=all_functions.begin(); fi!=all_functions.end(); ++fi) { // Save function SgAsmFunction *func = *fi; std::vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(func); std::string function_name = func->get_name(); if( function_name.size() == 0 || insns.size() < 100 || function_name.find("@plt") != std::string::npos ) { continue; } std::cout << "function name is: " << function_name << std::endl; { //std::string file_name = specimen_name+"_"+func->get_name()+"_"+boost::lexical_cast<std::string>(func->get_entry_va()); std::string file_name = specimen_path + "/" + function_name; std::cout << "generating " << file_name << " from " << specimen_name << std::endl; std::ofstream func_file; func_file.open(file_name.c_str()); // Save instructions for (std::vector<SgAsmInstruction*>::iterator it = insns.begin(); it != insns.end(); ++it) { SgUnsignedCharList array = (*it)->get_raw_bytes(); std::string str = ""; for(size_t i=0; i < array.size(); ++i) { unsigned char c = array[i]; str+= c; } func_file << str; } func_file.close(); } } return 0; }
void add_calls_to_syscalls_to_db(SqlDatabase::TransactionPtr tx, DirectedGraph* G, std::vector<SgAsmFunction*> all_functions) { // load the functions in db into memory std::map<std::string, std::set<int> > symbolToId; SqlDatabase::StatementPtr cmd3 = tx->statement("select id, name from semantic_functions"); for (SqlDatabase::Statement::iterator r=cmd3->begin(); r!=cmd3->end(); ++r) { int func_id = r.get<int>(0); std::string func_name = r.get<std::string>(1); if (func_name.size() == 0) continue; std::map<std::string, std::set<int> >::iterator fit = symbolToId.find(func_name); if (fit == symbolToId.end()) { std::set<int> function_ids; function_ids.insert(func_id); symbolToId[func_name] = function_ids; } else { fit->second.insert(func_id); } } DirectedGraph& graph = *G; SqlDatabase::StatementPtr stmt = tx->statement("insert into syscalls_made(caller, syscall_id, syscall_name) values(?,?,?)"); // Iterate over all components of the reachability graph typedef graph_traits<DirectedGraph>::vertex_descriptor Vertex; graph_traits<DirectedGraph>::vertex_iterator i, end; for (tie(i, end) = vertices(graph); i != end; ++i) { if (*i < ids_reserved_for_syscalls) continue; std::set<int> syscalls; // Iterate through the child vertex indices for [current_index] std::vector<Vertex> reachable; boost::breadth_first_search(graph, *i, boost::visitor(boost::make_bfs_visitor(boost::write_property(boost::identity_property_map(), std::back_inserter(reachable), boost::on_discover_vertex())))); for (std::vector<Vertex>::iterator it = reachable.begin(); it != reachable.end(); ++it) { if (*it < ids_reserved_for_syscalls) syscalls.insert(*it); } int caller_id = *i - ids_reserved_for_syscalls; ROSE_ASSERT(caller_id >= 0); SgAsmFunction* caller = all_functions[caller_id]; ROSE_ASSERT(isSgAsmFunction(caller) != NULL); std::string func_name = caller->get_name(); if (func_name.length() == 0) continue; std::map<std::string, std::set<int> >::iterator equivalent_ids = symbolToId.find(func_name); if (equivalent_ids == symbolToId.end()) equivalent_ids = symbolToId.find(func_name+"@plt"); if (syscalls.size() > 0 && equivalent_ids != symbolToId.end()) { for (std::set<int>::iterator sit = syscalls.begin(); sit != syscalls.end(); ++sit) { int syscall_callee_id = *sit; extern std::map<int, std::string> linux32_syscalls; // defined in linux_syscalls.C const std::string &syscall_name = linux32_syscalls[syscall_callee_id]; for (std::set<int>::iterator equivalent_id = equivalent_ids->second.begin(); equivalent_id != equivalent_ids->second.end(); ++ equivalent_id) { stmt->bind(0, *equivalent_id); stmt->bind(1, syscall_callee_id); stmt->bind(2, syscall_name); stmt->execute(); } } } } }
/* * Detect functions (blocks) that can be merged together. */ void RoseBin_FlowAnalysis::resolveFunctions(SgAsmNode* globalNode) { //cerr << " ObjDump-BinRose:: Detecting and merging Functions" << endl; vector<SgAsmFunction*> visitedFunctions; vector<SgNode*> tree =NodeQuery::querySubTree(globalNode, V_SgAsmFunction); // vector<SgNode*>::iterator itV = tree.begin(); int nr=0; while (!tree.empty()) { // for (;itV!=tree.end();itV++) { SgAsmFunction* funcD = isSgAsmFunction(tree.back()); tree.pop_back(); nr++; if ((nr % 100)==0) if (RoseBin_support::DEBUG_MODE()) cerr << " funcListSize : " << tree.size() << " -- iteration : " << nr << " func " << funcD->get_name() << endl; //SgAsmFunction* funcD = isSgAsmFunction(*itV); //itV++; ROSE_ASSERT(funcD); // make sure we dont visit a function twice vector <SgNode*> funcVec =funcD->get_traversalSuccessorContainer(); int last = funcVec.size()-1; if (last<0) continue; bool hasStopCondition=false; for (unsigned int itf = 0; itf < funcVec.size() ; itf++) { SgAsmx86Instruction* finst = isSgAsmx86Instruction(funcVec[itf]); ROSE_ASSERT(finst); if (finst->get_kind() == x86_ret || finst->get_kind() == x86_hlt) { hasStopCondition=true; } } //cerr << " last : " << last << endl; SgAsmx86Instruction* lastInst = isSgAsmx86Instruction(funcVec[last]); ROSE_ASSERT(lastInst); SgAsmx86Instruction* nextInst = isSgAsmx86Instruction(resolveFunction(lastInst, hasStopCondition)); if (nextInst) { SgAsmFunction* nextFunc = isSgAsmFunction(nextInst->get_parent()); if (nextFunc) { ROSE_ASSERT(g_algo->info); g_algo->info->returnTargets[funcD].insert(g_algo->info->returnTargets[nextFunc].begin(), g_algo->info->returnTargets[nextFunc].end()); // make sure that this function is being changed and should not be covered again //visitedFunctions.push_back(nextFunc); // visit current function after alternation again //tree.push_back(funcD); // now we remove this next function and iterate thrgouh all instructions and // attach them to the old function vector <SgNode*> funcNextVec =nextFunc->get_traversalSuccessorContainer(); for (unsigned int i=0; i < funcNextVec.size(); ++i) { SgAsmInstruction* inst = isSgAsmInstruction(funcNextVec[i]); ROSE_ASSERT(inst); inst->set_parent(funcD); funcD->append_statement(inst); //nextFunc->remove_statement(inst); // delete nextFunc; // should delete this later when iterator is done } nextFunc->remove_children(); nextFunc->set_parent(NULL); isSgAsmBlock(globalNode)->remove_statement(nextFunc); } } } // for }