Пример #1
0
	void visit(SgNode *node)
	{
		SgAsmFunction* f = isSgAsmFunction(node);
		if (f == NULL) return;

		std :: cout << f->get_name() << std::endl;
		std :: cout << "====================================" << std :: endl;

		// create dataflow engine
		SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(node);
		const RegisterDictionary *regdict = interp->get_registers();
		BaseSemantics ::RiscOperatorsPtr symbolicOps = SymbolicSemantics::RiscOperators::instance(regdict);
		DispatcherX86Ptr cpu = DispatcherX86::instance(symbolicOps);

		BaseSemantics :: SValuePtr esp_0 = symbolicOps->readRegister(cpu->REG_ESP);
		TaintedFlow taintAnalysis(cpu);

		TaintedFlow::Approximation approximation = TaintedFlow :: UNDER_APPROXIMATE;
		taintAnalysis.approximation(approximation);
		size_t cfgStartVertex = 0;


		// build control flow graph

		CFG cfg;
		ControlFlow().build_block_cfg_from_ast(node, cfg);

		  ///////////////////////
		 //  Taint Analysis   //
		///////////////////////

		taintAnalysis.computeFlowGraphs(cfg, cfgStartVertex);

		TaintedFlow::StatePtr initialState = taintAnalysis.stateInstance(TaintedFlow::BOTTOM);

		// at this point, we would need to taint different variables

		initialState->setIfExists(DataFlow::Variable(), TaintedFlow::NOT_TAINTED);


		// actually run the taint analysis

		taintAnalysis.runToFixedPoint(cfg, cfgStartVertex, initialState);

		BOOST_FOREACH(const typename CFG::VertexNode &vertex, cfg.vertices()){
			// rose_addr_t lastInsnAddr =
			//	SageInterface::querySubTree<SgAsmInstruction>(vertex.value()).back()->get_address();
			TaintedFlow::StatePtr state = taintAnalysis.getFinalState(vertex.id());
			std :: cout << *state << std :: endl;
		}

		// print 'variables' of function

		// BOOST_FOREACH(const DataFlow::Variable &variable, taintAnalysis.variables() )
		//	std :: cout << variable << std :: endl;



	}
void CountTraversal::visit ( SgNode* n )
   {
     SgAsmInstruction* asmInstruction = isSgAsmInstruction(n);
     if (asmInstruction != NULL)
        {
       // Use the new interface support for this (this detects all multi-byte nop instructions). 
          if (SageInterface::isNOP(asmInstruction) == true)
             {
               if (previousInstructionWasNop == true)
                  {
                 // Increment the length of the identified NOP sequence
                    count++;
                  }
                 else
                  {
                    count = 1;
                 // Record the starting address of the NOP sequence
                    nopSequenceStart = asmInstruction;
                  }

               previousInstructionWasNop = true;
             }
            else
             {
               if (count > 0)
                   {
                  // Report the sequence when we have detected the end of the sequence.
                     SgAsmFunction* functionDeclaration = getAsmFunction(asmInstruction);
                     printf ("Reporting NOP sequence of length %3d at address %zu in function %s (reason for this being a function = %u = %s) \n",
                          count,nopSequenceStart->get_address(),functionDeclaration->get_name().c_str(),
                             functionDeclaration->get_reason(),
                             stringifySgAsmFunctionFunctionReason(functionDeclaration->get_reason()).c_str());

                     nopSequences.push_back(pair<SgAsmInstruction*,int>(nopSequenceStart,count));

                     SgAsmBlock* block = isSgAsmBlock(nopSequenceStart->get_parent());
                     ROSE_ASSERT(block != NULL);
                     SgAsmStatementPtrList & l = block->get_statementList();

                  // Now iterate over the nop instructions in the sequence and report the lenght of each (can be multi-byte nop instructions).
                     SgAsmStatementPtrList::iterator i = find(l.begin(),l.end(),nopSequenceStart);
                     ROSE_ASSERT(i != l.end());
                     int counter = 0;
                     while ( (*i != asmInstruction) && (i != l.end()) )
                        {
                          printf ("--- NOP #%2d is length = %2d \n",counter++,(int)isSgAsmInstruction(*i)->get_raw_bytes().size());
                          i++;
                        }
                   }

               count = 0;
               previousInstructionWasNop = false;
             }
        }
   }
Пример #3
0
 void visit(SgNode *node) {
     SgAsmFunction *func = isSgAsmFunction(node);
     if (func && 0==func->get_name().compare("simple06")) {
         ++nvisits;
         CFG cfg = rose::BinaryAnalysis::ControlFlow().build_block_cfg_from_ast<CFG>(func);
         CFG_Vertex start = 0;
         assert(get(boost::vertex_name, cfg, start)==func->get_entry_block());
         DG_RelMap dgmap1 = rose::BinaryAnalysis::Dominance().build_postdom_relation_from_cfg(cfg, start);
         DG_RelMap dgmap2 = MyDominance().build_postdom_relation_from_cfg(cfg, start);
     }
 }
Пример #4
0
static bool
isEqual(SgNode* A, SgNode* B)
{

    
  if(A==NULL || B == NULL) return false;


  SgAsmInstruction* iA = isSgAsmX86Instruction(A);
  SgAsmInstruction* iB = isSgAsmX86Instruction(B);
  SgAsmFunction* fA = isSgAsmFunction(A);
  SgAsmFunction* fB = isSgAsmFunction(B);
  
  bool isTheSame = false;
  if(iA != NULL && iB != NULL)
    isTheSame = unparseInstrFast(iA) == unparseInstrFast(iB) ? true : false;
  if(fA != NULL && fB != NULL)
    isTheSame = fA->get_name() == fB->get_name() ? true : false;

  return isTheSame;
}
Пример #5
0
/****************************************************
 * process all instructions in the DB
 * add the instructions to the blocks
 ****************************************************/
void RoseBin_DB_IDAPRO::process_instruction_query(MYSQL* conn, MYSQL_RES* res_set) {
  rememberInstructions.clear();
  // get the functions
  //  char* q = (char*)"SELECT * FROM instructions_1";
  char *q = (char*)"select *,     (select parent_function from basic_blocks_1 where id = i.basic_block_id      and (i.address - parent_function) >= 0     and (i.address - parent_function) =     (select min(i.address - parent_function) from basic_blocks_1 where id = i.basic_block_id       and (i.address - parent_function) >= 0)     ) as i_f from instructions_1 i order by i.address"; 

  if (RoseBin_support::DEBUG_MODE())
    cout << "\n>> QUERY:: " << q << "\n" << endl;
  res_set = process_query(conn,q);
  if (res_set == NULL) {
    print_problemWithResults(conn);
  } else {
    
    MYSQL_ROW row;
    string mnemonic=(char*)"";
    uint64_t address=0;
    int basic_block=-1;
    int sequence =-1;
    string data=(char*)"";
    int i_func;

    while ((row = mysql_fetch_row(res_set))!=NULL) {
      for (unsigned int i=0; i<mysql_num_fields(res_set);i++) {
        char* ret=(char*)"";
        if (row[i] ==NULL) { 
          ret = (char*)"<NULL>";
          if (i==0) address = -1;
          if (i==1) basic_block = -1;
          if (i==2) mnemonic = ret;
          if (i==3) sequence = -1;
          if (i==4) data=ret;
          if (i==5) i_func= -1;
        } else {
          ret= row[i];
          if (i==0) address = atoi(ret);
          if (i==1) basic_block = atoi(ret);
          if (i==2) mnemonic = ret;
          if (i==3) sequence = atoi(ret);
          if (i==4) data=ret;
          if (i==5) i_func = atoi(ret);
        }
      }
      // patched to adjust to objdump , Apr 26 2007
      if (mnemonic ==(char*)"retn")
        mnemonic = (char*)"ret";
      
      if (RoseBin_support::DEBUG_MODE()) {
        ostringstream addrhex;
        addrhex << hex << setw(8) << address ;
        cout << ">> creating instruction : " << addrhex.str() << " " << address << 
          " - " << basic_block << " - " << mnemonic << " - " << sequence << endl;
      }
      // check if it is an instruction or if it appears in the callgraph,
      // if it is in the callgraph, one wants to create a BinaryCall instead

      // append the instruction to its function
      rose_hash::unordered_map <int, SgAsmFunction* >::iterator func_it = rememberFunctions.find(i_func);
      SgAsmFunction* func = NULL;
      // for (func_it; func_it!=rememberFunctions.end(); ++func_it) {
      if (func_it != rememberFunctions.end()) {
        func = func_it->second;
      } else {
        if (i_func!=-1)
        cerr << " ERROR : cant find the function i_func : " << i_func << " in rememberFunctions for instruction : " << mnemonic << endl;
      }

      
      SgAsmInstruction* instruction = NULL;
      instruction = createInstruction(address, func, mnemonic);
      //        instruction = new SgAsmInstruction(address,bb,mnemonic,"");
      // Sep 29, tps : commented the following line out, since the function was removed.
      //instruction->set_raw_bytes(data);

      ROSE_ASSERT(instruction);

      SgAsmOperandList* operandList = new SgAsmOperandList();
      instruction->set_operandList(operandList);
      operandList->set_parent(instruction);

      ostringstream hexaddr;
      hexaddr << hex << setw(8) << address ;
      if (RoseBin_support::DEBUG_MODE())
        cout << " .rememberInstruction " << instruction->class_name() 
             << "  at : " << address << " hex: " << hexaddr.str() << endl;
      rememberInstructions[address]= instruction ;


      if (func) {
        // get the block in the func and append to it to conform to jeremiah
        func->append_statement(instruction);
        instruction->set_parent(func);
        //vector <SgNode*> blockVec =func->get_traversalSuccessorContainer();
        //SgAsmBlock* block = isSgAsmBlock(blockVec[0]);
        //ROSE_ASSERT(block);
        //block->append_statement(instruction);
        //instruction->set_parent(block);

        ROSE_ASSERT(instruction->get_parent());

        //SgAsmNode* nInst = (SgAsmNode*) instruction;
        //nInst->set_parent(func);

        ostringstream addrhex;
        addrhex << hex << setw(8) << i_func ;
        if (RoseBin_support::DEBUG_MODE())
          cout << ">> appended instruction to function: " << func->get_name() << " addr " << addrhex.str() << " " << address << endl;
      } else {
        if (i_func!=-1) {
          cerr << " ERROR :: could not append instruction to function : " << endl;
          //exit(0);
        }
      }
      
    } // while

  } // if (res_set==NULL)
  checkError(conn,res_set);
}
Пример #6
0
int main(int argc, char** argv) {

  if (!containsArgument(argc, argv, "-checkAST") && 
      !containsArgument(argc, argv, "-checkGraph") &&
      !containsArgument(argc, argv, "-printTree") &&
      !containsArgument(argc, argv, "-callgraph") &&
      !containsArgument(argc, argv, "-cfa") &&
      !containsArgument(argc, argv, "-dfa") 
      ) {argc = 1;}

  if (argc < 2) {
    fprintf(stderr, "Usage: %s executableName [OPTIONS]\n", argv[0]);
    cout << "\nOPTIONS: " <<endl;
    cout << "-checkAST             - run all checkers on binary AST. " << endl; 
    cout << "-checkGraph           - run all checkers on dataflow graph. " << endl; 
    cout << "-printTree            - create dot file of AST. " << endl; 
    cout << "-callgraph            - perform callgraph analysis and print callgraph.dot file. " << endl; 
    cout << "-cfa                  - perform control flow analysis and print cfg.dot file. " << endl; 
    cout << "-dfa                  - perform dataflow flow analysis and print dfg.dot file. " << endl; 
    cout << "-inter                - perform dataflow analysis interprocedurally (default intraprocedural). " << endl; 
    cout << "-backward             - perform backward analysis (default forward). " << endl; 
    cout << "-gml                  - all graphs (except AST) are saved as gml files (default dot). " << endl; 
    cout << "-mergeedges           - aggregate edges between same nodes. " << endl; 
    cout << "-noedges              - do not print edges into dot or gml file (only nodes). " << endl; 
    return 1;
  }
  string execName = argv[1];

  lt_dlinit();
  
  // this is our test case input, we will assert on the data from this file
  test = false;
  if (execName=="buffer2.bin") {
    //cerr << "running test case on buffer2.bin !! " << endl << endl; 
    test = true;
  }
  // create out folder
  string filenameDir="out";
  mode_t mode = S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH;
  mkdir(filenameDir.c_str(), mode);

  std::ofstream myfile;

  bool interprocedural = false;
  if (containsArgument(argc, argv, "-inter")) {
    interprocedural = true;
  }
  bool forward = true;
  if (containsArgument(argc, argv, "-backward")) {
    forward = false;
  }
  bool dot = true;
  if (containsArgument(argc, argv, "-gml")) {
    dot = false;
  }
  bool mergedEdges = false;
  if (containsArgument(argc, argv, "-mergeedges")) {
    mergedEdges = true;
  }
  bool edges = true;
  if (containsArgument(argc, argv, "-noedges")) {
    edges = false;
  }

  RoseBin_Def::RoseAssemblyLanguage = RoseBin_Def::x86;
  //fprintf(stderr, "Starting binCompass frontend...\n");
  SgProject* project = frontend(argc,argv);
  ROSE_ASSERT (project != NULL);

  SgBinaryComposite* binary = isSgBinaryComposite(project->get_fileList()[0]);
  SgAsmGenericFile* file = binary != NULL ? binary->get_binaryFile() : NULL;

  //  const SgAsmInterpretationPtrList& interps = file->get_interpretations();
  //ROSE_ASSERT (interps.size() == 1);
  //SgAsmInterpretation* interp = interps[0];
  SgAsmInterpretation* interp = SageInterface::getMainInterpretation(file);
                                
  if (containsArgument(argc, argv, "-printTree")) {
    //fprintf(stderr, "Printing AST... _binary_tree.dot\n");
    string filename="_binary_tree.dot";
    AST_BIN_Traversal* trav = new AST_BIN_Traversal();
    trav->run(interp->get_global_block(), filename);
    if (test) {
      int instrnr = trav->getNrOfInstructions();
      //cerr << " Instructions written to file: " << instrnr << endl;
      ROSE_ASSERT(instrnr==861);
    }
  }

  RoseBin_Graph* graph;

  VirtualBinCFG::AuxiliaryInformation* info = new VirtualBinCFG::AuxiliaryInformation(file);
  std::map<int,std::set<SgAsmFunction*> > components;
  GraphAlgorithms* algo = new GraphAlgorithms(info);
  // call graph analysis  *******************************************************
  if (containsArgument(argc, argv, "-callgraph")) {
    //cerr << " creating call graph ... " << endl;
    graph= new RoseBin_DotGraph();
    string callFileName = "callgraph.dot";
    if (dot==false) {
      callFileName = "callgraph.gml";
      graph= new RoseBin_GMLGraph();
    }
    RoseBin_CallGraphAnalysis* callanalysis = new RoseBin_CallGraphAnalysis(interp->get_global_block(), new RoseObj(), algo);
    callanalysis->run(graph, callFileName, !mergedEdges);
    callanalysis->getConnectedComponents(components);
    if (test) {
      //cerr << " nr of nodes visited in callanalysis : " << callanalysis->nodesVisited() << endl;
      ROSE_ASSERT(callanalysis->nodesVisited()==10);
      //cerr << " nr of edges visited in callanalysis : " << callanalysis->edgesVisited() << endl;
      ROSE_ASSERT(callanalysis->edgesVisited()==9);
    }
  }

  if (containsArgument(argc, argv, "-printTree")) {
    //fprintf(stderr, "Printing AST... _binary_tree2.dot\n");
    string filename="_binary_tree2.dot";
    AST_BIN_Traversal* trav = new AST_BIN_Traversal();
    trav->run(interp->get_global_block(), filename);
    if (test) {
      int instrnr = trav->getNrOfInstructions();
      //cerr << " Instructions written to file: " << instrnr << endl;
      ROSE_ASSERT(instrnr==861);
    }
  }

  // control flow analysis  *******************************************************
  if (containsArgument(argc, argv, "-cfa")) {
    string cfgFileName = "cfg.dot";
    graph= new RoseBin_DotGraph();
    if (dot==false) {
      cfgFileName = "cfg.gml";
      graph= new RoseBin_GMLGraph();
    }
    RoseBin_ControlFlowAnalysis* cfganalysis = new RoseBin_ControlFlowAnalysis(interp->get_global_block(), forward, new RoseObj(), edges, algo);
    cfganalysis->run(graph, cfgFileName, mergedEdges);

#if 1
    std::map<int,std::set<SgAsmFunction*> >::const_iterator comps = components.begin();
    //set<std::string> partialCFG;
    
    for (;comps!=components.end();++comps) {
      set<std::string> partialCFG;
      int nr = comps->first;
      //cerr << " found the following component " << nr << endl;
      std::set<SgAsmFunction*>  funcs = comps->second;
      std::set<SgAsmFunction*>::const_iterator it = funcs.begin();
      for (;it!=funcs.end();++it) {
	SgAsmFunction* function = *it;
	string name = function->get_name();

	name.append("_f");
	//cerr << "   binCompass CALLGRAPH ANALYSIS : found function : " << name << endl; 
	partialCFG.insert(name);
      }
      string filename = "thomas";	
      filename.append(RoseBin_support::ToString(nr));
      filename.append(".dot");
      //cerr << " binCompass writing to file " << filename << endl;
      cfganalysis->printGraph(filename,partialCFG);
    }
    //cfganalysis->printGraph(filename,partialCFG);
#endif 	
#if 0
    set<std::string> partialCFG;
    partialCFG.insert(" 80483c0_f");
    partialCFG.insert(" 8048491_f");
    partialCFG.insert(" 8048363_f");
    partialCFG.insert(" 804828f_f");
    cfganalysis->printGraph("thomas.dot",partialCFG);
#endif

    if (test) {
      //cout << " cfa -- Number of nodes == " << cfganalysis->nodesVisited() << endl;
      //cout << " cfa -- Number of edges == " << cfganalysis->edgesVisited() << endl;
      //ROSE_ASSERT(cfganalysis->nodesVisited()==210);
      //ROSE_ASSERT(cfganalysis->edgesVisited()==234);
      ROSE_ASSERT(cfganalysis->nodesVisited()==237);
      ROSE_ASSERT(cfganalysis->edgesVisited()==261);
    }
  }

  if (containsArgument(argc, argv, "-printTree")) {
    //fprintf(stderr, "Printing AST... _binary_tree3.dot\n");
    string filename="_binary_tree3.dot";
    AST_BIN_Traversal* trav = new AST_BIN_Traversal();
    trav->run(interp->get_global_block(), filename);
    if (test) {
      int instrnr = trav->getNrOfInstructions();
      //cerr << " Instructions written to file: " << instrnr << endl;
      ROSE_ASSERT(instrnr==861);
    }
  }

  if (containsArgument(argc, argv, "-dfa")) {
    //cerr << " creating dataflow graph ... " << endl;
    string dfgFileName = "dfg.dot";
    graph= new RoseBin_DotGraph();
    if (dot==false) {
      dfgFileName = "dfg.gml";
      graph= new RoseBin_GMLGraph();
    }
    RoseBin_DataFlowAnalysis* dfanalysis = new RoseBin_DataFlowAnalysis(interp->get_global_block(), forward, new RoseObj(), algo);
    dfanalysis->init(interprocedural, edges);
    dfanalysis->run(graph, dfgFileName, mergedEdges);
    if (test) {
#if 0
      cout << " dfa -- Number of nodes == " << dfanalysis->nodesVisited() << endl;
      cout << " dfa -- Number of edges == " << dfanalysis->edgesVisited() << endl;
      cout << " dfa -- Number of memWrites == " << dfanalysis->nrOfMemoryWrites() << endl;
      cout << " dfa -- Number of regWrites == " << dfanalysis->nrOfRegisterWrites() << endl;
      cout << " dfa -- Number of definitions == " << dfanalysis->nrOfDefinitions() << endl;
      cout << " dfa -- Number of uses == " << dfanalysis->nrOfUses() << endl;
#endif
      if (interprocedural) {
	ROSE_ASSERT(dfanalysis->nodesVisited()==237);
	ROSE_ASSERT(dfanalysis->edgesVisited()==284);
	ROSE_ASSERT(dfanalysis->nrOfMemoryWrites()==12);
	ROSE_ASSERT(dfanalysis->nrOfRegisterWrites()==36);
	ROSE_ASSERT(dfanalysis->nrOfDefinitions()==183);
	ROSE_ASSERT(dfanalysis->nrOfUses()==25);
      } else {
	ROSE_ASSERT(dfanalysis->nodesVisited()==237);
	ROSE_ASSERT(dfanalysis->edgesVisited()==287);
	ROSE_ASSERT(dfanalysis->nrOfMemoryWrites()==18);
	ROSE_ASSERT(dfanalysis->nrOfRegisterWrites()==77);
	ROSE_ASSERT(dfanalysis->nrOfDefinitions()==216);
	ROSE_ASSERT(dfanalysis->nrOfUses()==31);

      }
    }
  }

  if (containsArgument(argc, argv, "-checkAST") || 
      containsArgument(argc, argv, "-checkGraph")) {
    // get a list of all checkers and traverse
    vector <BC_AnalysisInterface*> checkers;
    vector <BC_GraphAnalysisInterface*> graph_checkers;

    loadAnalysisFiles(checkers);

    vector <BC_AnalysisInterface*>::const_iterator it = checkers.begin();
    for (;it!=checkers.end();it++) {
      BC_AnalysisInterface* asmf = *it;
      //cout << "\nRunning Binary Checker --- " << asmf->get_name() << endl;
      string filename = execName+"."+asmf->get_name();
      unsigned int pos = filename.find_last_of("/");
      if (filename.find_last_of("/")!=string::npos && (pos+1)<filename.length()) 
	filename = filename.substr(pos+1, filename.length());
      filename = "out/"+filename+".out";
      //cerr << "Writing file : " << filename << endl;
      myfile.open(filename.c_str());
      asmf->init(interp->get_global_block());
      asmf->traverse(interp->get_global_block(), preorder);
      asmf->finish(interp->get_global_block());
      string output = asmf->get_output();
      myfile << output << " \n";
      myfile.close();
    }  

    if (containsArgument(argc, argv, "-checkGraph")) {
      loadGraphAnalysisFiles(graph_checkers);

      //cerr << "\n ---------------- preparing to run DataFlowAnalysis (-checkGraph)" << endl;
      string dfgFileName = "dfg.dot";
      graph= new RoseBin_DotGraph();
      if (dot==false) {
	dfgFileName = "dfg.gml";
	graph= new RoseBin_GMLGraph();
      }
      RoseBin_ControlFlowAnalysis* cfganalysis = new RoseBin_ControlFlowAnalysis(interp->get_global_block(), forward, new RoseObj(), edges, algo);
      cfganalysis->run(graph, dfgFileName, mergedEdges);
      if (test) {
	//cerr << " cfa -- Number of nodes == " << cfganalysis->nodesVisited() << endl;
	//cerr << " cfa -- Number of edges == " << cfganalysis->edgesVisited() << endl;
	ROSE_ASSERT(cfganalysis->nodesVisited()==237);
	ROSE_ASSERT(cfganalysis->edgesVisited()==261);
      }

      rose_graph_integer_node_hash_map nodes = graph->get_node_index_to_node_map();
      //cerr << "CFG (-checkGraph) finished ----- Graph nr of nodes : " << nodes.size() << endl;
      ROSE_ASSERT(nodes.size()>0);

      RoseBin_DataFlowAnalysis* dfanalysis = new RoseBin_DataFlowAnalysis(interp->get_global_block(), forward, new RoseObj(), algo);
      //dfanalysis->init(interprocedural, edges,graph);
      dfanalysis->init(interprocedural, edges);
      dfanalysis->run(graph, dfgFileName, mergedEdges);

      //cerr << "DFG (-checkGraph) finished ----- Graph nr of nodes : " << nodes.size() << endl;
      vector<SgGraphNode*> rootNodes;
      dfanalysis->getRootNodes(rootNodes);

      //SgGraphNode* root1 = rootNodes[0];
      //rootNodes.clear();
      //rootNodes.push_back(root1);

      vector <BC_GraphAnalysisInterface*>::const_iterator it2 = graph_checkers.begin();
#if 0
      cerr << "\n ---------------- running graph checkers : " << graph_checkers.size() << 
	"   rootNodes size : " << rootNodes.size() << "  interprocedural : " << 
	RoseBin_support::resBool(interprocedural) << endl;
      cout << "\n ---------------- running graph checkers : " << graph_checkers.size() << 
	"   rootNodes size : " << rootNodes.size() << "  interprocedural : " << 
	RoseBin_support::resBool(interprocedural) << endl;
      cerr << "Graph : " << nodes.size() << endl;
#endif
      for (;it2!=graph_checkers.end();it2++) {
	BC_GraphAnalysisInterface* asmf = *it2;
	ROSE_ASSERT(asmf);
	//cerr << "\nRunning Binary Graph Checker --- " << asmf->get_name() << "    " <<  "  roots : " <<
	// rootNodes.size() << endl;
	// tps 04/23/08 -- fixme: this code was broken when I added the testcase -- needs to be fixed
	dfanalysis->init();
	asmf->init(graph);
	dfanalysis->traverseGraph(rootNodes, asmf, interprocedural);
      }  
    }
  }  

  unparseAsmStatementToFile("unparsed.s", interp->get_global_block());

  lt_dlexit();

  return 0;
}
/***********************************************************************
 * (10/31/07) tps: Traverses the graph for each node in rootNodes
 * and applies to each node the evaluate function
 * which can be either def_use, variable detection or emulation
 * Each node in the controlflow of rootNode is traversed (forward)
 * and only if the hasChanged function returns false, the algorithm
 * comes to a fixpoint
 ***********************************************************************/
void
RoseBin_DataFlowAnalysis::traverseGraph(vector <SgGraphNode*>& rootNodes,
                                        RoseBin_DataFlowAbstract* analysis,
                                        bool interprocedural){
  if (RoseBin_support::DEBUG_MODE_MIN())
    cerr << " traverseGraph : debug: " << RoseBin_support::resBool(RoseBin_support::DEBUG_MODE()) <<
      "  debug_min : " <<  RoseBin_support::resBool(RoseBin_support::DEBUG_MODE_MIN()) << endl;
  // Number of functions traversed
  int funcNr =0;
  // ---------------------------------------------------------------------
  // stores the nodes that still needs to be visited
  //  vector<SgGraphNode*> worklist;
  deque<SgGraphNode*> worklist;
  nodeHashSetType worklist_hash;
  // a vector of successors of the current node
  vector<SgGraphNode*> successors;
  // ---------------------------------------------------------------------


  // iterate through all functions
  vector<SgGraphNode*>::iterator it = rootNodes.begin();
  for (; it!=rootNodes.end();++it) {
    // current node
    SgGraphNode* node = *it;

    string func_name = vizzGraph->getProperty(SgGraph::name, node);
    RoseBin_support::checkText(func_name);
    funcNr++;
    if (RoseBin_support::DEBUG_MODE()) {
      cout << "\n\n -----------  dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+
        RoseBin_support::ToString(rootNodes.size())+") : " << func_name <<
        "  visited size : " << visited.size() <<
        "  total visited nodes : " << nrOfNodesVisited << endl;
      // debug
    }
    if (RoseBin_support::DEBUG_MODE_MIN()) {
      cerr << " -----------  dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+
        RoseBin_support::ToString(rootNodes.size())+") : " << func_name <<
        "  visited size : " << visited.size() <<
        "  total visited nodes : " << nrOfNodesVisited <<
        "  def size  : " << analysis->getDefinitionSize() << endl;
    }

    // indicates whether the current value for this node has changed
    bool hasChanged=false;
    // pushback into worklist and visited list
    worklist.push_back(node);
    worklist_hash.insert(node);
    visited.insert(node);
    visitedCounter[node] = 1;
    vector <SgGraphNode*> pre;
    // while there are still graph nodes in the worklist do

    while (worklist.size()>0) {
      nrOfNodesVisited++;
      // the new node is taken from the back of the worklist
      //node = worklist.back();
      //worklist.pop_back();
      node = worklist.front();
      worklist.pop_front();

      worklist_hash.erase(node);
      // get the successors of the current node and store in successors vector
      string name = vizzGraph->getProperty(SgGraph::name, node);

      //if (RoseBin_support::DEBUG_MODE_MIN() && node)
      //        if (node->get_SgNode())
      //  cerr << node->get_SgNode()->class_name() << "  " << node << "  " << node->get_name() << endl;

      if (RoseBin_support::DEBUG_MODE_MIN() && node) {
        SgAsmInstruction* instr = isSgAsmInstruction(node->get_SgNode());
        if (instr) {
          SgAsmFunction* funcParent = isSgAsmFunction(instr->get_parent());
          if (funcParent) {
            string parent = funcParent->get_name();
            cout << " ---- analysis of node in function : " << parent <<
              "  defs " << analysis->getDefinitionSize() <<
              " visited : " << RoseBin_support::ToString(visitedCounter[node]) << endl;
          }
        }
      }


      if (RoseBin_support::DEBUG_MODE())
        cout << "\n evaluating: " << name << endl;
      // do something with the current node
      // e.g. checkVariables(name, node);
      SgGraphNode* nodeBefore= NULL;
      BeforeMapType::const_iterator it =
        nodeBeforeMap.find(node);
      if (it!=nodeBeforeMap.end())
        nodeBefore = it->second;
      // successor vector is empty on each new node
      successors.clear();
      ROSE_ASSERT(isSgIncidenceDirectedGraph(vizzGraph));
      isSgIncidenceDirectedGraph(vizzGraph)->getSuccessors(node, successors);

      hasChanged = analysis->run(name, node, nodeBefore);

      // append the successors to the worklist
      if (RoseBin_support::DEBUG_MODE())
        cout << ">> getting successors  (" << successors.size() << ") for : " << name << endl;
      //        if (successors.size()==0)
      //          cout << "PROBLEM ..................................................... : " << endl;
      vector<SgGraphNode*>::iterator succ = successors.begin();
      for (;succ!=successors.end();++succ) {
        // for each successor do...
        SgGraphNode* next = *succ;
        SgAsmX86Instruction* nodeN = isSgAsmX86Instruction(node->get_SgNode());
        //if (!nodeN) continue;
        SgAsmX86Instruction* nextN = isSgAsmX86Instruction(next->get_SgNode());
        //if (!nextN) continue;

        string name_n = vizzGraph->getProperty(SgGraph::name, next);



        bool call = false;
        bool exceptionCallNext = false;
        if (nextN)
          exceptionCallNext = exceptionCall(nextN->get_kind() == x86_call ? nextN : 0);
        bool exceptionCallNode = false;
        if (nodeN)
          exceptionCallNode = exceptionCall(nodeN->get_kind() == x86_call ? nodeN : 0);
        if (RoseBin_support::DEBUG_MODE())
          std::cout << " exceptionCallNode : " << exceptionCallNode << " exceptionCallNext : " << exceptionCallNext << endl;
        // if function call is call to malloc we have an exception and follow the call path
        if ((exceptionCallNode && !exceptionCallNext)) {
        } else if (
                   //if (
                   (nodeN && nodeN->get_kind() == x86_call) ||
                   (nextN && nextN->get_kind() == x86_ret) )
          call = true;
        //bool sameParent = analysis->sameParents(node, next);

        bool validNode=false;
        if (g_algo->isValidCFGEdge(next, node) || exceptionCallNode)
          validNode = true;

        // debug ------------------------
        if (RoseBin_support::DEBUG_MODE()) {
          string nodeBeforeStr="";
          if (nodeBefore) nodeBeforeStr= nodeBefore->get_name();
          cout << "  DEBUG : >>>>>>>> previous node " << nodeBeforeStr
               << "      This node : " << name << "  next node : " << name_n
               << "  ** validNode : " << RoseBin_support::resBool(validNode) << endl;
        }


        // ----------------------------------
        if (( interprocedural==false && !call) //
            ||  (interprocedural==true && validNode)) {
          if (visited.find(next)==visited.end()) {
            // if the successor is not yet visited
            // mark as visited and put into worklist
            if (RoseBin_support::DEBUG_MODE())
              cout << " never visited next node before... " << name_n <<
                " interprocedural : " << interprocedural << "  call : " << call << endl;
            if (RoseBin_support::DEBUG_MODE())
              cout << "adding to visited : " << name_n << endl;

            visited.insert(next);
            nodeBeforeMap[next]=node;
            visitedCounter[next]=1;
            vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(1));
            if (!containsHash(worklist_hash,next)) {
              // add next node only if the next node
              if (RoseBin_support::DEBUG_MODE())
                cout << "adding to worklist: " << name_n << endl;
              worklist.push_back(next);
              worklist_hash.insert(next);
            }
          } else {
            // if the successor has been visited, we need to check if it has changed
            // if it has not, we continue, else we need to push it back to the worklist
            int nr = visitedCounter[next];
            if (RoseBin_support::DEBUG_MODE())
              cout << " visited next node before... " << RoseBin_support::ToString(nr) <<
                "  Changed == " << RoseBin_support::resBool(hasChanged) << endl;

            if (hasChanged) {
              visitedCounter[next]=++nr;
              vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(nr));
              if (RoseBin_support::DEBUG_MODE())
                cout << " has changed : " << RoseBin_support::resBool(hasChanged) <<
                  "  -- interprocedural : " << RoseBin_support::resBool(interprocedural) <<
                  "  -- Call : " << RoseBin_support::resBool(call) <<
                  "  ------> new number: " << RoseBin_support::ToString(nr) <<
                  "  -- contained in hash? : " << RoseBin_support::resBool(containsHash(worklist_hash,next)) <<
                  "  ---- nr of Defs: " << RoseBin_support::ToString(analysis->getDefinitionSize()) <<
                  "  ---- nr of Use: " << RoseBin_support::ToString(analysis->getUsageSize())
                     << endl;

              if (interprocedural || (!interprocedural && !call)){ //sameParent)) { //!call && ) {
                if (!containsHash(worklist_hash,next)) {
                  worklist_hash.insert(next);
                  worklist.push_back(next);
                  if (RoseBin_support::DEBUG_MODE())
                    cout << " adding to worklist: " << name_n << endl;
                }
              }
            } else
              if (RoseBin_support::DEBUG_MODE())
                cout << " has NOT changed. " << endl;
            //else we continue with the next node
          }
        }
      } // for
    } // while worklist.size()>0

  } // for rootNodes
}
Пример #8
0
void
InitPointerToNull::visit(SgNode* node) {
  if (isSgAsmFunction(node)) {
    memoryWrites.clear();
    memoryRead.clear();
  } else

  if (isSgAsmx86Instruction(node) && isSgAsmx86Instruction(node)->get_kind() == x86_mov) {
    // this is the address of the mov instruction prior to the call
    //rose_addr_t resolveAddr=0;
    SgAsmx86Instruction* inst = isSgAsmx86Instruction(node);
    SgNode* instBlock = NULL;
    if (project) 
      instBlock= isSgAsmBlock(inst->get_parent());
    else //we run IDA, this is different
      instBlock=inst;

    if (instBlock==NULL)
      return;
    SgAsmFunction* instFunc = isSgAsmFunction(instBlock->get_parent());
    if (instFunc==NULL)
      return;

    // we have found a mov instruction
    // we need to check if it is a   mov mem, (value or reg) // assignment of variable // forgot mov mem, mem
    // or we find a                  mov reg, mem // usage of variable
    // make sure a variable is assigned before used
    SgAsmOperandList * ops = inst->get_operandList();
    SgAsmExpressionPtrList& opsList = ops->get_operands();
    SgAsmExpressionPtrList::iterator itOP = opsList.begin();
    SgAsmMemoryReferenceExpression* memL=NULL;
    SgAsmMemoryReferenceExpression* memR=NULL;
    SgAsmRegisterReferenceExpression* regL=NULL;
    SgAsmRegisterReferenceExpression* regR=NULL;
    SgAsmValueExpression* Val = NULL;
    int iteration=0;
    for (;itOP!=opsList.end();++itOP) {
      SgAsmExpression* exp = *itOP;
      ROSE_ASSERT(exp);
      if (iteration==1) {
	// right hand side
	memR = isSgAsmMemoryReferenceExpression(exp);
	regR = isSgAsmRegisterReferenceExpression(exp);
	Val = isSgAsmValueExpression(exp);
      }
      if (iteration==0) {
	// left hand side
	memL = isSgAsmMemoryReferenceExpression(exp);
	regL = isSgAsmRegisterReferenceExpression(exp);
	iteration++;
      }
    } //for
    if ((memL && regR) || (memL && Val) || (memL && memR)) {
      // could be assignment to address
      rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memL);      
      // apparently the reference to memory does not always have to be BP but
      // can also be IP if it is a static variable. How will we handle global variables?
      //bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memL);
      //if (containsBP) {
	// this is memory write with offset to BP
	// remember this memory location as a write
	if (debug)
	cerr << "found a memory write (REG) : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl;
	memoryWrites.insert(addr);
	//}
    } else if (regL && memR) {
      // could be usage of address
      rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memR);      
      bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memR);
      if (containsBP) {
	// this is memory read with offset to BP
	// did we see a write for this? If not, it is not initialized!
	std::set<rose_addr_t>::const_iterator it = memoryWrites.find(addr);
	if (it!=memoryWrites.end()) {
	  // found write, everything is good
	if (debug)
	  cerr << "found a read with matching write : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl;
	} else {
	  std::set<rose_addr_t>::const_iterator it2 = memoryRead.find(addr);
	  if (it2!=memoryRead.end()) {
	    // found this case before
	  } else {
	if (debug)
	    cerr << " This variable might not be initialized : " << RoseBin_support::HexToString(inst->get_address())<<" "<< unparseInstruction(inst) << endl;
	    string res = "Possibly uninitialized variable: ";
	    string funcname="";
	    SgAsmBlock* b = isSgAsmBlock(inst->get_parent());
	    SgAsmFunction* func = NULL;
	    if (b)
	      func=isSgAsmFunction(b->get_parent()); 
	    if (func)
	      funcname = func->get_name();
	    res+=" ("+RoseBin_support::HexToString(inst->get_address())+") : "+unparseInstruction(inst)+
	      " <"+inst->get_comment()+">  in function: "+funcname;
	    result[inst]= res;
	    memoryRead.insert(addr);
	  }
	}
      }
    }
  }
}
Пример #9
0
void
RoseBin_GMLGraph::printNodes(    bool dfg, RoseBin_FlowAnalysis* flow,bool forward_analysis,
                                 std::ofstream& myfile, string& recursiveFunctionName) {
  //bool firstFunc = true;
  // traverse nodes and visualize results of graph

  funcMap.clear();
  nodesMap.clear();
  //cerr << " Preparing graph - Nr of Nodes : " << nodes.size() << "  edges : " << edges.size() << endl;
  //SgGraphNodeList* gnodes = get_nodes();

  //  rose_graph_hash_multimap& nodes = get_nodes()->get_nodes();
  rose_graph_integer_node_hash_map nodes = get_node_index_to_node_map();
  int counter=nodes.size();
  int count=0;
  rose_graph_integer_node_hash_map::iterator itn2 = nodes.begin();
  for (; itn2!=nodes.end();++itn2) {
    counter++;
    count++;
    pair<int, SgGraphNode*> nt = *itn2;
    //    string hex_address = itn2->first;
    SgGraphNode* node = isSgGraphNode(itn2->second);
    string hex_address =node->get_name();
    SgNode* internal = node->get_SgNode();
    SgAsmFunction* func = isSgAsmFunction(internal);
    if (func) {
      vector<SgNode*> list;
      FindInstructionsVisitorx86 vis;
#ifdef _MSC_VER
//#pragma message ("WARNING: Removed reference to AstQueryNamespace::querySubTree()")
//        ROSE_ASSERT(false);

          // CH (4/7/2010): Workaround for MSVC
          vector<SgAsmX86Instruction*> temp_list;
          AstQueryNamespace::querySubTree(func, std::bind2nd( vis, &temp_list ));
          list.resize(temp_list.size());
          std::copy(temp_list.begin(), temp_list.end(), list.begin());
#else
#if defined(__APPLE__) && defined(__MACH__)
          //Pei-Hung (7/28/2016): OSX El Capitan has issue with bind2nd.  
          vector<SgAsmX86Instruction*> temp_list;
          AstQueryNamespace::querySubTree(func, std::bind2nd( vis, &temp_list ));
          list.resize(temp_list.size());
          std::copy(temp_list.begin(), temp_list.end(), list.begin());
#else
          AstQueryNamespace::querySubTree(func, std::bind2nd( vis, &list ));
#endif
#endif
      int validInstructions = func->nrOfValidInstructions(list);
      funcMap[func]=counter;
      nodesMap[func]=count;
      string name = func->get_name();
      string text = "node [\n   id " + RoseBin_support::ToString(counter) + "\n  id_ " +
        RoseBin_support::ToString(counter) + "\n  label \"" + name + "\"\n  ";
      text +="   nrinstr_ "+RoseBin_support::ToString(validInstructions)+" \n";
      text+= " isGroup 1\n isGroup_ 1\n ]\n";

      if (name=="frame_dummy") {
        //cerr << text << endl;
        vector<SgNode*> succs = func->get_traversalSuccessorContainer();
        vector<SgNode*>::iterator j = succs.begin();
        //cerr << " ------------- free_dummy"<<endl;
        int ii=0;
        for (;j!=succs.end();j++) {
          //SgNode* n = *j;
          //cerr << " Node contained at pos:"<<ii<<"  - " << n->class_name() << endl;
          ii++;
        }
        //cerr << " number of validInstructions: " << validInstructions << endl;
      }



      if (grouping)
        myfile << text;
    }
    SgAsmInstruction* bin_inst = isSgAsmInstruction(internal);
    if (bin_inst)
      nodesMap[bin_inst]=count;

  }

  //cerr << " Writing graph to GML - Nr of Nodes : " << nodes.size() << endl;
  int pos=0;
  rose_graph_integer_node_hash_map::iterator itn = nodes.begin();
  for (; itn!=nodes.end();++itn) {
    pos++;
    //    string hex_address = itn->first;
    SgGraphNode* node = isSgGraphNode(itn->second);
    string hex_address = node->get_name();
    SgNode* internal = node->get_SgNode();
    SgAsmFunction* func = isSgAsmFunction(internal);
    string text="";
    // specifies that this node has no destination address
    nodest_jmp = false;
    // specifies that there is a node that has a call error (calling itself)
    error =false;
    // specifies a call to a unknown location
    nodest_call = false;
    // specifies where its an int instruction
    interrupt = false;
    if (func) {
      string name = func->get_name();
      //cerr << " if part name : " << name << endl;
      ROSE_ASSERT(node);
      if (grouping==false) {
        map < int , string> node_p = node->get_properties();
        map < int , string>::iterator prop = node_p.begin();
        string name = "noname";
        string type = "removed";//node->get_type();
        for (; prop!=node_p.end(); ++prop) {
          int addr = prop->first;
          //cerr << " gml : property for addr : " << addr << endl;
          if (addr==SgGraph::nodest_jmp)
            nodest_jmp = true;
          else if (addr==SgGraph::itself_call)
            error = true;
          else if (addr==SgGraph::nodest_call)
            nodest_call = true;
          else if (addr==SgGraph::interrupt)
            interrupt = true;
          //      else
          //  name = prop->second;
        }
      }

      int parent = funcMap[func];
      RoseBin_support::checkText(name);
      int length = name.length();
      text = "node [\n   id " + RoseBin_support::ToString(pos) + "\n   label \"" + name + "\"\n";
      if (nodest_jmp) {
        text += "  graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"circle\" fill \"#FF0000\"  ]\n";
        text +="   Node_Color_ \"FF0000\" \n";
      }      else if (nodest_call) {
        text += "  graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"circle\" fill \"#FF9900\"  ]\n";
        text +="   Node_Color_ \"FF9900\" \n";
      }      else if (interrupt) {
        text += "  graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"circle\" fill \"#0000FF\"  ]\n";
        text +="   Node_Color_ \"0000FF\" \n";
      }      else if (error) {
        text += "  graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"circle\" fill \"#66FFFF\"  ]\n";
        text +="   Node_Color_ \"66FFFF\" \n";
      }else {
        text += "  graphics [ h 30.0 w " + RoseBin_support::ToString(length*7) + " type \"circle\" fill \"#9933FF\"  ]\n";
        text +="   Node_Color_ \"9933FF\" \n";
      }
      text +="   gid "+RoseBin_support::ToString(parent)+" \n";
      text +="   skip_ 1 \n";
      text +="   gid_ "+RoseBin_support::ToString(parent)+" ]\n";
      // skip functions for now
      //      if (skipFunctions)
      //        text ="";
    } /*not a func*/ else {
      SgAsmX86Instruction* bin_inst = isSgAsmX86Instruction(internal);
      //cerr << " else part " << endl;
      SgAsmFunction* funcDecl_parent = NULL;
      if (bin_inst) {
        funcDecl_parent = isSgAsmFunction(bin_inst->get_parent());
        if (funcDecl_parent==NULL)
          funcDecl_parent = isSgAsmFunction(bin_inst->get_parent()->get_parent());
      }
      if (funcDecl_parent==NULL) {
        cerr << " ERROR : printNodes preparation . No parent found for node : " << bin_inst->class_name() <<
          "  " << hex_address << endl;
        continue;
      }
      if ((pos % 10000)==0)
        cout << " GMLGraph:: printing GML Nodes : " << pos << endl;
      string name = getInternalNodes(node, forward_analysis,bin_inst);
      int parent=0;
      map <SgAsmFunction*, int>::iterator its = funcMap.find(funcDecl_parent);
      if (its!=funcMap.end())
        parent = funcMap[funcDecl_parent];
      if (parent==0)
        cerr << " GMLGraph parent == 0 " << endl;

      if (onlyControlStructure && x86InstructionIsControlTransfer(bin_inst)) {
        text = "node [\n   id " + RoseBin_support::ToString(pos) + "\n" + name ;
        int instrnr = funcDecl_parent->get_childIndex(bin_inst);
        text +="   instrnr_ "+RoseBin_support::ToString(instrnr)+" \n";
        text +="   gid_ "+RoseBin_support::ToString(parent)+" \n";
        text +="   gid "+RoseBin_support::ToString(parent)+" ]\n";
      } else {
        text = "node [\n   id " + RoseBin_support::ToString(pos) + "\n" + name ;
        int instrnr = funcDecl_parent->get_childIndex(bin_inst);
        text +="   instrnr_ "+RoseBin_support::ToString(instrnr)+" \n";
        text +="   gid_ "+RoseBin_support::ToString(parent)+" \n";
        text +="   gid "+RoseBin_support::ToString(parent)+" ]\n";
      }
    }

    myfile << text;
    //    cerr << " this node : " << text << endl;
  }
  funcMap.clear();
}
Пример #10
0
SgAsmInstruction*
RoseBin_FlowAnalysis::process_jumps_get_target(SgAsmx86Instruction* inst) {
  if (inst && x86InstructionIsControlTransfer(inst)) {
    //cerr << " ..................... processing jmp " << endl;
    ostringstream addrhex3;
    int addrsource = inst->get_address();
    addrhex3 << hex << setw(8) << addrsource ;
    string funcName ="";

    // get the operand and the destination address
    SgAsmOperandList* opList = inst->get_operandList();
    ROSE_ASSERT(opList);
    SgAsmExpressionPtrList ptrList = opList->get_operands();

    std::vector<SgAsmExpression*>::iterator itList= ptrList.begin();
    for (;itList!=ptrList.end();++itList) {
      SgAsmExpression* exp = *itList;
      ROSE_ASSERT(exp);
      SgAsmRegisterReferenceExpression* regRef = isSgAsmRegisterReferenceExpression(exp);

      //if (RoseBin_support::DEBUG_MODE())
      //        cout << " inst (jmp):: " << inst->get_mnemonic() << "  addr : " << addrhex3.str() << endl;
      SgAsmValueExpression* valExpr = isSgAsmValueExpression(exp);
      SgAsmMemoryReferenceExpression* memExpr = isSgAsmMemoryReferenceExpression(exp);
      string valStr = "";
      if (valExpr) {
        uint8_t byte_val=0xF;
        uint16_t word_val=0xFF;
        uint32_t double_word_val=0xFFFF;
        uint64_t quad_word_val=0xFFFFFFFFU;

        valStr =
          RoseBin_support::resolveValue(valExpr, true,
                                        byte_val,
                                        word_val,
                                        double_word_val,
                                        quad_word_val);

        //if (RoseBin_support::DEBUG_MODE())
        //cout << "   found value ....... :: " << valStr << endl;
        funcName = valExpr->get_replacement();
        //if (funcName=="")
        //  funcName="noName";
      }
      if (memExpr) {
        continue;
        // this is a jump to data ... do not handle right now!!
      }

      // convert val string to long
      uint64_t val=0;
      if(from_string<uint64_t>(val, valStr, std::hex)) {
        ostringstream addrhex2;
        addrhex2 << hex << setw(8) << val ;
        //if (RoseBin_support::DEBUG_MODE())
        //cerr << "    looking for value ("<<valStr << " ) in InstrSet: "
        //     << val << "  " << addrhex2.str() << endl;
        rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::const_iterator itc =
          rememberInstructions.find(val);
        if (itc!=rememberInstructions.end()) {
          SgAsmInstruction* target = itc->second;

          // we set the target (jump to for each control instruction)
          ROSE_ASSERT(target);


          //if (RoseBin_support::DEBUG_MODE())
          //cout << "    >>> target found! " << target << "     funcName " << funcName << endl;
          if (funcName!="") {
            SgAsmNode* block = target;
            if (!db)
              block = isSgAsmNode(target->get_parent());
            ROSE_ASSERT(block);
            SgAsmFunction* func = isSgAsmFunction(block->get_parent());

            if (func) {
              string fname = func->get_name();
              uint64_t val_f=0;
              if(from_string<uint64_t>(val_f, fname, std::hex)) {
                // func name is a hex number
                func->set_name(funcName);
                //              inst->set_comment(funcName);
              } else {
                // its a name
              }
            }
          }
          return target;
        } else {
          //if (RoseBin_support::DEBUG_MODE())
          //  cerr << "    >>>>>>>>>>>>>>> !!! OPS :: Target not found ...  \n" << endl;
        }
      }
      else{
        //      std::cerr << "FlowAnalysis ::  from_string failed .. " << std::endl;
        if (valStr!="")
          if (RoseBin_support::DEBUG_MODE())
          cerr << " WARNING: Cant convert string to long - in process_jump  :: " << regRef->class_name() <<
            " inst :: " << inst->get_mnemonic() << "  addr : " << addrhex3.str() << " target : " << valStr << endl;
      }
    }

  }
  return NULL;
}
Пример #11
0
int main(int argc, char** argv) {

  std::string binaryFilename = (argc >= 1 ? argv[argc-1]   : "" );
  std::vector<std::string> newArgv(argv,argv+argc);
  newArgv.push_back("-rose:output");
  newArgv.push_back(binaryFilename+"-binarySemantics.C");

  SgProject* proj = frontend(newArgv);
  
  ROSE_ASSERT (proj);
  SgSourceFile* newFile = isSgSourceFile(proj->get_fileList().front());
  ROSE_ASSERT(newFile != NULL);
  SgGlobal* g = newFile->get_globalScope();
  ROSE_ASSERT (g);

  //I am doing some experimental work to enable functions in the C representation
  //Set this flag to true in order to enable that work
  bool enable_functions = true;
  //Jeremiah did some work to enable a simplification and normalization of the 
  //C representation. Enable this work by setting this flag to true.
  bool enable_normalizations = false;

  vector<SgNode*> asmFiles = NodeQuery::querySubTree(proj, V_SgAsmGenericFile);
  ROSE_ASSERT (asmFiles.size() == 1);



  if( enable_functions == false)
  {
    //Representation of C normalizations withotu functions
    SgFunctionDeclaration* decl = buildDefiningFunctionDeclaration("run", SgTypeVoid::createType(), buildFunctionParameterList(), g);
    appendStatement(decl, g);
    SgBasicBlock* body = decl->get_definition()->get_body();
    //  ROSE_ASSERT(isSgAsmFile(asmFiles[0]));
    //  X86CTranslationPolicy policy(newFile, isSgAsmFile(asmFiles[0]));
    X86CTranslationPolicy policy(newFile, isSgAsmGenericFile(asmFiles[0]));
    ROSE_ASSERT( isSgAsmGenericFile(asmFiles[0]) != NULL);

    policy.switchBody = buildBasicBlock();
    removeDeadStores(policy.switchBody,policy);

    SgSwitchStatement* sw = buildSwitchStatement(buildVarRefExp(policy.ipSym), policy.switchBody);
    ROSE_ASSERT(isSgBasicBlock(sw->get_body()));

    SgWhileStmt* whileStmt = buildWhileStmt(buildBoolValExp(true), sw);

    appendStatement(whileStmt, body);
    policy.whileBody = sw;

    X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression> t(policy);
    //AS FIXME: This query gets noting in the form in the repository. Doing this hack since we only 
    //have one binary file anyways.
    //vector<SgNode*> instructions = NodeQuery::querySubTree(asmFiles[0], V_SgAsmX86Instruction);
    vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction);

    std::cout << "Instruction\n";
    for (size_t i = 0; i < instructions.size(); ++i) {
      SgAsmX86Instruction* insn = isSgAsmX86Instruction(instructions[i]);
      ROSE_ASSERT (insn);
      try {
          t.processInstruction(insn);
      } catch (const X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression>::Exception &e) {
          std::cout <<e.mesg <<": " <<unparseInstructionWithAddress(e.insn) <<"\n";
      }
    }


    if ( enable_normalizations == true )
    {
      //Enable normalizations of C representation
      //This is done heuristically where some steps
      //are repeated. It is not clear which order is 
      //the best
      {
        plugInAllConstVarDefs(policy.switchBody,policy) ;
        simplifyAllExpressions(policy.switchBody);
        removeIfConstants(policy.switchBody);
        removeDeadStores(policy.switchBody,policy);
        removeUnusedVariables(policy.switchBody);
      }
      {
        plugInAllConstVarDefs(policy.switchBody,policy) ;
        simplifyAllExpressions(policy.switchBody);
        removeIfConstants(policy.switchBody);
        removeDeadStores(policy.switchBody,policy);
      }
      removeUnusedVariables(policy.switchBody);
    }

  
  }else{ //Experimental changes to introduce functions into the C representation


    //When trying to add function I get that symbols are not defined

    //Iterate over the functions separately
    vector<SgNode*> asmFunctions = NodeQuery::querySubTree(proj, V_SgAsmFunction);

    for(size_t j = 0; j < asmFunctions.size(); j++ )
    {
      SgAsmFunction* binFunc = isSgAsmFunction( asmFunctions[j] );

      // Some functions (probably just one) are generated to hold basic blocks that could not
      // be assigned to a particular function. This happens when the Disassembler is overzealous
      // and the Partitioner cannot statically determine where the block belongs.  The name of
      // one such function is "***uncategorized blocks***".  [matzke 2010-06-29]
      if ((binFunc->get_reason() & SgAsmFunction::FUNC_LEFTOVERS))
        continue;

      //Some functions may be unnamed so we need to generate a name for those
      std::string funcName;
      if (binFunc->get_name().size()==0) {
	char addr_str[64];
	sprintf(addr_str, "0x%"PRIx64, binFunc->get_statementList()[0]->get_address());
	funcName = std::string("my_") + addr_str;;
      } else {
	funcName = "my" + binFunc->get_name();
      }

      //Functions can have illegal characters in their name. Need to replace those characters
      for ( int i = 0 ; i < funcName.size(); i++ )
      {
	char& currentCharacter = funcName.at(i);
	if ( currentCharacter == '.' )
	  currentCharacter = '_';
      }


      SgFunctionDeclaration* decl = buildDefiningFunctionDeclaration(funcName, SgTypeVoid::createType(), buildFunctionParameterList(), g);

      appendStatement(decl, g);
      SgBasicBlock* body = decl->get_definition()->get_body();
      X86CTranslationPolicy policy(newFile, isSgAsmGenericFile(asmFiles[0]));
      ROSE_ASSERT( isSgAsmGenericFile(asmFiles[0]) != NULL);
      policy.switchBody = buildBasicBlock();
      SgSwitchStatement* sw = buildSwitchStatement(buildVarRefExp(policy.ipSym), policy.switchBody);
      SgWhileStmt* whileStmt = buildWhileStmt(buildBoolValExp(true), sw);
      appendStatement(whileStmt, body);
      policy.whileBody = sw;
      X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression> t(policy);
      vector<SgNode*> instructions = NodeQuery::querySubTree(binFunc, V_SgAsmX86Instruction);

      for (size_t i = 0; i < instructions.size(); ++i) {
        SgAsmX86Instruction* insn = isSgAsmX86Instruction(instructions[i]);
	if( insn->get_kind() == x86_nop )
	  continue;
        ROSE_ASSERT (insn);
        try {
            t.processInstruction(insn);
        } catch (const X86InstructionSemantics<X86CTranslationPolicy, WordWithExpression>::Exception &e) {
            std::cout <<e.mesg <<": " <<unparseInstructionWithAddress(e.insn) <<"\n";
        }
      }

    }

    //addDirectJumpsToSwitchCases(policy);


  }

  proj->get_fileList().erase(proj->get_fileList().end() - 1); // Remove binary file before calling backend

//  AstTests::runAllTests(proj);

  //Compile the resulting project

  return backend(proj);
}
Пример #12
0
 /** Print the entire forest for debugging output. */
 void print(std::ostream &o) const {
     for (size_t i=0; i<levels.size(); ++i) {
         if (levels[i].vertices.empty()) {
             o <<"partition forest level " <<i <<" is empty.\n";
         } else {
             size_t nsets = levels[i].vertices.size();
             size_t nfuncs = 0;
             for (Vertices::const_iterator vi=levels[i].vertices.begin(); vi!=levels[i].vertices.end(); ++vi)
                 nfuncs += (*vi)->functions.size();
             o <<"partition forest level " <<i
               <<" contains " <<nfuncs <<" function" <<(1==nfuncs?"":"s")
               <<" in " <<nsets <<" set" <<(1==nsets?"":"s") <<"\n";
             o <<"  the following input was used to generate " <<(1==nsets?"this set":"these sets") <<":\n";
             o <<StringUtility::prefixLines(levels[i].inputs.toString(), "    ");
             int setno = 1;
             for (Vertices::const_iterator vi=levels[i].vertices.begin(); vi!=levels[i].vertices.end(); ++vi, ++setno) {
                 Vertex *vertex = *vi;
                 const Functions &functions = vertex->functions;
                 o <<"  set #" <<setno
                   <<" contains " <<vertex->functions.size() <<" function" <<(1==vertex->functions.size()?"":"s") <<":\n";
                 for (Functions::const_iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
                     SgAsmFunction *func = *fi;
                     o <<"    " <<StringUtility::addrToString(func->get_entry_va()) <<" <" <<func->get_name() <<">\n";
                 }
                 o <<"    whose output was: {";
                 for (OutputValues::const_iterator oi=vertex->outputs.begin(); oi!=vertex->outputs.end(); ++oi)
                     o <<" " <<*oi;
                 o <<" }\n";
             }
         }
     }
 }
Пример #13
0
void
RoseBin_FlowAnalysis::checkControlFlow( SgAsmInstruction* binInst,
                                        int functionSize, int countDown,
                                        string& currentFunctionName, int func_nr) {
  //cerr << "check control flow" << endl;
  while (!worklist_forthisfunction.empty()) {
    SgAsmInstruction* binInst = worklist_forthisfunction.top();
    worklist_forthisfunction.pop();
    ROSE_ASSERT(binInst);

    countDown--;

    int address = binInst->get_address();
    ostringstream addrhex;
    addrhex << hex << setw(8) << address ;

    ROSE_ASSERT(g_algo->info);
    vector <VirtualBinCFG::CFGEdge> vec;
    if (forward_analysis) {
      vec = binInst->cfgBinOutEdges(g_algo->info);
      if (isSgAsmx86Instruction(binInst) && isSgAsmx86Instruction(binInst)->get_kind() == x86_call) {
        // vec.push_back(VirtualBinCFG::CFGEdge(VirtualBinCFG::CFGNode(binInst), VirtualBinCFG::CFGNode(g_algo->info->getInstructionAtAddress(binInst->get_address() + binInst->get_raw_bytes().size())), g_algo->info));
      }
    }
    else
      vec = binInst->cfgBinInEdges(g_algo->info);


    string name = binInst->get_mnemonic();
    //    if (RoseBin_support::DEBUG_MODE())
    //         cout << " " << addrhex.str() << "  " << func_nr << " :: " << functionSize <<
    //  "/" << countDown << "  ---------- next CFG instruction : " << name <<   "  vecSize : " << vec.size() << endl;



    for (int i=0; i < (int)vec.size(); i++) {
      VirtualBinCFG::CFGEdge edge = vec[i];
      VirtualBinCFG::CFGNode cfg_target = edge.target();
      VirtualBinCFG::CFGNode cfg_source = edge.source();
      if (!forward_analysis) {
        cfg_target = edge.source();
        cfg_source = edge.target();
      }
      SgAsmInstruction* bin_target = isSgAsmInstruction(cfg_target.getNode());
      SgAsmInstruction* thisbin = isSgAsmInstruction(cfg_source.getNode());
      ROSE_ASSERT(thisbin);
      SgAsmx86Instruction* thisbinX86 = isSgAsmx86Instruction(thisbin);
      ROSE_ASSERT (thisbinX86);

      string src_mnemonic = thisbin->get_mnemonic();
      int src_address = thisbin->get_address();
      if (analysisName=="callgraph")
        src_address = funcDecl->get_address();
      ostringstream addrhex_s;
      addrhex_s << hex << setw(8) << src_address ;

      SgGraphNode* src =NULL;
      string hexStr = addrhex_s.str();
      if (analysisName!="callgraph") {
        vector<SgGraphNode*> sources;
        vizzGraph->checkIfGraphNodeExists(hexStr, sources);
        vector<SgGraphNode*>::const_iterator src_it = 
          sources.begin();
        for (;src_it!=sources.end();++src_it) {
          // should only be one node! adapted to new interface
          src = *src_it;
        }
        if (src==NULL) {
          //  src= vizzGraph->createNode (src_mnemonic, typeNode, src_address, vizzGraph->graph->get_graph_id(), false, thisbin);
          src= addCFNode (src_mnemonic, typeNode, src_address,  false, thisbin);

          string unp_name = unparseInstructionWithAddress(thisbin);
          src->append_properties(SgGraph::name,unp_name);
          if (analysisName=="dfa")
            src->append_properties(SgGraph::dfa_standard,unp_name);
        }
        ROSE_ASSERT(src);
        if (thisbinX86->get_kind() == x86_call) {
          uint64_t returnAddr = thisbinX86->get_address() + thisbinX86->get_raw_bytes().size();
          ROSE_ASSERT(g_algo->info);
          SgAsmInstruction* retInsn = g_algo->info->getInstructionAtAddress(returnAddr);
          if (retInsn) {
            //worklist_forthisfunction.push(retInsn);
            //ostringstream tgthex_s;
            //tgthex_s << hex << setw(8) << returnAddr ;
            //string tgtStr = tgthex_s.str();
            //SgGraphNode* tgt = vizzGraph->checkIfGraphNodeExists(tgtStr);

            // tps (25 Aug 2008) : this line seems broken!
            //string mne = retInsn->get_mnemonic();

            //if (!tgt) {tgt = vizzGraph->createNode(mne, typeNode, returnAddr, vizzGraph->graph->get_graph_id(), false, retInsn);}
            // cerr << " ------> Creating return edge : " << thisbinX86->get_address() << " " << returnAddr << endl;
            // vizzGraph->createEdge( typeEdge, vizzGraph->graph->get_graph_id(), src, thisbinX86->get_address(), tgt, returnAddr);


          }
        }
      }

      else if (analysisName=="callgraph") {
        // These are special cases that annotate the call graph (nodes)
        // so that the visualization can pick up the properties and color correctly
        ROSE_ASSERT(g_algo->info);
        if (thisbinX86->get_kind() == x86_jmp) {

          if (thisbinX86->cfgBinOutEdges(g_algo->info).empty()) {
            funcDeclNode->append_properties(SgGraph::nodest_jmp,RoseBin_support::ToString("nodest_jmp"));
          }
        }
        else if (thisbinX86->get_kind() == x86_call) {
          //cerr << "CallGRAPH: Found call : " <<
          //  RoseBin_support::HexToString(VirtualBinCFG::CFGNode(thisbinX86).getNode()->get_address()) << " to " <<
          //  RoseBin_support::HexToString(VirtualBinCFG::CFGNode(g_algo->info->getInstructionAtAddress(thisbinX86->get_address() + thisbinX86->get_raw_bytes().size())).getNode()->get_address()) <<  endl;

          vector<VirtualBinCFG::CFGEdge> dests = thisbinX86->cfgBinOutEdges(g_algo->info);
          dests.push_back(VirtualBinCFG::CFGEdge(VirtualBinCFG::CFGNode(thisbinX86), VirtualBinCFG::CFGNode(g_algo->info->getInstructionAtAddress(thisbinX86->get_address() + thisbinX86->get_raw_bytes().size())), g_algo->info));
          if (!dests.empty()) {
            SgAsmNode* parent = isSgAsmNode(dests[0].target().getNode()->get_parent());
            if (!db)
              parent = isSgAsmNode(parent->get_parent());
            if (parent) {
              SgAsmFunction* funcdestparent = isSgAsmFunction(parent);
              string trg_func_name = funcdestparent->get_name();
              if (trg_func_name==currentFunctionName) {
                funcDeclNode->append_properties(SgGraph::itself_call,RoseBin_support::ToString("itself_call"));
              }
            }
          } else {
            funcDeclNode->append_properties(SgGraph::nodest_call,RoseBin_support::ToString("nodest_call"));
            //cerr << " no destination found for call " << addrhex.str() << endl;
          }
        }
        else if (thisbinX86->get_kind() == x86_int) {
          funcDeclNode->append_properties(SgGraph::interrupt,RoseBin_support::ToString("interrupt"));
        }
      }


      if (bin_target!=NULL) {
        string trg_func_name = "";
        int trg_func_address =1;
        string hexStrf = "";

        SgAsmFunction* funcDeclparent=NULL;
        if (analysisName=="callgraph") {
          SgAsmNode* parent = dynamic_cast<SgAsmNode*>(bin_target->get_parent());
          if (parent==NULL)
            continue;
          if (!db)
            parent = isSgAsmNode(parent->get_parent());
          ROSE_ASSERT(parent);

          funcDeclparent = isSgAsmFunction(parent);
          ROSE_ASSERT(funcDeclparent);

          trg_func_name = funcDeclparent->get_name();
          trg_func_address = funcDeclparent->get_address();
          ostringstream addrhex_tf;
          addrhex_tf << hex << setw(8) << trg_func_address ;
          hexStrf = addrhex_tf.str();
          //cerr << " CALLGRAPH TARGET PARENT : " << hexStrf << endl;
        }

        string trg_mnemonic = bin_target->get_mnemonic();
        int trg_address = bin_target->get_address();
        ostringstream addrhex_t;
        addrhex_t << hex << setw(8) << trg_address ;

        if (RoseBin_support::DEBUG_MODE())
          cout << "     OUTEDGES TO: vec[" << i << "/" << vec.size() << "]  :" <<
            addrhex_t.str() << "  " << trg_mnemonic << endl;

        string hexStr = addrhex_t.str();
        SgGraphNode* trg=NULL;
        vector<SgGraphNode*> targets;
        if (analysisName=="callgraph")
          vizzGraph->checkIfGraphNodeExists(hexStrf, targets);
        else
          vizzGraph->checkIfGraphNodeExists(hexStr, targets);
        vector<SgGraphNode*>::const_iterator src_it = 
          targets.begin();
        for (;src_it!=targets.end();++src_it) {
          // should only be one node! adapted to new interface
          trg = *src_it;
        }
        //ROSE_ASSERT(trg);

        bool target_visited = false;

        // DQ (4/23/2009): We want the type defined in the base class.
        // rose_hash::unordered_map <string, SgAsmInstruction*>::iterator vis = local_visited.find(hexStr);
// CH (4/9/2010): Use boost::unordered instead   
//#ifndef _MSCx_VER
#if 1
        rose_hash::unordered_map <string, SgAsmInstruction*>::iterator vis = local_visited.find(hexStr);
#else
        rose_hash::unordered_map <string, SgAsmInstruction*,rose_hash::hash_string>::iterator vis = local_visited.find(hexStr);
#endif
        if (vis!=local_visited.end())
          target_visited=true;

        if (trg==NULL) {
          if (analysisName=="callgraph") {
            //      cerr << " >>> TARGET FUNC NAME " << trg_func_name << endl;
            //trg = vizzGraph->createNode (trg_func_name, typeNode, trg_func_address, vizzGraph->graph->get_graph_id(),false, funcDeclparent);
            trg = addCFNode (trg_func_name, typeNode, trg_func_address,false, funcDeclparent);
          }       else {
            //trg = vizzGraph->createNode (trg_mnemonic, typeNode, trg_address, vizzGraph->graph->get_graph_id(),false, bin_target);
            trg = addCFNode (trg_mnemonic, typeNode, trg_address, false, bin_target);
          }
          string unp_name = unparseInstructionWithAddress(bin_target);
          //cout << " (target==NULL) unparse name : " << unp_name << endl;
          trg->append_properties(SgGraph::name,unp_name);
          if (analysisName=="dfa")
            trg->append_properties(SgGraph::dfa_standard,unp_name);

        } else {
          string unp_name = unparseInstructionWithAddress(bin_target);
          //cout << "    unparse name : " << unp_name << endl;
          trg->append_properties(SgGraph::name,unp_name);
          if (analysisName=="dfa")
            trg->append_properties(SgGraph::dfa_standard,unp_name);
        }


        ROSE_ASSERT(trg);
        local_visited[hexStr] = bin_target;

        string name="";
        if (analysisName=="callgraph")
          name = RoseBin_support::ToString(src_address)+RoseBin_support::ToString(trg_func_address);
        else
          name = RoseBin_support::ToString(src_address)+RoseBin_support::ToString(trg_address);

        bool exists = vizzGraph->checkIfDirectedGraphEdgeExists(src, trg);
        if (!exists) {
          if (analysisName=="callgraph") {
            if (currentFunctionName!=trg_func_name && thisbinX86->get_kind() != x86_ret) {
              //              SgDirectedGraphEdge* edge = vizzGraph->createEdge( typeEdge, vizzGraph->graph->get_graph_id(), funcDeclNode, src_address, trg, trg_func_address);
              SgDirectedGraphEdge* edge = vizzGraph->addDirectedEdge( funcDeclNode, trg, typeEdge);
              //cerr << "CallGraph : create edge : " << RoseBin_support::HexToString(src_address) << " to func : " << RoseBin_support::HexToString(trg_func_address) << endl;
              vizzGraph->setProperty(SgGraph::type, edge, RoseBin_support::ToString(SgGraph::cfg));
            }
          } else {
            //string addr = RoseBin_support::HexToString(binInst->get_address());
            //if (addr==" 8048392" || addr==" 80482fd")
            //  cerr << " >>>>>>>>>> found " << addr << "  -- target_address : " << RoseBin_support::HexToString(trg_address) << endl;
            //      SgDirectedGraphEdge* edge =vizzGraph->createEdge( typeEdge, vizzGraph->graph->get_graph_id(), src, src_address, trg, trg_address);
            SgDirectedGraphEdge* edge = vizzGraph->addDirectedEdge( src, trg, typeEdge);
            vizzGraph->setProperty(SgGraph::type, edge, RoseBin_support::ToString(SgGraph::cfg));
          }
        }



        if (analysisName!="callgraph") {
          // handle return edges
          SgAsmStatementPtrList sources = thisbin->get_sources();
          SgAsmStatementPtrList::iterator it = sources.begin();
          for (;it!=sources.end();++it) {
            SgAsmInstruction* instT = isSgAsmInstruction(*it);
            //cerr << " This node is called from : " << instT->get_address() << endl;
            ostringstream addr_t;
            addr_t << hex << setw(8) << instT->get_address() ;
            SgGraphNode* trg =NULL;
            string hexStr = addr_t.str();
            vector<SgGraphNode*> targets;
            vizzGraph->checkIfGraphNodeExists(hexStr, targets);
            vector<SgGraphNode*>::const_iterator src_it = 
              targets.begin();
            for (;src_it!=targets.end();++src_it) {
          // should only be one node! adapted to new interface
              trg = *src_it;
            }
            //trg= vizzGraph->checkIfGraphNodeExists(hexStr);
            if (trg==NULL) {
              string hexa = RoseBin_support::HexToString(instT->get_address());
              hexa = hexa.substr(1,hexa.size());
              string name = "0x"+hexa+":"+instT->get_mnemonic();
              //trg= vizzGraph->createNode (name, typeNode, instT->get_address(), vizzGraph->graph->get_graph_id(), false, instT);
              trg= addCFNode(name, typeNode, instT->get_address(),  false, instT);

            }

            bool exists = vizzGraph->checkIfDirectedGraphEdgeExists( trg,src);
            if (!exists) {
              bool same = sameParents(trg,src);
              if (!same) {
                SgDirectedGraphEdge* edge =vizzGraph->addDirectedEdge( trg, src, typeEdge);
                //SgDirectedGraphEdge* edge =vizzGraph->createEdge( typeEdge, vizzGraph->graph->get_graph_id(),  trg, instT->get_address(), src, src_address);
                vizzGraph->setProperty(SgGraph::type, edge, RoseBin_support::ToString(SgGraph::cfg));
              }
            }
          }
        }

        if (!target_visited) {
          // check if target is in the same function!!!
          SgAsmNode* block = bin_target;
          if (!db)
            block = isSgAsmNode(bin_target->get_parent());
          ROSE_ASSERT(block);
          SgAsmFunction* funcPar = isSgAsmFunction(block->get_parent());
          if (funcPar) {
            string nameFunc = funcPar->get_name();
            if (nameFunc==currentFunctionName) {
              //checkControlFlow(bin_target, functionSize, countDown, currentFunctionName);
              worklist_forthisfunction.push(bin_target);
            }
          } else {
            if (RoseBin_support::DEBUG_MODE())
            cerr << " ERROR:: Target Instruction has no parent! " << bin_target->class_name() << endl;
          }
        } // if visited
      } else {
        nr_target_missed++;
        if (binInst)
          if (RoseBin_support::DEBUG_MODE())
          cerr << " WARNING:: no target found for " << RoseBin_support::HexToString(binInst->get_address()) << " " << binInst->class_name() << endl;
      }
    }
  }

  //  if (RoseBin_support::DEBUG_MODE())
  //  cout << " ------------------------ done with instr: " << name << " " << addrhex.str() << endl;
}
Пример #14
0
/****************************************************
 * traverse the binary AST
 ****************************************************/
void
RoseBin_FlowAnalysis::visit(SgNode* node) {

  //  cerr << " traversing node " << node->class_name() << endl;

  if (isSgAsmFunction(node) ) {
    SgAsmFunction* binDecl = isSgAsmFunction(node);
    string name = binDecl->get_name();
    ostringstream addrhex;
    addrhex << hex << setw(8) << binDecl->get_address() ;
    if (name=="") {
      name=addrhex.str();
      binDecl->set_name(name);
    }
    SgAsmStatement* stat = NULL;
    //    SgAsmStatementPtrList& list = binDecl->get_statementList();

    vector<SgAsmInstruction*> list;
    FindInstructionsVisitor vis;
    AstQueryNamespace::querySubTree(binDecl, std::bind2nd( vis, &list ));

    int sizeList = list.size();
    if (sizeList==0) {
      //cerr << " this function is empty!! " << endl;
      return;
    }

    //if ((func_nr % 1)==0)
    //  if (RoseBin_support::DEBUG_MODE())
    //    cout  << analysisName << " Func Nr: " << (++func_nr) << "  blocks:" <<
    //     sizeList << "  ***************** checking function : " << name << endl;

    if (forward_analysis) {
      stat = list.front();
    } else {
      // get the last instruction in a function (backward control flow)
      stat = list.back();
    }
    ROSE_ASSERT(stat);

    //   if (RoseBin_support::DEBUG_MODE())
    //cout << ">>>>>>>>>>>>>. checking statement in function : " << name << " .. " << stat->class_name() << endl;
    if (isSgAsmInstruction(stat)) {
      SgAsmInstruction* inst = isSgAsmInstruction(stat);
      ROSE_ASSERT(inst);
      // check the control flow of the first instruction in a function
      string typeFunction ="function";
      SgGraphNode* src=NULL;
      if (analysisName=="callgraph") {
        //      src = vizzGraph->createNode (name, typeFunction, binDecl->get_address(), vizzGraph->graph->get_graph_id(), false, binDecl);
        src = addCFNode (name, typeFunction, binDecl->get_address(), false, binDecl);
      } else {
        //src = vizzGraph->createNode (name, typeFunction, binDecl->get_address(), vizzGraph->graph->get_graph_id(), true, binDecl);
        //cerr << ">> adding node (f) src: " << RoseBin_support::HexToString(binDecl->get_address()) << endl;
        src = addCFNode (name, typeFunction, binDecl->get_address(), true, binDecl);
        string mnemonic=inst->get_mnemonic();
        //SgGraphNode* trg = vizzGraph->createNode (mnemonic, typeNode, inst->get_address(), vizzGraph->graph->get_graph_id(),false, inst);
        //cerr << ">> adding node (first) trg: " << RoseBin_support::HexToString(inst->get_address()) << endl;
        SgGraphNode* trg = addCFNode (mnemonic, typeNode, inst->get_address(), false, inst);
        string unp_name = unparseInstructionWithAddress(inst);
        trg->append_properties(SgGraph::name,unp_name);
        if (analysisName=="dfa")
          trg->append_properties(SgGraph::dfa_standard,unp_name);
        //cerr << "Create edge " << endl;
        //      SgDirectedGraphEdge* edge = vizzGraph->createEdge ( typeFunction, vizzGraph->graph->get_graph_id(), src, binDecl->get_address(), trg, inst->get_address());
        SgDirectedGraphEdge* edge = vizzGraph->addDirectedEdge ( src, trg, typeFunction);
        vizzGraph->setProperty(SgGraph::type, edge, RoseBin_support::ToString(SgGraph::cfg));
      }

      local_visited.clear();
      worklist_forthisfunction.push(inst);

      funcDecl = binDecl;
      funcDeclNode = src;
      checkControlFlow(inst, sizeList, sizeList, name, func_nr);
    } else {
    if (RoseBin_support::DEBUG_MODE())
      cerr << "This is not an Instruction " << endl;
    }
  }
}
int
main(int argc, char *argv[])
{
    std::ios::sync_with_stdio();
    argv0 = argv[0];
    {
        size_t slash = argv0.rfind('/');
        argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1);
        if (0==argv0.substr(0, 3).compare("lt-"))
            argv0 = argv0.substr(3);
    }

    int argno = 1;

    for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) {
        std::cout << argv[argno] << std::endl;
        if (!strcmp(argv[argno], "--")) {
            ++argno;
            break;
        } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) {
            ::usage(0);
        } else {
            std::cerr <<argv0 <<": unrecognized switch: " <<argv[argno] <<"\n"
                      <<"see \"" <<argv0 <<" --help\" for usage info.\n";
            exit(1);
        }
    }
    if (argno+1!=argc)
        ::usage(1);


    std::string specimen_name = StringUtility::getAbsolutePathFromRelativePath(argv[argno++], true);

    std::string specimen_path = StringUtility::getPathFromFileName(specimen_name);

    std::cout << "Specimen name is: " << specimen_name << std::endl;

    SgAsmInterpretation *interp = CloneDetection::open_specimen(specimen_name, argv0, false);
    SgBinaryComposite *binfile = SageInterface::getEnclosingNode<SgBinaryComposite>(interp);
    assert(interp!=NULL && binfile!=NULL);

    // Figure out what functions we need to generate files from.
    std::vector<SgAsmFunction*> all_functions = SageInterface::querySubTree<SgAsmFunction>(interp);
    std::cerr <<argv0 <<": " <<all_functions.size() <<" function" <<(1==all_functions.size()?"":"s") <<" found\n";


    for (std::vector<SgAsmFunction*>::iterator fi=all_functions.begin(); fi!=all_functions.end(); ++fi) {
        // Save function
        SgAsmFunction *func = *fi;

        std::vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(func);


        std::string function_name = func->get_name();

        if( function_name.size() == 0 || insns.size() < 100 ||  function_name.find("@plt") != std::string::npos  )
        {
            continue;
        }
        std::cout << "function name is: " << function_name << std::endl;


        {

            //std::string file_name = specimen_name+"_"+func->get_name()+"_"+boost::lexical_cast<std::string>(func->get_entry_va());
            std::string file_name = specimen_path + "/" + function_name;

            std::cout << "generating " << file_name << " from " << specimen_name << std::endl;

            std::ofstream func_file;
            func_file.open(file_name.c_str());

            // Save instructions
            for (std::vector<SgAsmInstruction*>::iterator it = insns.begin(); it != insns.end(); ++it) {

                SgUnsignedCharList array = (*it)->get_raw_bytes();

                std::string str = "";
                for(size_t i=0; i < array.size(); ++i) {
                    unsigned char c = array[i];

                    str+= c;
                }
                func_file << str;
            }


            func_file.close();
        }

    }

    return 0;
}
Пример #16
0
void
add_calls_to_syscalls_to_db(SqlDatabase::TransactionPtr tx, DirectedGraph* G, std::vector<SgAsmFunction*> all_functions)
{
    // load the functions in db into memory
    std::map<std::string, std::set<int> > symbolToId;
    SqlDatabase::StatementPtr cmd3 = tx->statement("select id, name  from semantic_functions");
    for (SqlDatabase::Statement::iterator r=cmd3->begin(); r!=cmd3->end(); ++r) {
        int func_id           = r.get<int>(0);
        std::string func_name = r.get<std::string>(1);

        if (func_name.size() == 0)
            continue;

        std::map<std::string, std::set<int> >::iterator fit = symbolToId.find(func_name);
        if (fit == symbolToId.end()) {
            std::set<int> function_ids;
            function_ids.insert(func_id);
            symbolToId[func_name] = function_ids;
        } else {
            fit->second.insert(func_id);
        }
    }

    DirectedGraph& graph = *G;
    SqlDatabase::StatementPtr stmt = tx->statement("insert into syscalls_made(caller, syscall_id, syscall_name) values(?,?,?)");

    // Iterate over all components of the reachability graph
    typedef graph_traits<DirectedGraph>::vertex_descriptor Vertex;
    graph_traits<DirectedGraph>::vertex_iterator i, end;
    for (tie(i, end) = vertices(graph); i != end; ++i) {
        if (*i < ids_reserved_for_syscalls)
            continue;

        std::set<int> syscalls;

        // Iterate through the child vertex indices for [current_index]
        std::vector<Vertex> reachable;
        boost::breadth_first_search(graph, *i,
                                    boost::visitor(boost::make_bfs_visitor(boost::write_property(boost::identity_property_map(),
                                                                                                 std::back_inserter(reachable),
                                                                                                 boost::on_discover_vertex()))));
        for (std::vector<Vertex>::iterator it = reachable.begin(); it != reachable.end(); ++it) {
            if (*it < ids_reserved_for_syscalls)
                syscalls.insert(*it);
        }

        int caller_id = *i - ids_reserved_for_syscalls;
        ROSE_ASSERT(caller_id >= 0);
        SgAsmFunction* caller = all_functions[caller_id];
        ROSE_ASSERT(isSgAsmFunction(caller) != NULL);

        std::string func_name = caller->get_name();
        if (func_name.length() == 0)
            continue;

        std::map<std::string, std::set<int> >::iterator equivalent_ids = symbolToId.find(func_name);
        if (equivalent_ids == symbolToId.end())
            equivalent_ids = symbolToId.find(func_name+"@plt");

        if (syscalls.size() > 0 && equivalent_ids != symbolToId.end()) {
            for (std::set<int>::iterator sit = syscalls.begin(); sit != syscalls.end(); ++sit) {
                int syscall_callee_id = *sit;
                extern std::map<int, std::string> linux32_syscalls; // defined in linux_syscalls.C
                const std::string &syscall_name = linux32_syscalls[syscall_callee_id];
                for (std::set<int>::iterator equivalent_id = equivalent_ids->second.begin();
                     equivalent_id != equivalent_ids->second.end(); ++ equivalent_id) {
                    stmt->bind(0, *equivalent_id);
                    stmt->bind(1, syscall_callee_id);
                    stmt->bind(2, syscall_name);
                    stmt->execute();
                }
            }
        }
    }
}
Пример #17
0
/*
 * Detect functions (blocks) that can be merged together.
 */
void
RoseBin_FlowAnalysis::resolveFunctions(SgAsmNode* globalNode) {
  //cerr << " ObjDump-BinRose:: Detecting and merging Functions" << endl;
  vector<SgAsmFunction*> visitedFunctions;
  vector<SgNode*> tree =NodeQuery::querySubTree(globalNode, V_SgAsmFunction);
  //  vector<SgNode*>::iterator itV = tree.begin();
  int nr=0;
  while (!tree.empty()) {
    //  for (;itV!=tree.end();itV++) {
    SgAsmFunction* funcD = isSgAsmFunction(tree.back());
    tree.pop_back();
    nr++;
    if ((nr % 100)==0)
      if (RoseBin_support::DEBUG_MODE())
        cerr << " funcListSize : " << tree.size() << "  -- iteration : " << nr << "   func " << funcD->get_name() << endl;

    //SgAsmFunction* funcD = isSgAsmFunction(*itV);
    //itV++;
    ROSE_ASSERT(funcD);
    // make sure we dont visit a function twice


    vector <SgNode*> funcVec =funcD->get_traversalSuccessorContainer();
    int last = funcVec.size()-1;
    if (last<0)
      continue;
    bool hasStopCondition=false;
    for (unsigned int itf = 0; itf < funcVec.size() ; itf++) {
      SgAsmx86Instruction* finst = isSgAsmx86Instruction(funcVec[itf]);
      ROSE_ASSERT(finst);
      if (finst->get_kind() == x86_ret || finst->get_kind() == x86_hlt) {
        hasStopCondition=true;
      }
    }
    //cerr << " last : " << last << endl;
    SgAsmx86Instruction* lastInst = isSgAsmx86Instruction(funcVec[last]);
    ROSE_ASSERT(lastInst);
    SgAsmx86Instruction* nextInst = isSgAsmx86Instruction(resolveFunction(lastInst, hasStopCondition));
    if (nextInst) {
      SgAsmFunction* nextFunc = isSgAsmFunction(nextInst->get_parent());
      if (nextFunc) {
        ROSE_ASSERT(g_algo->info);
        g_algo->info->returnTargets[funcD].insert(g_algo->info->returnTargets[nextFunc].begin(), g_algo->info->returnTargets[nextFunc].end());
        // make sure that this function is being changed and should not be covered again
        //visitedFunctions.push_back(nextFunc);
        // visit current function after alternation again
        //tree.push_back(funcD);
        // now we remove this next function and iterate thrgouh all instructions and
        // attach them to the old function
        vector <SgNode*> funcNextVec =nextFunc->get_traversalSuccessorContainer();
        for (unsigned int i=0; i < funcNextVec.size(); ++i) {
          SgAsmInstruction* inst = isSgAsmInstruction(funcNextVec[i]);
          ROSE_ASSERT(inst);
          inst->set_parent(funcD);
          funcD->append_statement(inst);
          //nextFunc->remove_statement(inst);
          // delete nextFunc; // should delete this later when iterator is done
        }
        nextFunc->remove_children();
        nextFunc->set_parent(NULL);
        isSgAsmBlock(globalNode)->remove_statement(nextFunc);
      }
    }
  } // for

}