예제 #1
0
bool
GraphAlgorithms::isDirectCFGEdge(SgGraphNode* sgNode,
                                 SgGraphNode* sgNodeBefore) {
  bool isDirectedControlFlowEdge = false;
  SgAsmInstruction* instSgNode = isSgAsmInstruction(sgNode->get_SgNode());
  SgAsmInstruction* instSgNodeBefore = isSgAsmInstruction(sgNodeBefore->get_SgNode());
  ROSE_ASSERT(instSgNode);
  ROSE_ASSERT(instSgNodeBefore);
  if (instSgNode && instSgNodeBefore) {
    SgAsmFunction* f1 = isSgAsmFunction(instSgNode->get_parent());
    SgAsmFunction* f2 = isSgAsmFunction(instSgNodeBefore->get_parent());
    if (f1==NULL)
      f1 = isSgAsmFunction(instSgNode->get_parent()->get_parent());
    if (f2==NULL)
      f2 = isSgAsmFunction(instSgNodeBefore->get_parent()->get_parent());
    //cerr << "           -- Checking DirectCFG : f1 == f2?  : " << f1 << " " << f2 << 
    //  "  node: " << sgNode->get_name() << "  and before: " << sgNodeBefore->get_name() << endl;
    if (f1 && f2 && f1==f2) {
      ROSE_ASSERT(info);
      vector<VirtualBinCFG::CFGEdge> outEdges = instSgNodeBefore->cfgBinOutEdges(info);
      //cerr << "           -- Checking DirectCFG between: " << sgNode->get_name() << "  and before: " << 
      //        sgNodeBefore->get_name() << "    nr outedges: " << outEdges.size() << endl;
      for (size_t i = 0; i < outEdges.size(); ++i) {
        if (outEdges[i].target().getNode() == instSgNode) {
          isDirectedControlFlowEdge = true;
          break;
        }
      }
    }
  }
  //  cerr << "     ... checking if isDirectedCFGEdge " << isDirectedControlFlowEdge << endl;
  return isDirectedControlFlowEdge;
}
예제 #2
0
bool
GraphAlgorithms::isValidCFGEdge(SgGraphNode* sgNode,
                                SgGraphNode* sgNodeBefore) {
  if (!sgNode || !sgNodeBefore)
    return false;
  //  bool isAUnconditionalControlTransfer = false;
  bool valid = true;
  bool isDirectedControlFlowEdge = false;
  SgAsmX86Instruction* inst = isSgAsmX86Instruction(sgNodeBefore->get_SgNode());

  SgAsmInstruction* instSgNode = isSgAsmInstruction(sgNode->get_SgNode());
  SgAsmInstruction* instSgNodeBefore = isSgAsmInstruction(sgNodeBefore->get_SgNode());
  if (instSgNode && instSgNodeBefore) {
  if (RoseBin_support::DEBUG_MODE())
    cout << " *** instSgNode && instSgNodeBefore " << endl;
    SgAsmFunction* f1 = isSgAsmFunction(instSgNode->get_parent());
    SgAsmFunction* f2 = isSgAsmFunction(instSgNodeBefore->get_parent());
    if (f1==NULL)
      f1 = isSgAsmFunction(instSgNode->get_parent()->get_parent());
    if (f2==NULL)
      f2 = isSgAsmFunction(instSgNodeBefore->get_parent()->get_parent());
    if (f1 && f2) {
      // (tps - 05/23/08) : the semantics of the previous implementation is:
      // check the node before in the instruction set and check if it is the same as the previous node
      // todo: the following line must be changed... the size of the current node does not give you the last node!
      if (RoseBin_support::DEBUG_MODE())
      cout << " *** f1 && f2 " << endl;
      SgAsmInstruction* nodeBeforeInSet = NULL;
      int byte = 1;
      ROSE_ASSERT(info);
      while (nodeBeforeInSet==NULL && byte<8) {
        nodeBeforeInSet = info->getInstructionAtAddress(instSgNode->get_address() - byte);
        byte++;
      }
      if (RoseBin_support::DEBUG_MODE())
      cout << " *** nodeBeforeInSet = " << nodeBeforeInSet << "  instSgNodeBefore : " << instSgNodeBefore << "   byte : " << byte << endl;
      if (nodeBeforeInSet == instSgNodeBefore) {
        //if (!isAsmUnconditionalBranch(nodeBeforeInSet))
        if (RoseBin_support::DEBUG_MODE())
        cout << " isDirectedControlFlowEdge = true  --  isAsmUnconditionalBranch(nodeBeforeInSet) : " << isAsmUnconditionalBranch(nodeBeforeInSet) << endl;
        isDirectedControlFlowEdge = true;
      }
      if (RoseBin_support::DEBUG_MODE()) {
      cout << " *** f1 && f2 -- isDirectionalControlFlowEdge: " << isDirectedControlFlowEdge << endl;
      cout << " inst->get_kind() == x86_call : " << (inst->get_kind() == x86_call) << "     inst->get_kind() == x86_ret : " << (inst->get_kind() == x86_ret) << endl;
      }
      if ((inst->get_kind() == x86_call || inst->get_kind() == x86_ret) && isDirectedControlFlowEdge)
        valid=false;
    }
  }
  /*
  if (RoseBin_support::DEBUG_MODE()) {
    cout << " ValidCFGEdge::: sgNode " << sgNode->get_name() <<
      "   sgNodeBefore " << sgNodeBefore->get_name() <<
      "   instSgNode << " << instSgNode <<
      "   instSgNodeBefore << " << instSgNodeBefore <<
      "   is Valid node ? " << RoseBin_support::resBool(valid) <<
      "   isControlFlowEdge " << RoseBin_support::resBool(isDirectedControlFlowEdge) << endl;
  }
  */

  return valid;
}
/***********************************************************************
 * (10/31/07) tps: Traverses the graph for each node in rootNodes
 * and applies to each node the evaluate function
 * which can be either def_use, variable detection or emulation
 * Each node in the controlflow of rootNode is traversed (forward)
 * and only if the hasChanged function returns false, the algorithm
 * comes to a fixpoint
 ***********************************************************************/
void
RoseBin_DataFlowAnalysis::traverseGraph(vector <SgGraphNode*>& rootNodes,
                                        RoseBin_DataFlowAbstract* analysis,
                                        bool interprocedural){
  if (RoseBin_support::DEBUG_MODE_MIN())
    cerr << " traverseGraph : debug: " << RoseBin_support::resBool(RoseBin_support::DEBUG_MODE()) <<
      "  debug_min : " <<  RoseBin_support::resBool(RoseBin_support::DEBUG_MODE_MIN()) << endl;
  // Number of functions traversed
  int funcNr =0;
  // ---------------------------------------------------------------------
  // stores the nodes that still needs to be visited
  //  vector<SgGraphNode*> worklist;
  deque<SgGraphNode*> worklist;
  nodeHashSetType worklist_hash;
  // a vector of successors of the current node
  vector<SgGraphNode*> successors;
  // ---------------------------------------------------------------------


  // iterate through all functions
  vector<SgGraphNode*>::iterator it = rootNodes.begin();
  for (; it!=rootNodes.end();++it) {
    // current node
    SgGraphNode* node = *it;

    string func_name = vizzGraph->getProperty(SgGraph::name, node);
    RoseBin_support::checkText(func_name);
    funcNr++;
    if (RoseBin_support::DEBUG_MODE()) {
      cout << "\n\n -----------  dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+
        RoseBin_support::ToString(rootNodes.size())+") : " << func_name <<
        "  visited size : " << visited.size() <<
        "  total visited nodes : " << nrOfNodesVisited << endl;
      // debug
    }
    if (RoseBin_support::DEBUG_MODE_MIN()) {
      cerr << " -----------  dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+
        RoseBin_support::ToString(rootNodes.size())+") : " << func_name <<
        "  visited size : " << visited.size() <<
        "  total visited nodes : " << nrOfNodesVisited <<
        "  def size  : " << analysis->getDefinitionSize() << endl;
    }

    // indicates whether the current value for this node has changed
    bool hasChanged=false;
    // pushback into worklist and visited list
    worklist.push_back(node);
    worklist_hash.insert(node);
    visited.insert(node);
    visitedCounter[node] = 1;
    vector <SgGraphNode*> pre;
    // while there are still graph nodes in the worklist do

    while (worklist.size()>0) {
      nrOfNodesVisited++;
      // the new node is taken from the back of the worklist
      //node = worklist.back();
      //worklist.pop_back();
      node = worklist.front();
      worklist.pop_front();

      worklist_hash.erase(node);
      // get the successors of the current node and store in successors vector
      string name = vizzGraph->getProperty(SgGraph::name, node);

      //if (RoseBin_support::DEBUG_MODE_MIN() && node)
      //        if (node->get_SgNode())
      //  cerr << node->get_SgNode()->class_name() << "  " << node << "  " << node->get_name() << endl;

      if (RoseBin_support::DEBUG_MODE_MIN() && node) {
        SgAsmInstruction* instr = isSgAsmInstruction(node->get_SgNode());
        if (instr) {
          SgAsmFunction* funcParent = isSgAsmFunction(instr->get_parent());
          if (funcParent) {
            string parent = funcParent->get_name();
            cout << " ---- analysis of node in function : " << parent <<
              "  defs " << analysis->getDefinitionSize() <<
              " visited : " << RoseBin_support::ToString(visitedCounter[node]) << endl;
          }
        }
      }


      if (RoseBin_support::DEBUG_MODE())
        cout << "\n evaluating: " << name << endl;
      // do something with the current node
      // e.g. checkVariables(name, node);
      SgGraphNode* nodeBefore= NULL;
      BeforeMapType::const_iterator it =
        nodeBeforeMap.find(node);
      if (it!=nodeBeforeMap.end())
        nodeBefore = it->second;
      // successor vector is empty on each new node
      successors.clear();
      ROSE_ASSERT(isSgIncidenceDirectedGraph(vizzGraph));
      isSgIncidenceDirectedGraph(vizzGraph)->getSuccessors(node, successors);

      hasChanged = analysis->run(name, node, nodeBefore);

      // append the successors to the worklist
      if (RoseBin_support::DEBUG_MODE())
        cout << ">> getting successors  (" << successors.size() << ") for : " << name << endl;
      //        if (successors.size()==0)
      //          cout << "PROBLEM ..................................................... : " << endl;
      vector<SgGraphNode*>::iterator succ = successors.begin();
      for (;succ!=successors.end();++succ) {
        // for each successor do...
        SgGraphNode* next = *succ;
        SgAsmX86Instruction* nodeN = isSgAsmX86Instruction(node->get_SgNode());
        //if (!nodeN) continue;
        SgAsmX86Instruction* nextN = isSgAsmX86Instruction(next->get_SgNode());
        //if (!nextN) continue;

        string name_n = vizzGraph->getProperty(SgGraph::name, next);



        bool call = false;
        bool exceptionCallNext = false;
        if (nextN)
          exceptionCallNext = exceptionCall(nextN->get_kind() == x86_call ? nextN : 0);
        bool exceptionCallNode = false;
        if (nodeN)
          exceptionCallNode = exceptionCall(nodeN->get_kind() == x86_call ? nodeN : 0);
        if (RoseBin_support::DEBUG_MODE())
          std::cout << " exceptionCallNode : " << exceptionCallNode << " exceptionCallNext : " << exceptionCallNext << endl;
        // if function call is call to malloc we have an exception and follow the call path
        if ((exceptionCallNode && !exceptionCallNext)) {
        } else if (
                   //if (
                   (nodeN && nodeN->get_kind() == x86_call) ||
                   (nextN && nextN->get_kind() == x86_ret) )
          call = true;
        //bool sameParent = analysis->sameParents(node, next);

        bool validNode=false;
        if (g_algo->isValidCFGEdge(next, node) || exceptionCallNode)
          validNode = true;

        // debug ------------------------
        if (RoseBin_support::DEBUG_MODE()) {
          string nodeBeforeStr="";
          if (nodeBefore) nodeBeforeStr= nodeBefore->get_name();
          cout << "  DEBUG : >>>>>>>> previous node " << nodeBeforeStr
               << "      This node : " << name << "  next node : " << name_n
               << "  ** validNode : " << RoseBin_support::resBool(validNode) << endl;
        }


        // ----------------------------------
        if (( interprocedural==false && !call) //
            ||  (interprocedural==true && validNode)) {
          if (visited.find(next)==visited.end()) {
            // if the successor is not yet visited
            // mark as visited and put into worklist
            if (RoseBin_support::DEBUG_MODE())
              cout << " never visited next node before... " << name_n <<
                " interprocedural : " << interprocedural << "  call : " << call << endl;
            if (RoseBin_support::DEBUG_MODE())
              cout << "adding to visited : " << name_n << endl;

            visited.insert(next);
            nodeBeforeMap[next]=node;
            visitedCounter[next]=1;
            vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(1));
            if (!containsHash(worklist_hash,next)) {
              // add next node only if the next node
              if (RoseBin_support::DEBUG_MODE())
                cout << "adding to worklist: " << name_n << endl;
              worklist.push_back(next);
              worklist_hash.insert(next);
            }
          } else {
            // if the successor has been visited, we need to check if it has changed
            // if it has not, we continue, else we need to push it back to the worklist
            int nr = visitedCounter[next];
            if (RoseBin_support::DEBUG_MODE())
              cout << " visited next node before... " << RoseBin_support::ToString(nr) <<
                "  Changed == " << RoseBin_support::resBool(hasChanged) << endl;

            if (hasChanged) {
              visitedCounter[next]=++nr;
              vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(nr));
              if (RoseBin_support::DEBUG_MODE())
                cout << " has changed : " << RoseBin_support::resBool(hasChanged) <<
                  "  -- interprocedural : " << RoseBin_support::resBool(interprocedural) <<
                  "  -- Call : " << RoseBin_support::resBool(call) <<
                  "  ------> new number: " << RoseBin_support::ToString(nr) <<
                  "  -- contained in hash? : " << RoseBin_support::resBool(containsHash(worklist_hash,next)) <<
                  "  ---- nr of Defs: " << RoseBin_support::ToString(analysis->getDefinitionSize()) <<
                  "  ---- nr of Use: " << RoseBin_support::ToString(analysis->getUsageSize())
                     << endl;

              if (interprocedural || (!interprocedural && !call)){ //sameParent)) { //!call && ) {
                if (!containsHash(worklist_hash,next)) {
                  worklist_hash.insert(next);
                  worklist.push_back(next);
                  if (RoseBin_support::DEBUG_MODE())
                    cout << " adding to worklist: " << name_n << endl;
                }
              }
            } else
              if (RoseBin_support::DEBUG_MODE())
                cout << " has NOT changed. " << endl;
            //else we continue with the next node
          }
        }
      } // for
    } // while worklist.size()>0

  } // for rootNodes
}
예제 #4
0
/****************************************************
 * process all instructions in the DB
 * add the instructions to the blocks
 ****************************************************/
void RoseBin_DB_IDAPRO::process_instruction_query(MYSQL* conn, MYSQL_RES* res_set) {
  rememberInstructions.clear();
  // get the functions
  //  char* q = (char*)"SELECT * FROM instructions_1";
  char *q = (char*)"select *,     (select parent_function from basic_blocks_1 where id = i.basic_block_id      and (i.address - parent_function) >= 0     and (i.address - parent_function) =     (select min(i.address - parent_function) from basic_blocks_1 where id = i.basic_block_id       and (i.address - parent_function) >= 0)     ) as i_f from instructions_1 i order by i.address"; 

  if (RoseBin_support::DEBUG_MODE())
    cout << "\n>> QUERY:: " << q << "\n" << endl;
  res_set = process_query(conn,q);
  if (res_set == NULL) {
    print_problemWithResults(conn);
  } else {
    
    MYSQL_ROW row;
    string mnemonic=(char*)"";
    uint64_t address=0;
    int basic_block=-1;
    int sequence =-1;
    string data=(char*)"";
    int i_func;

    while ((row = mysql_fetch_row(res_set))!=NULL) {
      for (unsigned int i=0; i<mysql_num_fields(res_set);i++) {
        char* ret=(char*)"";
        if (row[i] ==NULL) { 
          ret = (char*)"<NULL>";
          if (i==0) address = -1;
          if (i==1) basic_block = -1;
          if (i==2) mnemonic = ret;
          if (i==3) sequence = -1;
          if (i==4) data=ret;
          if (i==5) i_func= -1;
        } else {
          ret= row[i];
          if (i==0) address = atoi(ret);
          if (i==1) basic_block = atoi(ret);
          if (i==2) mnemonic = ret;
          if (i==3) sequence = atoi(ret);
          if (i==4) data=ret;
          if (i==5) i_func = atoi(ret);
        }
      }
      // patched to adjust to objdump , Apr 26 2007
      if (mnemonic ==(char*)"retn")
        mnemonic = (char*)"ret";
      
      if (RoseBin_support::DEBUG_MODE()) {
        ostringstream addrhex;
        addrhex << hex << setw(8) << address ;
        cout << ">> creating instruction : " << addrhex.str() << " " << address << 
          " - " << basic_block << " - " << mnemonic << " - " << sequence << endl;
      }
      // check if it is an instruction or if it appears in the callgraph,
      // if it is in the callgraph, one wants to create a BinaryCall instead

      // append the instruction to its function
      rose_hash::unordered_map <int, SgAsmFunction* >::iterator func_it = rememberFunctions.find(i_func);
      SgAsmFunction* func = NULL;
      // for (func_it; func_it!=rememberFunctions.end(); ++func_it) {
      if (func_it != rememberFunctions.end()) {
        func = func_it->second;
      } else {
        if (i_func!=-1)
        cerr << " ERROR : cant find the function i_func : " << i_func << " in rememberFunctions for instruction : " << mnemonic << endl;
      }

      
      SgAsmInstruction* instruction = NULL;
      instruction = createInstruction(address, func, mnemonic);
      //        instruction = new SgAsmInstruction(address,bb,mnemonic,"");
      // Sep 29, tps : commented the following line out, since the function was removed.
      //instruction->set_raw_bytes(data);

      ROSE_ASSERT(instruction);

      SgAsmOperandList* operandList = new SgAsmOperandList();
      instruction->set_operandList(operandList);
      operandList->set_parent(instruction);

      ostringstream hexaddr;
      hexaddr << hex << setw(8) << address ;
      if (RoseBin_support::DEBUG_MODE())
        cout << " .rememberInstruction " << instruction->class_name() 
             << "  at : " << address << " hex: " << hexaddr.str() << endl;
      rememberInstructions[address]= instruction ;


      if (func) {
        // get the block in the func and append to it to conform to jeremiah
        func->append_statement(instruction);
        instruction->set_parent(func);
        //vector <SgNode*> blockVec =func->get_traversalSuccessorContainer();
        //SgAsmBlock* block = isSgAsmBlock(blockVec[0]);
        //ROSE_ASSERT(block);
        //block->append_statement(instruction);
        //instruction->set_parent(block);

        ROSE_ASSERT(instruction->get_parent());

        //SgAsmNode* nInst = (SgAsmNode*) instruction;
        //nInst->set_parent(func);

        ostringstream addrhex;
        addrhex << hex << setw(8) << i_func ;
        if (RoseBin_support::DEBUG_MODE())
          cout << ">> appended instruction to function: " << func->get_name() << " addr " << addrhex.str() << " " << address << endl;
      } else {
        if (i_func!=-1) {
          cerr << " ERROR :: could not append instruction to function : " << endl;
          //exit(0);
        }
      }
      
    } // while

  } // if (res_set==NULL)
  checkError(conn,res_set);
}
예제 #5
0
SgAsmInstruction*
RoseBin_FlowAnalysis::process_jumps_get_target(SgAsmx86Instruction* inst) {
  if (inst && x86InstructionIsControlTransfer(inst)) {
    //cerr << " ..................... processing jmp " << endl;
    ostringstream addrhex3;
    int addrsource = inst->get_address();
    addrhex3 << hex << setw(8) << addrsource ;
    string funcName ="";

    // get the operand and the destination address
    SgAsmOperandList* opList = inst->get_operandList();
    ROSE_ASSERT(opList);
    SgAsmExpressionPtrList ptrList = opList->get_operands();

    std::vector<SgAsmExpression*>::iterator itList= ptrList.begin();
    for (;itList!=ptrList.end();++itList) {
      SgAsmExpression* exp = *itList;
      ROSE_ASSERT(exp);
      SgAsmRegisterReferenceExpression* regRef = isSgAsmRegisterReferenceExpression(exp);

      //if (RoseBin_support::DEBUG_MODE())
      //        cout << " inst (jmp):: " << inst->get_mnemonic() << "  addr : " << addrhex3.str() << endl;
      SgAsmValueExpression* valExpr = isSgAsmValueExpression(exp);
      SgAsmMemoryReferenceExpression* memExpr = isSgAsmMemoryReferenceExpression(exp);
      string valStr = "";
      if (valExpr) {
        uint8_t byte_val=0xF;
        uint16_t word_val=0xFF;
        uint32_t double_word_val=0xFFFF;
        uint64_t quad_word_val=0xFFFFFFFFU;

        valStr =
          RoseBin_support::resolveValue(valExpr, true,
                                        byte_val,
                                        word_val,
                                        double_word_val,
                                        quad_word_val);

        //if (RoseBin_support::DEBUG_MODE())
        //cout << "   found value ....... :: " << valStr << endl;
        funcName = valExpr->get_replacement();
        //if (funcName=="")
        //  funcName="noName";
      }
      if (memExpr) {
        continue;
        // this is a jump to data ... do not handle right now!!
      }

      // convert val string to long
      uint64_t val=0;
      if(from_string<uint64_t>(val, valStr, std::hex)) {
        ostringstream addrhex2;
        addrhex2 << hex << setw(8) << val ;
        //if (RoseBin_support::DEBUG_MODE())
        //cerr << "    looking for value ("<<valStr << " ) in InstrSet: "
        //     << val << "  " << addrhex2.str() << endl;
        rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::const_iterator itc =
          rememberInstructions.find(val);
        if (itc!=rememberInstructions.end()) {
          SgAsmInstruction* target = itc->second;

          // we set the target (jump to for each control instruction)
          ROSE_ASSERT(target);


          //if (RoseBin_support::DEBUG_MODE())
          //cout << "    >>> target found! " << target << "     funcName " << funcName << endl;
          if (funcName!="") {
            SgAsmNode* block = target;
            if (!db)
              block = isSgAsmNode(target->get_parent());
            ROSE_ASSERT(block);
            SgAsmFunction* func = isSgAsmFunction(block->get_parent());

            if (func) {
              string fname = func->get_name();
              uint64_t val_f=0;
              if(from_string<uint64_t>(val_f, fname, std::hex)) {
                // func name is a hex number
                func->set_name(funcName);
                //              inst->set_comment(funcName);
              } else {
                // its a name
              }
            }
          }
          return target;
        } else {
          //if (RoseBin_support::DEBUG_MODE())
          //  cerr << "    >>>>>>>>>>>>>>> !!! OPS :: Target not found ...  \n" << endl;
        }
      }
      else{
        //      std::cerr << "FlowAnalysis ::  from_string failed .. " << std::endl;
        if (valStr!="")
          if (RoseBin_support::DEBUG_MODE())
          cerr << " WARNING: Cant convert string to long - in process_jump  :: " << regRef->class_name() <<
            " inst :: " << inst->get_mnemonic() << "  addr : " << addrhex3.str() << " target : " << valStr << endl;
      }
    }

  }
  return NULL;
}