bool
RoseBin_DataFlowAbstract::sameParents(SgGraphNode* node, SgGraphNode* next) {
  bool same=false;
  if (isSgAsmFunction(node->get_SgNode())) {
    return true;
  }

  SgAsmx86Instruction* thisNode = isSgAsmx86Instruction(node->get_SgNode());
  SgAsmx86Instruction* nextNode = isSgAsmx86Instruction(next->get_SgNode());
  if (thisNode && nextNode) {
    SgAsmFunction* func1 = isSgAsmFunction(thisNode->get_parent());
    SgAsmFunction* func2 = isSgAsmFunction(nextNode->get_parent());
    if (func1==func2)
      same=true;
  }
  return same;
}
Example #2
0
void
InitPointerToNull::visit(SgNode* node) {
  if (isSgAsmFunction(node)) {
    memoryWrites.clear();
    memoryRead.clear();
  } else

  if (isSgAsmx86Instruction(node) && isSgAsmx86Instruction(node)->get_kind() == x86_mov) {
    // this is the address of the mov instruction prior to the call
    //rose_addr_t resolveAddr=0;
    SgAsmx86Instruction* inst = isSgAsmx86Instruction(node);
    SgNode* instBlock = NULL;
    if (project) 
      instBlock= isSgAsmBlock(inst->get_parent());
    else //we run IDA, this is different
      instBlock=inst;

    if (instBlock==NULL)
      return;
    SgAsmFunction* instFunc = isSgAsmFunction(instBlock->get_parent());
    if (instFunc==NULL)
      return;

    // we have found a mov instruction
    // we need to check if it is a   mov mem, (value or reg) // assignment of variable // forgot mov mem, mem
    // or we find a                  mov reg, mem // usage of variable
    // make sure a variable is assigned before used
    SgAsmOperandList * ops = inst->get_operandList();
    SgAsmExpressionPtrList& opsList = ops->get_operands();
    SgAsmExpressionPtrList::iterator itOP = opsList.begin();
    SgAsmMemoryReferenceExpression* memL=NULL;
    SgAsmMemoryReferenceExpression* memR=NULL;
    SgAsmRegisterReferenceExpression* regL=NULL;
    SgAsmRegisterReferenceExpression* regR=NULL;
    SgAsmValueExpression* Val = NULL;
    int iteration=0;
    for (;itOP!=opsList.end();++itOP) {
      SgAsmExpression* exp = *itOP;
      ROSE_ASSERT(exp);
      if (iteration==1) {
	// right hand side
	memR = isSgAsmMemoryReferenceExpression(exp);
	regR = isSgAsmRegisterReferenceExpression(exp);
	Val = isSgAsmValueExpression(exp);
      }
      if (iteration==0) {
	// left hand side
	memL = isSgAsmMemoryReferenceExpression(exp);
	regL = isSgAsmRegisterReferenceExpression(exp);
	iteration++;
      }
    } //for
    if ((memL && regR) || (memL && Val) || (memL && memR)) {
      // could be assignment to address
      rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memL);      
      // apparently the reference to memory does not always have to be BP but
      // can also be IP if it is a static variable. How will we handle global variables?
      //bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memL);
      //if (containsBP) {
	// this is memory write with offset to BP
	// remember this memory location as a write
	if (debug)
	cerr << "found a memory write (REG) : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl;
	memoryWrites.insert(addr);
	//}
    } else if (regL && memR) {
      // could be usage of address
      rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memR);      
      bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memR);
      if (containsBP) {
	// this is memory read with offset to BP
	// did we see a write for this? If not, it is not initialized!
	std::set<rose_addr_t>::const_iterator it = memoryWrites.find(addr);
	if (it!=memoryWrites.end()) {
	  // found write, everything is good
	if (debug)
	  cerr << "found a read with matching write : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl;
	} else {
	  std::set<rose_addr_t>::const_iterator it2 = memoryRead.find(addr);
	  if (it2!=memoryRead.end()) {
	    // found this case before
	  } else {
	if (debug)
	    cerr << " This variable might not be initialized : " << RoseBin_support::HexToString(inst->get_address())<<" "<< unparseInstruction(inst) << endl;
	    string res = "Possibly uninitialized variable: ";
	    string funcname="";
	    SgAsmBlock* b = isSgAsmBlock(inst->get_parent());
	    SgAsmFunction* func = NULL;
	    if (b)
	      func=isSgAsmFunction(b->get_parent()); 
	    if (func)
	      funcname = func->get_name();
	    res+=" ("+RoseBin_support::HexToString(inst->get_address())+") : "+unparseInstruction(inst)+
	      " <"+inst->get_comment()+">  in function: "+funcname;
	    result[inst]= res;
	    memoryRead.insert(addr);
	  }
	}
      }
    }
  }
}
void
RoseBin_FlowAnalysis::process_jumps() {
    if (RoseBin_support::DEBUG_MODE())
      cerr << "\n >>>>>>>>> processing jumps ... " << endl;
  rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::iterator it;
  for (it=rememberInstructions.begin();it!=rememberInstructions.end();++it) {
    SgAsmx86Instruction* inst = isSgAsmx86Instruction(it->second);
    if (inst->get_kind() == x86_call) {
      //cerr << "Found call at " << std::hex << inst->get_address() << endl;
      SgAsmx86Instruction* target = isSgAsmx86Instruction(process_jumps_get_target(inst));
      if (target) {
        //cerr << "Target is " << std::hex << target->get_address() << endl;
        // inst->get_targets().push_back(target);
        // we set the sources (for each node)
        ROSE_ASSERT(g_algo->info);
        g_algo->info->incomingEdges[target].insert(inst->get_address());
        // tps: changed this algorithm so that it runs in
        // linear time!
        ROSE_ASSERT (target->get_parent());
        if (target->get_parent()) {
          // ROSE_ASSERT(target->get_parent());
          SgAsmNode* b_b = target;
          if (!db)
            b_b = isSgAsmNode(target->get_parent());
          ROSE_ASSERT(b_b);
          SgAsmFunction* b_func = isSgAsmFunction(b_b->get_parent());

          if (b_func) {
            // (16/Oct/07) tps: this is tricky, it appears that sometimes the target can
            // be just a jmp to a new location, so we should forward this information to the correct
            // function.
            // Therefore we need to check if the current function has a return statement.
            // If not, we want to forward this information.
            if (target->get_kind() == x86_jmp) {
              //cerr << " >>>>>>>> found a jmp target - number of children: " << b_func->get_traversalSuccessorContainer().size() << endl;
              if (b_func->get_numberOfTraversalSuccessors()==1) {
                SgAsmx86Instruction* target2 = isSgAsmx86Instruction(process_jumps_get_target(inst));
                if (target2) {
                  b_b = target2;
                  if (!db)
                    b_b = isSgAsmNode(target2->get_parent());
                  b_func = isSgAsmFunction(b_b->get_parent());
                }
              }
            }


            if (inst->get_parent()) {
              //cerr << "Inst has a parent" << endl;
              if (inst->get_comment()=="")
                inst->set_comment(""+b_func->get_name());
              ROSE_ASSERT(g_algo->info);
              SgAsmInstruction* inst_after = g_algo->info->getInstructionAtAddress(inst->get_address() + inst->get_raw_bytes().size()); // inst->cfgBinFlowOutEdge(info);
              if (inst_after) {
                //cerr << "Added dest " << std::hex << isSgAsmStatement(inst_after)->get_address() << " for function" << endl;
                b_func->append_dest(isSgAsmStatement(inst_after));
              }
            }
          } else {
            if (RoseBin_support::DEBUG_MODE())
            cerr << " NO FUNCTION DETECTED ABOVE BLOCK . " << endl;
          }

        } else {
          if (RoseBin_support::DEBUG_MODE())
            cerr << "   WARNING :: process_jumps: target has no parent ... i.e. no FunctionDeclaration to it " <<
            target->class_name() << endl;
        }
      } else {
        if (inst)
          if (RoseBin_support::DEBUG_MODE())
            cerr << "    WARNING :: process_jumps: No target found for node " << RoseBin_support::HexToString(inst->get_address())
                 << "   " << inst->get_mnemonic() << endl;
      }
    } else {

      // might be a jmp
      SgAsmx86Instruction* target = isSgAsmx86Instruction(process_jumps_get_target(inst));
      if (target) {
        // inst->get_targets().push_back(target);
        // we set the sources (for each node)
        ROSE_ASSERT(g_algo->info);
        g_algo->info->incomingEdges[target].insert(inst->get_address());
      }
    }
  }
  //cerr << "\n >>>>>>>>> processing jumps ... done. " << endl;

  //  cerr << "\n >>>>>>>>> resolving RET jumps ... " << endl;
  rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::iterator it2;
  for (it2=rememberInstructions.begin();it2!=rememberInstructions.end();++it2) {
    //int id = it2->first;
    SgAsmx86Instruction* target = isSgAsmx86Instruction(it2->second);
    ROSE_ASSERT (target);
#if 1
    if (target->get_kind() == x86_ret) {
      SgAsmNode* b_b = target;
      if (!db)
        b_b = isSgAsmNode(target->get_parent());
      SgAsmFunction* parent = isSgAsmFunction(b_b->get_parent());
      if (parent) {
        //ROSE_ASSERT(parent);
        std::vector <SgAsmStatement*> dest_list = parent->get_dest();
        for (size_t i = 0; i < dest_list.size(); ++i) {
          ROSE_ASSERT (isSgAsmInstruction(dest_list[i]));
          //cerr << "Adding ret target " << std::hex << dest_list[i]->get_address() << " to " << std::hex << target->get_address() << endl;
          //info->indirectJumpAndReturnTargets[target].insert(dest_list[i]->get_address());
          ROSE_ASSERT(g_algo->info);
          g_algo->info->incomingEdges[isSgAsmInstruction(dest_list[i])].insert(target->get_address());
        }

        std::vector <SgAsmStatement*>::iterator it3 = dest_list.begin();
        for (; it3!=dest_list.end();++it3) {
          SgAsmInstruction* dest = isSgAsmInstruction(*it3);
          if (dest) {
            dest->append_sources(target);
            //cerr << " appending source to " << dest->get_address() << "   target: " << target->get_address() << endl;
          }
        } // for
      } else { // if parent
        if (RoseBin_support::DEBUG_MODE())
          cerr << "   ERROR :: RET jumps :: no parent found for ret : " << target->class_name() << endl;
        //exit (0);
      }
    } // if ret
#endif
  }
  if (RoseBin_support::DEBUG_MODE())
    cerr << " >>>>>>>>> resolving RET jumps ... done." << endl;
}
/*
 * Detect functions (blocks) that can be merged together.
 */
void
RoseBin_FlowAnalysis::resolveFunctions(SgAsmNode* globalNode) {
  //cerr << " ObjDump-BinRose:: Detecting and merging Functions" << endl;
  vector<SgAsmFunction*> visitedFunctions;
  vector<SgNode*> tree =NodeQuery::querySubTree(globalNode, V_SgAsmFunction);
  //  vector<SgNode*>::iterator itV = tree.begin();
  int nr=0;
  while (!tree.empty()) {
    //  for (;itV!=tree.end();itV++) {
    SgAsmFunction* funcD = isSgAsmFunction(tree.back());
    tree.pop_back();
    nr++;
    if ((nr % 100)==0)
      if (RoseBin_support::DEBUG_MODE())
        cerr << " funcListSize : " << tree.size() << "  -- iteration : " << nr << "   func " << funcD->get_name() << endl;

    //SgAsmFunction* funcD = isSgAsmFunction(*itV);
    //itV++;
    ROSE_ASSERT(funcD);
    // make sure we dont visit a function twice


    vector <SgNode*> funcVec =funcD->get_traversalSuccessorContainer();
    int last = funcVec.size()-1;
    if (last<0)
      continue;
    bool hasStopCondition=false;
    for (unsigned int itf = 0; itf < funcVec.size() ; itf++) {
      SgAsmx86Instruction* finst = isSgAsmx86Instruction(funcVec[itf]);
      ROSE_ASSERT(finst);
      if (finst->get_kind() == x86_ret || finst->get_kind() == x86_hlt) {
        hasStopCondition=true;
      }
    }
    //cerr << " last : " << last << endl;
    SgAsmx86Instruction* lastInst = isSgAsmx86Instruction(funcVec[last]);
    ROSE_ASSERT(lastInst);
    SgAsmx86Instruction* nextInst = isSgAsmx86Instruction(resolveFunction(lastInst, hasStopCondition));
    if (nextInst) {
      SgAsmFunction* nextFunc = isSgAsmFunction(nextInst->get_parent());
      if (nextFunc) {
        ROSE_ASSERT(g_algo->info);
        g_algo->info->returnTargets[funcD].insert(g_algo->info->returnTargets[nextFunc].begin(), g_algo->info->returnTargets[nextFunc].end());
        // make sure that this function is being changed and should not be covered again
        //visitedFunctions.push_back(nextFunc);
        // visit current function after alternation again
        //tree.push_back(funcD);
        // now we remove this next function and iterate thrgouh all instructions and
        // attach them to the old function
        vector <SgNode*> funcNextVec =nextFunc->get_traversalSuccessorContainer();
        for (unsigned int i=0; i < funcNextVec.size(); ++i) {
          SgAsmInstruction* inst = isSgAsmInstruction(funcNextVec[i]);
          ROSE_ASSERT(inst);
          inst->set_parent(funcD);
          funcD->append_statement(inst);
          //nextFunc->remove_statement(inst);
          // delete nextFunc; // should delete this later when iterator is done
        }
        nextFunc->remove_children();
        nextFunc->set_parent(NULL);
        isSgAsmBlock(globalNode)->remove_statement(nextFunc);
      }
    }
  } // for

}