bool GraphAlgorithms::isDirectCFGEdge(SgGraphNode* sgNode, SgGraphNode* sgNodeBefore) { bool isDirectedControlFlowEdge = false; SgAsmInstruction* instSgNode = isSgAsmInstruction(sgNode->get_SgNode()); SgAsmInstruction* instSgNodeBefore = isSgAsmInstruction(sgNodeBefore->get_SgNode()); ROSE_ASSERT(instSgNode); ROSE_ASSERT(instSgNodeBefore); if (instSgNode && instSgNodeBefore) { SgAsmFunction* f1 = isSgAsmFunction(instSgNode->get_parent()); SgAsmFunction* f2 = isSgAsmFunction(instSgNodeBefore->get_parent()); if (f1==NULL) f1 = isSgAsmFunction(instSgNode->get_parent()->get_parent()); if (f2==NULL) f2 = isSgAsmFunction(instSgNodeBefore->get_parent()->get_parent()); //cerr << " -- Checking DirectCFG : f1 == f2? : " << f1 << " " << f2 << // " node: " << sgNode->get_name() << " and before: " << sgNodeBefore->get_name() << endl; if (f1 && f2 && f1==f2) { ROSE_ASSERT(info); vector<VirtualBinCFG::CFGEdge> outEdges = instSgNodeBefore->cfgBinOutEdges(info); //cerr << " -- Checking DirectCFG between: " << sgNode->get_name() << " and before: " << // sgNodeBefore->get_name() << " nr outedges: " << outEdges.size() << endl; for (size_t i = 0; i < outEdges.size(); ++i) { if (outEdges[i].target().getNode() == instSgNode) { isDirectedControlFlowEdge = true; break; } } } } // cerr << " ... checking if isDirectedCFGEdge " << isDirectedControlFlowEdge << endl; return isDirectedControlFlowEdge; }
bool GraphAlgorithms::isValidCFGEdge(SgGraphNode* sgNode, SgGraphNode* sgNodeBefore) { if (!sgNode || !sgNodeBefore) return false; // bool isAUnconditionalControlTransfer = false; bool valid = true; bool isDirectedControlFlowEdge = false; SgAsmX86Instruction* inst = isSgAsmX86Instruction(sgNodeBefore->get_SgNode()); SgAsmInstruction* instSgNode = isSgAsmInstruction(sgNode->get_SgNode()); SgAsmInstruction* instSgNodeBefore = isSgAsmInstruction(sgNodeBefore->get_SgNode()); if (instSgNode && instSgNodeBefore) { if (RoseBin_support::DEBUG_MODE()) cout << " *** instSgNode && instSgNodeBefore " << endl; SgAsmFunction* f1 = isSgAsmFunction(instSgNode->get_parent()); SgAsmFunction* f2 = isSgAsmFunction(instSgNodeBefore->get_parent()); if (f1==NULL) f1 = isSgAsmFunction(instSgNode->get_parent()->get_parent()); if (f2==NULL) f2 = isSgAsmFunction(instSgNodeBefore->get_parent()->get_parent()); if (f1 && f2) { // (tps - 05/23/08) : the semantics of the previous implementation is: // check the node before in the instruction set and check if it is the same as the previous node // todo: the following line must be changed... the size of the current node does not give you the last node! if (RoseBin_support::DEBUG_MODE()) cout << " *** f1 && f2 " << endl; SgAsmInstruction* nodeBeforeInSet = NULL; int byte = 1; ROSE_ASSERT(info); while (nodeBeforeInSet==NULL && byte<8) { nodeBeforeInSet = info->getInstructionAtAddress(instSgNode->get_address() - byte); byte++; } if (RoseBin_support::DEBUG_MODE()) cout << " *** nodeBeforeInSet = " << nodeBeforeInSet << " instSgNodeBefore : " << instSgNodeBefore << " byte : " << byte << endl; if (nodeBeforeInSet == instSgNodeBefore) { //if (!isAsmUnconditionalBranch(nodeBeforeInSet)) if (RoseBin_support::DEBUG_MODE()) cout << " isDirectedControlFlowEdge = true -- isAsmUnconditionalBranch(nodeBeforeInSet) : " << isAsmUnconditionalBranch(nodeBeforeInSet) << endl; isDirectedControlFlowEdge = true; } if (RoseBin_support::DEBUG_MODE()) { cout << " *** f1 && f2 -- isDirectionalControlFlowEdge: " << isDirectedControlFlowEdge << endl; cout << " inst->get_kind() == x86_call : " << (inst->get_kind() == x86_call) << " inst->get_kind() == x86_ret : " << (inst->get_kind() == x86_ret) << endl; } if ((inst->get_kind() == x86_call || inst->get_kind() == x86_ret) && isDirectedControlFlowEdge) valid=false; } } /* if (RoseBin_support::DEBUG_MODE()) { cout << " ValidCFGEdge::: sgNode " << sgNode->get_name() << " sgNodeBefore " << sgNodeBefore->get_name() << " instSgNode << " << instSgNode << " instSgNodeBefore << " << instSgNodeBefore << " is Valid node ? " << RoseBin_support::resBool(valid) << " isControlFlowEdge " << RoseBin_support::resBool(isDirectedControlFlowEdge) << endl; } */ return valid; }
/*********************************************************************** * (10/31/07) tps: Traverses the graph for each node in rootNodes * and applies to each node the evaluate function * which can be either def_use, variable detection or emulation * Each node in the controlflow of rootNode is traversed (forward) * and only if the hasChanged function returns false, the algorithm * comes to a fixpoint ***********************************************************************/ void RoseBin_DataFlowAnalysis::traverseGraph(vector <SgGraphNode*>& rootNodes, RoseBin_DataFlowAbstract* analysis, bool interprocedural){ if (RoseBin_support::DEBUG_MODE_MIN()) cerr << " traverseGraph : debug: " << RoseBin_support::resBool(RoseBin_support::DEBUG_MODE()) << " debug_min : " << RoseBin_support::resBool(RoseBin_support::DEBUG_MODE_MIN()) << endl; // Number of functions traversed int funcNr =0; // --------------------------------------------------------------------- // stores the nodes that still needs to be visited // vector<SgGraphNode*> worklist; deque<SgGraphNode*> worklist; nodeHashSetType worklist_hash; // a vector of successors of the current node vector<SgGraphNode*> successors; // --------------------------------------------------------------------- // iterate through all functions vector<SgGraphNode*>::iterator it = rootNodes.begin(); for (; it!=rootNodes.end();++it) { // current node SgGraphNode* node = *it; string func_name = vizzGraph->getProperty(SgGraph::name, node); RoseBin_support::checkText(func_name); funcNr++; if (RoseBin_support::DEBUG_MODE()) { cout << "\n\n ----------- dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+ RoseBin_support::ToString(rootNodes.size())+") : " << func_name << " visited size : " << visited.size() << " total visited nodes : " << nrOfNodesVisited << endl; // debug } if (RoseBin_support::DEBUG_MODE_MIN()) { cerr << " ----------- dataflow analysis of function ("+RoseBin_support::ToString(funcNr)+"/"+ RoseBin_support::ToString(rootNodes.size())+") : " << func_name << " visited size : " << visited.size() << " total visited nodes : " << nrOfNodesVisited << " def size : " << analysis->getDefinitionSize() << endl; } // indicates whether the current value for this node has changed bool hasChanged=false; // pushback into worklist and visited list worklist.push_back(node); worklist_hash.insert(node); visited.insert(node); visitedCounter[node] = 1; vector <SgGraphNode*> pre; // while there are still graph nodes in the worklist do while (worklist.size()>0) { nrOfNodesVisited++; // the new node is taken from the back of the worklist //node = worklist.back(); //worklist.pop_back(); node = worklist.front(); worklist.pop_front(); worklist_hash.erase(node); // get the successors of the current node and store in successors vector string name = vizzGraph->getProperty(SgGraph::name, node); //if (RoseBin_support::DEBUG_MODE_MIN() && node) // if (node->get_SgNode()) // cerr << node->get_SgNode()->class_name() << " " << node << " " << node->get_name() << endl; if (RoseBin_support::DEBUG_MODE_MIN() && node) { SgAsmInstruction* instr = isSgAsmInstruction(node->get_SgNode()); if (instr) { SgAsmFunction* funcParent = isSgAsmFunction(instr->get_parent()); if (funcParent) { string parent = funcParent->get_name(); cout << " ---- analysis of node in function : " << parent << " defs " << analysis->getDefinitionSize() << " visited : " << RoseBin_support::ToString(visitedCounter[node]) << endl; } } } if (RoseBin_support::DEBUG_MODE()) cout << "\n evaluating: " << name << endl; // do something with the current node // e.g. checkVariables(name, node); SgGraphNode* nodeBefore= NULL; BeforeMapType::const_iterator it = nodeBeforeMap.find(node); if (it!=nodeBeforeMap.end()) nodeBefore = it->second; // successor vector is empty on each new node successors.clear(); ROSE_ASSERT(isSgIncidenceDirectedGraph(vizzGraph)); isSgIncidenceDirectedGraph(vizzGraph)->getSuccessors(node, successors); hasChanged = analysis->run(name, node, nodeBefore); // append the successors to the worklist if (RoseBin_support::DEBUG_MODE()) cout << ">> getting successors (" << successors.size() << ") for : " << name << endl; // if (successors.size()==0) // cout << "PROBLEM ..................................................... : " << endl; vector<SgGraphNode*>::iterator succ = successors.begin(); for (;succ!=successors.end();++succ) { // for each successor do... SgGraphNode* next = *succ; SgAsmX86Instruction* nodeN = isSgAsmX86Instruction(node->get_SgNode()); //if (!nodeN) continue; SgAsmX86Instruction* nextN = isSgAsmX86Instruction(next->get_SgNode()); //if (!nextN) continue; string name_n = vizzGraph->getProperty(SgGraph::name, next); bool call = false; bool exceptionCallNext = false; if (nextN) exceptionCallNext = exceptionCall(nextN->get_kind() == x86_call ? nextN : 0); bool exceptionCallNode = false; if (nodeN) exceptionCallNode = exceptionCall(nodeN->get_kind() == x86_call ? nodeN : 0); if (RoseBin_support::DEBUG_MODE()) std::cout << " exceptionCallNode : " << exceptionCallNode << " exceptionCallNext : " << exceptionCallNext << endl; // if function call is call to malloc we have an exception and follow the call path if ((exceptionCallNode && !exceptionCallNext)) { } else if ( //if ( (nodeN && nodeN->get_kind() == x86_call) || (nextN && nextN->get_kind() == x86_ret) ) call = true; //bool sameParent = analysis->sameParents(node, next); bool validNode=false; if (g_algo->isValidCFGEdge(next, node) || exceptionCallNode) validNode = true; // debug ------------------------ if (RoseBin_support::DEBUG_MODE()) { string nodeBeforeStr=""; if (nodeBefore) nodeBeforeStr= nodeBefore->get_name(); cout << " DEBUG : >>>>>>>> previous node " << nodeBeforeStr << " This node : " << name << " next node : " << name_n << " ** validNode : " << RoseBin_support::resBool(validNode) << endl; } // ---------------------------------- if (( interprocedural==false && !call) // || (interprocedural==true && validNode)) { if (visited.find(next)==visited.end()) { // if the successor is not yet visited // mark as visited and put into worklist if (RoseBin_support::DEBUG_MODE()) cout << " never visited next node before... " << name_n << " interprocedural : " << interprocedural << " call : " << call << endl; if (RoseBin_support::DEBUG_MODE()) cout << "adding to visited : " << name_n << endl; visited.insert(next); nodeBeforeMap[next]=node; visitedCounter[next]=1; vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(1)); if (!containsHash(worklist_hash,next)) { // add next node only if the next node if (RoseBin_support::DEBUG_MODE()) cout << "adding to worklist: " << name_n << endl; worklist.push_back(next); worklist_hash.insert(next); } } else { // if the successor has been visited, we need to check if it has changed // if it has not, we continue, else we need to push it back to the worklist int nr = visitedCounter[next]; if (RoseBin_support::DEBUG_MODE()) cout << " visited next node before... " << RoseBin_support::ToString(nr) << " Changed == " << RoseBin_support::resBool(hasChanged) << endl; if (hasChanged) { visitedCounter[next]=++nr; vizzGraph->setProperty(SgGraph::visitedCounter, next, RoseBin_support::ToString(nr)); if (RoseBin_support::DEBUG_MODE()) cout << " has changed : " << RoseBin_support::resBool(hasChanged) << " -- interprocedural : " << RoseBin_support::resBool(interprocedural) << " -- Call : " << RoseBin_support::resBool(call) << " ------> new number: " << RoseBin_support::ToString(nr) << " -- contained in hash? : " << RoseBin_support::resBool(containsHash(worklist_hash,next)) << " ---- nr of Defs: " << RoseBin_support::ToString(analysis->getDefinitionSize()) << " ---- nr of Use: " << RoseBin_support::ToString(analysis->getUsageSize()) << endl; if (interprocedural || (!interprocedural && !call)){ //sameParent)) { //!call && ) { if (!containsHash(worklist_hash,next)) { worklist_hash.insert(next); worklist.push_back(next); if (RoseBin_support::DEBUG_MODE()) cout << " adding to worklist: " << name_n << endl; } } } else if (RoseBin_support::DEBUG_MODE()) cout << " has NOT changed. " << endl; //else we continue with the next node } } } // for } // while worklist.size()>0 } // for rootNodes }
/**************************************************** * process all instructions in the DB * add the instructions to the blocks ****************************************************/ void RoseBin_DB_IDAPRO::process_instruction_query(MYSQL* conn, MYSQL_RES* res_set) { rememberInstructions.clear(); // get the functions // char* q = (char*)"SELECT * FROM instructions_1"; char *q = (char*)"select *, (select parent_function from basic_blocks_1 where id = i.basic_block_id and (i.address - parent_function) >= 0 and (i.address - parent_function) = (select min(i.address - parent_function) from basic_blocks_1 where id = i.basic_block_id and (i.address - parent_function) >= 0) ) as i_f from instructions_1 i order by i.address"; if (RoseBin_support::DEBUG_MODE()) cout << "\n>> QUERY:: " << q << "\n" << endl; res_set = process_query(conn,q); if (res_set == NULL) { print_problemWithResults(conn); } else { MYSQL_ROW row; string mnemonic=(char*)""; uint64_t address=0; int basic_block=-1; int sequence =-1; string data=(char*)""; int i_func; while ((row = mysql_fetch_row(res_set))!=NULL) { for (unsigned int i=0; i<mysql_num_fields(res_set);i++) { char* ret=(char*)""; if (row[i] ==NULL) { ret = (char*)"<NULL>"; if (i==0) address = -1; if (i==1) basic_block = -1; if (i==2) mnemonic = ret; if (i==3) sequence = -1; if (i==4) data=ret; if (i==5) i_func= -1; } else { ret= row[i]; if (i==0) address = atoi(ret); if (i==1) basic_block = atoi(ret); if (i==2) mnemonic = ret; if (i==3) sequence = atoi(ret); if (i==4) data=ret; if (i==5) i_func = atoi(ret); } } // patched to adjust to objdump , Apr 26 2007 if (mnemonic ==(char*)"retn") mnemonic = (char*)"ret"; if (RoseBin_support::DEBUG_MODE()) { ostringstream addrhex; addrhex << hex << setw(8) << address ; cout << ">> creating instruction : " << addrhex.str() << " " << address << " - " << basic_block << " - " << mnemonic << " - " << sequence << endl; } // check if it is an instruction or if it appears in the callgraph, // if it is in the callgraph, one wants to create a BinaryCall instead // append the instruction to its function rose_hash::unordered_map <int, SgAsmFunction* >::iterator func_it = rememberFunctions.find(i_func); SgAsmFunction* func = NULL; // for (func_it; func_it!=rememberFunctions.end(); ++func_it) { if (func_it != rememberFunctions.end()) { func = func_it->second; } else { if (i_func!=-1) cerr << " ERROR : cant find the function i_func : " << i_func << " in rememberFunctions for instruction : " << mnemonic << endl; } SgAsmInstruction* instruction = NULL; instruction = createInstruction(address, func, mnemonic); // instruction = new SgAsmInstruction(address,bb,mnemonic,""); // Sep 29, tps : commented the following line out, since the function was removed. //instruction->set_raw_bytes(data); ROSE_ASSERT(instruction); SgAsmOperandList* operandList = new SgAsmOperandList(); instruction->set_operandList(operandList); operandList->set_parent(instruction); ostringstream hexaddr; hexaddr << hex << setw(8) << address ; if (RoseBin_support::DEBUG_MODE()) cout << " .rememberInstruction " << instruction->class_name() << " at : " << address << " hex: " << hexaddr.str() << endl; rememberInstructions[address]= instruction ; if (func) { // get the block in the func and append to it to conform to jeremiah func->append_statement(instruction); instruction->set_parent(func); //vector <SgNode*> blockVec =func->get_traversalSuccessorContainer(); //SgAsmBlock* block = isSgAsmBlock(blockVec[0]); //ROSE_ASSERT(block); //block->append_statement(instruction); //instruction->set_parent(block); ROSE_ASSERT(instruction->get_parent()); //SgAsmNode* nInst = (SgAsmNode*) instruction; //nInst->set_parent(func); ostringstream addrhex; addrhex << hex << setw(8) << i_func ; if (RoseBin_support::DEBUG_MODE()) cout << ">> appended instruction to function: " << func->get_name() << " addr " << addrhex.str() << " " << address << endl; } else { if (i_func!=-1) { cerr << " ERROR :: could not append instruction to function : " << endl; //exit(0); } } } // while } // if (res_set==NULL) checkError(conn,res_set); }
SgAsmInstruction* RoseBin_FlowAnalysis::process_jumps_get_target(SgAsmx86Instruction* inst) { if (inst && x86InstructionIsControlTransfer(inst)) { //cerr << " ..................... processing jmp " << endl; ostringstream addrhex3; int addrsource = inst->get_address(); addrhex3 << hex << setw(8) << addrsource ; string funcName =""; // get the operand and the destination address SgAsmOperandList* opList = inst->get_operandList(); ROSE_ASSERT(opList); SgAsmExpressionPtrList ptrList = opList->get_operands(); std::vector<SgAsmExpression*>::iterator itList= ptrList.begin(); for (;itList!=ptrList.end();++itList) { SgAsmExpression* exp = *itList; ROSE_ASSERT(exp); SgAsmRegisterReferenceExpression* regRef = isSgAsmRegisterReferenceExpression(exp); //if (RoseBin_support::DEBUG_MODE()) // cout << " inst (jmp):: " << inst->get_mnemonic() << " addr : " << addrhex3.str() << endl; SgAsmValueExpression* valExpr = isSgAsmValueExpression(exp); SgAsmMemoryReferenceExpression* memExpr = isSgAsmMemoryReferenceExpression(exp); string valStr = ""; if (valExpr) { uint8_t byte_val=0xF; uint16_t word_val=0xFF; uint32_t double_word_val=0xFFFF; uint64_t quad_word_val=0xFFFFFFFFU; valStr = RoseBin_support::resolveValue(valExpr, true, byte_val, word_val, double_word_val, quad_word_val); //if (RoseBin_support::DEBUG_MODE()) //cout << " found value ....... :: " << valStr << endl; funcName = valExpr->get_replacement(); //if (funcName=="") // funcName="noName"; } if (memExpr) { continue; // this is a jump to data ... do not handle right now!! } // convert val string to long uint64_t val=0; if(from_string<uint64_t>(val, valStr, std::hex)) { ostringstream addrhex2; addrhex2 << hex << setw(8) << val ; //if (RoseBin_support::DEBUG_MODE()) //cerr << " looking for value ("<<valStr << " ) in InstrSet: " // << val << " " << addrhex2.str() << endl; rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::const_iterator itc = rememberInstructions.find(val); if (itc!=rememberInstructions.end()) { SgAsmInstruction* target = itc->second; // we set the target (jump to for each control instruction) ROSE_ASSERT(target); //if (RoseBin_support::DEBUG_MODE()) //cout << " >>> target found! " << target << " funcName " << funcName << endl; if (funcName!="") { SgAsmNode* block = target; if (!db) block = isSgAsmNode(target->get_parent()); ROSE_ASSERT(block); SgAsmFunction* func = isSgAsmFunction(block->get_parent()); if (func) { string fname = func->get_name(); uint64_t val_f=0; if(from_string<uint64_t>(val_f, fname, std::hex)) { // func name is a hex number func->set_name(funcName); // inst->set_comment(funcName); } else { // its a name } } } return target; } else { //if (RoseBin_support::DEBUG_MODE()) // cerr << " >>>>>>>>>>>>>>> !!! OPS :: Target not found ... \n" << endl; } } else{ // std::cerr << "FlowAnalysis :: from_string failed .. " << std::endl; if (valStr!="") if (RoseBin_support::DEBUG_MODE()) cerr << " WARNING: Cant convert string to long - in process_jump :: " << regRef->class_name() << " inst :: " << inst->get_mnemonic() << " addr : " << addrhex3.str() << " target : " << valStr << endl; } } } return NULL; }