bool RoseBin_DataFlowAbstract::sameParents(SgGraphNode* node, SgGraphNode* next) { bool same=false; if (isSgAsmFunction(node->get_SgNode())) { return true; } SgAsmx86Instruction* thisNode = isSgAsmx86Instruction(node->get_SgNode()); SgAsmx86Instruction* nextNode = isSgAsmx86Instruction(next->get_SgNode()); if (thisNode && nextNode) { SgAsmFunction* func1 = isSgAsmFunction(thisNode->get_parent()); SgAsmFunction* func2 = isSgAsmFunction(nextNode->get_parent()); if (func1==func2) same=true; } return same; }
void InitPointerToNull::visit(SgNode* node) { if (isSgAsmFunction(node)) { memoryWrites.clear(); memoryRead.clear(); } else if (isSgAsmx86Instruction(node) && isSgAsmx86Instruction(node)->get_kind() == x86_mov) { // this is the address of the mov instruction prior to the call //rose_addr_t resolveAddr=0; SgAsmx86Instruction* inst = isSgAsmx86Instruction(node); SgNode* instBlock = NULL; if (project) instBlock= isSgAsmBlock(inst->get_parent()); else //we run IDA, this is different instBlock=inst; if (instBlock==NULL) return; SgAsmFunction* instFunc = isSgAsmFunction(instBlock->get_parent()); if (instFunc==NULL) return; // we have found a mov instruction // we need to check if it is a mov mem, (value or reg) // assignment of variable // forgot mov mem, mem // or we find a mov reg, mem // usage of variable // make sure a variable is assigned before used SgAsmOperandList * ops = inst->get_operandList(); SgAsmExpressionPtrList& opsList = ops->get_operands(); SgAsmExpressionPtrList::iterator itOP = opsList.begin(); SgAsmMemoryReferenceExpression* memL=NULL; SgAsmMemoryReferenceExpression* memR=NULL; SgAsmRegisterReferenceExpression* regL=NULL; SgAsmRegisterReferenceExpression* regR=NULL; SgAsmValueExpression* Val = NULL; int iteration=0; for (;itOP!=opsList.end();++itOP) { SgAsmExpression* exp = *itOP; ROSE_ASSERT(exp); if (iteration==1) { // right hand side memR = isSgAsmMemoryReferenceExpression(exp); regR = isSgAsmRegisterReferenceExpression(exp); Val = isSgAsmValueExpression(exp); } if (iteration==0) { // left hand side memL = isSgAsmMemoryReferenceExpression(exp); regL = isSgAsmRegisterReferenceExpression(exp); iteration++; } } //for if ((memL && regR) || (memL && Val) || (memL && memR)) { // could be assignment to address rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memL); // apparently the reference to memory does not always have to be BP but // can also be IP if it is a static variable. How will we handle global variables? //bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memL); //if (containsBP) { // this is memory write with offset to BP // remember this memory location as a write if (debug) cerr << "found a memory write (REG) : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl; memoryWrites.insert(addr); //} } else if (regL && memR) { // could be usage of address rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memR); bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memR); if (containsBP) { // this is memory read with offset to BP // did we see a write for this? If not, it is not initialized! std::set<rose_addr_t>::const_iterator it = memoryWrites.find(addr); if (it!=memoryWrites.end()) { // found write, everything is good if (debug) cerr << "found a read with matching write : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl; } else { std::set<rose_addr_t>::const_iterator it2 = memoryRead.find(addr); if (it2!=memoryRead.end()) { // found this case before } else { if (debug) cerr << " This variable might not be initialized : " << RoseBin_support::HexToString(inst->get_address())<<" "<< unparseInstruction(inst) << endl; string res = "Possibly uninitialized variable: "; string funcname=""; SgAsmBlock* b = isSgAsmBlock(inst->get_parent()); SgAsmFunction* func = NULL; if (b) func=isSgAsmFunction(b->get_parent()); if (func) funcname = func->get_name(); res+=" ("+RoseBin_support::HexToString(inst->get_address())+") : "+unparseInstruction(inst)+ " <"+inst->get_comment()+"> in function: "+funcname; result[inst]= res; memoryRead.insert(addr); } } } } } }
void RoseBin_FlowAnalysis::process_jumps() { if (RoseBin_support::DEBUG_MODE()) cerr << "\n >>>>>>>>> processing jumps ... " << endl; rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::iterator it; for (it=rememberInstructions.begin();it!=rememberInstructions.end();++it) { SgAsmx86Instruction* inst = isSgAsmx86Instruction(it->second); if (inst->get_kind() == x86_call) { //cerr << "Found call at " << std::hex << inst->get_address() << endl; SgAsmx86Instruction* target = isSgAsmx86Instruction(process_jumps_get_target(inst)); if (target) { //cerr << "Target is " << std::hex << target->get_address() << endl; // inst->get_targets().push_back(target); // we set the sources (for each node) ROSE_ASSERT(g_algo->info); g_algo->info->incomingEdges[target].insert(inst->get_address()); // tps: changed this algorithm so that it runs in // linear time! ROSE_ASSERT (target->get_parent()); if (target->get_parent()) { // ROSE_ASSERT(target->get_parent()); SgAsmNode* b_b = target; if (!db) b_b = isSgAsmNode(target->get_parent()); ROSE_ASSERT(b_b); SgAsmFunction* b_func = isSgAsmFunction(b_b->get_parent()); if (b_func) { // (16/Oct/07) tps: this is tricky, it appears that sometimes the target can // be just a jmp to a new location, so we should forward this information to the correct // function. // Therefore we need to check if the current function has a return statement. // If not, we want to forward this information. if (target->get_kind() == x86_jmp) { //cerr << " >>>>>>>> found a jmp target - number of children: " << b_func->get_traversalSuccessorContainer().size() << endl; if (b_func->get_numberOfTraversalSuccessors()==1) { SgAsmx86Instruction* target2 = isSgAsmx86Instruction(process_jumps_get_target(inst)); if (target2) { b_b = target2; if (!db) b_b = isSgAsmNode(target2->get_parent()); b_func = isSgAsmFunction(b_b->get_parent()); } } } if (inst->get_parent()) { //cerr << "Inst has a parent" << endl; if (inst->get_comment()=="") inst->set_comment(""+b_func->get_name()); ROSE_ASSERT(g_algo->info); SgAsmInstruction* inst_after = g_algo->info->getInstructionAtAddress(inst->get_address() + inst->get_raw_bytes().size()); // inst->cfgBinFlowOutEdge(info); if (inst_after) { //cerr << "Added dest " << std::hex << isSgAsmStatement(inst_after)->get_address() << " for function" << endl; b_func->append_dest(isSgAsmStatement(inst_after)); } } } else { if (RoseBin_support::DEBUG_MODE()) cerr << " NO FUNCTION DETECTED ABOVE BLOCK . " << endl; } } else { if (RoseBin_support::DEBUG_MODE()) cerr << " WARNING :: process_jumps: target has no parent ... i.e. no FunctionDeclaration to it " << target->class_name() << endl; } } else { if (inst) if (RoseBin_support::DEBUG_MODE()) cerr << " WARNING :: process_jumps: No target found for node " << RoseBin_support::HexToString(inst->get_address()) << " " << inst->get_mnemonic() << endl; } } else { // might be a jmp SgAsmx86Instruction* target = isSgAsmx86Instruction(process_jumps_get_target(inst)); if (target) { // inst->get_targets().push_back(target); // we set the sources (for each node) ROSE_ASSERT(g_algo->info); g_algo->info->incomingEdges[target].insert(inst->get_address()); } } } //cerr << "\n >>>>>>>>> processing jumps ... done. " << endl; // cerr << "\n >>>>>>>>> resolving RET jumps ... " << endl; rose_hash::unordered_map <uint64_t, SgAsmInstruction* >::iterator it2; for (it2=rememberInstructions.begin();it2!=rememberInstructions.end();++it2) { //int id = it2->first; SgAsmx86Instruction* target = isSgAsmx86Instruction(it2->second); ROSE_ASSERT (target); #if 1 if (target->get_kind() == x86_ret) { SgAsmNode* b_b = target; if (!db) b_b = isSgAsmNode(target->get_parent()); SgAsmFunction* parent = isSgAsmFunction(b_b->get_parent()); if (parent) { //ROSE_ASSERT(parent); std::vector <SgAsmStatement*> dest_list = parent->get_dest(); for (size_t i = 0; i < dest_list.size(); ++i) { ROSE_ASSERT (isSgAsmInstruction(dest_list[i])); //cerr << "Adding ret target " << std::hex << dest_list[i]->get_address() << " to " << std::hex << target->get_address() << endl; //info->indirectJumpAndReturnTargets[target].insert(dest_list[i]->get_address()); ROSE_ASSERT(g_algo->info); g_algo->info->incomingEdges[isSgAsmInstruction(dest_list[i])].insert(target->get_address()); } std::vector <SgAsmStatement*>::iterator it3 = dest_list.begin(); for (; it3!=dest_list.end();++it3) { SgAsmInstruction* dest = isSgAsmInstruction(*it3); if (dest) { dest->append_sources(target); //cerr << " appending source to " << dest->get_address() << " target: " << target->get_address() << endl; } } // for } else { // if parent if (RoseBin_support::DEBUG_MODE()) cerr << " ERROR :: RET jumps :: no parent found for ret : " << target->class_name() << endl; //exit (0); } } // if ret #endif } if (RoseBin_support::DEBUG_MODE()) cerr << " >>>>>>>>> resolving RET jumps ... done." << endl; }
/* * Detect functions (blocks) that can be merged together. */ void RoseBin_FlowAnalysis::resolveFunctions(SgAsmNode* globalNode) { //cerr << " ObjDump-BinRose:: Detecting and merging Functions" << endl; vector<SgAsmFunction*> visitedFunctions; vector<SgNode*> tree =NodeQuery::querySubTree(globalNode, V_SgAsmFunction); // vector<SgNode*>::iterator itV = tree.begin(); int nr=0; while (!tree.empty()) { // for (;itV!=tree.end();itV++) { SgAsmFunction* funcD = isSgAsmFunction(tree.back()); tree.pop_back(); nr++; if ((nr % 100)==0) if (RoseBin_support::DEBUG_MODE()) cerr << " funcListSize : " << tree.size() << " -- iteration : " << nr << " func " << funcD->get_name() << endl; //SgAsmFunction* funcD = isSgAsmFunction(*itV); //itV++; ROSE_ASSERT(funcD); // make sure we dont visit a function twice vector <SgNode*> funcVec =funcD->get_traversalSuccessorContainer(); int last = funcVec.size()-1; if (last<0) continue; bool hasStopCondition=false; for (unsigned int itf = 0; itf < funcVec.size() ; itf++) { SgAsmx86Instruction* finst = isSgAsmx86Instruction(funcVec[itf]); ROSE_ASSERT(finst); if (finst->get_kind() == x86_ret || finst->get_kind() == x86_hlt) { hasStopCondition=true; } } //cerr << " last : " << last << endl; SgAsmx86Instruction* lastInst = isSgAsmx86Instruction(funcVec[last]); ROSE_ASSERT(lastInst); SgAsmx86Instruction* nextInst = isSgAsmx86Instruction(resolveFunction(lastInst, hasStopCondition)); if (nextInst) { SgAsmFunction* nextFunc = isSgAsmFunction(nextInst->get_parent()); if (nextFunc) { ROSE_ASSERT(g_algo->info); g_algo->info->returnTargets[funcD].insert(g_algo->info->returnTargets[nextFunc].begin(), g_algo->info->returnTargets[nextFunc].end()); // make sure that this function is being changed and should not be covered again //visitedFunctions.push_back(nextFunc); // visit current function after alternation again //tree.push_back(funcD); // now we remove this next function and iterate thrgouh all instructions and // attach them to the old function vector <SgNode*> funcNextVec =nextFunc->get_traversalSuccessorContainer(); for (unsigned int i=0; i < funcNextVec.size(); ++i) { SgAsmInstruction* inst = isSgAsmInstruction(funcNextVec[i]); ROSE_ASSERT(inst); inst->set_parent(funcD); funcD->append_statement(inst); //nextFunc->remove_statement(inst); // delete nextFunc; // should delete this later when iterator is done } nextFunc->remove_children(); nextFunc->set_parent(NULL); isSgAsmBlock(globalNode)->remove_statement(nextFunc); } } } // for }