void visit(SgNode *node) { SgAsmBlock *block = isSgAsmBlock(node); if (block && !analyzer->is_vertex_filtered(block)) { block->get_successors().clear(); block->set_successors_complete(false); } }
/* * Converts blocks to functions (is not part of Jeremiahs disassembler - and also IDA) * deprecated! */ void RoseBin_FlowAnalysis::convertBlocksToFunctions(SgAsmNode* globalNode) { vector<SgNode*> tree =NodeQuery::querySubTree(globalNode, V_SgAsmBlock); vector<SgNode*>::iterator itV = tree.begin(); //cerr << " ObjDump-BinRose:: Converting Blocks To Functions" << endl; for (;itV!=tree.end();itV++) { SgAsmBlock* block = isSgAsmBlock(*itV); if (block && block!=globalNode) { uint64_t addr = block->get_address(); isSgAsmBlock(globalNode)->remove_statement(block); block->set_parent(NULL); SgAsmFunction* func = new SgAsmFunction(addr, RoseBin_support::HexToString(addr)); ROSE_ASSERT(g_algo->info); g_algo->info->returnTargets[func].insert(g_algo->info->returnTargets[block].begin(), g_algo->info->returnTargets[block].end()); isSgAsmBlock(globalNode)->append_statement(func); func->set_parent(globalNode); vector <SgNode*> vec =block->get_traversalSuccessorContainer(); for (unsigned int itf = 0; itf < vec.size() ; itf++) { SgAsmInstruction* finst = isSgAsmInstruction(vec[itf]); finst->set_parent(func); func->append_statement(finst); } block->remove_children(); } } // string filename="_binary_tree_func.dot"; //AST_BIN_Traversal* trav = new AST_BIN_Traversal(); //trav->run(globalNode, filename); }
/* * This function removes blocks, so functions contain only instructions * deprecated! */ void RoseBin_FlowAnalysis::flattenBlocks(SgAsmNode* globalNode) { vector<SgNode*> tree =NodeQuery::querySubTree(globalNode, V_SgAsmBlock); vector<SgNode*>::iterator itV = tree.begin(); //cerr << " ObjDump-BinRose:: Removing Blocks " << endl; for (;itV!=tree.end();itV++) { SgAsmBlock* block = isSgAsmBlock(*itV); if (block && block!=globalNode) { SgAsmFunction* func = isSgAsmFunction(block->get_parent()); if (func) { ROSE_ASSERT(g_algo->info); g_algo->info->returnTargets[func].insert(g_algo->info->returnTargets[block].begin(), g_algo->info->returnTargets[block].end()); vector <SgNode*> vec =block->get_traversalSuccessorContainer(); for (unsigned int itf = 0; itf < vec.size() ; itf++) { SgAsmInstruction* finst = isSgAsmInstruction(vec[itf]); finst->set_parent(func); func->append_statement(finst); } func->remove_statement(block); } } } }
SgAsmBlock* buildBasicBlock(const std::vector<SgAsmInstruction*> &insns) { SgAsmBlock *bb = new SgAsmBlock; if (!insns.empty()) { bb->set_id(insns.front()->get_address()); bb->set_address(insns.front()->get_address()); BOOST_FOREACH (SgAsmInstruction *insn, insns) { bb->get_statementList().push_back(insn); insn->set_parent(bb); }
void CountTraversal::visit ( SgNode* n ) { SgAsmInstruction* asmInstruction = isSgAsmInstruction(n); if (asmInstruction != NULL) { // Use the new interface support for this (this detects all multi-byte nop instructions). if (SageInterface::isNOP(asmInstruction) == true) { if (previousInstructionWasNop == true) { // Increment the length of the identified NOP sequence count++; } else { count = 1; // Record the starting address of the NOP sequence nopSequenceStart = asmInstruction; } previousInstructionWasNop = true; } else { if (count > 0) { // Report the sequence when we have detected the end of the sequence. SgAsmFunction* functionDeclaration = getAsmFunction(asmInstruction); printf ("Reporting NOP sequence of length %3d at address %zu in function %s (reason for this being a function = %u = %s) \n", count,nopSequenceStart->get_address(),functionDeclaration->get_name().c_str(), functionDeclaration->get_reason(), stringifySgAsmFunctionFunctionReason(functionDeclaration->get_reason()).c_str()); nopSequences.push_back(pair<SgAsmInstruction*,int>(nopSequenceStart,count)); SgAsmBlock* block = isSgAsmBlock(nopSequenceStart->get_parent()); ROSE_ASSERT(block != NULL); SgAsmStatementPtrList & l = block->get_statementList(); // Now iterate over the nop instructions in the sequence and report the lenght of each (can be multi-byte nop instructions). SgAsmStatementPtrList::iterator i = find(l.begin(),l.end(),nopSequenceStart); ROSE_ASSERT(i != l.end()); int counter = 0; while ( (*i != asmInstruction) && (i != l.end()) ) { printf ("--- NOP #%2d is length = %2d \n",counter++,(int)isSgAsmInstruction(*i)->get_raw_bytes().size()); i++; } } count = 0; previousInstructionWasNop = false; } } }
/** Add edges to graph from functions that call system calls to system calls. * * The first 1000 vertexes (0 to 999) in the graph is reserved for system calls, which is many more than the actual system * calls in linux. */ void add_syscall_edges(DirectedGraph* G, std::vector<SgAsmFunction*>& all_functions) { // Detect all system calls and add an edge from the function to the function to the system call for (unsigned int caller_id = 0; caller_id < all_functions.size(); ++caller_id) { SgAsmFunction *func = all_functions[caller_id]; std::vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(func); for (std::vector<SgAsmInstruction*>::iterator inst_it = insns.begin(); inst_it != insns.end(); ++inst_it) { SgAsmX86Instruction *insn = isSgAsmX86Instruction(*inst_it); if (insn == NULL) continue; SgAsmBlock *block = SageInterface::getEnclosingNode<SgAsmBlock>(insn); // On linux system calls are always interrups and all interrupts are system calls if (insn && block && insn->get_kind()==x86_int) { const SgAsmExpressionPtrList &opand_list = insn->get_operandList()->get_operands(); SgAsmExpression *expr = opand_list.size()==1 ? opand_list[0] : NULL; //semantically execute the basic block to find out which sytem call was called if (expr && expr->variantT()==V_SgAsmIntegerValueExpression && 0x80==isSgAsmIntegerValueExpression(expr)->get_value()) { const SgAsmStatementPtrList &stmts = block->get_statementList(); size_t int_n; for (int_n=0; int_n<stmts.size(); int_n++) { if (isSgAsmInstruction(stmts[int_n])==insn) break; } typedef PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State, PartialSymbolicSemantics::ValueType> Policy; typedef X86InstructionSemantics<Policy, PartialSymbolicSemantics::ValueType> Semantics; Policy policy; Semantics semantics(policy); try { semantics.processBlock(stmts, 0, int_n); if (policy.readRegister<32>("eax").is_known()) { int nr = policy.readRegister<32>("eax").known_value(); boost::add_edge(caller_id, nr, *G); } } catch (const Semantics::Exception&) { } catch (const Policy::Exception&) { } } } } } }
void visit(SgNode *node) { SgAsmBlock *bb = isSgAsmBlock(node); SgAsmFunction *func = bb ? SageInterface::getEnclosingNode<SgAsmFunction>(bb) : NULL; if (func && bb->get_address()==removal_addr) { SgAsmStatementPtrList::iterator found = std::find(func->get_statementList().begin(), func->get_statementList().end(), bb); ROSE_ASSERT(found!=func->get_statementList().end()); func->get_statementList().erase(found); std::cout <<"removed basic block " <<StringUtility::addrToString(removal_addr) <<std::endl; // throw 1 /* found the one-and-only block, so we can abandon the traversal */ } }
BtorTranslationPolicy::BtorTranslationPolicy(BtorTranslationHooks* hooks, uint32_t minNumStepsToFindError, uint32_t maxNumStepsToFindError, SgProject* proj): problem(), hooks(hooks), regdict(NULL) { assert (minNumStepsToFindError >= 1); // Can't find an error on the first step assert (maxNumStepsToFindError < 0xFFFFFFFFU); // Prevent overflows assert (minNumStepsToFindError <= maxNumStepsToFindError || maxNumStepsToFindError == 0); makeRegMap(origRegisterMap, ""); makeRegMapZero(newRegisterMap); isValidIp = false_(); validIPs.clear(); Comp stepCount = problem.build_var(32, "stepCount_saturating_at_" + boost::lexical_cast<std::string>(maxNumStepsToFindError + 1)); addNext(stepCount, ite(problem.build_op_eq(stepCount, number<32>(maxNumStepsToFindError + 1)), number<32>(maxNumStepsToFindError + 1), problem.build_op_inc(stepCount))); resetState = problem.build_op_eq(stepCount, zero(32)); errorsEnabled = problem.build_op_and( problem.build_op_ugte(stepCount, number<32>(minNumStepsToFindError)), (maxNumStepsToFindError == 0 ? true_() : problem.build_op_ulte(stepCount, number<32>(maxNumStepsToFindError)))); { vector<SgNode*> functions = NodeQuery::querySubTree(proj, V_SgAsmFunction); for (size_t i = 0; i < functions.size(); ++i) { functionStarts.push_back(isSgAsmFunction(functions[i])->get_address()); // fprintf(stderr, "functionStarts 0x%"PRIx64"\n", isSgAsmFunction(functions[i])->get_address()); } } { vector<SgNode*> blocks = NodeQuery::querySubTree(proj, V_SgAsmBlock); for (size_t i = 0; i < blocks.size(); ++i) { SgAsmBlock* b = isSgAsmBlock(blocks[i]); if (!b->get_statementList().empty() && isSgAsmX86Instruction(b->get_statementList().front())) { blockStarts.push_back(b->get_address()); // fprintf(stderr, "blockStarts 0x%"PRIx64"\n", b->get_address()); } } } { vector<SgNode*> calls = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction); for (size_t i = 0; i < calls.size(); ++i) { SgAsmX86Instruction* b = isSgAsmX86Instruction(calls[i]); if (b->get_kind() != x86_call) continue; returnPoints.push_back(b->get_address() + b->get_raw_bytes().size()); // fprintf(stderr, "returnPoints 0x%"PRIx64"\n", b->get_address() + b->get_raw_bytes().size()); } } { vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction); for (size_t i = 0; i < instructions.size(); ++i) { SgAsmX86Instruction* b = isSgAsmX86Instruction(instructions[i]); validIPs.push_back(b->get_address()); } } }
void visit(SgNode *node) { SgAsmBlock *block = isSgAsmBlock(node); if (block && block->has_instructions()) { using namespace rose::BinaryAnalysis::InstructionSemantics2; const RegisterDictionary *regdict = RegisterDictionary::dictionary_i386(); SymbolicSemantics::RiscOperatorsPtr ops = SymbolicSemantics::RiscOperators::instance(regdict); ops->computingDefiners(SymbolicSemantics::TRACK_ALL_DEFINERS); // only used so we can test that it works BaseSemantics::DispatcherPtr dispatcher = DispatcherX86::instance(ops, 32); const SgAsmStatementPtrList &stmts = block->get_statementList(); for (SgAsmStatementPtrList::const_iterator si=stmts.begin(); si!=stmts.end(); ++si) { SgAsmX86Instruction *insn = isSgAsmX86Instruction(*si); if (insn) { std::cout <<unparseInstructionWithAddress(insn) <<"\n"; dispatcher->processInstruction(insn); std::cout <<*ops <<"\n"; } } } }
void BlockTraversal::visit(SgNode* n) { SgAsmBlock* asmBlock = isSgAsmBlock(n); if (asmBlock != NULL) { // Save the address of the SgAsmBlock. rose_addr_t asmBlockAddress = asmBlock->get_address(); // printf ("asmBlockAddress = %zu \n",asmBlockAddress); printf ("asmBlockAddress = %p \n",asmBlockAddress); bool blockExists = blockMap.find(asmBlockAddress) != blockMap.end(); ROSE_ASSERT(blockExists == false); if (blockExists == false) { // Need to add the block address. // blockMap[asmBlockAddress] = asmBlock; blockMap[asmBlockAddress] = pair<SgAsmBlock*,TraceStructType*>(asmBlock,new TraceStructType(asmBlock)); } } }
void visit(SgNode *node) { SgAsmBlock *block = isSgAsmBlock(node); if (block && block->has_instructions()) { using namespace BinaryAnalysis::InstructionSemantics; typedef SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> Policy; typedef X86InstructionSemantics<Policy, SymbolicSemantics::ValueType> Semantics; Policy policy(NULL/*no SMT solver*/); Semantics semantics(policy); const SgAsmStatementPtrList &stmts = block->get_statementList(); for (SgAsmStatementPtrList::const_iterator si=stmts.begin(); si!=stmts.end(); ++si) { SgAsmX86Instruction *insn = isSgAsmX86Instruction(*si); if (insn) { std::cout <<unparseInstructionWithAddress(insn) <<"\n"; semantics.processInstruction(insn); std::cout <<policy; } } } }
void visit(SgNode *node) { SgAsmBlock *block = isSgAsmBlock(node); SgAsmFunction *func = block ? block->get_enclosing_function() : NULL; if (block && func) { if (block==func->get_entry_block()) { if (block->get_immediate_dominator()) { if (bad_blocks) bad_blocks->insert(block); failed = true; } } else { SgAsmBlock *idom = block->get_immediate_dominator(); if (!idom || idom->get_enclosing_function()!=func) { if (bad_blocks) bad_blocks->insert(block); failed = true; } } } }
void InitPointerToNull::visit(SgNode* node) { if (isSgAsmFunction(node)) { memoryWrites.clear(); memoryRead.clear(); } else if (isSgAsmx86Instruction(node) && isSgAsmx86Instruction(node)->get_kind() == x86_mov) { // this is the address of the mov instruction prior to the call //rose_addr_t resolveAddr=0; SgAsmx86Instruction* inst = isSgAsmx86Instruction(node); SgNode* instBlock = NULL; if (project) instBlock= isSgAsmBlock(inst->get_parent()); else //we run IDA, this is different instBlock=inst; if (instBlock==NULL) return; SgAsmFunction* instFunc = isSgAsmFunction(instBlock->get_parent()); if (instFunc==NULL) return; // we have found a mov instruction // we need to check if it is a mov mem, (value or reg) // assignment of variable // forgot mov mem, mem // or we find a mov reg, mem // usage of variable // make sure a variable is assigned before used SgAsmOperandList * ops = inst->get_operandList(); SgAsmExpressionPtrList& opsList = ops->get_operands(); SgAsmExpressionPtrList::iterator itOP = opsList.begin(); SgAsmMemoryReferenceExpression* memL=NULL; SgAsmMemoryReferenceExpression* memR=NULL; SgAsmRegisterReferenceExpression* regL=NULL; SgAsmRegisterReferenceExpression* regR=NULL; SgAsmValueExpression* Val = NULL; int iteration=0; for (;itOP!=opsList.end();++itOP) { SgAsmExpression* exp = *itOP; ROSE_ASSERT(exp); if (iteration==1) { // right hand side memR = isSgAsmMemoryReferenceExpression(exp); regR = isSgAsmRegisterReferenceExpression(exp); Val = isSgAsmValueExpression(exp); } if (iteration==0) { // left hand side memL = isSgAsmMemoryReferenceExpression(exp); regL = isSgAsmRegisterReferenceExpression(exp); iteration++; } } //for if ((memL && regR) || (memL && Val) || (memL && memR)) { // could be assignment to address rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memL); // apparently the reference to memory does not always have to be BP but // can also be IP if it is a static variable. How will we handle global variables? //bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memL); //if (containsBP) { // this is memory write with offset to BP // remember this memory location as a write if (debug) cerr << "found a memory write (REG) : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl; memoryWrites.insert(addr); //} } else if (regL && memR) { // could be usage of address rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memR); bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memR); if (containsBP) { // this is memory read with offset to BP // did we see a write for this? If not, it is not initialized! std::set<rose_addr_t>::const_iterator it = memoryWrites.find(addr); if (it!=memoryWrites.end()) { // found write, everything is good if (debug) cerr << "found a read with matching write : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl; } else { std::set<rose_addr_t>::const_iterator it2 = memoryRead.find(addr); if (it2!=memoryRead.end()) { // found this case before } else { if (debug) cerr << " This variable might not be initialized : " << RoseBin_support::HexToString(inst->get_address())<<" "<< unparseInstruction(inst) << endl; string res = "Possibly uninitialized variable: "; string funcname=""; SgAsmBlock* b = isSgAsmBlock(inst->get_parent()); SgAsmFunction* func = NULL; if (b) func=isSgAsmFunction(b->get_parent()); if (func) funcname = func->get_name(); res+=" ("+RoseBin_support::HexToString(inst->get_address())+") : "+unparseInstruction(inst)+ " <"+inst->get_comment()+"> in function: "+funcname; result[inst]= res; memoryRead.insert(addr); } } } } } }
void visit(SgNode *node) { SgAsmBlock *block = isSgAsmBlock(node); if (block) block->set_immediate_dominator(NULL); }
/** Returns a label for the value. The label consists of the base object name (if available) or address, followed by a plus * sign or minus sign, followed by the offset from that object. The empty string is returned if this integer value expression * has no base object (i.e., it's absolute). * * If the base object has no name and the integer value points directly at the object (offset=0) then one of two things * happen: if @p quiet is true, the empty string is returned, otherwise the label is the name of the node type enclosed in an * extra set of angle brackets. This is useful to indicate that a value is relative rather than absolute. For instance, the * instruction listing "call 0x004126bb" is ambiguous as to whether 0x004126bb points to a known, unnamed function, a non-entry * instruction within a function, or some memory location we didn't disassemble. But when labeled with @p quiet being false, * the output will be: * * <ul> * <li>call 0x004126bb<main>; points to a function with a name</li> * <li>call 0x004126bb<<Func>>; points to a function without a name</li> * <li>call 0x004126bb<<Insn>>; points to an instruction that's not a function entry point</li> * <li>call 0x004126bb; points to something that's not been disassembled</li> * </ul> */ std::string SgAsmIntegerValueExpression::get_label(bool quiet/*=false*/) const { SgNode *node = get_base_node(); if (!node) return ""; // Get the name of the base object if possible. std::string retval; std::string refkind; if (isSgAsmFunction(node)) { retval = isSgAsmFunction(node)->get_name(); refkind = "Func"; } else if (isSgAsmGenericSymbol(node)) { retval = isSgAsmGenericSymbol(node)->get_name()->get_string(); refkind = "Sym"; } else if (isSgAsmPEImportItem(node)) { retval = isSgAsmPEImportItem(node)->get_name()->get_string(); refkind = "Import"; } else if (isSgAsmGenericSection(node)) { retval = isSgAsmGenericSection(node)->get_short_name(); refkind = "Section"; } else if (isSgAsmInstruction(node)) { refkind = "Insn"; } else if (isSgAsmStaticData(node)) { SgAsmBlock *blk = SageInterface::getEnclosingNode<SgAsmBlock>(node); if (blk && 0!=(blk->get_reason() & SgAsmBlock::BLK_JUMPTABLE)) { refkind = "JumpTable"; } else { refkind = "StaticData"; } } else if (isSgAsmBlock(node)) { refkind = "BBlock"; } else if (isSgAsmPERVASizePair(node)) { refkind = "Rva/Size"; } else { refkind = "Reference"; } // If it has no name, then use something fairly generic. That way we can at least indicate that the value is relative. int64_t offset = (int64_t)get_relative_value(); if (retval.empty()) { retval = "<" + refkind; // extra level of brackets to indicate that it's not a real name if (offset) retval += "@" + StringUtility::addrToString(virtual_address(node), 32); retval += ">"; } // Append the offset, but consider it to be signed. Disregard the number of significant bits in the absolute value and use // a smaller bit width if possible. But don't use the minimum bit width since this makes it hard to tell how many bits // there are at a glance (use only 8, 16, 32, or 64). size_t nbits = 0; if (offset > 0xffffffffll) { nbits = 64; retval += "+"; } else if (offset > 0xffffll) { nbits = 32; retval += "+"; } else if (offset > 0xffll) { nbits = 16; retval += "+"; } else if (offset > 9) { nbits = 8; retval += "+"; } else if (offset > 0) { char buf[64]; snprintf(buf, sizeof buf, "+%"PRId64, offset); retval += buf; } else if (offset==0) { /*void*/ } else if (-offset > 0xffffffffll) { nbits = 64; offset = -offset; retval += "-"; } else if (-offset > 0xffffll) { nbits = 32; offset = -offset; retval += "-"; } else if (-offset > 0xffll) { nbits = 16; offset = -offset; retval += "-"; } else if (-offset > 9) { nbits = 8; offset = -offset; retval += "-"; } else { char buf[64]; snprintf(buf, sizeof buf, "%"PRId64, offset); retval += buf; } if (nbits!=0) retval += StringUtility::addrToString(offset, nbits); return retval; }