BinaryAnalysis::Disassembler::AddressSet SgAsmX86Instruction::getSuccessors(const std::vector<SgAsmInstruction*>& insns, bool *complete, const MemoryMap::Ptr &initial_memory) { Stream debug(mlog[DEBUG]); using namespace Rose::BinaryAnalysis::InstructionSemantics2; if (debug) { debug <<"SgAsmX86Instruction::getSuccessors(" <<StringUtility::addrToString(insns.front()->get_address()) <<" for " <<insns.size() <<" instruction" <<(1==insns.size()?"":"s") <<"):" <<"\n"; } BinaryAnalysis::Disassembler::AddressSet successors = SgAsmInstruction::getSuccessors(insns, complete); /* If we couldn't determine all the successors, or a cursory analysis couldn't narrow it down to a single successor then * we'll do a more thorough analysis now. In the case where the cursory analysis returned a complete set containing two * successors, a thorough analysis might be able to narrow it down to a single successor. We should not make special * assumptions about CALL and FARCALL instructions -- their only successor is the specified address operand. */ if (!*complete || successors.size()>1) { const RegisterDictionary *regdict; if (SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(this)) { regdict = RegisterDictionary::dictionary_for_isa(interp); } else { switch (get_baseSize()) { case x86_insnsize_16: regdict = RegisterDictionary::dictionary_i286(); break; case x86_insnsize_32: regdict = RegisterDictionary::dictionary_pentium4(); break; case x86_insnsize_64: regdict = RegisterDictionary::dictionary_amd64(); break; default: ASSERT_not_reachable("invalid x86 instruction size"); } } const RegisterDescriptor IP = regdict->findLargestRegister(x86_regclass_ip, 0); PartialSymbolicSemantics::RiscOperatorsPtr ops = PartialSymbolicSemantics::RiscOperators::instance(regdict); ops->set_memory_map(initial_memory); BaseSemantics::DispatcherPtr cpu = DispatcherX86::instance(ops, IP.get_nbits(), regdict); try { BOOST_FOREACH (SgAsmInstruction *insn, insns) { cpu->processInstruction(insn); SAWYER_MESG(debug) <<" state after " <<insn->toString() <<"\n" <<*ops; } BaseSemantics::SValuePtr ip = ops->readRegister(IP); if (ip->is_number()) { successors.clear(); successors.insert(ip->get_number()); *complete = true; } } catch(const BaseSemantics::Exception &e) {
BinaryAnalysis::Disassembler::AddressSet SgAsmM68kInstruction::getSuccessors(const std::vector<SgAsmInstruction*>& insns, bool *complete, const BinaryAnalysis::MemoryMap::Ptr &initial_memory) { using namespace Rose::BinaryAnalysis::InstructionSemantics2; Stream debug(mlog[DEBUG]); if (debug) { debug <<"SgAsmM68kInstruction::getSuccessors(" <<StringUtility::addrToString(insns.front()->get_address()) <<" for " <<insns.size() <<" instruction" <<(1==insns.size()?"":"s") <<"):" <<"\n"; } BinaryAnalysis::Disassembler::AddressSet successors = SgAsmInstruction::getSuccessors(insns, complete); // If we couldn't determine all the successors, or a cursory analysis couldn't narrow it down to a single successor then // we'll do a more thorough analysis now. In the case where the cursory analysis returned a complete set containing two // successors, a thorough analysis might be able to narrow it down to a single successor. We should not make special // assumptions about function call instructions -- their only successor is the specified address operand. */ if (!*complete || successors.size()>1) { using namespace Rose::BinaryAnalysis::InstructionSemantics2::PartialSymbolicSemantics; const RegisterDictionary *regdict = RegisterDictionary::dictionary_coldfire_emac(); RiscOperatorsPtr ops = RiscOperators::instance(regdict); ops->set_memory_map(initial_memory); DispatcherM68kPtr dispatcher = DispatcherM68k::instance(ops, 32); try { for (size_t i=0; i<insns.size(); ++i) { dispatcher->processInstruction(insns[i]); if (debug) debug << " state after " <<insns[i]->toString() <<"\n" <<*ops; } SValuePtr ip = SValue::promote(ops->readRegister(dispatcher->REG_PC)); if (ip->is_number()) { successors.clear(); successors.insert(ip->get_number()); *complete = true; /*this is the complete set of successors*/ } } catch(const BaseSemantics::Exception& e) { /* Abandon entire basic block if we hit an instruction that's not implemented. */ debug <<e <<"\n"; } } if (debug) { debug <<" successors:"; BOOST_FOREACH (rose_addr_t va, successors) debug <<" " <<StringUtility::addrToString(va); debug <<(*complete?"":"...") <<"\n"; } return successors; }
/** Return control flow successors. See base class for full documentation. */ BinaryAnalysis::Disassembler::AddressSet SgAsmArmInstruction::getSuccessors(bool *complete) { BinaryAnalysis::Disassembler::AddressSet retval; const std::vector<SgAsmExpression*> &exprs = get_operandList()->get_operands(); *complete = true; /*assume retval is the complete set of successors for now*/ switch (get_kind()) { case arm_b: case arm_bl: case arm_blx: case arm_bx: { /* Branch target */ ROSE_ASSERT(exprs.size()==1); SgAsmExpression *dest = exprs[0]; if (isSgAsmValueExpression(dest)) { rose_addr_t target_va = SageInterface::getAsmConstant(isSgAsmValueExpression(dest)); retval.insert(target_va); } else { /* Could also be a register reference expression, but we don't know the successor in that case. */ *complete = false; } /* Fall-through address */ if (get_condition()!=arm_cond_al) retval.insert(get_address()+4); break; } case arm_bxj: { /* First argument is the register that holds the next instruction pointer value to use in the case that Jazelle is * not available. We only know the successor if the register is the instruction pointer, in which case the * successor is the fall-through address. */ ROSE_ASSERT(exprs.size()==1); SgAsmRegisterReferenceExpression *rre = isSgAsmRegisterReferenceExpression(exprs[0]); ROSE_ASSERT(rre); if (rre->get_descriptor().get_major()==arm_regclass_gpr && rre->get_descriptor().get_minor()==15) { retval.insert(get_address()+4); } else { *complete = false; } break; } case arm_cmn: case arm_cmp: case arm_teq: case arm_tst: /* Comparison and test instructions don't ever affect the instruction pointer; they only fall through */ retval.insert(get_address()+4); break; case arm_bkpt: case arm_swi: case arm_undefined: case arm_unknown_instruction: /* No known successors for interrupt-generating instructions */ break; default: if (!modifies_ip(this) || get_condition()!=arm_cond_al) { retval.insert(get_address()+4); } else { *complete = false; } break; } return retval; }
BinaryAnalysis::Disassembler::AddressSet SgAsmX86Instruction::getSuccessors(bool *complete) { BinaryAnalysis::Disassembler::AddressSet retval; *complete = true; /*assume true and prove otherwise*/ switch (get_kind()) { case x86_call: case x86_farcall: case x86_jmp: case x86_farjmp: { /* Unconditional branch to operand-specified address. We cannot assume that a CALL instruction returns to the * fall-through address. */ rose_addr_t va; if (getBranchTarget(&va)) { retval.insert(va); } else { *complete = false; } break; } case x86_ja: case x86_jae: case x86_jb: case x86_jbe: case x86_jcxz: case x86_jecxz: case x86_jrcxz: case x86_je: case x86_jg: case x86_jge: case x86_jl: case x86_jle: case x86_jne: case x86_jno: case x86_jns: case x86_jo: case x86_jpe: case x86_jpo: case x86_js: case x86_loop: case x86_loopnz: case x86_loopz: { /* Conditional branches to operand-specified address */ rose_addr_t va; if (getBranchTarget(&va)) { retval.insert(va); } else { *complete = false; } retval.insert(get_address() + get_size()); break; } case x86_int: // assumes interrupts return case x86_int1: case x86_int3: case x86_into: case x86_syscall: { retval.insert(get_address() + get_size()); // probable return point *complete = false; break; } case x86_ret: case x86_iret: case x86_rsm: case x86_sysret: case x86_ud2: case x86_retf: { /* Unconditional branch to run-time specified address */ *complete = false; break; } case x86_hlt: { /* Instructions having no successor. */ break; } case x86_unknown_instruction: { /* Instructions having unknown successors */ *complete = false; break; } default: { /* Instructions that always fall through to the next instruction */ retval.insert(get_address() + get_size()); break; } } return retval; }
/** Return control flow successors. See base class for full documentation. */ BinaryAnalysis::Disassembler::AddressSet SgAsmX86Instruction::getSuccessors(const std::vector<SgAsmInstruction*>& insns, bool *complete, const MemoryMap *initial_memory) { using namespace rose::BinaryAnalysis::InstructionSemantics; Stream debug(mlog[DEBUG]); if (debug) { debug <<"SgAsmX86Instruction::getSuccessors(" <<StringUtility::addrToString(insns.front()->get_address()) <<" for " <<insns.size() <<" instruction" <<(1==insns.size()?"":"s") <<"):" <<"\n"; } BinaryAnalysis::Disassembler::AddressSet successors = SgAsmInstruction::getSuccessors(insns, complete); /* If we couldn't determine all the successors, or a cursory analysis couldn't narrow it down to a single successor then * we'll do a more thorough analysis now. In the case where the cursory analysis returned a complete set containing two * successors, a thorough analysis might be able to narrow it down to a single successor. We should not make special * assumptions about CALL and FARCALL instructions -- their only successor is the specified address operand. */ if (!*complete || successors.size()>1) { #if 0 /* Use the most robust semantic analysis available. Warning: this can be very slow, especially when an SMT solver is * involved! */ # if defined(ROSE_YICES) || defined(ROSE_HAVE_LIBYICES) YicesSolver yices; if (yices.available_linkage() & YicesSolver::LM_LIBRARY) { yices.set_linkage(YicesSolver::LM_LIBRARY); } else { yices.set_linkage(YicesSolver::LM_EXECUTABLE); } SMTSolver *solver = &yices; # else SMTSolver *solver = NULL; # endif if (debug && solver) solver->set_debug(stderr); typedef SymbolicSemantics::Policy<> Policy; typedef SymbolicSemantics::ValueType<32> RegisterType; typedef X86InstructionSemantics<Policy, SymbolicSemantics::ValueType> Semantics; Policy policy(solver); #else typedef PartialSymbolicSemantics::Policy<> Policy; typedef PartialSymbolicSemantics::ValueType<32> RegisterType; typedef X86InstructionSemantics<Policy, PartialSymbolicSemantics::ValueType> Semantics; Policy policy; policy.set_map(initial_memory); #endif try { Semantics semantics(policy); for (size_t i=0; i<insns.size(); i++) { SgAsmX86Instruction* insn = isSgAsmX86Instruction(insns[i]); semantics.processInstruction(insn); if (debug) { debug << " state after " <<unparseInstructionWithAddress(insn) <<"\n" <<policy.get_state(); } } const RegisterType &newip = policy.get_ip(); if (newip.is_known()) { successors.clear(); successors.insert(newip.known_value()); *complete = true; /*this is the complete set of successors*/ } } catch(const Semantics::Exception& e) { /* Abandon entire basic block if we hit an instruction that's not implemented. */ debug <<e <<"\n"; } catch(const Policy::Exception& e) { /* Abandon entire basic block if the semantics policy cannot handle the instruction. */ debug <<e <<"\n"; } } if (debug) { debug <<" successors:"; for (BinaryAnalysis::Disassembler::AddressSet::const_iterator si=successors.begin(); si!=successors.end(); ++si) debug <<" " <<StringUtility::addrToString(*si); debug <<(*complete?"":"...") <<"\n"; } return successors; }
BinaryAnalysis::Disassembler::AddressSet SgAsmM68kInstruction::getSuccessors(bool *complete) { BinaryAnalysis::Disassembler::AddressSet retval; *complete = true; switch (get_kind()) { //case m68k_halt: { // // Instructions having no successors // break; //} case m68k_unknown_instruction: case m68k_illegal: case m68k_trap: { // Instructions having unknown successors *complete = false; break; } case m68k_rtd: case m68k_rtm: case m68k_rtr: case m68k_rts: { // Instructions that have a single successor that is unknown *complete = false; break; } case m68k_bcc: case m68k_bcs: case m68k_beq: case m68k_bge: case m68k_bgt: case m68k_bhi: case m68k_ble: case m68k_bls: case m68k_blt: case m68k_bmi: case m68k_bne: case m68k_bpl: case m68k_bvc: case m68k_bvs: case m68k_bkpt: case m68k_chk: case m68k_chk2: case m68k_dbhi: case m68k_dbls: case m68k_dbcc: case m68k_dbcs: case m68k_dbne: case m68k_dbeq: case m68k_dbf: case m68k_dbvc: case m68k_dbvs: case m68k_dbpl: case m68k_dbmi: case m68k_dbge: case m68k_dblt: case m68k_dbgt: case m68k_dble: case m68k_fbeq: case m68k_fbne: case m68k_fbgt: case m68k_fbngt: case m68k_fbge: case m68k_fbnge: case m68k_fblt: case m68k_fbnlt: case m68k_fble: case m68k_fbnle: case m68k_fbgl: case m68k_fbngl: case m68k_fbgle: case m68k_fbngle: case m68k_fbogt: case m68k_fbule: case m68k_fboge: case m68k_fbult: case m68k_fbolt: case m68k_fbuge: case m68k_fbole: case m68k_fbugt: case m68k_fbogl: case m68k_fbueq: case m68k_fbor: case m68k_fbun: case m68k_fbf: case m68k_fbt: case m68k_fbsf: case m68k_fbst: case m68k_fbseq: case m68k_fbsne: case m68k_trapt: case m68k_traphi: case m68k_trapls: case m68k_trapcc: case m68k_trapcs: case m68k_trapne: case m68k_trapeq: case m68k_trapvc: case m68k_trapvs: case m68k_trappl: case m68k_trapmi: case m68k_trapge: case m68k_traplt: case m68k_trapgt: case m68k_traple: case m68k_trapv: { // Fall-through address and another (known or unknown) address rose_addr_t target_va; if (getBranchTarget(&target_va)) { retval.insert(target_va); } else { *complete = false; } retval.insert(get_address() + get_size()); break; } case m68k_bra: case m68k_bsr: case m68k_callm: case m68k_jmp: case m68k_jsr: { // Unconditional branches rose_addr_t target_va; if (getBranchTarget(&target_va)) { retval.insert(target_va); } else { *complete = false; } break; } case m68k_dbt: // no-op case m68k_trapf: // no-op default: { // Instructions that always only fall through retval.insert(get_address() + get_size()); break; } } return retval; }