void operator()(const RegisterDescriptor &desc, const char *abbr) { BaseSemantics::RegisterStatePtr regstate = ops->get_state()->get_register_state(); FormatRestorer fmt(o); o <<prefix <<std::setw(8) <<std::left <<abbr <<"= { "; fmt.restore(); BaseSemantics::SValuePtr val = regstate->readRegister(desc, ops.get()); o <<*val <<" }\n"; }
std::string NoOperation::StateNormalizer::toString(const BaseSemantics::DispatcherPtr &cpu, const BaseSemantics::StatePtr &state_) { BaseSemantics::StatePtr state = state_; BaseSemantics::RiscOperatorsPtr ops = cpu->get_operators(); if (!state) return ""; bool isCloned = false; // do we have our own copy of the state? // If possible and appropriate, remove the instruction pointer register const RegisterDescriptor regIp = cpu->instructionPointerRegister(); BaseSemantics::RegisterStateGenericPtr rstate = BaseSemantics::RegisterStateGeneric::promote(state->registerState()); if (rstate && rstate->is_partly_stored(regIp)) { BaseSemantics::SValuePtr ip = ops->readRegister(cpu->instructionPointerRegister()); if (ip->is_number()) { state = state->clone(); isCloned = true; rstate = BaseSemantics::RegisterStateGeneric::promote(state->registerState()); rstate->erase_register(regIp, ops.get()); } } // Get the memory state, cloning the state if not done so above. BaseSemantics::MemoryCellStatePtr mem = boost::dynamic_pointer_cast<BaseSemantics::MemoryCellState>(state->memoryState()); if (mem && !isCloned) { state = state->clone(); isCloned = true; mem = BaseSemantics::MemoryCellState::promote(state->memoryState()); } // Erase memory that has never been written (i.e., cells that sprang into existence by reading an address) of which appears // to have been recently popped from the stack. CellErasurePredicate predicate(ops, ops->readRegister(cpu->stackPointerRegister()), ignorePoppedMemory_); if (mem) mem->eraseMatchingCells(predicate); BaseSemantics::Formatter fmt; fmt.set_show_latest_writers(false); fmt.set_show_properties(false); std::ostringstream ss; ss <<(*state+fmt); return ss.str(); }
int main(int argc, char *argv[]) { Diagnostics::initialize(); ::mlog = Diagnostics::Facility("tool", Diagnostics::destination); Diagnostics::mfacilities.insertAndAdjust(::mlog); // Parse the command-line Partitioner2::Engine engine; std::vector<std::string> specimenNames = parseCommandLine(argc, argv, engine); if (specimenNames.empty()) throw std::runtime_error("no specimen specified; see --help"); // Load specimen into memory MemoryMap map = engine.loadSpecimens(specimenNames); // Configure instruction semantics Partitioner2::Partitioner partitioner = engine.createPartitioner(); Disassembler *disassembler = engine.obtainDisassembler(); const RegisterDictionary *regdict = disassembler->get_registers(); if (disassembler->dispatcher() == NULL) throw std::runtime_error("no instruction semantics for this architecture"); BaseSemantics::RiscOperatorsPtr ops = InstructionSemantics2::ConcreteSemantics::RiscOperators::instance(regdict); BaseSemantics::DispatcherPtr cpu = disassembler->dispatcher()->create(ops); ConcreteSemantics::MemoryState::promote(ops->currentState()->memoryState())->memoryMap(map); // Find starting address rose_addr_t va = 0; if (settings.startVa) { va = *settings.startVa; } else if (engine.isaName() == "coldfire") { // Use the interrupt vector to initialize the stack pointer and instruction pointer. uint32_t sp, ip; if (4 != map.at(0).limit(4).read((uint8_t*)&sp).size()) throw std::runtime_error("cannot read stack pointer at address 0x00000000"); ops->writeRegister(disassembler->stackPointerRegister(), ops->number_(32, ByteOrder::be_to_host(sp))); if (4 != map.at(4).limit(4).read((uint8_t*)&ip).size()) throw std::runtime_error("cannot read instruction pointer at address 0x00000004"); va = ByteOrder::be_to_host(ip); } else if (!map.atOrAfter(0).require(MemoryMap::EXECUTABLE).next().assignTo(va)) { throw std::runtime_error("no starting address specified and none marked executable"); } ops->writeRegister(disassembler->instructionPointerRegister(), ops->number_(32, va)); // Execute map.dump(::mlog[INFO]); while (1) { va = ops->readRegister(disassembler->instructionPointerRegister())->get_number(); SgAsmInstruction *insn = partitioner.instructionProvider()[va]; SAWYER_MESG(::mlog[TRACE]) <<unparseInstructionWithAddress(insn, NULL, regdict) <<"\n"; try { cpu->processInstruction(insn); } catch (const BaseSemantics::Exception &e) { ::mlog[WARN] <<e <<"\n"; } } // std::cout <<"Final state:\n"; // std::cout <<*ops->currentState(); }
size_t RiscOperators::add_subdomain(const BaseSemantics::RiscOperatorsPtr &subdomain, const std::string &name, bool activate) { ASSERT_not_null(subdomain); size_t idx = subdomains.size(); subdomains.push_back(subdomain); active.push_back(activate); if (idx>=formatter.subdomain_names.size()) formatter.subdomain_names.resize(idx+1, ""); formatter.subdomain_names[idx] = name; SValue::promote(protoval())->set_subvalue(idx, subdomain->protoval()); return idx; }
BaseSemantics::StatePtr NoOperation::initialState(SgAsmInstruction *insn) const { ASSERT_not_null(insn); ASSERT_not_null(cpu_); BaseSemantics::StatePtr state; if (normalizer_) { state = normalizer_->initialState(cpu_, insn); } else { state = cpu_->currentState()->clone(); state->clear(); RegisterDescriptor IP = cpu_->instructionPointerRegister(); state->writeRegister(IP, cpu_->number_(IP.get_nbits(), insn->get_address()), cpu_->get_operators().get()); } // Set the stack pointer to a concrete value if (initialSp_) { const RegisterDescriptor regSp = cpu_->stackPointerRegister(); BaseSemantics::RiscOperatorsPtr ops = cpu_->get_operators(); state->writeRegister(regSp, ops->number_(regSp.get_nbits(), *initialSp_), ops.get()); } return state; }
/* Analyze a single interpretation a block at a time */ static void analyze_interp(SgAsmInterpretation *interp) { /* Get the set of all instructions except instructions that are part of left-over blocks. */ struct AllInstructions: public SgSimpleProcessing, public std::map<rose_addr_t, SgAsmX86Instruction*> { void visit(SgNode *node) { SgAsmX86Instruction *insn = isSgAsmX86Instruction(node); SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn); if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS)) insert(std::make_pair(insn->get_address(), insn)); } } insns; insns.traverse(interp, postorder); while (!insns.empty()) { std::cout <<"=====================================================================================\n" <<"=== Starting a new basic block ===\n" <<"=====================================================================================\n"; AllInstructions::iterator si = insns.begin(); SgAsmX86Instruction *insn = si->second; insns.erase(si); BaseSemantics::RiscOperatorsPtr operators = make_ops(); BaseSemantics::Formatter formatter; formatter.set_suppress_initial_values(); formatter.set_show_latest_writers(do_usedef); BaseSemantics::DispatcherPtr dispatcher; if (do_trace) { // Enable RiscOperators tracing, but turn off a bunch of info that makes comparisons with a known good answer // difficult. Sawyer::Message::PrefixPtr prefix = Sawyer::Message::Prefix::instance(); prefix->showProgramName(false); prefix->showThreadId(false); prefix->showElapsedTime(false); prefix->showFacilityName(Sawyer::Message::Prefix::NEVER); prefix->showImportance(false); Sawyer::Message::UnformattedSinkPtr sink = Sawyer::Message::StreamSink::instance(std::cout); sink->prefix(prefix); sink->defaultPropertiesNS().useColor = false; TraceSemantics::RiscOperatorsPtr trace = TraceSemantics::RiscOperators::instance(operators); trace->stream().destination(sink); trace->stream().enable(); dispatcher = DispatcherX86::instance(trace, 32); } else { dispatcher = DispatcherX86::instance(operators, 32); } operators->set_solver(make_solver()); // The fpstatus_top register must have a concrete value if we'll use the x86 floating-point stack (e.g., st(0)) if (const RegisterDescriptor *REG_FPSTATUS_TOP = regdict->lookup("fpstatus_top")) { BaseSemantics::SValuePtr st_top = operators->number_(REG_FPSTATUS_TOP->get_nbits(), 0); operators->writeRegister(*REG_FPSTATUS_TOP, st_top); } #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN BaseSemantics::SValuePtr orig_esp; if (do_test_subst) { // Only request the orig_esp if we're going to use it later because it causes an esp value to be instantiated // in the state, which is printed in the output, and thus changes the answer. BaseSemantics::RegisterStateGeneric::promote(operators->get_state()->get_register_state())->initialize_large(); orig_esp = operators->readRegister(*regdict->lookup("esp")); std::cout <<"Original state:\n" <<*operators; } #endif /* Perform semantic analysis for each instruction in this block. The block ends when we no longer know the value of * the instruction pointer or the instruction pointer refers to an instruction that doesn't exist or which has already * been processed. */ while (1) { /* Analyze current instruction */ std::cout <<"\n" <<unparseInstructionWithAddress(insn) <<"\n"; try { dispatcher->processInstruction(insn); # if 0 /*DEBUGGING [Robb P. Matzke 2013-05-01]*/ show_state(operators); // for comparing RegisterStateGeneric with the old RegisterStateX86 output # else std::cout <<(*operators + formatter); # endif } catch (const BaseSemantics::Exception &e) { std::cout <<e <<"\n"; } /* Never follow CALL instructions */ if (insn->get_kind()==x86_call || insn->get_kind()==x86_farcall) break; /* Get next instruction of this block */ BaseSemantics::SValuePtr ip = operators->readRegister(dispatcher->findRegister("eip")); if (!ip->is_number()) break; rose_addr_t next_addr = ip->get_number(); si = insns.find(next_addr); if (si==insns.end()) break; insn = si->second; insns.erase(si); } // Test substitution on the symbolic state. #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN if (do_test_subst) { SymbolicSemantics::SValuePtr from = SymbolicSemantics::SValue::promote(orig_esp); BaseSemantics::SValuePtr newvar = operators->undefined_(32); newvar->set_comment("frame_pointer"); SymbolicSemantics::SValuePtr to = SymbolicSemantics::SValue::promote(operators->add(newvar, operators->number_(32, 4))); std::cout <<"Substituting from " <<*from <<" to " <<*to <<"\n"; SymbolicSemantics::RiscOperators::promote(operators)->substitute(from, to); std::cout <<"Substituted state:\n" <<(*operators+formatter); } #endif } }
void operator()(const char *name, const char *abbr=NULL) { const RegisterDictionary *regdict = ops->get_state()->get_register_state()->get_register_dictionary(); const RegisterDescriptor *desc = regdict->lookup(name); assert(desc); (*this)(*desc, abbr?abbr:name); }
// Show the register state for BaseSemantics::RegisterStateGeneric in the same format as for RegisterStateX86. This is // for comparison of the two register states when verifying results. It's also close to the format used by the old binary // semantics API. void show_state(const BaseSemantics::RiscOperatorsPtr &ops) { #if SEMANTIC_DOMAIN == MULTI_DOMAIN std::cout <<*ops; return; #endif struct ShowReg { BaseSemantics::RiscOperatorsPtr ops; std::ostream &o; std::string prefix; ShowReg(const BaseSemantics::RiscOperatorsPtr &ops, std::ostream &o, const std::string &prefix) : ops(ops), o(o), prefix(prefix) {} void operator()(const char *name, const char *abbr=NULL) { const RegisterDictionary *regdict = ops->get_state()->get_register_state()->get_register_dictionary(); const RegisterDescriptor *desc = regdict->lookup(name); assert(desc); (*this)(*desc, abbr?abbr:name); } void operator()(const RegisterDescriptor &desc, const char *abbr) { BaseSemantics::RegisterStatePtr regstate = ops->get_state()->get_register_state(); FormatRestorer fmt(o); o <<prefix <<std::setw(8) <<std::left <<abbr <<"= { "; fmt.restore(); BaseSemantics::SValuePtr val = regstate->readRegister(desc, ops.get()); o <<*val <<" }\n"; } void operator()(unsigned majr, unsigned minr, unsigned offset, unsigned nbits, const char *abbr) { (*this)(RegisterDescriptor(majr, minr, offset, nbits), abbr); } } show(ops, std::cout, " "); std::cout <<"registers:\n"; show("eax", "ax"); show("ecx", "cx"); show("edx", "dx"); show("ebx", "bx"); show("esp", "sp"); show("ebp", "bp"); show("esi", "si"); show("edi", "di"); show("es"); show("cs"); show("ss"); show("ds"); show("fs"); show("gs"); show("cf"); show(x86_regclass_flags, 0, 1, 1, "?1"); show("pf"); show(x86_regclass_flags, 0, 3, 1, "?3"); show("af"); show(x86_regclass_flags, 0, 5, 1, "?5"); show("zf"); show("sf"); show("tf"); show("if"); show("df"); show("of"); show(x86_regclass_flags, 0, 12, 1, "iopl0"); show(x86_regclass_flags, 0, 13, 1, "iopl1"); show("nt"); show(x86_regclass_flags, 0, 15, 1, "?15"); show("rf"); show("vm"); show(x86_regclass_flags, 0, 18, 1, "ac"); show(x86_regclass_flags, 0, 19, 1, "vif"); show(x86_regclass_flags, 0, 20, 1, "vip"); show(x86_regclass_flags, 0, 21, 1, "id"); show(x86_regclass_flags, 0, 22, 1, "?22"); show(x86_regclass_flags, 0, 23, 1, "?23"); show(x86_regclass_flags, 0, 24, 1, "?24"); show(x86_regclass_flags, 0, 25, 1, "?25"); show(x86_regclass_flags, 0, 26, 1, "?26"); show(x86_regclass_flags, 0, 27, 1, "?27"); show(x86_regclass_flags, 0, 28, 1, "?28"); show(x86_regclass_flags, 0, 29, 1, "?29"); show(x86_regclass_flags, 0, 30, 1, "?30"); show(x86_regclass_flags, 0, 31, 1, "?31"); show("eip", "ip"); BaseSemantics::Formatter memfmt; memfmt.set_line_prefix(" "); std::cout <<"memory:\n"; ops->get_state()->print_memory(std::cout, memfmt); }
explicit State(const BaseSemantics::RiscOperatorsPtr &ops) : BaseSemantics::RegisterStateGeneric(ops->get_protoval(), ops->get_state()->get_register_state()->get_register_dictionary()), ops_(ops) {}
/* Analyze a single interpretation a block at a time */ static void analyze_interp(SgAsmInterpretation *interp) { /* Get the set of all instructions except instructions that are part of left-over blocks. */ struct AllInstructions: public SgSimpleProcessing, public std::map<rose_addr_t, SgAsmx86Instruction*> { void visit(SgNode *node) { SgAsmx86Instruction *insn = isSgAsmx86Instruction(node); SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn); if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS)) insert(std::make_pair(insn->get_address(), insn)); } } insns; insns.traverse(interp, postorder); while (!insns.empty()) { std::cout <<"=====================================================================================\n" <<"=== Starting a new basic block ===\n" <<"=====================================================================================\n"; AllInstructions::iterator si = insns.begin(); SgAsmx86Instruction *insn = si->second; insns.erase(si); #if SEMANTIC_API == NEW_API BaseSemantics::RiscOperatorsPtr operators = make_ops(); BaseSemantics::Formatter formatter; formatter.set_suppress_initial_values(); BaseSemantics::DispatcherPtr dispatcher; if (do_trace) { TraceSemantics::RiscOperatorsPtr trace = TraceSemantics::RiscOperators::instance(operators); trace->set_stream(stdout); dispatcher = DispatcherX86::instance(trace); } else { dispatcher = DispatcherX86::instance(operators); } operators->set_solver(make_solver()); #else // OLD_API typedef X86InstructionSemantics<MyPolicy, MyValueType> MyDispatcher; MyPolicy operators; MyDispatcher dispatcher(operators); # if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN operators.set_solver(make_solver()); SymbolicSemantics::Formatter formatter; formatter.expr_formatter.do_rename = true; formatter.expr_formatter.add_renames = true; # elif SEMANTIC_DOMAIN != FINDCONST_DOMAIN && SEMANTIC_DOMAIN != FINDCONSTABI_DOMAIN BaseSemantics::Formatter formatter; # endif #endif #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN && SEMANTIC_API == NEW_API BaseSemantics::SValuePtr orig_esp; if (do_test_subst) { // Only request the orig_esp if we're going to use it later because it causes an esp value to be instantiated // in the state, which is printed in the output, and thus changes the answer. BaseSemantics::RegisterStateGeneric::promote(operators->get_state()->get_register_state())->initialize_large(); orig_esp = operators->readRegister(*regdict->lookup("esp")); std::cout <<"Original state:\n" <<*operators; } #endif /* Perform semantic analysis for each instruction in this block. The block ends when we no longer know the value of * the instruction pointer or the instruction pointer refers to an instruction that doesn't exist or which has already * been processed. */ while (1) { /* Analyze current instruction */ std::cout <<"\n" <<unparseInstructionWithAddress(insn) <<"\n"; #if SEMANTIC_API == NEW_API try { dispatcher->processInstruction(insn); # if 0 /*DEBUGGING [Robb P. Matzke 2013-05-01]*/ show_state(operators); // for comparing RegisterStateGeneric with the old RegisterStateX86 output # else std::cout <<(*operators + formatter); # endif } catch (const BaseSemantics::Exception &e) { std::cout <<e <<"\n"; } #else // OLD API try { dispatcher.processInstruction(insn); # if SEMANTIC_DOMAIN == FINDCONST_DOMAIN || SEMANTIC_DOMAIN == FINDCONSTABI_DOMAIN operators.print(std::cout); # else operators.print(std::cout, formatter); # endif } catch (const MyDispatcher::Exception &e) { std::cout <<e <<"\n"; break; # if SEMANTIC_DOMAIN == PARTSYM_DOMAIN } catch (const MyPolicy::Exception &e) { std::cout <<e <<"\n"; break; # endif } catch (const SMTSolver::Exception &e) { std::cout <<e <<" [ "<<unparseInstructionWithAddress(insn) <<"]\n"; break; } #endif /* Never follow CALL instructions */ if (insn->get_kind()==x86_call || insn->get_kind()==x86_farcall) break; /* Get next instruction of this block */ #if SEMANTIC_API == NEW_API BaseSemantics::SValuePtr ip = operators->readRegister(dispatcher->findRegister("eip")); if (!ip->is_number()) break; rose_addr_t next_addr = ip->get_number(); #else // OLD_API # if SEMANTIC_DOMAIN == PARTSYM_DOMAIN || SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN MyValueType<32> ip = operators.get_ip(); if (!ip.is_known()) break; rose_addr_t next_addr = ip.known_value(); # elif SEMANTIC_DOMAIN == NULL_DOMAIN || SEMANTIC_DOMAIN == INTERVAL_DOMAIN MyValueType<32> ip = operators.readRegister<32>(dispatcher.REG_EIP); if (!ip.is_known()) break; rose_addr_t next_addr = ip.known_value(); # elif SEMANTIC_DOMAIN == MULTI_DOMAIN PartialSymbolicSemantics::ValueType<32> ip = operators.readRegister<32>(dispatcher.REG_EIP) .get_subvalue(MyMultiSemanticsClass::SP0()); if (!ip.is_known()) break; rose_addr_t next_addr = ip.known_value(); # else if (operators.newIp->get().name) break; rose_addr_t next_addr = operators.newIp->get().offset; # endif #endif si = insns.find(next_addr); if (si==insns.end()) break; insn = si->second; insns.erase(si); } // Test substitution on the symbolic state. #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN && SEMANTIC_API == NEW_API if (do_test_subst) { SymbolicSemantics::SValuePtr from = SymbolicSemantics::SValue::promote(orig_esp); BaseSemantics::SValuePtr newvar = operators->undefined_(32); newvar->set_comment("frame_pointer"); SymbolicSemantics::SValuePtr to = SymbolicSemantics::SValue::promote(operators->add(newvar, operators->number_(32, 4))); std::cout <<"Substituting from " <<*from <<" to " <<*to <<"\n"; SymbolicSemantics::RiscOperators::promote(operators)->substitute(from, to); std::cout <<"Substituted state:\n" <<(*operators+formatter); } #endif } }
// see base class bool SgAsmX86Instruction::isFunctionCallSlow(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target, rose_addr_t *return_va) { if (isFunctionCallFast(insns, target, return_va)) return true; // The following stuff works only if we have a relatively complete AST. static const size_t EXECUTION_LIMIT = 10; // max size of basic blocks for expensive analyses if (insns.empty()) return false; SgAsmX86Instruction *last = isSgAsmX86Instruction(insns.back()); if (!last) return false; SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(last); SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(func); // Slow method: Emulate the instructions and then look at the EIP and stack. If the EIP points outside the current // function and the top of the stack holds an address of an instruction within the current function, then this must be a // function call. if (interp && insns.size()<=EXECUTION_LIMIT) { using namespace Rose::BinaryAnalysis; using namespace Rose::BinaryAnalysis::InstructionSemantics2; using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics; const InstructionMap &imap = interp->get_instruction_map(); const RegisterDictionary *regdict = RegisterDictionary::dictionary_for_isa(interp); SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver); BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver); ASSERT_not_null(ops); const RegisterDescriptor SP = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp); DispatcherX86Ptr dispatcher = DispatcherX86::instance(ops, SP.get_nbits()); SValuePtr orig_esp = SValue::promote(ops->readRegister(dispatcher->REG_anySP)); try { for (size_t i=0; i<insns.size(); ++i) dispatcher->processInstruction(insns[i]); } catch (const BaseSemantics::Exception &e) { return false; } // If the next instruction address is concrete but does not point to a function entry point, then this is not a call. SValuePtr eip = SValue::promote(ops->readRegister(dispatcher->REG_anyIP)); if (eip->is_number()) { rose_addr_t target_va = eip->get_number(); SgAsmFunction *target_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(target_va, NULL)); if (!target_func || target_va!=target_func->get_entry_va()) return false; } // If nothing was pushed onto the stack, then this isn't a function call. const size_t spWidth = dispatcher->REG_anySP.get_nbits(); SValuePtr esp = SValue::promote(ops->readRegister(dispatcher->REG_anySP)); SValuePtr stack_delta = SValue::promote(ops->add(esp, ops->negate(orig_esp))); SValuePtr stack_delta_sign = SValue::promote(ops->extract(stack_delta, spWidth-1, spWidth)); if (stack_delta_sign->is_number() && 0==stack_delta_sign->get_number()) return false; // If the top of the stack does not contain a concrete value or the top of the stack does not point to an instruction // in this basic block's function, then this is not a function call. const size_t ipWidth = dispatcher->REG_anyIP.get_nbits(); SValuePtr top = SValue::promote(ops->readMemory(dispatcher->REG_SS, esp, esp->undefined_(ipWidth), esp->boolean_(true))); if (top->is_number()) { rose_addr_t va = top->get_number(); SgAsmFunction *return_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(va, NULL)); if (!return_func || return_func!=func) { return false; } } else { return false; } // Since EIP might point to a function entry address and since the top of the stack contains a pointer to an // instruction in this function, we assume that this is a function call. if (target && eip->is_number()) *target = eip->get_number(); if (return_va && top->is_number()) *return_va = top->get_number(); return true; } // Similar to the above method, but works when all we have is the basic block (e.g., this case gets hit quite a bit from // the Partitioner). Returns true if, after executing the basic block, the top of the stack contains the fall-through // address of the basic block. We depend on our caller to figure out if EIP is reasonably a function entry address. if (!interp && insns.size()<=EXECUTION_LIMIT) { using namespace Rose::BinaryAnalysis; using namespace Rose::BinaryAnalysis::InstructionSemantics2; using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics; SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver); SgAsmX86Instruction *x86insn = isSgAsmX86Instruction(insns.front()); ASSERT_not_null(x86insn); #if 1 // [Robb P. Matzke 2015-03-03]: FIXME[Robb P. Matzke 2015-03-03]: not ready yet; x86-64 semantics still under construction if (x86insn->get_addressSize() != x86_insnsize_32) return false; #endif const RegisterDictionary *regdict = registersForInstructionSize(x86insn->get_addressSize()); const RegisterDescriptor SP = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp); BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver); DispatcherX86Ptr dispatcher = DispatcherX86::instance(ops, SP.get_nbits()); try { for (size_t i=0; i<insns.size(); ++i) dispatcher->processInstruction(insns[i]); } catch (const BaseSemantics::Exception &e) { return false; } // Look at the top of the stack const size_t ipWidth = dispatcher->REG_anyIP.get_nbits(); SValuePtr top = SValue::promote(ops->readMemory(dispatcher->REG_SS, ops->readRegister(SP), ops->protoval()->undefined_(ipWidth), ops->protoval()->boolean_(true))); if (top->is_number() && top->get_number() == last->get_address()+last->get_size()) { if (target) { SValuePtr eip = SValue::promote(ops->readRegister(dispatcher->REG_anyIP)); if (eip->is_number()) *target = eip->get_number(); } if (return_va) *return_va = top->get_number(); return true; } } return false; }
//! [basicReadTest] static void basicReadTest(const P2::Partitioner &partitioner) { std::cout <<"\n" <<std::string(40, '=') <<"\nbasicReadTest\n" <<std::string(40, '=') <<"\n"; SymbolicSemantics::Formatter fmt; fmt.set_line_prefix(" "); // Create the RiscOperators and the initial state. const RegisterDictionary *regdict = partitioner.instructionProvider().registerDictionary(); const RegisterDescriptor REG = partitioner.instructionProvider().stackPointerRegister(); const std::string REG_NAME = RegisterNames(regdict)(REG); BaseSemantics::RiscOperatorsPtr ops = SymbolicSemantics::RiscOperators::instance(regdict); ops->currentState()->memoryState()->set_byteOrder(partitioner.instructionProvider().defaultByteOrder()); BaseSemantics::StatePtr initialState = ops->currentState()->clone(); ops->initialState(initialState); // lazily evaluated initial state std::cout <<"Initial state before reading:\n" <<(*initialState+fmt); // Read some memory and a register, which should cause them to spring into existence in both the current state and the // initial state. BaseSemantics::SValuePtr addr1 = ops->number_(32, 0); BaseSemantics::SValuePtr dflt1m = ops->number_(32, 0x11223344); BaseSemantics::SValuePtr read1m = ops->readMemory(RegisterDescriptor(), addr1, dflt1m, ops->boolean_(true)); BaseSemantics::SValuePtr dflt1r = ops->undefined_(REG.get_nbits()); BaseSemantics::SValuePtr read1r = ops->readRegister(REG, dflt1r); std::cout <<"Initial state after reading " <<*read1m <<" from address " <<*addr1 <<"\n" <<"and " <<*read1r <<" from " <<REG_NAME <<"\n" <<(*initialState+fmt); ASSERT_always_require(read1m->must_equal(dflt1m)); ASSERT_always_require(read1r->must_equal(dflt1r)); // Create a new current state and read again. We should get the same value even though the current state is empty. BaseSemantics::StatePtr curState = ops->currentState()->clone(); curState->clear(); ops->currentState(curState); BaseSemantics::SValuePtr dflt2m = ops->number_(32, 0x55667788); BaseSemantics::SValuePtr read2m = ops->readMemory(RegisterDescriptor(), addr1, dflt2m, ops->boolean_(true)); BaseSemantics::SValuePtr dflt2r = ops->undefined_(REG.get_nbits()); BaseSemantics::SValuePtr read2r = ops->readRegister(REG, dflt2r); std::cout <<"Initial state after reading " <<*read2m <<" from address " <<*addr1 <<"\n" <<"and " <<*read2r <<" from " <<REG_NAME <<"\n" <<(*initialState+fmt); ASSERT_always_require(read1m->must_equal(read2m)); ASSERT_always_require(read1r->must_equal(read2r)); // Disable the initial state. If we re-read the same address we'll still get the same result because it's now present in // the current state also. ops->initialState(BaseSemantics::StatePtr()); BaseSemantics::SValuePtr dflt3m = ops->number_(32, 0x99aabbcc); BaseSemantics::SValuePtr read3m = ops->readMemory(RegisterDescriptor(), addr1, dflt3m, ops->boolean_(true)); BaseSemantics::SValuePtr dflt3r = ops->undefined_(REG.get_nbits()); BaseSemantics::SValuePtr read3r = ops->readRegister(REG, dflt3r); ASSERT_always_require(read1m->must_equal(read3m)); ASSERT_always_require(read1r->must_equal(read3r)); }
int main(int argc, char *argv[]) { ROSE_INITIALIZE; Diagnostics::initAndRegister(&::mlog, "tool"); parseCommandLine(argc, argv); // Create the machine state const RegisterDictionary *registers = RegisterDictionary::dictionary_amd64(); const RegisterDescriptor EAX = *registers->lookup("eax"); SmtSolverPtr solver; BaseSemantics::RiscOperatorsPtr ops = SymbolicSemantics::RiscOperators::instance(registers, solver); ops->currentState()->memoryState()->set_byteOrder(ByteOrder::ORDER_LSB); // Initialize the machine state with some writes and get the string representation. BaseSemantics::SValuePtr eax = ops->number_(32, 1234); ops->writeRegister(EAX, eax); BaseSemantics::SValuePtr addr0 = ops->number_(32, 0x1000); BaseSemantics::SValuePtr mem0 = ops->number_(8, 123); ops->writeMemory(RegisterDescriptor(), addr0, mem0, ops->boolean_(true)); std::ostringstream s0; s0 <<*ops; // Peek at parts of the state that exist BaseSemantics::SValuePtr v1 = ops->peekRegister(EAX, ops->undefined_(32)); ASSERT_always_not_null(v1); ASSERT_always_require(v1->must_equal(eax, solver)); BaseSemantics::SValuePtr mem1 = ops->peekMemory(RegisterDescriptor(), addr0, ops->undefined_(8)); ASSERT_always_not_null(mem1); ASSERT_always_require(mem1->must_equal(mem0, solver)); std::ostringstream s1; s1 <<*ops; ASSERT_always_require2(s0.str() == s1.str(), s1.str()); // Peek at parts of the state that don't exist const RegisterDescriptor EBX = *registers->lookup("ebx"); BaseSemantics::SValuePtr ebx = ops->undefined_(32); BaseSemantics::SValuePtr v2 = ops->peekRegister(EBX, ebx); ASSERT_always_not_null(v2); ASSERT_always_require(v2->must_equal(ebx, solver)); BaseSemantics::SValuePtr addr2 = ops->number_(32, 0x2000); BaseSemantics::SValuePtr mem2init = ops->undefined_(8); BaseSemantics::SValuePtr mem2 = ops->peekMemory(RegisterDescriptor(), addr2, mem2init); ASSERT_always_not_null(mem2); ASSERT_always_require(mem2->must_equal(mem2init, solver)); std::ostringstream s2; s2 <<*ops; ASSERT_always_require2(s0.str() == s2.str(), s2.str()); // Peek at parts of the state that partly exist. const RegisterDescriptor RAX = *registers->lookup("rax"); BaseSemantics::SValuePtr zero64 = ops->number_(64, 0); BaseSemantics::SValuePtr v3 = ops->peekRegister(RAX, zero64); ASSERT_always_not_null(v3); ASSERT_always_require(v3->must_equal(ops->number_(64, 1234), solver)); BaseSemantics::SValuePtr zero32 = ops->number_(32, 0); BaseSemantics::SValuePtr mem3ans = ops->number_(32, 123); BaseSemantics::SValuePtr mem3 = ops->peekMemory(RegisterDescriptor(), addr0, zero32); ASSERT_always_not_null(mem3); ASSERT_always_require(mem3->must_equal(mem3ans, solver)); std::ostringstream s3; s3 <<*ops; ASSERT_always_require2(s0.str() == s3.str(), s3.str()); std::cout <<s3.str(); }
// see base class; don't modify target_va or return_va if they are not known bool SgAsmM68kInstruction::isFunctionCallSlow(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target_va, rose_addr_t *return_va) { if (isFunctionCallFast(insns, target_va, return_va)) return true; static const size_t EXECUTION_LIMIT = 25; // max size of basic blocks for expensive analyses if (insns.empty()) return false; SgAsmM68kInstruction *last = isSgAsmM68kInstruction(insns.back()); if (!last) return false; SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(last); SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(func); // Slow method: Emulate the instructions and then look at the program counter (PC) and stack (A7). If the PC points // outside the current function and the top of the stack holds an address of an instruction within the current function, // then this must be a function call. if (interp && insns.size()<=EXECUTION_LIMIT) { using namespace Rose::BinaryAnalysis; using namespace Rose::BinaryAnalysis::InstructionSemantics2; using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics; const InstructionMap &imap = interp->get_instruction_map(); const RegisterDictionary *regdict = RegisterDictionary::dictionary_for_isa(interp); SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver); BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver); DispatcherM68kPtr dispatcher = DispatcherM68k::instance(ops, 32); SValuePtr orig_sp = SValue::promote(ops->readRegister(dispatcher->REG_A[7])); try { for (size_t i=0; i<insns.size(); ++i) dispatcher->processInstruction(insns[i]); } catch (const BaseSemantics::Exception &e) { return false; } // If the next instruction address is concrete but does not point to a function entry point, then this is not a call. SValuePtr ip = SValue::promote(ops->readRegister(dispatcher->REG_PC)); if (ip->is_number()) { rose_addr_t target_va = ip->get_number(); SgAsmFunction *target_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(target_va, NULL)); if (!target_func || target_va!=target_func->get_entry_va()) return false; } // If nothing was pushed onto the stack, then this isn't a function call. SValuePtr sp = SValue::promote(ops->readRegister(dispatcher->REG_A[7])); SValuePtr stack_delta = SValue::promote(ops->add(sp, ops->negate(orig_sp))); SValuePtr stack_delta_sign = SValue::promote(ops->extract(stack_delta, 31, 32)); if (stack_delta_sign->is_number() && 0==stack_delta_sign->get_number()) return false; // If the top of the stack does not contain a concrete value or the top of the stack does not point to an instruction // in this basic block's function, then this is not a function call. SValuePtr top = SValue::promote(ops->readMemory(RegisterDescriptor(), sp, sp->undefined_(32), sp->boolean_(true))); if (top->is_number()) { rose_addr_t va = top->get_number(); SgAsmFunction *return_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(va, NULL)); if (!return_func || return_func!=func) { return false; } } else { return false; } // Since the instruction pointer might point to a function entry address and since the top of the stack contains a // pointer to an instruction in this function, we assume that this is a function call. if (target_va && ip->is_number()) *target_va = ip->get_number(); if (return_va && top->is_number()) *return_va = top->get_number(); return true; } // Similar to the above method, but works when all we have is the basic block (e.g., this case gets hit quite a bit from // the Partitioner). Returns true if, after executing the basic block, the top of the stack contains the fall-through // address of the basic block. We depend on our caller to figure out if the instruction pointer is reasonably a function // entry address. if (!interp && insns.size()<=EXECUTION_LIMIT) { using namespace Rose::BinaryAnalysis; using namespace Rose::BinaryAnalysis::InstructionSemantics2; using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics; const RegisterDictionary *regdict = RegisterDictionary::dictionary_coldfire_emac(); SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver); BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver); DispatcherM68kPtr dispatcher = DispatcherM68k::instance(ops, 32); try { for (size_t i=0; i<insns.size(); ++i) dispatcher->processInstruction(insns[i]); } catch (const BaseSemantics::Exception &e) { return false; } // Look at the top of the stack SValuePtr top = SValue::promote(ops->readMemory(RegisterDescriptor(), ops->readRegister(dispatcher->REG_A[7]), ops->protoval()->undefined_(32), ops->protoval()->boolean_(true))); if (top->is_number() && top->get_number() == last->get_address()+last->get_size()) { if (target_va) { SValuePtr ip = SValue::promote(ops->readRegister(dispatcher->REG_PC)); if (ip->is_number()) *target_va = ip->get_number(); } if (return_va) *return_va = top->get_number(); return true; } } return false; }