//! [basicReadTest] static void basicReadTest(const P2::Partitioner &partitioner) { std::cout <<"\n" <<std::string(40, '=') <<"\nbasicReadTest\n" <<std::string(40, '=') <<"\n"; SymbolicSemantics::Formatter fmt; fmt.set_line_prefix(" "); // Create the RiscOperators and the initial state. const RegisterDictionary *regdict = partitioner.instructionProvider().registerDictionary(); const RegisterDescriptor REG = partitioner.instructionProvider().stackPointerRegister(); const std::string REG_NAME = RegisterNames(regdict)(REG); BaseSemantics::RiscOperatorsPtr ops = SymbolicSemantics::RiscOperators::instance(regdict); ops->currentState()->memoryState()->set_byteOrder(partitioner.instructionProvider().defaultByteOrder()); BaseSemantics::StatePtr initialState = ops->currentState()->clone(); ops->initialState(initialState); // lazily evaluated initial state std::cout <<"Initial state before reading:\n" <<(*initialState+fmt); // Read some memory and a register, which should cause them to spring into existence in both the current state and the // initial state. BaseSemantics::SValuePtr addr1 = ops->number_(32, 0); BaseSemantics::SValuePtr dflt1m = ops->number_(32, 0x11223344); BaseSemantics::SValuePtr read1m = ops->readMemory(RegisterDescriptor(), addr1, dflt1m, ops->boolean_(true)); BaseSemantics::SValuePtr dflt1r = ops->undefined_(REG.get_nbits()); BaseSemantics::SValuePtr read1r = ops->readRegister(REG, dflt1r); std::cout <<"Initial state after reading " <<*read1m <<" from address " <<*addr1 <<"\n" <<"and " <<*read1r <<" from " <<REG_NAME <<"\n" <<(*initialState+fmt); ASSERT_always_require(read1m->must_equal(dflt1m)); ASSERT_always_require(read1r->must_equal(dflt1r)); // Create a new current state and read again. We should get the same value even though the current state is empty. BaseSemantics::StatePtr curState = ops->currentState()->clone(); curState->clear(); ops->currentState(curState); BaseSemantics::SValuePtr dflt2m = ops->number_(32, 0x55667788); BaseSemantics::SValuePtr read2m = ops->readMemory(RegisterDescriptor(), addr1, dflt2m, ops->boolean_(true)); BaseSemantics::SValuePtr dflt2r = ops->undefined_(REG.get_nbits()); BaseSemantics::SValuePtr read2r = ops->readRegister(REG, dflt2r); std::cout <<"Initial state after reading " <<*read2m <<" from address " <<*addr1 <<"\n" <<"and " <<*read2r <<" from " <<REG_NAME <<"\n" <<(*initialState+fmt); ASSERT_always_require(read1m->must_equal(read2m)); ASSERT_always_require(read1r->must_equal(read2r)); // Disable the initial state. If we re-read the same address we'll still get the same result because it's now present in // the current state also. ops->initialState(BaseSemantics::StatePtr()); BaseSemantics::SValuePtr dflt3m = ops->number_(32, 0x99aabbcc); BaseSemantics::SValuePtr read3m = ops->readMemory(RegisterDescriptor(), addr1, dflt3m, ops->boolean_(true)); BaseSemantics::SValuePtr dflt3r = ops->undefined_(REG.get_nbits()); BaseSemantics::SValuePtr read3r = ops->readRegister(REG, dflt3r); ASSERT_always_require(read1m->must_equal(read3m)); ASSERT_always_require(read1r->must_equal(read3r)); }
RoseBin_support::X86PositionInRegister get_position_in_register(const RegisterDescriptor &rdesc) { if (0==rdesc.get_offset()) { switch (rdesc.get_nbits()) { case 8: return RoseBin_support::x86_regpos_low_byte; case 16: return RoseBin_support::x86_regpos_word; case 32: return RoseBin_support::x86_regpos_dword; case 64: return RoseBin_support::x86_regpos_qword; default: return RoseBin_support::x86_regpos_all; } } else if (8==rdesc.get_offset() && 8==rdesc.get_nbits()) { return RoseBin_support::x86_regpos_high_byte; } else { return RoseBin_support::x86_regpos_unknown; } }
Printer& Printer::eret() { RegisterDescriptor reg = thread_->get_process()->get_simulator()->syscallReturnRegister(); uint64_t unsignedRetval = thread_->operators()->readRegister(reg)->get_number(); int64_t signedRetval = IntegerOps::signExtend2(unsignedRetval, reg.get_nbits(), 64); return eret(signedRetval); }
BinaryAnalysis::Disassembler::AddressSet SgAsmX86Instruction::getSuccessors(const std::vector<SgAsmInstruction*>& insns, bool *complete, const MemoryMap::Ptr &initial_memory) { Stream debug(mlog[DEBUG]); using namespace Rose::BinaryAnalysis::InstructionSemantics2; if (debug) { debug <<"SgAsmX86Instruction::getSuccessors(" <<StringUtility::addrToString(insns.front()->get_address()) <<" for " <<insns.size() <<" instruction" <<(1==insns.size()?"":"s") <<"):" <<"\n"; } BinaryAnalysis::Disassembler::AddressSet successors = SgAsmInstruction::getSuccessors(insns, complete); /* If we couldn't determine all the successors, or a cursory analysis couldn't narrow it down to a single successor then * we'll do a more thorough analysis now. In the case where the cursory analysis returned a complete set containing two * successors, a thorough analysis might be able to narrow it down to a single successor. We should not make special * assumptions about CALL and FARCALL instructions -- their only successor is the specified address operand. */ if (!*complete || successors.size()>1) { const RegisterDictionary *regdict; if (SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(this)) { regdict = RegisterDictionary::dictionary_for_isa(interp); } else { switch (get_baseSize()) { case x86_insnsize_16: regdict = RegisterDictionary::dictionary_i286(); break; case x86_insnsize_32: regdict = RegisterDictionary::dictionary_pentium4(); break; case x86_insnsize_64: regdict = RegisterDictionary::dictionary_amd64(); break; default: ASSERT_not_reachable("invalid x86 instruction size"); } } const RegisterDescriptor IP = regdict->findLargestRegister(x86_regclass_ip, 0); PartialSymbolicSemantics::RiscOperatorsPtr ops = PartialSymbolicSemantics::RiscOperators::instance(regdict); ops->set_memory_map(initial_memory); BaseSemantics::DispatcherPtr cpu = DispatcherX86::instance(ops, IP.get_nbits(), regdict); try { BOOST_FOREACH (SgAsmInstruction *insn, insns) { cpu->processInstruction(insn); SAWYER_MESG(debug) <<" state after " <<insn->toString() <<"\n" <<*ops; } BaseSemantics::SValuePtr ip = ops->readRegister(IP); if (ip->is_number()) { successors.clear(); successors.insert(ip->get_number()); *complete = true; } } catch(const BaseSemantics::Exception &e) {
BaseSemantics::StatePtr NoOperation::initialState(SgAsmInstruction *insn) const { ASSERT_not_null(insn); ASSERT_not_null(cpu_); BaseSemantics::StatePtr state; if (normalizer_) { state = normalizer_->initialState(cpu_, insn); } else { state = cpu_->currentState()->clone(); state->clear(); RegisterDescriptor IP = cpu_->instructionPointerRegister(); state->writeRegister(IP, cpu_->number_(IP.get_nbits(), insn->get_address()), cpu_->get_operators().get()); } // Set the stack pointer to a concrete value if (initialSp_) { const RegisterDescriptor regSp = cpu_->stackPointerRegister(); BaseSemantics::RiscOperatorsPtr ops = cpu_->get_operators(); state->writeRegister(regSp, ops->number_(regSp.get_nbits(), *initialSp_), ops.get()); } return state; }
VirtualMachine(const P2::Partitioner &partitioner, const Settings &settings) : wordSize_(0), stackVa_(settings.stackVa), returnMarker_(0xbeef0967) { const RegisterDictionary *regs = partitioner.instructionProvider().registerDictionary(); ops_ = ConcreteSemantics::RiscOperators::instance(regs); if (settings.traceSemantics) { BaseSemantics::RiscOperatorsPtr traceOps = TraceSemantics::RiscOperators::instance(ops_); cpu_ = partitioner.newDispatcher(traceOps); } else { cpu_ = partitioner.newDispatcher(ops_); } if (cpu_==NULL) throw std::runtime_error("no semantics for architecture"); regIp_ = partitioner.instructionProvider().instructionPointerRegister(); regSp_ = partitioner.instructionProvider().stackPointerRegister(); regSs_ = partitioner.instructionProvider().stackSegmentRegister(); wordSize_ = regIp_.get_nbits(); }
/** Returns the name of an X86 register. * * We use the amd64 architecture because, since it's backward compatible with the 8086, it contains definitions for all the * registers from older architectures. */ std::string unparseX86Register(const RegisterDescriptor ®) { using namespace StringUtility; const RegisterDictionary *dict = RegisterDictionary::dictionary_amd64(); std::string name = dict->lookup(reg); if (name.empty()) { static bool dumped_dict = false; std::cerr <<"unparseX86Register(" <<reg <<"): register descriptor not found in dictionary.\n"; if (!dumped_dict) { std::cerr <<" FIXME: we might be using the amd64 register dictionary. [RPM 2011-03-02]\n"; //std::cerr <<*dict; dumped_dict = true; } return (std::string("BAD_REGISTER(") + numberToString(reg.get_major()) + "." + numberToString(reg.get_minor()) + "." + numberToString(reg.get_offset()) + "." + numberToString(reg.get_nbits()) + ")"); } return name; }
NoOperation::IndexIntervals NoOperation::findNoopSubsequences(const std::vector<SgAsmInstruction*> &insns) const { IndexIntervals retval; Sawyer::Message::Stream debug(mlog[DEBUG]); if (debug) { debug <<"findNoopSubsequences(\n"; BOOST_FOREACH (SgAsmInstruction *insn, insns) debug <<" " <<unparseInstructionWithAddress(insn) <<"\n"; debug <<")\n"; } // If we have no instruction semantics then assume that all instructions have an effect. if (!cpu_ || insns.empty()) return retval; // Process each instruction as if insns were a basic block. Store insns[i]'s initial state in states[i] and its final state // in states[i+1]. States don't generally have a way to compare them for equality, so use a simple string-based comparison // for now. FIXME[Robb P. Matzke 2015-05-11] std::vector<std::string> states; bool hadError = false; cpu_->get_operators()->currentState(initialState(insns.front())); const RegisterDescriptor regIP = cpu_->instructionPointerRegister(); try { BOOST_FOREACH (SgAsmInstruction *insn, insns) { cpu_->get_operators()->writeRegister(regIP, cpu_->get_operators()->number_(regIP.get_nbits(), insn->get_address())); states.push_back(normalizeState(cpu_->currentState())); if (debug) { debug <<" normalized state #" <<states.size()-1 <<":\n" <<StringUtility::prefixLines(states.back(), " "); debug <<" instruction: " <<unparseInstructionWithAddress(insn) <<"\n"; } cpu_->processInstruction(insn); } } catch (const BaseSemantics::Exception &e) { hadError = true; SAWYER_MESG(debug) <<" semantic exception: " <<e <<"\n"; } if (!hadError) { states.push_back(normalizeState(cpu_->currentState())); if (debug) debug <<" normalized state #" <<states.size()-1 <<":\n" <<StringUtility::prefixLines(states.back(), " "); } // Look for pairs of states that are the same, and call that sequence of instructions a no-op for (size_t i=0; i+1<states.size(); ++i) { for (size_t j=i+1; j<states.size(); ++j) { if (states[i]==states[j]) { retval.push_back(IndexInterval::hull(i, j-1)); SAWYER_MESG(debug) <<" no-op: " <<i <<".." <<(j-1) <<"\n"; } } } return retval; }
// see base class bool SgAsmX86Instruction::isFunctionCallSlow(const std::vector<SgAsmInstruction*>& insns, rose_addr_t *target, rose_addr_t *return_va) { if (isFunctionCallFast(insns, target, return_va)) return true; // The following stuff works only if we have a relatively complete AST. static const size_t EXECUTION_LIMIT = 10; // max size of basic blocks for expensive analyses if (insns.empty()) return false; SgAsmX86Instruction *last = isSgAsmX86Instruction(insns.back()); if (!last) return false; SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(last); SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(func); // Slow method: Emulate the instructions and then look at the EIP and stack. If the EIP points outside the current // function and the top of the stack holds an address of an instruction within the current function, then this must be a // function call. if (interp && insns.size()<=EXECUTION_LIMIT) { using namespace Rose::BinaryAnalysis; using namespace Rose::BinaryAnalysis::InstructionSemantics2; using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics; const InstructionMap &imap = interp->get_instruction_map(); const RegisterDictionary *regdict = RegisterDictionary::dictionary_for_isa(interp); SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver); BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver); ASSERT_not_null(ops); const RegisterDescriptor SP = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp); DispatcherX86Ptr dispatcher = DispatcherX86::instance(ops, SP.get_nbits()); SValuePtr orig_esp = SValue::promote(ops->readRegister(dispatcher->REG_anySP)); try { for (size_t i=0; i<insns.size(); ++i) dispatcher->processInstruction(insns[i]); } catch (const BaseSemantics::Exception &e) { return false; } // If the next instruction address is concrete but does not point to a function entry point, then this is not a call. SValuePtr eip = SValue::promote(ops->readRegister(dispatcher->REG_anyIP)); if (eip->is_number()) { rose_addr_t target_va = eip->get_number(); SgAsmFunction *target_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(target_va, NULL)); if (!target_func || target_va!=target_func->get_entry_va()) return false; } // If nothing was pushed onto the stack, then this isn't a function call. const size_t spWidth = dispatcher->REG_anySP.get_nbits(); SValuePtr esp = SValue::promote(ops->readRegister(dispatcher->REG_anySP)); SValuePtr stack_delta = SValue::promote(ops->add(esp, ops->negate(orig_esp))); SValuePtr stack_delta_sign = SValue::promote(ops->extract(stack_delta, spWidth-1, spWidth)); if (stack_delta_sign->is_number() && 0==stack_delta_sign->get_number()) return false; // If the top of the stack does not contain a concrete value or the top of the stack does not point to an instruction // in this basic block's function, then this is not a function call. const size_t ipWidth = dispatcher->REG_anyIP.get_nbits(); SValuePtr top = SValue::promote(ops->readMemory(dispatcher->REG_SS, esp, esp->undefined_(ipWidth), esp->boolean_(true))); if (top->is_number()) { rose_addr_t va = top->get_number(); SgAsmFunction *return_func = SageInterface::getEnclosingNode<SgAsmFunction>(imap.get_value_or(va, NULL)); if (!return_func || return_func!=func) { return false; } } else { return false; } // Since EIP might point to a function entry address and since the top of the stack contains a pointer to an // instruction in this function, we assume that this is a function call. if (target && eip->is_number()) *target = eip->get_number(); if (return_va && top->is_number()) *return_va = top->get_number(); return true; } // Similar to the above method, but works when all we have is the basic block (e.g., this case gets hit quite a bit from // the Partitioner). Returns true if, after executing the basic block, the top of the stack contains the fall-through // address of the basic block. We depend on our caller to figure out if EIP is reasonably a function entry address. if (!interp && insns.size()<=EXECUTION_LIMIT) { using namespace Rose::BinaryAnalysis; using namespace Rose::BinaryAnalysis::InstructionSemantics2; using namespace Rose::BinaryAnalysis::InstructionSemantics2::SymbolicSemantics; SmtSolverPtr solver = SmtSolver::instance(Rose::CommandLine::genericSwitchArgs.smtSolver); SgAsmX86Instruction *x86insn = isSgAsmX86Instruction(insns.front()); ASSERT_not_null(x86insn); #if 1 // [Robb P. Matzke 2015-03-03]: FIXME[Robb P. Matzke 2015-03-03]: not ready yet; x86-64 semantics still under construction if (x86insn->get_addressSize() != x86_insnsize_32) return false; #endif const RegisterDictionary *regdict = registersForInstructionSize(x86insn->get_addressSize()); const RegisterDescriptor SP = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp); BaseSemantics::RiscOperatorsPtr ops = RiscOperators::instance(regdict, solver); DispatcherX86Ptr dispatcher = DispatcherX86::instance(ops, SP.get_nbits()); try { for (size_t i=0; i<insns.size(); ++i) dispatcher->processInstruction(insns[i]); } catch (const BaseSemantics::Exception &e) { return false; } // Look at the top of the stack const size_t ipWidth = dispatcher->REG_anyIP.get_nbits(); SValuePtr top = SValue::promote(ops->readMemory(dispatcher->REG_SS, ops->readRegister(SP), ops->protoval()->undefined_(ipWidth), ops->protoval()->boolean_(true))); if (top->is_number() && top->get_number() == last->get_address()+last->get_size()) { if (target) { SValuePtr eip = SValue::promote(ops->readRegister(dispatcher->REG_anyIP)); if (eip->is_number()) *target = eip->get_number(); } if (return_va) *return_va = top->get_number(); return true; } } return false; }
// Custom version of the readRegister API that does not require a RiscOperators pointer. It // simply uses a global variable to fill in the missing parameter. This must be a global // variable because storing the smart pointer in the register state causes pointer reference // cycles that confuses the reference counter of the smart pointer and causes memory leaks. // A better solution should probably be identified. This method does NOT alter the "create // on access" behaviors of the the standard readRegister() method. SymbolicValuePtr read_register(RegisterDescriptor rd) { BaseRiscOperators* ops = (BaseRiscOperators*)global_rops.get(); return SymbolicValue::promote(RegisterStateGeneric::readRegister( rd, ops->undefined_(rd.get_nbits()), ops)); }