int main(int argc, char *argv[]) { Diagnostics::initialize(); ::mlog = Diagnostics::Facility("tool", Diagnostics::destination); Diagnostics::mfacilities.insertAndAdjust(::mlog); // Parse the command-line Partitioner2::Engine engine; std::vector<std::string> specimenNames = parseCommandLine(argc, argv, engine); if (specimenNames.empty()) throw std::runtime_error("no specimen specified; see --help"); // Load specimen into memory MemoryMap map = engine.loadSpecimens(specimenNames); // Configure instruction semantics Partitioner2::Partitioner partitioner = engine.createPartitioner(); Disassembler *disassembler = engine.obtainDisassembler(); const RegisterDictionary *regdict = disassembler->get_registers(); if (disassembler->dispatcher() == NULL) throw std::runtime_error("no instruction semantics for this architecture"); BaseSemantics::RiscOperatorsPtr ops = InstructionSemantics2::ConcreteSemantics::RiscOperators::instance(regdict); BaseSemantics::DispatcherPtr cpu = disassembler->dispatcher()->create(ops); ConcreteSemantics::MemoryState::promote(ops->currentState()->memoryState())->memoryMap(map); // Find starting address rose_addr_t va = 0; if (settings.startVa) { va = *settings.startVa; } else if (engine.isaName() == "coldfire") { // Use the interrupt vector to initialize the stack pointer and instruction pointer. uint32_t sp, ip; if (4 != map.at(0).limit(4).read((uint8_t*)&sp).size()) throw std::runtime_error("cannot read stack pointer at address 0x00000000"); ops->writeRegister(disassembler->stackPointerRegister(), ops->number_(32, ByteOrder::be_to_host(sp))); if (4 != map.at(4).limit(4).read((uint8_t*)&ip).size()) throw std::runtime_error("cannot read instruction pointer at address 0x00000004"); va = ByteOrder::be_to_host(ip); } else if (!map.atOrAfter(0).require(MemoryMap::EXECUTABLE).next().assignTo(va)) { throw std::runtime_error("no starting address specified and none marked executable"); } ops->writeRegister(disassembler->instructionPointerRegister(), ops->number_(32, va)); // Execute map.dump(::mlog[INFO]); while (1) { va = ops->readRegister(disassembler->instructionPointerRegister())->get_number(); SgAsmInstruction *insn = partitioner.instructionProvider()[va]; SAWYER_MESG(::mlog[TRACE]) <<unparseInstructionWithAddress(insn, NULL, regdict) <<"\n"; try { cpu->processInstruction(insn); } catch (const BaseSemantics::Exception &e) { ::mlog[WARN] <<e <<"\n"; } } // std::cout <<"Final state:\n"; // std::cout <<*ops->currentState(); }
BinaryAnalysis::Disassembler::AddressSet SgAsmX86Instruction::getSuccessors(const std::vector<SgAsmInstruction*>& insns, bool *complete, const MemoryMap::Ptr &initial_memory) { Stream debug(mlog[DEBUG]); using namespace Rose::BinaryAnalysis::InstructionSemantics2; if (debug) { debug <<"SgAsmX86Instruction::getSuccessors(" <<StringUtility::addrToString(insns.front()->get_address()) <<" for " <<insns.size() <<" instruction" <<(1==insns.size()?"":"s") <<"):" <<"\n"; } BinaryAnalysis::Disassembler::AddressSet successors = SgAsmInstruction::getSuccessors(insns, complete); /* If we couldn't determine all the successors, or a cursory analysis couldn't narrow it down to a single successor then * we'll do a more thorough analysis now. In the case where the cursory analysis returned a complete set containing two * successors, a thorough analysis might be able to narrow it down to a single successor. We should not make special * assumptions about CALL and FARCALL instructions -- their only successor is the specified address operand. */ if (!*complete || successors.size()>1) { const RegisterDictionary *regdict; if (SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(this)) { regdict = RegisterDictionary::dictionary_for_isa(interp); } else { switch (get_baseSize()) { case x86_insnsize_16: regdict = RegisterDictionary::dictionary_i286(); break; case x86_insnsize_32: regdict = RegisterDictionary::dictionary_pentium4(); break; case x86_insnsize_64: regdict = RegisterDictionary::dictionary_amd64(); break; default: ASSERT_not_reachable("invalid x86 instruction size"); } } const RegisterDescriptor IP = regdict->findLargestRegister(x86_regclass_ip, 0); PartialSymbolicSemantics::RiscOperatorsPtr ops = PartialSymbolicSemantics::RiscOperators::instance(regdict); ops->set_memory_map(initial_memory); BaseSemantics::DispatcherPtr cpu = DispatcherX86::instance(ops, IP.get_nbits(), regdict); try { BOOST_FOREACH (SgAsmInstruction *insn, insns) { cpu->processInstruction(insn); SAWYER_MESG(debug) <<" state after " <<insn->toString() <<"\n" <<*ops; } BaseSemantics::SValuePtr ip = ops->readRegister(IP); if (ip->is_number()) { successors.clear(); successors.insert(ip->get_number()); *complete = true; } } catch(const BaseSemantics::Exception &e) {
bool SymbolicExpansion::expandAarch64(SgAsmInstruction *rose_insn, BaseSemantics::RiscOperatorsPtr ops, const std::string &insn_dump) { SgAsmArmv8Instruction *insn = static_cast<SgAsmArmv8Instruction *>(rose_insn); BaseSemantics::DispatcherPtr cpu = DispatcherARM64::instance(ops, 64); try { cpu->processInstruction(insn); } catch (rose::BinaryAnalysis::InstructionSemantics2::BaseSemantics::Exception &e) { // fprintf(stderr, "Instruction processing threw exception for instruction: %s\n", insn_dump.c_str()); } return false; }
rose_addr_t run(const P2::Partitioner &partitioner, const Settings &settings, const std::set<rose_addr_t> &breakpoints = std::set<rose_addr_t>()) { for (size_t nInsns=0; nInsns<settings.insnLimit; ++nInsns) { rose_addr_t ip = ops_->readRegister(regIp_)->get_number(); if (ip == returnMarker_ || (nInsns>0 && breakpoints.find(ip)!=breakpoints.end())) return ip; SgAsmInstruction *insn = partitioner.instructionProvider()[ip]; if (!insn) throw std::runtime_error("no instruction at " + StringUtility::addrToString(ip)); if (settings.traceInsns && ::mlog[TRACE]) ::mlog[TRACE] <<unparseInstructionWithAddress(insn) <<"\n"; cpu_->processInstruction(insn); } throw std::runtime_error("execution limit exceeded ("+StringUtility::plural(settings.insnLimit, "instructions")+")"); }
// The actual analysis for a function call starting at a function call return point and terminating each path whenever we reach // another function call. Try to figure out if we ever read from EAX without first writing to it and return true if we do. static bool returnValueUsed(const Cfg &cfg, CfgVertex startVertex, const RegisterDictionary *regdict) { BaseSemantics::SValuePtr protoval = NullSemantics::SValue::instance(); RegisterStatePtr registers = RegisterState::instance(protoval, regdict); BaseSemantics::MemoryStatePtr memory = BaseSemantics::MemoryCellList::instance(protoval, protoval); BaseSemantics::StatePtr state = BaseSemantics::State::instance(registers, memory); BaseSemantics::RiscOperatorsPtr ops = NullSemantics::RiscOperators::instance(state); size_t addrWidth = regdict->findLargestRegister(x86_regclass_gpr, x86_gpr_sp).get_nbits(); BaseSemantics::DispatcherPtr dispatcher = DispatcherX86::instance(ops, addrWidth); WorkList<CfgVertex> worklist(true); Map<CfgVertex, size_t> seen; worklist.push(startVertex); while (!worklist.empty()) { CfgVertex v = worklist.pop(); seen[v] = 1; SgAsmBlock *bb = get_ast_node(cfg, v); std::vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(bb); // "Run" the basic block bool failed = false; BOOST_FOREACH (SgAsmInstruction *insn, insns) { try { dispatcher->processInstruction(insn); if (registers->readUninitialized()) return true; } catch (...) { failed = true; break; } } if (failed) continue; // Add new vertices to the work list, but only if none of the outgoing edges are function calls. bool isCall = false; CfgOutEdgeIterator ei, ei_end; for (boost::tie(ei, ei_end)=out_edges(v, cfg); ei!=ei_end && !isCall; ++ei) isCall = isFunctionCall(cfg, *ei); if (!isCall) { for (boost::tie(ei, ei_end)=out_edges(v, cfg); ei!=ei_end && !isCall; ++ei) { if (!seen.exists(v)) worklist.push(v); } } } return false; }
void visit(SgNode *node) { SgAsmBlock *block = isSgAsmBlock(node); if (block && block->has_instructions()) { using namespace rose::BinaryAnalysis::InstructionSemantics2; const RegisterDictionary *regdict = RegisterDictionary::dictionary_i386(); SymbolicSemantics::RiscOperatorsPtr ops = SymbolicSemantics::RiscOperators::instance(regdict); ops->computingDefiners(SymbolicSemantics::TRACK_ALL_DEFINERS); // only used so we can test that it works BaseSemantics::DispatcherPtr dispatcher = DispatcherX86::instance(ops, 32); const SgAsmStatementPtrList &stmts = block->get_statementList(); for (SgAsmStatementPtrList::const_iterator si=stmts.begin(); si!=stmts.end(); ++si) { SgAsmX86Instruction *insn = isSgAsmX86Instruction(*si); if (insn) { std::cout <<unparseInstructionWithAddress(insn) <<"\n"; dispatcher->processInstruction(insn); std::cout <<*ops <<"\n"; } } } }
/* Analyze a single interpretation a block at a time */ static void analyze_interp(SgAsmInterpretation *interp) { /* Get the set of all instructions except instructions that are part of left-over blocks. */ struct AllInstructions: public SgSimpleProcessing, public std::map<rose_addr_t, SgAsmX86Instruction*> { void visit(SgNode *node) { SgAsmX86Instruction *insn = isSgAsmX86Instruction(node); SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn); if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS)) insert(std::make_pair(insn->get_address(), insn)); } } insns; insns.traverse(interp, postorder); while (!insns.empty()) { std::cout <<"=====================================================================================\n" <<"=== Starting a new basic block ===\n" <<"=====================================================================================\n"; AllInstructions::iterator si = insns.begin(); SgAsmX86Instruction *insn = si->second; insns.erase(si); BaseSemantics::RiscOperatorsPtr operators = make_ops(); BaseSemantics::Formatter formatter; formatter.set_suppress_initial_values(); formatter.set_show_latest_writers(do_usedef); BaseSemantics::DispatcherPtr dispatcher; if (do_trace) { // Enable RiscOperators tracing, but turn off a bunch of info that makes comparisons with a known good answer // difficult. Sawyer::Message::PrefixPtr prefix = Sawyer::Message::Prefix::instance(); prefix->showProgramName(false); prefix->showThreadId(false); prefix->showElapsedTime(false); prefix->showFacilityName(Sawyer::Message::Prefix::NEVER); prefix->showImportance(false); Sawyer::Message::UnformattedSinkPtr sink = Sawyer::Message::StreamSink::instance(std::cout); sink->prefix(prefix); sink->defaultPropertiesNS().useColor = false; TraceSemantics::RiscOperatorsPtr trace = TraceSemantics::RiscOperators::instance(operators); trace->stream().destination(sink); trace->stream().enable(); dispatcher = DispatcherX86::instance(trace, 32); } else { dispatcher = DispatcherX86::instance(operators, 32); } operators->set_solver(make_solver()); // The fpstatus_top register must have a concrete value if we'll use the x86 floating-point stack (e.g., st(0)) if (const RegisterDescriptor *REG_FPSTATUS_TOP = regdict->lookup("fpstatus_top")) { BaseSemantics::SValuePtr st_top = operators->number_(REG_FPSTATUS_TOP->get_nbits(), 0); operators->writeRegister(*REG_FPSTATUS_TOP, st_top); } #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN BaseSemantics::SValuePtr orig_esp; if (do_test_subst) { // Only request the orig_esp if we're going to use it later because it causes an esp value to be instantiated // in the state, which is printed in the output, and thus changes the answer. BaseSemantics::RegisterStateGeneric::promote(operators->get_state()->get_register_state())->initialize_large(); orig_esp = operators->readRegister(*regdict->lookup("esp")); std::cout <<"Original state:\n" <<*operators; } #endif /* Perform semantic analysis for each instruction in this block. The block ends when we no longer know the value of * the instruction pointer or the instruction pointer refers to an instruction that doesn't exist or which has already * been processed. */ while (1) { /* Analyze current instruction */ std::cout <<"\n" <<unparseInstructionWithAddress(insn) <<"\n"; try { dispatcher->processInstruction(insn); # if 0 /*DEBUGGING [Robb P. Matzke 2013-05-01]*/ show_state(operators); // for comparing RegisterStateGeneric with the old RegisterStateX86 output # else std::cout <<(*operators + formatter); # endif } catch (const BaseSemantics::Exception &e) { std::cout <<e <<"\n"; } /* Never follow CALL instructions */ if (insn->get_kind()==x86_call || insn->get_kind()==x86_farcall) break; /* Get next instruction of this block */ BaseSemantics::SValuePtr ip = operators->readRegister(dispatcher->findRegister("eip")); if (!ip->is_number()) break; rose_addr_t next_addr = ip->get_number(); si = insns.find(next_addr); if (si==insns.end()) break; insn = si->second; insns.erase(si); } // Test substitution on the symbolic state. #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN if (do_test_subst) { SymbolicSemantics::SValuePtr from = SymbolicSemantics::SValue::promote(orig_esp); BaseSemantics::SValuePtr newvar = operators->undefined_(32); newvar->set_comment("frame_pointer"); SymbolicSemantics::SValuePtr to = SymbolicSemantics::SValue::promote(operators->add(newvar, operators->number_(32, 4))); std::cout <<"Substituting from " <<*from <<" to " <<*to <<"\n"; SymbolicSemantics::RiscOperators::promote(operators)->substitute(from, to); std::cout <<"Substituted state:\n" <<(*operators+formatter); } #endif } }
int main(int argc, char *argv[]) { SgProject *project = frontend(argc, argv); SgAsmInterpretation *interp = SageInterface::querySubTree<SgAsmInterpretation>(project).back(); AllInstructions insns(interp); SgAsmGenericHeader *header = interp->get_headers()->get_headers().front(); rose_addr_t start_va = header->get_base_va() + header->get_entry_rva(); #if SEMANTIC_API == OLD_API MyPolicy operators; X86InstructionSemantics<MyPolicy, MyValueType> dispatcher(operators); #else BaseSemantics::RiscOperatorsPtr operators = make_ops(); BaseSemantics::DispatcherPtr dispatcher = DispatcherX86::instance(operators); #endif struct sigaction sa; sa.sa_handler = alarm_handler; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sigaction(SIGALRM, &sa, NULL); alarm(timeout); struct timeval start_time; std::cout <<"test starting...\n"; gettimeofday(&start_time, NULL); size_t ninsns = 0; while (!had_alarm) { rose_addr_t va = start_va; while (SgAsmInstruction *insn = insns.fetch(va)) { //std::cerr <<unparseInstructionWithAddress(insn) <<"\n"; #if SEMANTIC_API == OLD_API dispatcher.processInstruction(isSgAsmx86Instruction(insn)); ++ninsns; #if SEMANTIC_DOMAIN == MULTI_DOMAIN // multi-semantics ValueType has no is_known() or get_known(). We need to invoke it on a specific subpolicy. PartialSymbolicSemantics::ValueType<32> ip = operators.readRegister<32>("eip") .get_subvalue(MyMultiSemanticsClass::SP0()); #else MyValueType<32> ip = operators.readRegister<32>("eip"); #endif if (!ip.is_known()) break; va = ip.known_value(); #else dispatcher->processInstruction(insn); ++ninsns; BaseSemantics::SValuePtr ip = operators->readRegister(dispatcher->findRegister("eip")); if (!ip->is_number()) break; va = ip->get_number(); #endif if (had_alarm) break; } } #if SEMANTIC_API == OLD_API MyValueType<32> eax = operators.readRegister<32>("eax"); std::cerr <<"eax = " <<eax <<"\n"; #else BaseSemantics::SValuePtr eax = operators->readRegister(dispatcher->findRegister("eax")); #if SEMANTIC_DOMAIN == MULTI_DOMAIN // This is entirely optional, but the output looks better if it has the names of the subdomains. std::cerr <<"eax = " <<(*eax + MultiSemantics::RiscOperators::promote(operators)->get_formatter()) <<"\n"; #else std::cerr <<"eax = " <<*eax <<"\n"; #endif #endif struct timeval stop_time; gettimeofday(&stop_time, NULL); double elapsed = ((double)stop_time.tv_sec-start_time.tv_sec) + 1e-6*((double)stop_time.tv_usec-start_time.tv_usec); if (elapsed < timeout/4.0) std::cout <<"warning: test did not run for a sufficiently long time; output may contain a high degree of error.\n"; std::cout <<"number of instructions: " <<ninsns <<"\n" <<"elapsed time: " <<elapsed <<" seconds\n" <<"semantic execution rate: " <<(ninsns/elapsed) <<" instructions/second\n"; return 0; }
/* Analyze a single interpretation a block at a time */ static void analyze_interp(SgAsmInterpretation *interp) { /* Get the set of all instructions except instructions that are part of left-over blocks. */ struct AllInstructions: public SgSimpleProcessing, public std::map<rose_addr_t, SgAsmx86Instruction*> { void visit(SgNode *node) { SgAsmx86Instruction *insn = isSgAsmx86Instruction(node); SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn); if (func && 0==(func->get_reason() & SgAsmFunction::FUNC_LEFTOVERS)) insert(std::make_pair(insn->get_address(), insn)); } } insns; insns.traverse(interp, postorder); while (!insns.empty()) { std::cout <<"=====================================================================================\n" <<"=== Starting a new basic block ===\n" <<"=====================================================================================\n"; AllInstructions::iterator si = insns.begin(); SgAsmx86Instruction *insn = si->second; insns.erase(si); #if SEMANTIC_API == NEW_API BaseSemantics::RiscOperatorsPtr operators = make_ops(); BaseSemantics::Formatter formatter; formatter.set_suppress_initial_values(); BaseSemantics::DispatcherPtr dispatcher; if (do_trace) { TraceSemantics::RiscOperatorsPtr trace = TraceSemantics::RiscOperators::instance(operators); trace->set_stream(stdout); dispatcher = DispatcherX86::instance(trace); } else { dispatcher = DispatcherX86::instance(operators); } operators->set_solver(make_solver()); #else // OLD_API typedef X86InstructionSemantics<MyPolicy, MyValueType> MyDispatcher; MyPolicy operators; MyDispatcher dispatcher(operators); # if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN operators.set_solver(make_solver()); SymbolicSemantics::Formatter formatter; formatter.expr_formatter.do_rename = true; formatter.expr_formatter.add_renames = true; # elif SEMANTIC_DOMAIN != FINDCONST_DOMAIN && SEMANTIC_DOMAIN != FINDCONSTABI_DOMAIN BaseSemantics::Formatter formatter; # endif #endif #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN && SEMANTIC_API == NEW_API BaseSemantics::SValuePtr orig_esp; if (do_test_subst) { // Only request the orig_esp if we're going to use it later because it causes an esp value to be instantiated // in the state, which is printed in the output, and thus changes the answer. BaseSemantics::RegisterStateGeneric::promote(operators->get_state()->get_register_state())->initialize_large(); orig_esp = operators->readRegister(*regdict->lookup("esp")); std::cout <<"Original state:\n" <<*operators; } #endif /* Perform semantic analysis for each instruction in this block. The block ends when we no longer know the value of * the instruction pointer or the instruction pointer refers to an instruction that doesn't exist or which has already * been processed. */ while (1) { /* Analyze current instruction */ std::cout <<"\n" <<unparseInstructionWithAddress(insn) <<"\n"; #if SEMANTIC_API == NEW_API try { dispatcher->processInstruction(insn); # if 0 /*DEBUGGING [Robb P. Matzke 2013-05-01]*/ show_state(operators); // for comparing RegisterStateGeneric with the old RegisterStateX86 output # else std::cout <<(*operators + formatter); # endif } catch (const BaseSemantics::Exception &e) { std::cout <<e <<"\n"; } #else // OLD API try { dispatcher.processInstruction(insn); # if SEMANTIC_DOMAIN == FINDCONST_DOMAIN || SEMANTIC_DOMAIN == FINDCONSTABI_DOMAIN operators.print(std::cout); # else operators.print(std::cout, formatter); # endif } catch (const MyDispatcher::Exception &e) { std::cout <<e <<"\n"; break; # if SEMANTIC_DOMAIN == PARTSYM_DOMAIN } catch (const MyPolicy::Exception &e) { std::cout <<e <<"\n"; break; # endif } catch (const SMTSolver::Exception &e) { std::cout <<e <<" [ "<<unparseInstructionWithAddress(insn) <<"]\n"; break; } #endif /* Never follow CALL instructions */ if (insn->get_kind()==x86_call || insn->get_kind()==x86_farcall) break; /* Get next instruction of this block */ #if SEMANTIC_API == NEW_API BaseSemantics::SValuePtr ip = operators->readRegister(dispatcher->findRegister("eip")); if (!ip->is_number()) break; rose_addr_t next_addr = ip->get_number(); #else // OLD_API # if SEMANTIC_DOMAIN == PARTSYM_DOMAIN || SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN MyValueType<32> ip = operators.get_ip(); if (!ip.is_known()) break; rose_addr_t next_addr = ip.known_value(); # elif SEMANTIC_DOMAIN == NULL_DOMAIN || SEMANTIC_DOMAIN == INTERVAL_DOMAIN MyValueType<32> ip = operators.readRegister<32>(dispatcher.REG_EIP); if (!ip.is_known()) break; rose_addr_t next_addr = ip.known_value(); # elif SEMANTIC_DOMAIN == MULTI_DOMAIN PartialSymbolicSemantics::ValueType<32> ip = operators.readRegister<32>(dispatcher.REG_EIP) .get_subvalue(MyMultiSemanticsClass::SP0()); if (!ip.is_known()) break; rose_addr_t next_addr = ip.known_value(); # else if (operators.newIp->get().name) break; rose_addr_t next_addr = operators.newIp->get().offset; # endif #endif si = insns.find(next_addr); if (si==insns.end()) break; insn = si->second; insns.erase(si); } // Test substitution on the symbolic state. #if SEMANTIC_DOMAIN == SYMBOLIC_DOMAIN && SEMANTIC_API == NEW_API if (do_test_subst) { SymbolicSemantics::SValuePtr from = SymbolicSemantics::SValue::promote(orig_esp); BaseSemantics::SValuePtr newvar = operators->undefined_(32); newvar->set_comment("frame_pointer"); SymbolicSemantics::SValuePtr to = SymbolicSemantics::SValue::promote(operators->add(newvar, operators->number_(32, 4))); std::cout <<"Substituting from " <<*from <<" to " <<*to <<"\n"; SymbolicSemantics::RiscOperators::promote(operators)->substitute(from, to); std::cout <<"Substituted state:\n" <<(*operators+formatter); } #endif } }